# Linear Regression Model

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

# Load cleaned data
data = pd.read_csv('cleaned_flight_delay_data.csv')

# Features (X) and Target (y)
X = data[['Air Carrier Delay', 'Aircraft Arriving Late', 'National Aviation System Delay', 'Security Delay']]
y = data['Air Carrier Delay']  # You can set the target column as required

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear Regression Model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred = linear_model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Results
print("Linear Regression Results:")
print(f"Mean Squared Error: {mse:.5f}")
print(f"R-squared Score: {r2:.2f}")

# Plot Predicted vs Actual
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, color='blue', label='Predicted vs. Actual')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', label='Ideal Fit')
plt.xlabel('Actual Total Delay')
plt.ylabel('Predicted Total Delay')
plt.title('Linear Regression: Predicted vs. Actual')
plt.legend()
plt.show()


# Decision Tree Regression Model

In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load the cleaned data
data = pd.read_csv('cleaned_flight_delay_data.csv')

# Features (X) and Target (y)
X = data[['Air Carrier Delay', 'Aircraft Arriving Late', 'National Aviation System Delay', 'Security Delay']]
y = data['Air Carrier Delay']  # Adjust the target variable if needed

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Decision Tree Regression Model
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print Results
print("Decision Tree Regression Results:")
print(f"Mean Squared Error: {mse:.5f}")
print(f"R-squared Score: {r2:.2f}")

# Plot Residuals
residuals = y_test - y_pred
plt.figure(figsize=(8, 6))
plt.scatter(y_test, residuals, color='purple', label='Residuals')
plt.axhline(y=0, color='red', linestyle='--', label='Zero Residual')
plt.xlabel('Actual Total Delay')
plt.ylabel('Residuals (Actual - Predicted)')
plt.title('Decision Tree Regression: Residual Analysis')
plt.legend()
plt.show()


# Clustering Analysis

In [None]:
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Load the cleaned data
data = pd.read_csv('cleaned_flight_delay_data.csv')

# Features for clustering
X = data[['Air Carrier Delay', 'Aircraft Arriving Late', 'National Aviation System Delay', 'Security Delay']]

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# KMeans Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X_scaled)

# Add cluster labels to the original dataset
data['Cluster'] = clusters

# Print cluster centers and inertia
print("Cluster Analysis Results:")
print(f"Cluster Centers:\n{kmeans.cluster_centers_}")
print(f"Inertia: {kmeans.inertia_}")

# Plot Clusters
plt.figure(figsize=(8, 6))
plt.scatter(X['Aircraft Arriving Late'], X['National Aviation System Delay'], c=clusters, cmap='viridis', label='Clusters')
plt.xlabel('Aircraft Arriving Late')
plt.ylabel('National Aviation System Delay')
plt.title('Cluster Analysis: Aircraft Arriving Late vs National Aviation System Delay')
plt.colorbar(label='Cluster')
plt.show()

# Save clustered data to a new CSV file
data.to_csv('clustered_flight_delay_data.csv', index=False)
print("Clustered data saved to: clustered_flight_delay_data.csv")
