In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, StackingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split


In [3]:
jfk_df = pd.read_csv("JFK-Flights.csv")


In [4]:
print("Available columns:", jfk_df.columns.tolist())

Available columns: ['MONTH', 'DAY_OF_MONTH', 'DAY_OF_WEEK', 'OP_UNIQUE_CARRIER', 'TAIL_NUM', 'DEST', 'DEP_DELAY', 'CRS_ELAPSED_TIME', 'DISTANCE', 'CRS_DEP_M', 'DEP_TIME_M', 'CRS_ARR_M', 'Temperature', 'Dew Point', 'Humidity', 'Wind', 'Wind Speed', 'Wind Gust', 'Pressure', 'Condition', 'sch_dep', 'sch_arr', 'TAXI_OUT']


In [5]:
weather_features = [
    "Temperature", "Dew Point", "Humidity", "Wind", 
    "Wind Speed", "Wind Gust", "Pressure"
]
target = "DEP_DELAY"

In [6]:
jfk_df = jfk_df.dropna(subset=[target, "MONTH"] + weather_features)


In [7]:
def assign_season(month):
    if month in [12, 1, 2]:
        return "Winter"
    elif month in [3, 4, 5]:
        return "Spring"
    elif month in [6, 7, 8]:
        return "Summer"
    else:
        return "Fall"

jfk_df["Season"] = jfk_df["MONTH"].apply(assign_season)

In [8]:
# Encode categorical columns: Wind and Season
jfk_df["Wind"] = jfk_df["Wind"].astype(str)
jfk_df["Season"] = jfk_df["Season"].astype(str)

In [9]:
X = jfk_df[weather_features + ["Season"]]
X = pd.get_dummies(X, columns=["Wind", "Season"], drop_first=True)
y = jfk_df[target]

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [11]:
base_models = [
    ("lr", LinearRegression()),
    ("gb", GradientBoostingRegressor(random_state=42)),
    ("nn", MLPRegressor(max_iter=500, random_state=42))
]

In [12]:
meta_model = RandomForestRegressor(random_state=42)

In [13]:
ensemble = StackingRegressor(estimators=base_models, final_estimator=meta_model)


In [14]:
ensemble.fit(X_train, y_train)

In [15]:
y_pred = ensemble.predict(X_test)

In [16]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Weather + Season Model Results:")
print(f"RMSE: {rmse:.2f}")
print(f"MAE: {mae:.2f}")
print(f"R^2: {r2:.2f}")

Weather + Season Model Results:
RMSE: 45.46
MAE: 18.61
R^2: -0.12
