In [9]:
import os
import joblib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor  # ✅ Add this
from xgboost import XGBRegressor, XGBClassifier

# Flight Price Prediction (Regression)
df_flight = pd.read_csv("data/Flight_Price.csv").dropna()
df_flight.drop_duplicates(inplace=True)

df_flight['Journey_day'] = pd.to_datetime(df_flight['Date_of_Journey'], dayfirst=True).dt.day
df_flight['Journey_month'] = pd.to_datetime(df_flight['Date_of_Journey'], dayfirst=True).dt.month
df_flight['Dep_Time_hour'] = pd.to_datetime(df_flight['Dep_Time'], format='%H:%M').dt.hour
df_flight['Dep_Time_minute'] = pd.to_datetime(df_flight['Dep_Time'], format='%H:%M').dt.minute
df_flight['Arrival_Time_hour'] = pd.to_datetime(df_flight['Arrival_Time'], format='%H:%M', errors='coerce').dt.hour
df_flight['Arrival_Time_minute'] = pd.to_datetime(df_flight['Arrival_Time'], format='%H:%M', errors='coerce').dt.minute

def duration_to_minutes(duration):
    h, m = 0, 0
    parts = duration.strip().split()
    for part in parts:
        if 'h' in part: h = int(part.replace('h', ''))
        elif 'm' in part: m = int(part.replace('m', ''))
    return h * 60 + m

df_flight['Duration_mins'] = df_flight['Duration'].apply(duration_to_minutes)

le = LabelEncoder()
for col in ['Airline', 'Source', 'Destination', 'Route', 'Total_Stops', 'Additional_Info']:
    df_flight[col] = le.fit_transform(df_flight[col])

Xr = df_flight[['Airline', 'Source', 'Destination', 'Route', 'Total_Stops',
                'Journey_day', 'Journey_month', 'Dep_Time_hour', 'Dep_Time_minute',
                'Arrival_Time_hour', 'Arrival_Time_minute', 'Duration_mins']]
yr = df_flight['Price']

Xr_train, Xr_test, yr_train, yr_test = train_test_split(Xr, yr, test_size=0.2, random_state=42)

# Split
Xr_train, Xr_test, yr_train, yr_test = train_test_split(Xr, yr, test_size=0.2, random_state=42)

# Drop missing values from X and sync y
Xr_train.dropna(inplace=True)
yr_train = yr_train[Xr_train.index]

Xr_test.dropna(inplace=True)
yr_test = yr_test[Xr_test.index]

regressors = {
    "linear_regression": LinearRegression(),
    "random_forest": RandomForestRegressor(),
    "gradient_boosting": GradientBoostingRegressor(),
    "xgboost": XGBRegressor(),
    "knn_regressor": KNeighborsRegressor()
}

os.makedirs("flight_price_prediction/models", exist_ok=True)
for name, model in regressors.items():
    model.fit(Xr_train, yr_train)
    joblib.dump(model, f"flight_price_prediction/models/{name}.pkl")
