In [61]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor

df = pd.read_csv(r"C:\Users\Admin\Project 3\data\Flight_Price.csv")
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)

# Preprocessing
df['Journey_day'] = pd.to_datetime(df['Date_of_Journey'], dayfirst=True).dt.day
df['Journey_month'] = pd.to_datetime(df['Date_of_Journey'], dayfirst=True).dt.month

for col in ['Dep_Time', 'Arrival_Time']:
    df[col + '_hour'] = pd.to_datetime(df[col].str.split().str[0], format='%H:%M', errors='coerce').dt.hour
    df[col + '_minute'] = pd.to_datetime(df[col].str.split().str[0], format='%H:%M', errors='coerce').dt.minute

def duration_to_mins(duration):
    h, m = 0, 0
    parts = duration.strip().split(' ')
    for part in parts:
        if 'h' in part:
            h = int(part.replace('h', ''))
        elif 'm' in part:
            m = int(part.replace('m', ''))
    return h * 60 + m

df['Duration_mins'] = df['Duration'].apply(duration_to_mins)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for col in ['Airline', 'Source', 'Destination', 'Route', 'Total_Stops', 'Additional_Info']:
    df[col] = le.fit_transform(df[col])

# Features & Target
X = df[['Airline', 'Source', 'Destination', 'Route', 'Total_Stops', 'Journey_day', 'Journey_month',
        'Dep_Time_hour', 'Dep_Time_minute', 'Arrival_Time_hour', 'Arrival_Time_minute', 'Duration_mins']]
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

import os

# Ensure directory exists
os.makedirs("flight_price_prediction/models", exist_ok=True)

import joblib

models = {
    "linear_regression": LinearRegression(),
    "random_forest": RandomForestRegressor(),
    "gradient_boosting": GradientBoostingRegressor(),
    "xgboost": XGBRegressor(),
    "knn": KNeighborsRegressor()
}


for name, model in models.items():
    model.fit(X_train, y_train)
    joblib.dump(model, f"flight_price_prediction/models/{name}.pkl")

print("All model saved.")

All model saved.
