In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error


In [17]:
# Load the CSV file
file_path = 'flight_dataset.csv'
df = pd.read_csv(file_path)

# Separate features and target
X = df.drop(columns='Price')
y = df['Price']

In [18]:
# Define the preprocessing steps
numeric_features = ['Date', 'Month', 'Year', 'Dep_hours', 'Dep_min', 'Arrival_hours', 'Arrival_min', 'Duration_hours', 'Duration_min', 'Total_Stops']
categorical_features = ['Airline', 'Source', 'Destination']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Create a pipeline that first transforms the data and then applies the model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [26]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Save the model to a file
joblib.dump(model,'Flight_price_model.pkl')

print("Model trained and saved successfully.")

y_pred = model.predict(X_test)

# Calculate R² score
r2 = r2_score(y_test, y_pred)
print(f"R² Score: {r2}")

# Calculate MAPE
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print(f"Mean Absolute Percentage Error (MAPE): {mape}%")

Model trained and saved successfully.
R² Score: 0.6149447379894808
Mean Absolute Percentage Error (MAPE): 23.752633665651125%
