In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import joblib

ModuleNotFoundError: No module named 'matplotlib'

In [2]:
# Load training data
train_data = pd.read_excel("/mnt/data/Data_Train.xlsx")

# Load test data
test_data = pd.read_excel("/mnt/data/Test_set.xlsx")

# Display first few rows
train_data.head()

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/Data_Train.xlsx'

In [None]:
# Check for missing values
train_data.isnull().sum()

In [None]:
# Convert Date_of_Journey to day, month, and year
train_data["Journey_day"] = pd.to_datetime(train_data["Date_of_Journey"]).dt.day
train_data["Journey_month"] = pd.to_datetime(train_data["Date_of_Journey"]).dt.month
train_data.drop(["Date_of_Journey"], axis=1, inplace=True)

In [None]:
# Extract hour and minute from 'Dep_Time'
train_data["Dep_hour"] = pd.to_datetime(train_data["Dep_Time"]).dt.hour
train_data["Dep_min"] = pd.to_datetime(train_data["Dep_Time"]).dt.minute
train_data.drop(["Dep_Time"], axis=1, inplace=True)

# Extract hour and minute from 'Arrival_Time'
train_data["Arrival_hour"] = pd.to_datetime(train_data["Arrival_Time"]).dt.hour
train_data["Arrival_min"] = pd.to_datetime(train_data["Arrival_Time"]).dt.minute
train_data.drop(["Arrival_Time"], axis=1, inplace=True)

In [None]:
# Encode categorical variables like Airline, Source, Destination
encoder = LabelEncoder()
train_data["Airline"] = encoder.fit_transform(train_data["Airline"])
train_data["Source"] = encoder.fit_transform(train_data["Source"])
train_data["Destination"] = encoder.fit_transform(train_data["Destination"])

In [None]:
X = train_data.drop(["Price"], axis=1)
y = train_data["Price"]

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


Step 3: Train Models
3.1 Train Linear Regression using Gradient Descent

In [None]:
# Standardize data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Train SGDRegressor
sgd_regressor = SGDRegressor(max_iter=1000, learning_rate="optimal", eta0=0.01, random_state=42)
sgd_regressor.fit(X_train_scaled, y_train)

# Predict
y_pred_sgd = sgd_regressor.predict(X_val_scaled)

# Compute Loss (MSE)
mse_sgd = mean_squared_error(y_val, y_pred_sgd)
print(f"SGD Linear Regression MSE: {mse_sgd}")


3.2 Train Decision Tree & Random Forest

In [None]:
# Decision Tree
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_val)
mse_dt = mean_squared_error(y_val, y_pred_dt)
print(f"Decision Tree MSE: {mse_dt}")

# Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_val)
mse_rf = mean_squared_error(y_val, y_pred_rf)
print(f"Random Forest MSE: {mse_rf}")


3.3 Choose Best Model and Save

In [None]:
best_model = rf if mse_rf < mse_sgd and mse_rf < mse_dt else (sgd_regressor if mse_sgd < mse_dt else dt)
joblib.dump(best_model, "best_model.pkl")


Step 4: Create FastAPI
4.1 Install and Set Up FastAPI

In [None]:
pip install fastapi pydantic uvicorn
pip freeze > requirements.txt


4.2 Create prediction.py

In [None]:
from fastapi import FastAPI
import joblib
import numpy as np
from pydantic import BaseModel

app = FastAPI()

# Load the model
model = joblib.load("best_model.pkl")

class FlightInput(BaseModel):
    Airline: int
    Source: int
    Destination: int
    Journey_day: int
    Journey_month: int
    Dep_hour: int
    Dep_min: int
    Arrival_hour: int
    Arrival_min: int

@app.post("/predict")
def predict_price(data: FlightInput):
    input_data = np.array([[data.Airline, data.Source, data.Destination, data.Journey_day,
                            data.Journey_month, data.Dep_hour, data.Dep_min,
                            data.Arrival_hour, data.Arrival_min]])

    prediction = model.predict(input_data)
    return {"predicted_price": prediction[0]}


4.3 Run FastAPI Locally

In [None]:
uvicorn prediction:app --reload
