In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

In [9]:
df = pd.read_csv('D:\\GUVI\\visual_studio\\Fligh Data\\flight_project\\Flight_price_cleaned_data_2.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Airline,Source,Destination,Additional_Info,Price,dur_min,stops,journey_day,dep_time
0,0,IndiGo,Banglore,New Delhi,No Info,3897.0,170.0,0,6,222000
1,1,Air India,Kolkata,Banglore,No Info,7662.0,445.0,2,5,55000
2,2,Jet Airways,Delhi,Cochin,No Info,13882.0,1140.0,2,4,92500
3,3,IndiGo,Kolkata,Banglore,No Info,6218.0,325.0,1,3,180500
4,4,IndiGo,Banglore,New Delhi,No Info,6494.0,285.0,1,3,165000


In [10]:
df= df.drop('Unnamed: 0', axis = 1)

In [11]:
x = df.drop('Price', axis = 1)
y = df.Price
x = pd.get_dummies(x)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

## Multiple Models

In [12]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [13]:
models = [
    (
        'LinearRegression',
        {'n_jobs': None},
        LinearRegression(),
        (x_train, y_train),
        (x_test, y_test)
    ),
    (
        'Decision Tree',
        {'criterion': 'absolute_error',
        'max_depth': 7,
        'min_samples_leaf': 70,
        'min_samples_split': 220},
        DecisionTreeRegressor(),
        (x_train, y_train),
        (x_test, y_test)
    ),
    (
        'Decision Tree CV',
        {'criterion': 'friedman_mse',
        'max_depth': 25,
        'max_features': 32,
        'min_samples_leaf': 75,
        'min_samples_split': 210},
        DecisionTreeRegressor(),
        (x_train, y_train),
        (x_test, y_test)
    ),
    
    (
        'Gradient Boosting',
        {'learning_rate': 0.1,
        'max_depth': 3,
        'n_estimators': 50,
        },
        GradientBoostingRegressor(),
        (x_train, y_train),
        (x_test, y_test)
    ),
    
    (
        'Gradient Boosting CV',
        {'learning_rate': 0.3,
        'max_depth': 6,
        'n_estimators': 65,
        },
        GradientBoostingRegressor(),
        (x_train, y_train),
        (x_test, y_test)
    ),
    
    (
        'Random Forest',
        {'max_depth': None,
        'min_samples_leaf':1,
        'min_samples_split':2,
        'n_estimators': 100,
        },
        RandomForestRegressor(),
        (x_train, y_train),
        (x_test, y_test)
    ),
    
    (
        'Random Forest CV',
        {'max_depth': 5,
        'min_samples_leaf':1,
        'min_samples_split':2,
        'n_estimators': 125,
        },
        RandomForestRegressor(),
        (x_train, y_train),
        (x_test, y_test)
    )
]

In [14]:
report = []
for model_name, params, model, train_set, test_set in models:
    x_train = train_set[0]
    y_train = train_set[1]
    x_test = test_set[0]
    y_test = test_set[1]
    model.set_params(**params)
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    R_score= r2_score(y_test, y_pred)
    
    report.append((model_name, mse, rmse, mae, R_score))

In [15]:
report

[('LinearRegression',
  3449941.155822323,
  np.float64(1857.401721712975),
  1381.3818586028156,
  0.7817513980637246),
 ('Decision Tree',
  4397528.119209171,
  np.float64(2097.0284020988297),
  1173.7802994852598,
  0.7218055843146419),
 ('Decision Tree CV',
  4091561.725881008,
  np.float64(2022.7609166386935),
  1192.6115433303116,
  0.7411614905655819),
 ('Gradient Boosting',
  2888492.5445162035,
  np.float64(1699.5565729084171),
  1216.1845379955762,
  0.8172695037188968),
 ('Gradient Boosting CV',
  1973810.1015080567,
  np.float64(1404.9235215868716),
  915.3008684205016,
  0.8751337267260866),
 ('Random Forest',
  2469844.4122935906,
  np.float64(1571.573864727201),
  948.2777270262688,
  0.8437538306781024),
 ('Random Forest CV',
  3565202.877496815,
  np.float64(1888.1744827999385),
  1261.206986600865,
  0.7744597636630127)]

In [16]:
import mlflow

In [38]:
mlflow.set_experiment('Flight_Price_Experiment_0')
mlflow.set_tracking_uri('http://127.0.0.1:5000')
for i, element in enumerate(models):
    model_name = element[0]
    params = element[1]
    model = element[2]
    score = report[i]
    
    with mlflow.start_run(run_name = model_name):
        mlflow.log_params(params)
        mlflow.log_metrics({'MSE': score[1],
                            'RMSE':score[2],
                            'MAE':score[3],
                            'R2': score[4]})
        mlflow.sklearn.log_model(model, 'model')

2025/04/26 21:15:00 INFO mlflow.tracking.fluent: Experiment with name 'Flight_Price_Experiment_0' does not exist. Creating a new experiment.




🏃 View run LinearRegression at: http://127.0.0.1:5000/#/experiments/730251750864844700/runs/1c39c13e54df49cbbc3108c1ba7bef7b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/730251750864844700




🏃 View run Decision Tree at: http://127.0.0.1:5000/#/experiments/730251750864844700/runs/1adb3b46154e486fb2095edaf1d55eee
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/730251750864844700




🏃 View run Decision Tree CV at: http://127.0.0.1:5000/#/experiments/730251750864844700/runs/2ef7349aaaed4693b87b4a056f39e4a2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/730251750864844700




🏃 View run Gradient Boosting at: http://127.0.0.1:5000/#/experiments/730251750864844700/runs/258aff331af3415a910018113bdfa1c2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/730251750864844700




🏃 View run Gradient Boosting CV at: http://127.0.0.1:5000/#/experiments/730251750864844700/runs/7311f103f70a4f0094355c1e63a6bd3c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/730251750864844700




🏃 View run Random Forest at: http://127.0.0.1:5000/#/experiments/730251750864844700/runs/94823fa68cb7421ab735b230f981881f
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/730251750864844700




🏃 View run Random Forest CV at: http://127.0.0.1:5000/#/experiments/730251750864844700/runs/a8be26a689824f2ab52dc97a042f9561
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/730251750864844700
