In [89]:
from sklearn.ensemble import VotingRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
import pickle

In [109]:
df = pd.read_csv("DataSets/X_train.csv")
dft = pd.read_csv("DataSets/X_test.csv")

In [110]:
df['flight_code']=df.flight.apply(lambda x: x.split("-")[0])
df['flight_no']=df.flight.apply(lambda x: x.split("-")[1])
df['flight_no']=df['flight_no'].astype(int)
dft['flight_code']=dft.flight.apply(lambda x: x.split("-")[0])
dft['flight_no']=dft.flight.apply(lambda x: x.split("-")[1])
dft['flight_no']=dft['flight_no'].astype(int)

In [113]:
X = df.drop(columns=['flightId', 'flight', 'price'])
X['airline']=X.airline.map(dict(Vistara=0, Air_India=1, GO_FIRST=2, Indigo=3, AirAsia=4, SpiceJet=5))
X['source_city']=X.source_city.map(dict(Mumbai=0, Delhi=1, Bangalore=2, Kolkata=3, Hyderabad=4, Chennai=5))
X['destination_city']=X.destination_city.map(dict(Mumbai=0, Delhi=1, Bangalore=2, Kolkata=3, Hyderabad=4, Chennai=5))
X['departure_time']=X.departure_time.map(dict(Morning=0, Early_Morning=1, Evening=2, Night=3, Afternoon=4, Late_Night=5))
X['arrival_time']=X.arrival_time.map(dict(Morning=0, Early_Morning=1, Evening=2, Night=3, Afternoon=4, Late_Night=5))
X['stops']=X.stops.map(dict(zero=0, one=1, two_or_more=2))
X['flight_code']=X.flight_code.map(dict(UK=0, AI=1, G8=2, E6=3, I5=4, SG=5))

In [93]:
Xt = dft.drop(columns=['flightId', 'flight', 'price'])
Xt['airline']=Xt.airline.map(dict(Vistara=0, Air_India=1, GO_FIRST=2, Indigo=3, AirAsia=4, SpiceJet=5))
Xt['source_city']=Xt.source_city.map(dict(Mumbai=0, Delhi=1, Bangalore=2, Kolkata=3, Hyderabad=4, Chennai=5))
Xt['destination_city']=Xt.destination_city.map(dict(Mumbai=0, Delhi=1, Bangalore=2, Kolkata=3, Hyderabad=4, Chennai=5))
Xt['departure_time']=Xt.departure_time.map(dict(Morning=0, Early_Morning=1, Evening=2, Night=3, Afternoon=4, Late_Night=5))
Xt['arrival_time']=Xt.arrival_time.map(dict(Morning=0, Early_Morning=1, Evening=2, Night=3, Afternoon=4, Late_Night=5))
Xt['stops']=Xt.stops.map(dict(zero=0, one=1, two_or_more=2))
Xt['flight_code']=Xt.flight_code.map(dict(UK=0, AI=1, G8=2, E6=3, I5=4, SG=5))

In [94]:
y = df[['price']]

gb=GradientBoostingRegressor(max_depth=5, max_features=4, n_estimators=2300, learning_rate=0.056, random_state=0)
xgb=XGBRegressor(n_estimators=1300, learning_rate=0.04, n_jobs=-1)
lgbm=lgb.LGBMRegressor(learning_rate=0.09, n_estimators=5800, max_depth=4)
vot=VotingRegressor([('gb', gb), ('xgb', xgb), ('lgb', lgbm)])

rgs = [
    ('Gradient Boosting', gb), 
    ('XGBoost', xgb),
    ('LightGBM', lgbm),
    ('Voting Regressor', vot)
]

for name,rg in rgs:
    rg.fit(X,y)
    yt_pred = rg.predict(Xt)
    print("%s || R2 = %.4f and MSE = %.2f" % (name, r2_score(dft['price'],yt_pred), mean_squared_error(dft['price'], yt_pred)))

#Gradient Boosting || R2 = 0.8194 and MSE = 31978343.37
#XGBoost || R2 = 0.8140 and MSE = 32941617.74
#LightGBM || R2 = 0.7928 and MSE = 36696315.08
#Voting Regressor || R2 = 0.8193 and MSE = 31995161.06

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


Gradient Boosting || R2 = 0.8194 and MSE = 31978343.37
XGBoost || R2 = 0.8140 and MSE = 32941617.74
LightGBM || R2 = 0.7928 and MSE = 36696315.08


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


Voting Regressor || R2 = 0.8193 and MSE = 31995161.06


In [95]:
#streamlit model saving
reg=vot.fit(X,y)
model = {
    'model': reg,
    'X':X
}
pickle.dump(model, open('flights_model.sav', 'wb'))

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
