In [1]:
## Import important Libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [2]:
## Import ML Libraries
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from imblearn.over_sampling import SMOTE


## Import Ensemble libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import VotingClassifier

## Import Support Vector Classifier library
from sklearn import svm
from sklearn.svm import SVC



## Import Metrics Libraries
from sklearn.metrics import classification_report, confusion_matrix,  roc_curve, roc_auc_score,accuracy_score

In [3]:
df_Surveydata_train = pd.read_csv(r"C:\Users\ANAY TIWARI\OneDrive\Desktop\Hackathon_GL\Surveydata_train_(1)_(1).csv")
df_Traveldata_train = pd.read_csv(r"C:\Users\ANAY TIWARI\OneDrive\Desktop\Hackathon_GL\Traveldata_train_(1)_(2).csv")

In [4]:
## Merge Surveydata_train and Traveldata_train for Train and Test Set
df_train = pd.merge(df_Surveydata_train,df_Traveldata_train,on='ID',how='outer')

In [5]:
## Split the data into categorical variables and Numerical variables
df_train_cat = df_train.select_dtypes(['object'])
df_train_num = df_train.select_dtypes(['int64','float64'])

In [6]:
df_train_num.isnull().sum()

ID                          0
Overall_Experience          0
Age                        33
Travel_Distance             0
DepartureDelay_in_Mins     57
ArrivalDelay_in_Mins      357
dtype: int64

In [7]:
df_train_num.Age.mode()

0    39.0
Name: Age, dtype: float64

In [8]:
df_train_num.loc[df_train_num.Age.isnull()]

Unnamed: 0,ID,Overall_Experience,Age,Travel_Distance,DepartureDelay_in_Mins,ArrivalDelay_in_Mins
2292,98802293,1,,242,0.0,3.0
10887,98810888,1,,245,0.0,0.0
11562,98811563,1,,1933,0.0,0.0
11682,98811683,0,,4717,0.0,0.0
16203,98816204,1,,251,3.0,0.0
19731,98819732,0,,1566,9.0,28.0
21141,98821142,0,,1834,4.0,4.0
22085,98822086,0,,3385,0.0,0.0
24361,98824362,1,,1689,16.0,22.0
25782,98825783,1,,1346,0.0,0.0


In [9]:
df_train_num['Age'].fillna(df_train_num['Age'].mode()[0], inplace=True)

In [10]:
df_train_num.isnull().sum()

ID                          0
Overall_Experience          0
Age                         0
Travel_Distance             0
DepartureDelay_in_Mins     57
ArrivalDelay_in_Mins      357
dtype: int64

In [11]:
df_train_num.DepartureDelay_in_Mins

0         0.0
1         9.0
2        77.0
3        13.0
4         0.0
         ... 
94374    83.0
94375     5.0
94376     0.0
94377     0.0
94378    28.0
Name: DepartureDelay_in_Mins, Length: 94379, dtype: float64

In [12]:
df_train_num.DepartureDelay_in_Mins.mean()

14.647091876762579

In [13]:
df_train_num.DepartureDelay_in_Mins.fillna(15,inplace=True)

In [14]:
df_train_num.isnull().sum()

ID                          0
Overall_Experience          0
Age                         0
Travel_Distance             0
DepartureDelay_in_Mins      0
ArrivalDelay_in_Mins      357
dtype: int64

In [15]:
df_train_num.ArrivalDelay_in_Mins.mean()

15.0052221820425

In [16]:
df_train_num.ArrivalDelay_in_Mins.fillna(15,inplace=True)

In [17]:
df_train_num.isnull().sum()

ID                        0
Overall_Experience        0
Age                       0
Travel_Distance           0
DepartureDelay_in_Mins    0
ArrivalDelay_in_Mins      0
dtype: int64

In [18]:
df_train_cat.isnull().sum()

Seat_comfort                 61
Seat_Class                    0
Arrival_time_convenient    8930
Catering                   8741
Platform_location            30
Onboardwifi_service          30
Onboard_entertainment        18
Online_support               91
Onlinebooking_Ease           73
Onboard_service            7601
Leg_room                     90
Baggage_handling            142
Checkin_service              77
Cleanliness                   6
Online_boarding               6
Gender                       77
CustomerType               8951
TypeTravel                 9226
Travel_Class                  0
dtype: int64

In [19]:
df_train_cat.Seat_comfort.mode()

0    acceptable
Name: Seat_comfort, dtype: object

In [20]:
df_train_cat.Seat_comfort.fillna(df_train_cat.Seat_comfort.mode()[0],inplace=True)

In [21]:
df_train_cat.isnull().sum()

Seat_comfort                  0
Seat_Class                    0
Arrival_time_convenient    8930
Catering                   8741
Platform_location            30
Onboardwifi_service          30
Onboard_entertainment        18
Online_support               91
Onlinebooking_Ease           73
Onboard_service            7601
Leg_room                     90
Baggage_handling            142
Checkin_service              77
Cleanliness                   6
Online_boarding               6
Gender                       77
CustomerType               8951
TypeTravel                 9226
Travel_Class                  0
dtype: int64

In [22]:
df_train_cat.Arrival_time_convenient.mode()

0    good
Name: Arrival_time_convenient, dtype: object

In [23]:
df_train_cat.Arrival_time_convenient.fillna(df_train_cat.Arrival_time_convenient.mode()[0],inplace=True)

In [24]:
df_train_cat.Catering.fillna(df_train_cat.Catering.mode()[0],inplace=True)

In [25]:
df_train_cat.Onboard_service.fillna(df_train_cat.Onboard_service.mode()[0],inplace=True)

In [26]:
df_train_cat.Leg_room.fillna(df_train_cat.Leg_room.mode()[0],inplace=True)

In [27]:
df_train_cat.Baggage_handling.fillna(df_train_cat.Baggage_handling.mode()[0],inplace=True)

In [28]:
df_train_cat.Checkin_service.fillna(df_train_cat.Checkin_service.mode()[0],inplace=True)

In [29]:
df_train_cat.Cleanliness.fillna(df_train_cat.Cleanliness.mode()[0],inplace=True)

In [30]:
df_train_cat.Online_boarding.fillna(df_train_cat.Online_boarding.mode()[0],inplace=True)

In [31]:
df_train_cat.Gender.fillna(df_train_cat.Gender.mode()[0],inplace=True)

In [32]:
df_train_cat.CustomerType.fillna(df_train_cat.CustomerType.mode()[0],inplace=True)

In [33]:
df_train_cat.TypeTravel.fillna(df_train_cat.TypeTravel.mode()[0],inplace=True)

In [34]:
df_train_cat.isnull().sum()

Seat_comfort                0
Seat_Class                  0
Arrival_time_convenient     0
Catering                    0
Platform_location          30
Onboardwifi_service        30
Onboard_entertainment      18
Online_support             91
Onlinebooking_Ease         73
Onboard_service             0
Leg_room                    0
Baggage_handling            0
Checkin_service             0
Cleanliness                 0
Online_boarding             0
Gender                      0
CustomerType                0
TypeTravel                  0
Travel_Class                0
dtype: int64

In [35]:
df_train_cat.Platform_location.fillna(df_train_cat.Platform_location.mode()[0],inplace=True)

In [36]:
df_train_cat.Onboardwifi_service.fillna(df_train_cat.Onboardwifi_service.mode()[0],inplace=True)

In [37]:
df_train_cat.Onboard_entertainment.fillna(df_train_cat.Onboard_entertainment.mode()[0],inplace=True)

In [38]:
df_train_cat.Online_support.fillna(df_train_cat.Online_support.mode()[0],inplace=True)

In [39]:
df_train_cat.Onlinebooking_Ease.fillna(df_train_cat.Onlinebooking_Ease.mode()[0],inplace=True)

In [40]:
df_train_cat.isnull().sum()

Seat_comfort               0
Seat_Class                 0
Arrival_time_convenient    0
Catering                   0
Platform_location          0
Onboardwifi_service        0
Onboard_entertainment      0
Online_support             0
Onlinebooking_Ease         0
Onboard_service            0
Leg_room                   0
Baggage_handling           0
Checkin_service            0
Cleanliness                0
Online_boarding            0
Gender                     0
CustomerType               0
TypeTravel                 0
Travel_Class               0
dtype: int64

In [41]:
from scipy.stats import zscore

In [42]:
df_train_num_scaled = df_train_num[['Age','Travel_Distance','DepartureDelay_in_Mins','ArrivalDelay_in_Mins']].apply(zscore)

In [43]:
df_train_num_scaled

Unnamed: 0,Age,Travel_Distance,DepartureDelay_in_Mins,ArrivalDelay_in_Mins
0,0.832379,-1.660469,-0.384171,-0.260780
1,0.567722,0.215099,-0.148118,-0.391102
2,0.236901,-0.892926,1.635392,2.710567
3,0.303065,-1.166284,-0.043206,0.078058
4,0.700050,0.002054,-0.384171,-0.391102
...,...,...,...,...
94374,-0.490905,-0.604976,1.792761,2.866954
94375,0.303065,-1.349171,-0.253030,-0.104393
94376,1.560185,0.792945,-0.384171,-0.391102
94377,-1.549533,0.744304,-0.384171,-0.391102


In [44]:
df_train = pd.concat([df_train_num['Overall_Experience'],df_train_num_scaled,df_train_cat],axis=1,join = 'inner')

In [45]:
df_train['Overall_Experience'] = df_train['Overall_Experience'].astype('int64')

In [46]:
df_train['Seat_comfort'] = np.where(df_train['Seat_comfort'] == 'extremely poor','0',df_train['Seat_comfort'] )
df_train['Seat_comfort'] = np.where(df_train['Seat_comfort'] == 'poor','1',df_train['Seat_comfort'] )
df_train['Seat_comfort'] = np.where(df_train['Seat_comfort'] == 'need improvement','2',df_train['Seat_comfort'] )
df_train['Seat_comfort'] = np.where(df_train['Seat_comfort'] == 'acceptable','3',df_train['Seat_comfort'] )
df_train['Seat_comfort'] = np.where(df_train['Seat_comfort'] == 'good','4',df_train['Seat_comfort'] )
df_train['Seat_comfort'] = np.where(df_train['Seat_comfort'] == 'excellent','5',df_train['Seat_comfort'] )

In [47]:
df_train['Arrival_time_convenient'] = np.where(df_train['Arrival_time_convenient'] == 'extremely poor', '0',df_train['Arrival_time_convenient'])
df_train['Arrival_time_convenient'] = np.where(df_train['Arrival_time_convenient'] == 'poor', '1',df_train['Arrival_time_convenient'])
df_train['Arrival_time_convenient'] = np.where(df_train['Arrival_time_convenient'] == 'need improvement', '2',df_train['Arrival_time_convenient'])
df_train['Arrival_time_convenient'] = np.where(df_train['Arrival_time_convenient'] == 'acceptable', '3',df_train['Arrival_time_convenient'])
df_train['Arrival_time_convenient'] = np.where(df_train['Arrival_time_convenient'] == 'good', '4',df_train['Arrival_time_convenient'])
df_train['Arrival_time_convenient'] = np.where(df_train['Arrival_time_convenient'] == 'excellent', '5',df_train['Arrival_time_convenient'])

In [48]:
df_train['Platform_location'] = np.where(df_train['Platform_location'] == 'very inconvinient', '0',df_train['Platform_location'])
df_train['Platform_location'] = np.where(df_train['Platform_location'] == 'Inconvinient', '1',df_train['Platform_location'])
df_train['Platform_location'] = np.where(df_train['Platform_location'] == 'need improvement', '2',df_train['Platform_location'])
df_train['Platform_location'] = np.where(df_train['Platform_location'] == 'manageable', '3',df_train['Platform_location'])
df_train['Platform_location'] = np.where(df_train['Platform_location'] == 'Convinient', '4',df_train['Platform_location'])
df_train['Platform_location'] = np.where(df_train['Platform_location'] == 'very convinient', '5',df_train['Platform_location'])

In [49]:
df_train['Seat_Class'] = np.where(df_train['Seat_Class'] == 'Ordinary', '0',df_train['Seat_Class'])
df_train['Seat_Class'] = np.where(df_train['Seat_Class'] == 'Green Car', '1',df_train['Seat_Class'])

In [50]:
df_train['Catering'] = np.where(df_train['Catering'] == 'extremely poor', '0',df_train['Catering'])
df_train['Catering'] = np.where(df_train['Catering'] == 'poor', '1',df_train['Catering'])
df_train['Catering'] = np.where(df_train['Catering'] == 'need improvement', '2',df_train['Catering'])
df_train['Catering'] = np.where(df_train['Catering'] == 'acceptable', '3',df_train['Catering'])
df_train['Catering'] = np.where(df_train['Catering'] == 'good', '4',df_train['Catering'])
df_train['Catering'] = np.where(df_train['Catering'] == 'excellent', '5',df_train['Catering'])

In [51]:
df_train['Onboardwifi_service'] = np.where(df_train['Onboardwifi_service'] == 'extremely poor', '0',df_train['Onboardwifi_service'])
df_train['Onboardwifi_service'] = np.where(df_train['Onboardwifi_service'] == 'poor', '1',df_train['Onboardwifi_service'])
df_train['Onboardwifi_service'] = np.where(df_train['Onboardwifi_service'] == 'need improvement', '2',df_train['Onboardwifi_service'])
df_train['Onboardwifi_service'] = np.where(df_train['Onboardwifi_service'] == 'acceptable', '3',df_train['Onboardwifi_service'])
df_train['Onboardwifi_service'] = np.where(df_train['Onboardwifi_service'] == 'good', '4',df_train['Onboardwifi_service'])
df_train['Onboardwifi_service'] = np.where(df_train['Onboardwifi_service'] == 'excellent', '5',df_train['Onboardwifi_service'])

In [52]:
df_train['Online_support'] = np.where(df_train['Online_support'] == 'extremely poor', '0',df_train['Online_support'])
df_train['Online_support'] = np.where(df_train['Online_support'] == 'poor', '1',df_train['Online_support'])
df_train['Online_support'] = np.where(df_train['Online_support'] == 'need improvement', '2',df_train['Online_support'])
df_train['Online_support'] = np.where(df_train['Online_support'] == 'acceptable', '3',df_train['Online_support'])
df_train['Online_support'] = np.where(df_train['Online_support'] == 'good', '4',df_train['Online_support'])
df_train['Online_support'] = np.where(df_train['Online_support'] == 'excellent', '5',df_train['Online_support'])

In [53]:
df_train['Onboard_entertainment'] = np.where(df_train['Onboard_entertainment'] == 'extremely poor', '0',df_train['Onboard_entertainment'])
df_train['Onboard_entertainment'] = np.where(df_train['Onboard_entertainment'] == 'poor', '1',df_train['Onboard_entertainment'])
df_train['Onboard_entertainment'] = np.where(df_train['Onboard_entertainment'] == 'need improvement', '2',df_train['Onboard_entertainment'])
df_train['Onboard_entertainment'] = np.where(df_train['Onboard_entertainment'] == 'acceptable', '3',df_train['Onboard_entertainment'])
df_train['Onboard_entertainment'] = np.where(df_train['Onboard_entertainment'] == 'good', '4',df_train['Onboard_entertainment'])
df_train['Onboard_entertainment'] = np.where(df_train['Onboard_entertainment'] == 'excellent', '5',df_train['Onboard_entertainment'])

In [54]:
df_train['Onboard_service'] = np.where(df_train['Onboard_service'] == 'extremely poor', '0',df_train['Onboard_service'])
df_train['Onboard_service'] = np.where(df_train['Onboard_service'] == 'poor', '1',df_train['Onboard_service'])
df_train['Onboard_service'] = np.where(df_train['Onboard_service'] == 'need improvement', '2',df_train['Onboard_service'])
df_train['Onboard_service'] = np.where(df_train['Onboard_service'] == 'acceptable', '3',df_train['Onboard_service'])
df_train['Onboard_service'] = np.where(df_train['Onboard_service'] == 'good', '4',df_train['Onboard_service'])
df_train['Onboard_service'] = np.where(df_train['Onboard_service'] == 'excellent', '5',df_train['Onboard_service'])

In [55]:
df_train['Leg_room'] = np.where(df_train['Leg_room'] == 'extremely poor', '0',df_train['Leg_room'])
df_train['Leg_room'] = np.where(df_train['Leg_room'] == 'poor', '1',df_train['Leg_room'])
df_train['Leg_room'] = np.where(df_train['Leg_room'] == 'need improvement', '2',df_train['Leg_room'])
df_train['Leg_room'] = np.where(df_train['Leg_room'] == 'acceptable', '3',df_train['Leg_room'])
df_train['Leg_room'] = np.where(df_train['Leg_room'] == 'good', '4',df_train['Leg_room'])
df_train['Leg_room'] = np.where(df_train['Leg_room'] == 'excellent', '5',df_train['Leg_room'])

In [56]:
df_train['Checkin_service'] = np.where(df_train['Checkin_service'] == 'extremely poor', '0',df_train['Checkin_service'])
df_train['Checkin_service'] = np.where(df_train['Checkin_service'] == 'poor', '1',df_train['Checkin_service'])
df_train['Checkin_service'] = np.where(df_train['Checkin_service'] == 'need improvement', '2',df_train['Checkin_service'])
df_train['Checkin_service'] = np.where(df_train['Checkin_service'] == 'acceptable', '3',df_train['Checkin_service'])
df_train['Checkin_service'] = np.where(df_train['Checkin_service'] == 'good', '4',df_train['Checkin_service'])
df_train['Checkin_service'] = np.where(df_train['Checkin_service'] == 'excellent', '5',df_train['Checkin_service'])

In [57]:
df_train['Online_boarding'] = np.where(df_train['Online_boarding'] == 'extremely poor', '0',df_train['Online_boarding'])
df_train['Online_boarding'] = np.where(df_train['Online_boarding'] == 'poor', '1',df_train['Online_boarding'])
df_train['Online_boarding'] = np.where(df_train['Online_boarding'] == 'need improvement', '2',df_train['Online_boarding'])
df_train['Online_boarding'] = np.where(df_train['Online_boarding'] == 'acceptable', '3',df_train['Online_boarding'])
df_train['Online_boarding'] = np.where(df_train['Online_boarding'] == 'good', '4',df_train['Online_boarding'])
df_train['Online_boarding'] = np.where(df_train['Online_boarding'] == 'excellent', '5',df_train['Online_boarding'])

In [58]:
df_train['Travel_Class'] = np.where(df_train['Travel_Class'] == 'Eco', '0',df_train['Travel_Class'])
df_train['Travel_Class'] = np.where(df_train['Travel_Class'] == 'Business', '1',df_train['Travel_Class'])

In [59]:
df_train['TypeTravel'] = np.where(df_train['TypeTravel'] == 'Personal Travel', '0',df_train['TypeTravel'])
df_train['TypeTravel'] = np.where(df_train['TypeTravel'] == 'Business travel', '1',df_train['TypeTravel'])

In [60]:
df_train['CustomerType'] = np.where(df_train['CustomerType'] == 'disloyal Customer', '0',df_train['CustomerType'])
df_train['CustomerType'] = np.where(df_train['CustomerType'] == 'Loyal Customer', '1',df_train['CustomerType'])

In [61]:
df_train['Cleanliness'] = np.where(df_train['Cleanliness'] == 'extremely poor', '0',df_train['Cleanliness'])
df_train['Cleanliness'] = np.where(df_train['Cleanliness'] == 'poor', '1',df_train['Cleanliness'])
df_train['Cleanliness'] = np.where(df_train['Cleanliness'] == 'need improvement', '2',df_train['Cleanliness'])
df_train['Cleanliness'] = np.where(df_train['Cleanliness'] == 'acceptable', '3',df_train['Cleanliness'])
df_train['Cleanliness'] = np.where(df_train['Cleanliness'] == 'good', '4',df_train['Cleanliness'])
df_train['Cleanliness'] = np.where(df_train['Cleanliness'] == 'excellent', '5',df_train['Cleanliness'])

In [62]:
df_train['Baggage_handling'] = np.where(df_train['Baggage_handling'] == 'extremely poor', '0',df_train['Baggage_handling'])
df_train['Baggage_handling'] = np.where(df_train['Baggage_handling'] == 'poor', '1',df_train['Baggage_handling'])
df_train['Baggage_handling'] = np.where(df_train['Baggage_handling'] == 'need improvement', '2',df_train['Baggage_handling'])
df_train['Baggage_handling'] = np.where(df_train['Baggage_handling'] == 'acceptable', '3',df_train['Baggage_handling'])
df_train['Baggage_handling'] = np.where(df_train['Baggage_handling'] == 'good', '4',df_train['Baggage_handling'])
df_train['Baggage_handling'] = np.where(df_train['Baggage_handling'] == 'excellent', '5',df_train['Baggage_handling'])

In [63]:
df_train['Onlinebooking_Ease'] = np.where(df_train['Onlinebooking_Ease'] == 'extremely poor', '0',df_train['Onlinebooking_Ease'])
df_train['Onlinebooking_Ease'] = np.where(df_train['Onlinebooking_Ease'] == 'poor', '1',df_train['Onlinebooking_Ease'])
df_train['Onlinebooking_Ease'] = np.where(df_train['Onlinebooking_Ease'] == 'need improvement', '2',df_train['Onlinebooking_Ease'])
df_train['Onlinebooking_Ease'] = np.where(df_train['Onlinebooking_Ease'] == 'acceptable', '3',df_train['Onlinebooking_Ease'])
df_train['Onlinebooking_Ease'] = np.where(df_train['Onlinebooking_Ease'] == 'good', '4',df_train['Onlinebooking_Ease'])
df_train['Onlinebooking_Ease'] = np.where(df_train['Onlinebooking_Ease'] == 'excellent', '5',df_train['Onlinebooking_Ease'])

In [64]:
df_train['Gender'] = np.where(df_train['Gender'] == 'Female', '0',df_train['Gender'])
df_train['Gender'] = np.where(df_train['Gender'] == 'Male', '1',df_train['Gender'])

In [65]:
## Convert all the features data types into 'int64'
df_train['Seat_comfort'] = df_train['Seat_comfort'].astype('int64')
df_train['Seat_Class'] = df_train['Seat_Class'].astype('int64')
df_train['Arrival_time_convenient'] = df_train['Arrival_time_convenient'].astype('int64')
df_train['Catering'] = df_train['Catering'].astype('int64')
df_train['Platform_location'] = df_train['Platform_location'].astype('int64')
df_train['Onboardwifi_service'] = df_train['Onboardwifi_service'].astype('int64')
df_train['Onboard_entertainment'] = df_train['Onboard_entertainment'].astype('int64')
df_train['Online_support'] = df_train['Online_support'].astype('int64')
df_train['Onlinebooking_Ease'] = df_train['Onlinebooking_Ease'].astype('int64')
df_train['Onboard_service'] = df_train['Onboard_service'].astype('int64')
df_train['Leg_room'] = df_train['Leg_room'].astype('int64')
df_train['Baggage_handling'] = df_train['Baggage_handling'].astype('int64')
df_train['Checkin_service'] = df_train['Checkin_service'].astype('int64')
df_train['Cleanliness'] = df_train['Cleanliness'].astype('int64')
df_train['Online_boarding'] = df_train['Online_boarding'].astype('int64')
df_train['Gender'] = df_train['Gender'].astype('int64')
df_train['CustomerType'] = df_train['CustomerType'].astype('int64')
df_train['TypeTravel'] = df_train['TypeTravel'].astype('int64')
df_train['Travel_Class'] = df_train['Travel_Class'].astype('int64')

In [66]:
## Read the test data which is provided
df_Surveydata_test = pd.read_csv(r"C:\Users\ANAY TIWARI\OneDrive\Desktop\Hackathon_GL\Surveydata_test_(1).csv")
df_Traveldata_test = pd.read_csv(r"C:\Users\ANAY TIWARI\OneDrive\Desktop\Hackathon_GL\Traveldata_test_(1).csv")

In [67]:
## Merge the Survey test and Travel test data
df_test = pd.merge(df_Surveydata_test,df_Traveldata_test,on='ID',how='outer')

In [68]:
## Splitting the data into categorical variables and numerical variables
df_test_cat = df_test.select_dtypes(['object'])
df_test_num = df_test.select_dtypes(['int64','float64'])

In [69]:
df_test_cat.isnull().sum()

Seat_comfort                 22
Seat_Class                    0
Arrival_time_convenient    3325
Catering                   3357
Platform_location            12
Onboardwifi_service          12
Onboard_entertainment         8
Online_support               26
Onlinebooking_Ease           18
Onboard_service            2872
Leg_room                     25
Baggage_handling             40
Checkin_service              22
Cleanliness                   2
Online_boarding               2
Gender                       30
CustomerType               3383
TypeTravel                 3448
Travel_Class                  0
dtype: int64

In [70]:
df_test_cat.Seat_comfort.fillna(df_test_cat.Seat_comfort.mode()[0],inplace=True)

In [71]:
df_test_cat.Arrival_time_convenient.fillna(df_test_cat.Arrival_time_convenient.mode()[0],inplace=True)

In [72]:
df_test_cat.Catering.fillna(df_test_cat.Catering.mode()[0],inplace=True)

In [73]:
df_test_cat.Platform_location.fillna(df_test_cat.Platform_location.mode()[0],inplace=True)

In [74]:
df_test_cat.Onboardwifi_service.fillna(df_test_cat.Onboardwifi_service.mode()[0],inplace=True)

In [75]:
df_test_cat.Onboard_entertainment.fillna(df_test_cat.Onboard_entertainment.mode()[0],inplace=True)

In [76]:
df_test_cat.Online_support.fillna(df_test_cat.Online_support.mode()[0],inplace=True)

In [77]:
df_test_cat.Onlinebooking_Ease.fillna(df_test_cat.Onlinebooking_Ease.mode()[0],inplace=True)

In [78]:
df_test_cat.Onboard_service.fillna(df_test_cat.Onboard_service.mode()[0],inplace=True)

In [79]:
df_test_cat.Leg_room.fillna(df_test_cat.Leg_room.mode()[0],inplace=True)

In [80]:
df_test_cat.Baggage_handling.fillna(df_test_cat.Baggage_handling.mode()[0],inplace=True)

In [81]:
df_test_cat.Checkin_service.fillna(df_test_cat.Checkin_service.mode()[0],inplace=True)

In [82]:
df_test_cat.Cleanliness.fillna(df_test_cat.Cleanliness.mode()[0],inplace=True)

In [83]:
df_test_cat.Online_boarding.fillna(df_test_cat.Online_boarding.mode()[0],inplace=True)

In [84]:
df_test_cat.Gender.fillna(df_test_cat.Gender.mode()[0],inplace=True)

In [85]:
df_test_cat.CustomerType.fillna(df_test_cat.CustomerType.mode()[0],inplace=True)

In [86]:
df_test_cat.TypeTravel.fillna(df_test_cat.TypeTravel.mode()[0],inplace=True)

In [87]:
df_test_cat.isnull().sum()

Seat_comfort               0
Seat_Class                 0
Arrival_time_convenient    0
Catering                   0
Platform_location          0
Onboardwifi_service        0
Onboard_entertainment      0
Online_support             0
Onlinebooking_Ease         0
Onboard_service            0
Leg_room                   0
Baggage_handling           0
Checkin_service            0
Cleanliness                0
Online_boarding            0
Gender                     0
CustomerType               0
TypeTravel                 0
Travel_Class               0
dtype: int64

In [88]:
df_test_num.isnull().sum()

ID                          0
Age                        11
Travel_Distance             0
DepartureDelay_in_Mins     29
ArrivalDelay_in_Mins      123
dtype: int64

In [89]:
df_test_num.Age.fillna(df_test_num.Age.mode()[0],inplace=True)

In [90]:
df_test_num.Age.unique()

array([36., 21., 60., 29., 18., 49., 40., 11., 57., 43., 20., 15., 28.,
       17., 34., 63., 35., 25., 27., 32., 46., 54., 33., 30., 37., 47.,
       42., 58., 31., 48., 44., 65., 45., 24., 19., 66., 70., 10., 41.,
       52., 23., 22., 26., 68., 38., 51., 53., 50.,  7., 69., 39., 56.,
       59., 16., 14., 55., 62.,  8., 13.,  9., 64., 77., 12., 61., 80.,
       67., 76., 71., 72., 78., 73., 75., 74., 85., 79.])

In [91]:
df_test_num.DepartureDelay_in_Mins.mean()

14.88069603350856

In [92]:
df_test_num.DepartureDelay_in_Mins.fillna(15,inplace=True)

In [93]:
df_test_num.ArrivalDelay_in_Mins.fillna(15,inplace=True)

In [94]:
df_test_num.isnull().sum()

ID                        0
Age                       0
Travel_Distance           0
DepartureDelay_in_Mins    0
ArrivalDelay_in_Mins      0
dtype: int64

In [95]:
df_test_num_scaled = df_test_num[['Age','Travel_Distance','DepartureDelay_in_Mins','ArrivalDelay_in_Mins']].apply(zscore)

In [96]:
df_test = pd.concat([df_test_num_scaled,df_test_cat],axis=1,join='inner')
df_test

Unnamed: 0,Age,Travel_Distance,DepartureDelay_in_Mins,ArrivalDelay_in_Mins,Seat_comfort,Seat_Class,Arrival_time_convenient,Catering,Platform_location,Onboardwifi_service,...,Onboard_service,Leg_room,Baggage_handling,Checkin_service,Cleanliness,Online_boarding,Gender,CustomerType,TypeTravel,Travel_Class
0,-0.227741,-1.420638,-0.392846,-0.397974,acceptable,Green Car,acceptable,acceptable,manageable,need improvement,...,excellent,excellent,excellent,good,excellent,poor,Female,Loyal Customer,Business travel,Business
1,-1.218821,-0.548819,-0.155250,0.329977,extremely poor,Ordinary,good,poor,manageable,acceptable,...,excellent,acceptable,good,acceptable,excellent,acceptable,Female,disloyal Customer,Business travel,Business
2,1.357987,0.824810,-0.392846,-0.397974,excellent,Ordinary,excellent,excellent,very convinient,excellent,...,need improvement,need improvement,need improvement,good,need improvement,excellent,Male,Loyal Customer,Business travel,Business
3,-0.690245,-0.620087,-0.392846,-0.397974,acceptable,Green Car,excellent,acceptable,very convinient,poor,...,acceptable,need improvement,excellent,excellent,excellent,poor,Female,Loyal Customer,Personal Travel,Eco
4,-1.417037,-0.368206,0.055946,-0.397974,excellent,Ordinary,extremely poor,excellent,need improvement,excellent,...,good,acceptable,excellent,excellent,excellent,excellent,Male,disloyal Customer,Business travel,Business
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35597,-2.077757,-0.637660,-0.392846,-0.397974,need improvement,Green Car,excellent,need improvement,manageable,acceptable,...,good,excellent,good,acceptable,good,acceptable,Male,Loyal Customer,Personal Travel,Eco
35598,0.895483,-0.210049,-0.392846,-0.397974,need improvement,Ordinary,need improvement,good,need improvement,acceptable,...,good,good,good,acceptable,good,good,Female,Loyal Customer,Business travel,Business
35599,-1.152749,-0.788008,-0.392846,-0.397974,good,Green Car,extremely poor,good,need improvement,need improvement,...,poor,acceptable,poor,poor,excellent,need improvement,Male,disloyal Customer,Business travel,Eco
35600,1.820491,-1.529982,0.214343,0.017998,excellent,Ordinary,excellent,excellent,Inconvinient,acceptable,...,excellent,excellent,excellent,acceptable,excellent,good,Female,Loyal Customer,Personal Travel,Eco


In [97]:
df_test['Seat_comfort'] = np.where(df_test['Seat_comfort'] == 'extremely poor', '0',df_test['Seat_comfort'])
df_test['Seat_comfort'] = np.where(df_test['Seat_comfort'] == 'poor', '1',df_test['Seat_comfort'])
df_test['Seat_comfort'] = np.where(df_test['Seat_comfort'] == 'need improvement', '2',df_test['Seat_comfort'])
df_test['Seat_comfort'] = np.where(df_test['Seat_comfort'] == 'acceptable', '3',df_test['Seat_comfort'])
df_test['Seat_comfort'] = np.where(df_test['Seat_comfort'] == 'good', '4',df_test['Seat_comfort'])
df_test['Seat_comfort'] = np.where(df_test['Seat_comfort'] == 'excellent', '5',df_test['Seat_comfort'])

In [98]:
df_test['Catering'] = np.where(df_test['Catering'] == 'extremely poor', '0',df_test['Catering'])
df_test['Catering'] = np.where(df_test['Catering'] == 'poor', '1',df_test['Catering'])
df_test['Catering'] = np.where(df_test['Catering'] == 'need improvement', '2',df_test['Catering'])
df_test['Catering'] = np.where(df_test['Catering'] == 'acceptable', '3',df_test['Catering'])
df_test['Catering'] = np.where(df_test['Catering'] == 'good', '4',df_test['Catering'])
df_test['Catering'] = np.where(df_test['Catering'] == 'excellent', '5',df_test['Catering'])

In [99]:
df_test['Arrival_time_convenient'] = np.where(df_test['Arrival_time_convenient'] == 'extremely poor', '0',df_test['Arrival_time_convenient'])
df_test['Arrival_time_convenient'] = np.where(df_test['Arrival_time_convenient'] == 'poor', '1',df_test['Arrival_time_convenient'])
df_test['Arrival_time_convenient'] = np.where(df_test['Arrival_time_convenient'] == 'need improvement', '2',df_test['Arrival_time_convenient'])
df_test['Arrival_time_convenient'] = np.where(df_test['Arrival_time_convenient'] == 'acceptable', '3',df_test['Arrival_time_convenient'])
df_test['Arrival_time_convenient'] = np.where(df_test['Arrival_time_convenient'] == 'good', '4',df_test['Arrival_time_convenient'])
df_test['Arrival_time_convenient'] = np.where(df_test['Arrival_time_convenient'] == 'excellent', '5',df_test['Arrival_time_convenient'])

In [100]:
df_test['Platform_location'] = np.where(df_test['Platform_location'] == 'very inconvinient', '0',df_test['Platform_location'])
df_test['Platform_location'] = np.where(df_test['Platform_location'] == 'Inconvinient', '1',df_test['Platform_location'])
df_test['Platform_location'] = np.where(df_test['Platform_location'] == 'need improvement', '2',df_test['Platform_location'])
df_test['Platform_location'] = np.where(df_test['Platform_location'] == 'manageable', '3',df_test['Platform_location'])
df_test['Platform_location'] = np.where(df_test['Platform_location'] == 'Convinient', '4',df_test['Platform_location'])
df_test['Platform_location'] = np.where(df_test['Platform_location'] == 'very convinient', '5',df_test['Platform_location'])

In [101]:
df_test['Seat_Class'] = np.where(df_test['Seat_Class'] == 'Ordinary', '0',df_test['Seat_Class'])
df_test['Seat_Class'] = np.where(df_test['Seat_Class'] == 'Green Car', '1',df_test['Seat_Class'])

In [102]:
df_test['Onboard_entertainment'] = np.where(df_test['Onboard_entertainment'] == 'extremely poor', '0',df_test['Onboard_entertainment'])
df_test['Onboard_entertainment'] = np.where(df_test['Onboard_entertainment'] == 'poor', '1',df_test['Onboard_entertainment'])
df_test['Onboard_entertainment'] = np.where(df_test['Onboard_entertainment'] == 'need improvement', '2',df_test['Onboard_entertainment'])
df_test['Onboard_entertainment'] = np.where(df_test['Onboard_entertainment'] == 'acceptable', '3',df_test['Onboard_entertainment'])
df_test['Onboard_entertainment'] = np.where(df_test['Onboard_entertainment'] == 'good', '4',df_test['Onboard_entertainment'])
df_test['Onboard_entertainment'] = np.where(df_test['Onboard_entertainment'] == 'excellent', '5',df_test['Onboard_entertainment'])

In [103]:
df_test['Online_support'] = np.where(df_test['Online_support'] == 'extremely poor', '0',df_test['Online_support'])
df_test['Online_support'] = np.where(df_test['Online_support'] == 'poor', '1',df_test['Online_support'])
df_test['Online_support'] = np.where(df_test['Online_support'] == 'need improvement', '2',df_test['Online_support'])
df_test['Online_support'] = np.where(df_test['Online_support'] == 'acceptable', '3',df_test['Online_support'])
df_test['Online_support'] = np.where(df_test['Online_support'] == 'good', '4',df_test['Online_support'])
df_test['Online_support'] = np.where(df_test['Online_support'] == 'excellent', '5',df_test['Online_support'])

In [104]:
df_test['Onboardwifi_service'] = np.where(df_test['Onboardwifi_service'] == 'extremely poor', '0',df_test['Onboardwifi_service'])
df_test['Onboardwifi_service'] = np.where(df_test['Onboardwifi_service'] == 'poor', '1',df_test['Onboardwifi_service'])
df_test['Onboardwifi_service'] = np.where(df_test['Onboardwifi_service'] == 'need improvement', '2',df_test['Onboardwifi_service'])
df_test['Onboardwifi_service'] = np.where(df_test['Onboardwifi_service'] == 'acceptable', '3',df_test['Onboardwifi_service'])
df_test['Onboardwifi_service'] = np.where(df_test['Onboardwifi_service'] == 'good', '4',df_test['Onboardwifi_service'])
df_test['Onboardwifi_service'] = np.where(df_test['Onboardwifi_service'] == 'excellent', '5',df_test['Onboardwifi_service'])

In [105]:
df_test['Leg_room'] = np.where(df_test['Leg_room'] == 'extremely poor', '0',df_test['Leg_room'])
df_test['Leg_room'] = np.where(df_test['Leg_room'] == 'poor', '1',df_test['Leg_room'])
df_test['Leg_room'] = np.where(df_test['Leg_room'] == 'need improvement', '2',df_test['Leg_room'])
df_test['Leg_room'] = np.where(df_test['Leg_room'] == 'acceptable', '3',df_test['Leg_room'])
df_test['Leg_room'] = np.where(df_test['Leg_room'] == 'good', '4',df_test['Leg_room'])
df_test['Leg_room'] = np.where(df_test['Leg_room'] == 'excellent', '5',df_test['Leg_room'])

In [106]:
df_test['Onboard_service'] = np.where(df_test['Onboard_service'] == 'extremely poor', '0',df_test['Onboard_service'])
df_test['Onboard_service'] = np.where(df_test['Onboard_service'] == 'poor', '1',df_test['Onboard_service'])
df_test['Onboard_service'] = np.where(df_test['Onboard_service'] == 'need improvement', '2',df_test['Onboard_service'])
df_test['Onboard_service'] = np.where(df_test['Onboard_service'] == 'acceptable', '3',df_test['Onboard_service'])
df_test['Onboard_service'] = np.where(df_test['Onboard_service'] == 'good', '4',df_test['Onboard_service'])
df_test['Onboard_service'] = np.where(df_test['Onboard_service'] == 'excellent', '5',df_test['Onboard_service'])

In [107]:
df_test['Baggage_handling'] = np.where(df_test['Baggage_handling'] == 'extremely poor', '0',df_test['Baggage_handling'])
df_test['Baggage_handling'] = np.where(df_test['Baggage_handling'] == 'poor', '1',df_test['Baggage_handling'])
df_test['Baggage_handling'] = np.where(df_test['Baggage_handling'] == 'need improvement', '2',df_test['Baggage_handling'])
df_test['Baggage_handling'] = np.where(df_test['Baggage_handling'] == 'acceptable', '3',df_test['Baggage_handling'])
df_test['Baggage_handling'] = np.where(df_test['Baggage_handling'] == 'good', '4',df_test['Baggage_handling'])
df_test['Baggage_handling'] = np.where(df_test['Baggage_handling'] == 'excellent', '5',df_test['Baggage_handling'])

In [108]:
df_test['TypeTravel'] = np.where(df_test['TypeTravel'] == 'Personal Travel', '0',df_test['TypeTravel'])
df_test['TypeTravel'] = np.where(df_test['TypeTravel'] == 'Business travel', '1',df_test['TypeTravel'])

In [109]:
df_test['Cleanliness'] = np.where(df_test['Cleanliness'] == 'extremely poor', '0',df_test['Cleanliness'])
df_test['Cleanliness'] = np.where(df_test['Cleanliness'] == 'poor', '1',df_test['Cleanliness'])
df_test['Cleanliness'] = np.where(df_test['Cleanliness'] == 'need improvement', '2',df_test['Cleanliness'])
df_test['Cleanliness'] = np.where(df_test['Cleanliness'] == 'acceptable', '3',df_test['Cleanliness'])
df_test['Cleanliness'] = np.where(df_test['Cleanliness'] == 'good', '4',df_test['Cleanliness'])
df_test['Cleanliness'] = np.where(df_test['Cleanliness'] == 'excellent', '5',df_test['Cleanliness'])

In [110]:
df_test['CustomerType'] = np.where(df_test['CustomerType'] == 'disloyal Customer', '0',df_test['CustomerType'])
df_test['CustomerType'] = np.where(df_test['CustomerType'] == 'Loyal Customer', '1',df_test['CustomerType'])

In [111]:
df_test['Travel_Class'] = np.where(df_test['Travel_Class'] == 'Eco', '0',df_test['Travel_Class'])
df_test['Travel_Class'] = np.where(df_test['Travel_Class'] == 'Business', '1',df_test['Travel_Class'])

In [112]:
df_test['Online_boarding'] = np.where(df_test['Online_boarding'] == 'extremely poor', '0',df_test['Online_boarding'])
df_test['Online_boarding'] = np.where(df_test['Online_boarding'] == 'poor', '1',df_test['Online_boarding'])
df_test['Online_boarding'] = np.where(df_test['Online_boarding'] == 'need improvement', '2',df_test['Online_boarding'])
df_test['Online_boarding'] = np.where(df_test['Online_boarding'] == 'acceptable', '3',df_test['Online_boarding'])
df_test['Online_boarding'] = np.where(df_test['Online_boarding'] == 'good', '4',df_test['Online_boarding'])
df_test['Online_boarding'] = np.where(df_test['Online_boarding'] == 'excellent', '5',df_test['Online_boarding'])

In [113]:
df_test['Checkin_service'] = np.where(df_test['Checkin_service'] == 'extremely poor', '0',df_test['Checkin_service'])
df_test['Checkin_service'] = np.where(df_test['Checkin_service'] == 'poor', '1',df_test['Checkin_service'])
df_test['Checkin_service'] = np.where(df_test['Checkin_service'] == 'need improvement', '2',df_test['Checkin_service'])
df_test['Checkin_service'] = np.where(df_test['Checkin_service'] == 'acceptable', '3',df_test['Checkin_service'])
df_test['Checkin_service'] = np.where(df_test['Checkin_service'] == 'good', '4',df_test['Checkin_service'])
df_test['Checkin_service'] = np.where(df_test['Checkin_service'] == 'excellent', '5',df_test['Checkin_service'])

In [114]:
df_test['Onlinebooking_Ease'] = np.where(df_test['Onlinebooking_Ease'] == 'extremely poor', '0',df_test['Onlinebooking_Ease'])
df_test['Onlinebooking_Ease'] = np.where(df_test['Onlinebooking_Ease'] == 'poor', '1',df_test['Onlinebooking_Ease'])
df_test['Onlinebooking_Ease'] = np.where(df_test['Onlinebooking_Ease'] == 'need improvement', '2',df_test['Onlinebooking_Ease'])
df_test['Onlinebooking_Ease'] = np.where(df_test['Onlinebooking_Ease'] == 'acceptable', '3',df_test['Onlinebooking_Ease'])
df_test['Onlinebooking_Ease'] = np.where(df_test['Onlinebooking_Ease'] == 'good', '4',df_test['Onlinebooking_Ease'])
df_test['Onlinebooking_Ease'] = np.where(df_test['Onlinebooking_Ease'] == 'excellent', '5',df_test['Onlinebooking_Ease'])

In [115]:
df_test['Gender'] = np.where(df_test['Gender'] == 'Female', '0',df_test['Gender'])
df_test['Gender'] = np.where(df_test['Gender'] == 'Male', '1',df_test['Gender'])

In [116]:
## Converting the Data types into int64
df_test['Seat_comfort'] = df_test['Seat_comfort'].astype('int64')
df_test['Seat_Class'] = df_test['Seat_Class'].astype('int64')
df_test['Arrival_time_convenient'] = df_test['Arrival_time_convenient'].astype('int64')
df_test['Catering'] = df_test['Catering'].astype('int64')
df_test['Platform_location'] = df_test['Platform_location'].astype('int64')
df_test['Onboardwifi_service'] = df_test['Onboardwifi_service'].astype('int64')
df_test['Onboard_entertainment'] = df_test['Onboard_entertainment'].astype('int64')
df_test['Online_support'] = df_test['Online_support'].astype('int64')
df_test['Onlinebooking_Ease'] = df_test['Onlinebooking_Ease'].astype('int64')
df_test['Onboard_service'] = df_test['Onboard_service'].astype('int64')
df_test['Leg_room'] = df_test['Leg_room'].astype('int64')
df_test['Baggage_handling'] = df_test['Baggage_handling'].astype('int64')
df_test['Checkin_service'] = df_test['Checkin_service'].astype('int64')
df_test['Cleanliness'] = df_test['Cleanliness'].astype('int64')
df_test['Online_boarding'] = df_test['Online_boarding'].astype('int64')
df_test['Gender'] = df_test['Gender'].astype('int64')
df_test['CustomerType'] = df_test['CustomerType'].astype('int64')
df_test['TypeTravel'] = df_test['TypeTravel'].astype('int64')
df_test['Travel_Class'] = df_test['Travel_Class'].astype('int64')

In [117]:
df_train['Overall_Experience'].value_counts(normalize = True)

1    0.546658
0    0.453342
Name: Overall_Experience, dtype: float64

In [118]:
# making copies of dataset
df_train_1 = df_train.copy()
df_train_2 = df_train.copy()

In [119]:
X = df_train_1.drop('Overall_Experience',axis=1)
y=df_train_1['Overall_Experience']

In [120]:
## Splitting the data into Xtrain,ytrain,Xtest,ytest
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1)

In [121]:
rf1 = RandomForestClassifier(n_estimators=310, max_depth= 25, 
                             max_features= 15, random_state = 0,
                             n_jobs = -1)


#rf1.fit(X_train, y_train)
#ytrain_predict_rf1 = rf1.predict(X_train)
#display(accuracy_score(y_train, ytrain_predict_rf1))
#ytest_predict_rf1 = rf1.predict(X_test)
#display(accuracy_score(y_test, ytest_predict_rf1))

In [122]:
#ada8 = AdaBoostClassifier(rf1, n_estimators=46, learning_rate=1.0, random_state= 0)

#ada8.fit(X_train, y_train)
#ytrain_predict_ada8 = ada8.predict(X_train)
#display(accuracy_score(y_train, ytrain_predict_ada8))
#ytest_predict_ada8 = ada8.predict(X_test)
#display(accuracy_score(y_test, ytest_predict_ada8))

In [123]:
df_test1 = df_test_num.copy()

In [124]:
#pred = ada8.predict(df_test)
#df_test1['Overall_Experience'] = pred
#sample_9 = df_test1[['ID','Overall_Experience']]

In [125]:
#sample_9.to_csv(r"C:\Users\ANAY TIWARI\OneDrive\Desktop\Hackathon_GL\prediction_8.csv")

In [126]:
#ada8 = AdaBoostClassifier(rf1, n_estimators=47, learning_rate=1.0, random_state= 0)

#ada8.fit(X_train, y_train)
#ytrain_predict_ada8 = ada8.predict(X_train)
#display(accuracy_score(y_train, ytrain_predict_ada8))
#ytest_predict_ada8 = ada8.predict(X_test)
#display(accuracy_score(y_test, ytest_predict_ada8))

In [127]:
#ada8 = AdaBoostClassifier(rf1, n_estimators=46, learning_rate=1.0, random_state= 1)

#ada8.fit(X_train, y_train)
#ytrain_predict_ada8 = ada8.predict(X_train)
#display(accuracy_score(y_train, ytrain_predict_ada8))
#ytest_predict_ada8 = ada8.predict(X_test)
#display(accuracy_score(y_test, ytest_predict_ada8))

In [128]:
#ada9 = AdaBoostClassifier(rf1, n_estimators=35, learning_rate=1.0, random_state= 0)

#ada9.fit(X_train, y_train)
#ytrain_predict_ada9 = ada9.predict(X_train)
#display(accuracy_score(y_train, ytrain_predict_ada9))
#ytest_predict_ada9 = ada9.predict(X_test)
#display(accuracy_score(y_test, ytest_predict_ada9))

In [129]:
#pred = ada9.predict(df_test)
#df_test1['Overall_Experience'] = pred
#sample_9 = df_test1[['ID','Overall_Experience']]

In [130]:
#sample_9.to_csv(r"C:\Users\ANAY TIWARI\OneDrive\Desktop\Hackathon_GL\prediction_9.csv")

In [131]:
#ada9 = AdaBoostClassifier(rf1, n_estimators=35, learning_rate=1.1, random_state= 0)

#ada9.fit(X_train, y_train)
#ytrain_predict_ada9 = ada9.predict(X_train)
#display(accuracy_score(y_train, ytrain_predict_ada9))
#ytest_predict_ada9 = ada9.predict(X_test)
#display(accuracy_score(y_test, ytest_predict_ada9))

In [132]:
#ada9 = AdaBoostClassifier(rf1, n_estimators=35, learning_rate=1.2, random_state= 0)

#ada9.fit(X_train, y_train)
#ytrain_predict_ada9 = ada9.predict(X_train)
#display(accuracy_score(y_train, ytrain_predict_ada9))
#ytest_predict_ada9 = ada9.predict(X_test)
#display(accuracy_score(y_test, ytest_predict_ada9))

In [133]:
import xgboost as xgb

In [136]:
model = xgb.XGBClassifier(n_estimators=35, random_state = 0)

In [137]:
model.fit(X_train,y_train)

In [138]:
ytrain_predict = model.predict(X_train)
display(accuracy_score(y_train,ytrain_predict))
ytest_predict = model.predict(X_test)
display(accuracy_score(y_test,ytest_predict))

0.9539241655944902

0.94811753902663

In [139]:
pred = model.predict(df_test)
df_test1['Overall_Experience'] = pred
sample_10 = df_test1[['ID','Overall_Experience']]

In [140]:
sample_10.to_csv(r"C:\Users\ANAY TIWARI\OneDrive\Desktop\Hackathon_GL\prediction_10.csv")

In [142]:
ada8 = AdaBoostClassifier(rf1, n_estimators=46, learning_rate=1.0, random_state= 1)

ada8.fit(X_train, y_train)
ytrain_predict_ada8 = ada8.predict(X_train)
display(accuracy_score(y_train, ytrain_predict_ada8))
ytest_predict_ada8 = ada8.predict(X_test)
display(accuracy_score(y_test, ytest_predict_ada8))

0.9999545901763415

0.9544394998940453

In [143]:
pred = ada8.predict(df_test)
df_test1['Overall_Experience'] = pred
sample_11 = df_test1[['ID','Overall_Experience']]

In [144]:
sample_11.to_csv(r"C:\Users\ANAY TIWARI\OneDrive\Desktop\Hackathon_GL\prediction_11.csv")

In [None]:
ada8 = AdaBoostClassifier(rf1, n_estimators=46, learning_rate=1.0, random_state=2)

ada8.fit(X_train, y_train)
ytrain_predict_ada8 = ada8.predict(X_train)
display(accuracy_score(y_train, ytrain_predict_ada8))
ytest_predict_ada8 = ada8.predict(X_test)
display(accuracy_score(y_test, ytest_predict_ada8))