# Importing Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.naive_bayes import GaussianNB, BernoulliNB, CategoricalNB,ComplementNB,MultinomialNB
from sklearn.tree import DecisionTreeClassifier,ExtraTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier,BaggingClassifier,GradientBoostingClassifier,RandomForestClassifier,HistGradientBoostingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis,QuadraticDiscriminantAnalysis
from sklearn.svm import LinearSVC, SVC, NuSVC
from sklearn.linear_model import RidgeClassifier, LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier

In [2]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import PolynomialFeatures

# Load and Seprate Target

In [3]:
df=pd.read_csv('train1.csv')
features=df.columns[:-1]
target=df.columns[-1]
X=df[features]
y=df[target]

In [4]:
lb=LabelEncoder().fit(y)
y=lb.transform(y)
l=X.columns.to_list()
order=['offer_expiration(hrs)','age','income_range','temperature','travel_time']
for i in order:
    l.remove(i)

# Feature Engineering

In [5]:
X=pd.get_dummies(data=X,columns=l)

In [6]:
X.tail()

Unnamed: 0,offer_expiration(hrs),income_range,age,temperature,travel_time,shaadi_Divorced,shaadi_Married partner,shaadi_Single,shaadi_Unmarried partner,shaadi_Widowed,...,visit_restaurant_with_rating_(avg)_3,visit_restaurant_with_rating_(avg)_4,climate_Spring,climate_Summer,climate_Winter,drop_location_Location A,drop_location_Location B,drop_location_Location C,prefer_home_food_0,prefer_home_food_1
12374,10,31249.5,52,89,22,0,0,0,1,0,...,0,0,0,1,0,0,1,0,1,0
12375,48,31249.5,21,67,18,0,0,1,0,0,...,0,0,0,1,0,0,1,0,1,0
12376,10,93749.5,31,89,18,0,1,0,0,0,...,0,0,1,0,0,0,1,0,0,1
12377,10,18749.5,26,89,7,0,1,0,0,0,...,0,0,0,1,0,0,0,1,1,0
12378,48,43749.5,18,67,7,0,0,0,1,0,...,0,0,1,0,0,1,0,0,0,1


In [7]:
enc=OrdinalEncoder()
ord=enc.fit(X)
temp=ord.transform(X)

In [8]:
X=pd.DataFrame(data=temp,columns=X.columns)
X.tail()

Unnamed: 0,offer_expiration(hrs),income_range,age,temperature,travel_time,shaadi_Divorced,shaadi_Married partner,shaadi_Single,shaadi_Unmarried partner,shaadi_Widowed,...,visit_restaurant_with_rating_(avg)_3,visit_restaurant_with_rating_(avg)_4,climate_Spring,climate_Summer,climate_Winter,drop_location_Location A,drop_location_Location B,drop_location_Location C,prefer_home_food_0,prefer_home_food_1
12374,0.0,2.0,7.0,2.0,4.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
12375,1.0,2.0,1.0,1.0,3.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
12376,0.0,7.0,3.0,2.0,3.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
12377,0.0,1.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0
12378,1.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0


# Train Test Split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
     X, y, 
    test_size=0.20,
    random_state=42,
    shuffle=True
)

# Linear Classifiers

In [10]:
classifers=[RidgeClassifier(), LogisticRegression(max_iter=1000), Perceptron()]

In [11]:
for clf in classifers:
    clf.fit(X_train,y_train)
    print('####',clf,'######')
    print('Emperical/Training Error',clf.score(X_train,y_train))
    print('Genrealisation/Test Error',clf.score(X_test,y_test))
    scores = cross_val_score(clf, X, y, cv=5, n_jobs=-1)
    print("Accuracy of %0.3f \n with a \n Standard Deviation of %0.3f" % (scores.mean(), scores.std()))

#### RidgeClassifier() ######
Emperical/Training Error 0.6060789659699081
Genrealisation/Test Error 0.592891760904685
Accuracy of 0.596 
 with a 
 Standard Deviation of 0.007
#### LogisticRegression(max_iter=1000) ######
Emperical/Training Error 0.607189740482682
Genrealisation/Test Error 0.5916801292407108
Accuracy of 0.597 
 with a 
 Standard Deviation of 0.007
#### Perceptron() ######
Emperical/Training Error 0.5409471877208927
Genrealisation/Test Error 0.5468497576736672
Accuracy of 0.539 
 with a 
 Standard Deviation of 0.038


# Naive Bayes

In [12]:
classifers=[GaussianNB(), BernoulliNB(), CategoricalNB(),ComplementNB(),MultinomialNB()]

In [13]:
for clf in classifers:
    clf.fit(X_train,y_train)
    print('####',clf,'######')
    print('Emperical/Training Error',clf.score(X_train,y_train))
    print('Genrealisation/Test Error',clf.score(X_test,y_test))
    scores = cross_val_score(clf, X, y, cv=5, n_jobs=-1)
    print("Accuracy of %0.3f \n with a \n Standard Deviation of %0.3f" % (scores.mean(), scores.std()))

#### GaussianNB() ######
Emperical/Training Error 0.5610421084519842
Genrealisation/Test Error 0.5496768982229402
Accuracy of 0.552 
 with a 
 Standard Deviation of 0.007
#### BernoulliNB() ######
Emperical/Training Error 0.5943653438352015
Genrealisation/Test Error 0.5751211631663974
Accuracy of 0.588 
 with a 
 Standard Deviation of 0.008
#### CategoricalNB() ######
Emperical/Training Error 0.5953751388468141
Genrealisation/Test Error 0.5803715670436187
Accuracy of 0.588 
 with a 
 Standard Deviation of 0.011
#### ComplementNB() ######
Emperical/Training Error 0.5798242956679794
Genrealisation/Test Error 0.5731017770597738
Accuracy of 0.573 
 with a 
 Standard Deviation of 0.006
#### MultinomialNB() ######
Emperical/Training Error 0.5986064828839746
Genrealisation/Test Error 0.5827948303715671
Accuracy of 0.590 
 with a 
 Standard Deviation of 0.005


# Tree Methods

In [14]:
classifers=[DecisionTreeClassifier(),ExtraTreeClassifier()]

In [15]:
for clf in classifers:
    clf.fit(X_train,y_train)
    print('####',clf,'######')
    print('Emperical/Training Error',clf.score(X_train,y_train))
    print('Genrealisation/Test Error',clf.score(X_test,y_test))
    scores = cross_val_score(clf, X, y, cv=5, n_jobs=-1)
    print("Accuracy of %0.3f \n with a \n Standard Deviation of %0.3f" % (scores.mean(), scores.std()))

#### DecisionTreeClassifier() ######
Emperical/Training Error 1.0
Genrealisation/Test Error 0.529886914378029
Accuracy of 0.541 
 with a 
 Standard Deviation of 0.011
#### ExtraTreeClassifier() ######
Emperical/Training Error 1.0
Genrealisation/Test Error 0.5424071082390953
Accuracy of 0.533 
 with a 
 Standard Deviation of 0.009


# Ensemble Classifiers

In [16]:
classifers=[AdaBoostClassifier(),BaggingClassifier(),
            GradientBoostingClassifier(),RandomForestClassifier(),
            HistGradientBoostingClassifier()]

In [17]:
for clf in classifers:
    clf.fit(X_train,y_train)
    print('####',clf,'######')
    print('Emperical/Training Error',clf.score(X_train,y_train))
    print('Genrealisation/Test Error',clf.score(X_test,y_test))
    scores = cross_val_score(clf, X, y, cv=5, n_jobs=-1)
    print("Accuracy of %0.3f \n with a \n Standard Deviation of %0.3f" % (scores.mean(), scores.std()))

#### AdaBoostClassifier() ######
Emperical/Training Error 0.6058770069675856
Genrealisation/Test Error 0.5993537964458805
Accuracy of 0.597 
 with a 
 Standard Deviation of 0.007
#### BaggingClassifier() ######
Emperical/Training Error 0.9857618903362617
Genrealisation/Test Error 0.5678513731825525
Accuracy of 0.560 
 with a 
 Standard Deviation of 0.012
#### GradientBoostingClassifier() ######
Emperical/Training Error 0.6373826113299
Genrealisation/Test Error 0.6037964458804523
Accuracy of 0.602 
 with a 
 Standard Deviation of 0.010
#### RandomForestClassifier() ######
Emperical/Training Error 1.0
Genrealisation/Test Error 0.6090468497576736
Accuracy of 0.596 
 with a 
 Standard Deviation of 0.006
#### HistGradientBoostingClassifier() ######
Emperical/Training Error 0.7614864182570938
Genrealisation/Test Error 0.6037964458804523
Accuracy of 0.608 
 with a 
 Standard Deviation of 0.003


# KNN and Discriminant Analysis

In [18]:
classifers=[
    # MLPClassifier(max_iter=10000),
            KNeighborsClassifier(),LinearDiscriminantAnalysis(),QuadraticDiscriminantAnalysis()]

In [19]:
for clf in classifers:
    clf.fit(X_train,y_train)
    print('####',clf,'######')
    print('Emperical/Training Error',clf.score(X_train,y_train))
    print('Genrealisation/Test Error',clf.score(X_test,y_test))
    scores = cross_val_score(clf, X, y, cv=5, n_jobs=-1)
    print("Accuracy of %0.3f \n with a \n Standard Deviation of %0.3f" % (scores.mean(), scores.std()))

#### KNeighborsClassifier() ######
Emperical/Training Error 0.7244269413309098
Genrealisation/Test Error 0.5646203554119548
Accuracy of 0.562 
 with a 
 Standard Deviation of 0.006
#### LinearDiscriminantAnalysis() ######
Emperical/Training Error 0.6060789659699081
Genrealisation/Test Error 0.5924878836833603
Accuracy of 0.597 
 with a 
 Standard Deviation of 0.007
#### QuadraticDiscriminantAnalysis() ######
Emperical/Training Error 0.5425628597394729
Genrealisation/Test Error 0.5205977382875606




Accuracy of 0.514 
 with a 
 Standard Deviation of 0.024


# SVC Classifiers

In [20]:
classifers=[LinearSVC(max_iter=100000),SVC(),NuSVC()]

In [21]:
for clf in classifers:
    clf.fit(X_train,y_train)
    print('####',clf,'######')
    print('Emperical/Training Error',clf.score(X_train,y_train))
    print('Genrealisation/Test Error',clf.score(X_test,y_test))
    scores = cross_val_score(clf, X, y, cv=5, n_jobs=-1)
    print("Accuracy of %0.3f \n with a \n Standard Deviation of %0.3f" % (scores.mean(), scores.std()))

#### LinearSVC(max_iter=100000) ######
Emperical/Training Error 0.6061799454710693
Genrealisation/Test Error 0.592891760904685
Accuracy of 0.596 
 with a 
 Standard Deviation of 0.007
#### SVC() ######
Emperical/Training Error 0.6368777138240937
Genrealisation/Test Error 0.5876413570274637
Accuracy of 0.598 
 with a 
 Standard Deviation of 0.006
#### NuSVC() ######
Emperical/Training Error 0.9470867413914975
Genrealisation/Test Error 0.5779483037156704
Accuracy of 0.566 
 with a 
 Standard Deviation of 0.007
