# Airline Passenger Satisfaction - Model

### Ad 1. Split data

#### Algorithm

In [1]:
from numpy.random import RandomState
import pandas as pd
import numpy as np

In [19]:
class Splitter:
    
    def __init__(self, fileName):
        self.__df = pd.read_csv(fileName, sep=',')
        self.__df.drop(columns=['ind'])
        self.__train = self.__df.sample(frac=0.8, random_state=RandomState())
        
    def train_data(self):
        return np.array(self.__train, dtype=np.float64)
    
    def test_data(self):
        return np.array(self.__df.loc[~self.__df.index.isin(self.__train.index)], dtype=np.float64)
    
    def train_X_data(self):
        tmp_df = self.__train
        return np.array(tmp_df.drop(columns=['satisfaction']), dtype=np.float64)
    
    def train_Y_data(self):
        return np.array(self.__train['satisfaction'], dtype=np.float64)
    
    def test_X_data(self):
        tmp_df = self.__df.loc[~self.__df.index.isin(self.__train.index)]
        return np.array(tmp_df.drop(columns=['satisfaction']), dtype=np.float64)
    
    def test_Y_data(self):
        return np.array(self.__df.loc[~self.__df.index.isin(self.__train.index)]['satisfaction'], dtype=np.float64)

In [22]:
sp = Splitter('finalDataset.csv')
train_shape = sp.train_data().shape
test_shape =sp.test_data().shape

#### Result of split

In [23]:
total = train_shape[0] + test_shape[0]
print('> Train data percent: ' + str(train_shape[0]/total * 100) + ', and test data percent: ' + str(test_shape[0]/total * 100))

> Train data percent: 80.0, and test data percent: 20.0


### Ad 2. Classificators

In [40]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import GradientBoostingClassifier

#### 2.1. SVC

In [36]:
clf = SVC()
clf.fit(sp.train_X_data(), sp.train_Y_data())
clf.score(sp.test_X_data(), sp.test_Y_data())

0.5703726516784724

#### 2.2. DecisionTreeClassifier

In [37]:
clf2 = DecisionTreeClassifier()
clf2.fit(sp.train_X_data(), sp.train_Y_data())
clf2.score(sp.test_X_data(), sp.test_Y_data())

0.9371342777948876

#### 2.3. QuadraticDiscriminantAnalysis

In [38]:
clf3 = QuadraticDiscriminantAnalysis()
clf3.fit(sp.train_X_data(), sp.train_Y_data())
clf3.score(sp.test_X_data(), sp.test_Y_data())

0.860409608869726

#### 2.4. QuadraticDiscriminantAnalysis

In [41]:
clf4 = GradientBoostingClassifier()
clf4.fit(sp.train_X_data(), sp.train_Y_data())
clf4.score(sp.test_X_data(), sp.test_Y_data())

0.9409839852171235