# Airline Passenger Satisfaction - Best Model (Voting Classifier) + PCA
----
## Load data

In [None]:
%run ./01_data_prep.ipynb
%run ./utils.ipynb

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier

model1 = KNeighborsClassifier(n_neighbors = 5)
model2 = DecisionTreeClassifier(max_depth = 13)
model3 = make_pipeline(StandardScaler(), SVC(gamma=0.03, C=0.8))
model4 = MLPClassifier(alpha=0.001, learning_rate_init=0.005, random_state=11)
model5 = make_pipeline(StandardScaler(), SGDClassifier(alpha=0.01))
ensemble = RandomForestClassifier(max_depth=20)

from sklearn.ensemble import VotingClassifier

model = VotingClassifier(
    estimators=[
        ('knn', model1), ('dt', model2), ('svc', model3),
        ('mlp', model4), ('', model5), ('rf', ensemble)
    ],
    voting='hard',
    n_jobs=-1
)

# model = model.fit(X_train, y_train)

In [None]:
from sklearn.decomposition import PCA

## Find optimal number of components

In [None]:
n_components_poss = [2, 8, 12, 16, 20, 22]

for n_components in n_components_poss:
    pca = PCA(n_components=n_components)
    pca.fit_transform(X_train)
    print([percent for percent in pca.explained_variance_ratio_ if percent > 0.0001])

In [None]:
pca = PCA(n_components=3)
pca_X_train = pca.fit_transform(X_train)
pca_X_valid = pca.transform(X_valid)

model.fit(pca_X_train, y_train)

In [None]:
train_error, val_error = get_mae(model, pca_X_train, pca_X_valid, y_train, y_valid)

print(f'Mean Average Error (train): {train_error}')
print(f'Mean Average Error (validation): {val_error}')