# Airline Passenger Satisfaction - Model 3: Support Vector Machine
----
### Prepare data

In [21]:
%run ./01_data_prep.ipynb
%run ./utils.ipynb

## Run Classifier: SVM
#### Try to select best parameters

In [12]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pandas as pd

## Tuning gamma parameter

In [None]:
# compare MAE with differing values of hyperparameter gamma (+ plot)
gamma_values = [0.001, 0.01, 0.03, 0.05, 0.08, 0.1, 0.5]
mae_train, mae_val = [], []
for gamma in gamma_values:
    # StandardScaler: Standardize features by removing the mean and scaling to unit variance
    # SVC: Support Vector Classifier
    #   gamma: kernel coefficient for rbf(default), poly and sigmoid
    model = make_pipeline(StandardScaler(), SVC(gamma=gamma))
    mt, mv = get_mae(model, X_train, X_valid, y_train, y_valid)
    mae_train.append(mt)
    mae_val.append(mv)
    print(f'Gamma value: {gamma}  \t\t Training Error: {mt}  \t\t Validation Error: {mv}')


In [None]:
# Add plot
plt.title('Error for different gamma values')
plt.xlabel('gamma')
plt.ylabel('error')
plt.xticks(range(len(gamma_values)), gamma_values)
sns.lineplot(data=mae_train, label='train_error')
sns.lineplot(data=mae_val, label='test_error')

## Tuning C parameter
Using the previous result, we set gamma=0.03

In [None]:
# compare MAE with differing values of hyperparameter C (how much fit data) (+ plot)
C_values = [0.02, 0.2, 0.8, 1.2, 2, 5, 10]
mae_train, mae_val = [], []
for c in C_values:
    # StandardScaler: Standardize features by removing the mean and scaling to unit variance
    # SVC: Support Vector Classifier
    #   C: regularization parameter (higher C, higher variance)
    model = make_pipeline(StandardScaler(), SVC(gamma=0.03, C=c))
    mt, mv = get_mae(model, X_train, X_valid, y_train, y_valid)
    mae_train.append(mt)
    mae_val.append(mv)
    print(f'C value: {c}  \t\t Training Error: {mt}  \t\t Validation Error: {mv}')


In [None]:
# Add plot
plt.title('Error for different C values (gamma=0.03)')
plt.xlabel('C')
plt.ylabel('error')
plt.xticks(range(len(C_values)), C_values)
sns.lineplot(data=mae_train, label='train_error')
sns.lineplot(data=mae_val, label='test_error')

## Learning Curve
Using the previous results, we set gamma=0.03 and C=0.8

In [None]:
model = make_pipeline(StandardScaler(), SVC(gamma=0.03, C=0.8))

title = "Learning Curves"
plot_learning_curve(model, title, X_train, y_train, ylim=(0.8, 1.01), n_jobs=4)

plt.show()