# Compare model results and final model Selection

In this section, we will do the following:
1. Evaluate all the saved models on the validation set.
2. Select the best model based on performance on the validation set.
3. Evaluate the best model on the holdout test set.

In [20]:
# Import the relevant packages
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score
from time import time

In [4]:
# Read the validation and test data
X_val = pd.read_csv('./data/val_features.csv')
y_val = pd.read_csv('./data/val_labels.csv')

X_test = pd.read_csv('./data/test_features.csv')
y_test = pd.read_csv('./data/test_labels.csv')

In [6]:
X_val.head()

Unnamed: 0.1,Unnamed: 0,Pclass,Sex,Age,Fare,Family_cnt,Cabin_ind
0,849,1,1,29.699118,89.1042,1,1
1,331,1,0,45.5,28.5,0,1
2,260,3,0,29.699118,7.75,0,0
3,316,2,1,24.0,26.0,1,0
4,292,2,0,36.0,12.875,0,1


In [8]:
y_val.head()

Unnamed: 0,Survived
0,1
1,0
2,0
3,1
4,0


In [12]:
X_test.head()

Unnamed: 0.1,Unnamed: 0,Pclass,Sex,Age,Fare,Family_cnt,Cabin_ind
0,424,3,0,18.0,20.2125,2,0
1,837,3,0,29.699118,8.05,0,0
2,525,3,0,40.5,7.75,0,0
3,328,3,1,31.0,20.525,2,0
4,70,2,0,32.0,10.5,0,0


In [14]:
y_test.head()

Unnamed: 0,Survived
0,0
1,0
2,0
3,1
4,0


In [40]:
# Read the saved best models
gb_mdl = joblib.load('./data/models/GB_model.pkl')
rf_mdl = joblib.load('./data/models/RF_model.pkl')
stacked_mdl = joblib.load('./data/models/stacked_model.pkl')

In [42]:
def evaluate_model(model, features, labels):
    start = time()
    pred = model.predict(features)
    end = time()
    accuracy = round(accuracy_score(labels, pred), 3)
    precision = round(precision_score(labels, pred), 3)
    recall = round(recall_score(labels, pred), 3)
    print('{} -- Accuracy: {} / Precision: {} / Recall: {} / Latency: {}ms'.format(str(model).split('(')[0],
                                                                                   accuracy,
                                                                                   precision,
                                                                                   recall,
                                                                                   round((end - start)*1000, 1)))

In [46]:
# Create a loop for evaluate the models
for mdl in [gb_mdl, rf_mdl, stacked_mdl]:
    evaluate_model(mdl,X_val, y_val)

GradientBoostingClassifier -- Accuracy: 0.809 / Precision: 0.804 / Recall: 0.631 / Latency: 19.4ms
StackingClassifier -- Accuracy: 0.809 / Precision: 0.792 / Recall: 0.646 / Latency: 20.5ms
StackingClassifier -- Accuracy: 0.809 / Precision: 0.792 / Recall: 0.646 / Latency: 0.0ms
