# Comparing All Binary Models for Titanic Dataset

In [1]:
import joblib
import pandas as pd
from time import time
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, confusion_matrix

In [2]:
tr_features=pd.read_csv('train_features.csv')
tr_labels=pd.read_csv('train_labels.csv')

te_features=pd.read_csv('test_features.csv')
te_labels=pd.read_csv('test_labels.csv')

val_features=pd.read_csv('val_features.csv')
val_labels=pd.read_csv('val_labels.csv')

In [3]:
models = {}
for mdl in ['LR', 'SVM', 'MLP', 'RF', 'GB', 'KNN', 'GNB', 'DT']:
    models[mdl]=joblib.load('{}_model.pkl'.format(mdl))
models

{'LR': LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
                    intercept_scaling=1, l1_ratio=None, max_iter=100,
                    multi_class='auto', n_jobs=None, penalty='l2',
                    random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                    warm_start=False),
 'SVM': SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
     decision_function_shape='ovr', degree=3, gamma=1, kernel='rbf', max_iter=-1,
     probability=False, random_state=None, shrinking=True, tol=0.001,
     verbose=False),
 'MLP': MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
               beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
               hidden_layer_sizes=(50,), learning_rate='invscaling',
               learning_rate_init=0.001, max_fun=15000, max_iter=200,
               momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
               power_t=0.5, random_s

In [4]:
def evaluate_model(name, model, features, labels):
    start=time()
    pred=model.predict(features)
    end=time()
    accuracy = round(accuracy_score(labels, pred), 3)
    precision = round(precision_score(labels, pred), 3)
    recall = round(recall_score(labels, pred), 3)
    print('{} --Accuracy: {} / Precision:{}/Recall:{}/Latency:{}ms'.format(name, accuracy, precision,recall, round(end-start),4))
   # or
    print(classification_report(labels, pred))
    print(confusion_matrix(labels, pred))

In [5]:
for name, mdl in models.items():
    evaluate_model(name, mdl, val_features,val_labels)

LR --Accuracy: 0.816 / Precision:0.831/Recall:0.711/Latency:0ms
              precision    recall  f1-score   support

           0       0.81      0.89      0.85       103
           1       0.83      0.71      0.77        76

    accuracy                           0.82       179
   macro avg       0.82      0.80      0.81       179
weighted avg       0.82      0.82      0.81       179

[[92 11]
 [22 54]]
SVM --Accuracy: 0.804 / Precision:0.902/Recall:0.605/Latency:0ms
              precision    recall  f1-score   support

           0       0.77      0.95      0.85       103
           1       0.90      0.61      0.72        76

    accuracy                           0.80       179
   macro avg       0.83      0.78      0.79       179
weighted avg       0.82      0.80      0.80       179

[[98  5]
 [30 46]]
MLP --Accuracy: 0.827 / Precision:0.846/Recall:0.724/Latency:0ms
              precision    recall  f1-score   support

           0       0.82      0.90      0.86       103
     

In [6]:
evaluate_model('Random Forest', models['RF'], te_features,te_labels)

Random Forest --Accuracy: 0.809 / Precision:0.772/Recall:0.677/Latency:0ms
              precision    recall  f1-score   support

           0       0.83      0.88      0.85       113
           1       0.77      0.68      0.72        65

    accuracy                           0.81       178
   macro avg       0.80      0.78      0.79       178
weighted avg       0.81      0.81      0.81       178

[[100  13]
 [ 21  44]]
