# Best Model Selection

In [1]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
# loada dataset

df = sns.load_dataset("titanic")

X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']

X = pd.get_dummies(X, columns=['sex'])
X.age.fillna(value= X['age'].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X.age.fillna(value= X['age'].mean(), inplace=True)


In [None]:
# import models
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

In [9]:
# model train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# check based on accuracy score
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accurracy = accuracy_score(y_test, y_pred)
    models_scores.append([model_name,accurracy])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Accuracy Scores: ", f'{model[0]} : {model[1]:.2f}')

Accuracy Scores:  Logistic Regression : 0.81
Accuracy Scores:  Random Forest : 0.80
Accuracy Scores:  Decision Tree : 0.77
Accuracy Scores:  KNN : 0.69
Accuracy Scores:  SVM : 0.66


In [10]:
# check based on precission score
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Precission = precision_score(y_test, y_pred)
    models_scores.append([model_name,Precission])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Accuracy Scores: ", f'{model[0]} : {model[1]:.2f}')

Accuracy Scores:  Logistic Regression : 0.80
Accuracy Scores:  Random Forest : 0.79
Accuracy Scores:  SVM : 0.76
Accuracy Scores:  Decision Tree : 0.73
Accuracy Scores:  KNN : 0.66


In [13]:
# check based on recall score
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Recall = recall_score(y_test, y_pred)
    models_scores.append([model_name,Recall])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Accuracy Scores: ", f'{model[0]} : {model[1]:.2f}')

Accuracy Scores:  Logistic Regression : 0.72
Accuracy Scores:  Random Forest : 0.72
Accuracy Scores:  Decision Tree : 0.70
Accuracy Scores:  KNN : 0.54
Accuracy Scores:  SVM : 0.26


In [14]:
# check based on f1 score
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    models_scores.append([model_name,f1])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Accuracy Scores: ", f'{model[0]} : {model[1]:.2f}')

Accuracy Scores:  Logistic Regression : 0.76
Accuracy Scores:  Random Forest : 0.75
Accuracy Scores:  Decision Tree : 0.72
Accuracy Scores:  KNN : 0.59
Accuracy Scores:  SVM : 0.38
