## Best Model Selection
Author: Ejaz-ur-Rehman\
Date Created: 26-03-2025\
Email ID: ijazfinance@gmail.com

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [7]:
# load the data set
df = sns.load_dataset('titanic')
# define X and y
X = df[['age', 'fare', 'pclass', 'sex', 'sibsp', 'parch']]
y = df['survived']
# labele encoding
X = pd.get_dummies(X, columns=['sex'])
# impute missing values with mean
X.age.fillna(value=X['age'].mean(), inplace=True)

# getting models from Sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# choose the matrics
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
# model selection
from sklearn.model_selection import train_test_split

# define the models split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# list of model used
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

model_scores = []
for model, model_name in zip(models, model_names):
    # fit the model
    model.fit(X_train, y_train)
    # make predictions
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    model_scores.append ((model_name, accuracy))

sorted_models = sorted(model_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Accuracy Score:", f'{model[0]}: {model[1]: .2f}')



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X.age.fillna(value=X['age'].mean(), inplace=True)


Accuracy Score: Logistic Regression:  0.81
Accuracy Score: Random Forest:  0.80
Accuracy Score: Decision Tree:  0.75
Accuracy Score: KNN:  0.69
Accuracy Score: SVM:  0.66


In [8]:
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

model_scores = []
for model, model_name in zip(models, model_names):
    # fit the model
    model.fit(X_train, y_train)
    # make predictions
    y_pred = model.predict(X_test)
    Percision = precision_score(y_test, y_pred)
    model_scores.append ((model_name, Percision))

sorted_models = sorted(model_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Percision Score:", f'{model[0]}: {model[1]: .2f}')

Percision Score: Logistic Regression:  0.80
Percision Score: Random Forest:  0.78
Percision Score: SVM:  0.76
Percision Score: Decision Tree:  0.73
Percision Score: KNN:  0.66


In [9]:
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

model_scores = []
for model, model_name in zip(models, model_names):
    # fit the model
    model.fit(X_train, y_train)
    # make predictions
    y_pred = model.predict(X_test)
    Recall = recall_score(y_test, y_pred)
    model_scores.append ((model_name, Recall))

sorted_models = sorted(model_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Recall Score:", f'{model[0]}: {model[1]: .2f}')

Recall Score: Logistic Regression:  0.72
Recall Score: Decision Tree:  0.72
Recall Score: Random Forest:  0.72
Recall Score: KNN:  0.54
Recall Score: SVM:  0.26


In [10]:
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

model_scores = []
for model, model_name in zip(models, model_names):
    # fit the model
    model.fit(X_train, y_train)
    # make predictions
    y_pred = model.predict(X_test)
    F1 = f1_score(y_test, y_pred)
    model_scores.append ((model_name, F1))

sorted_models = sorted(model_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("F1 Score:", f'{model[0]}: {model[1]: .2f}')

F1 Score: Logistic Regression:  0.76
F1 Score: Random Forest:  0.75
F1 Score: Decision Tree:  0.70
F1 Score: KNN:  0.59
F1 Score: SVM:  0.38
