# Best Model Selection

In [8]:
# Importing libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Loading dataset
df = sns.load_dataset("titanic")
# Defining X and y
X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']
# Label encoding (converting categorical labels into numerical values)
X = pd.get_dummies(X, columns=['sex'])
# Filling null values in age column
X['age'] = X['age'].fillna(X['age'].mean())

# Importing machine learning models
# Importing supervised machine learning classification model (Logistic Regression)
from sklearn.linear_model import LogisticRegression 
# Importing supervised machine learning classification model (Support Vector Machine)
from sklearn.svm import SVC        
# Importing supervised machine learning classification model (Decision Tree)                
from sklearn.tree import DecisionTreeClassifier
# Importing supervised machine learning classification model (Random Forest)                
from sklearn.ensemble import RandomForestClassifier
# Importing supervised machine learning classification model (K Neighbors)                
from sklearn.neighbors import KNeighborsClassifier
# Importing classification model metrics                
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
# Importing train_test_split
from sklearn.model_selection import train_test_split

# Splitting dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Making list of supervised machine learning classification models
models = [
    LogisticRegression(),
    SVC(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    KNeighborsClassifier()
]

# Assigning names to listed models
model_names = [
    'LogisticRegression',
    'SVC',
    'DecisionTreeClassifier',
    'RandomForestClassifier',
    'KNeighborsClassifier'
]

# Creating empty list of models_score
models_scores = []

# Using for loop to fit each model to data and get their accuracy
for model, model_names in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    models_scores.append((model_names, accuracy)) # Filling empty models_scores list with model names and accuracy

# Sorting models in models-scores list in descending order according to models' accuracy score and storing it into sorted_models 
sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
# Calling each model from sorted_models list and printing model name and its respective accuracy score
for model in sorted_models: 
    print("Accuracy Score: ",f'{model[0]} : {model[1]:.2f}')

Accuracy Score:  LogisticRegression : 0.81
Accuracy Score:  RandomForestClassifier : 0.80
Accuracy Score:  DecisionTreeClassifier : 0.76
Accuracy Score:  KNeighborsClassifier : 0.69
Accuracy Score:  SVC : 0.66


In [4]:
models = [
    LogisticRegression(),
    SVC(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    KNeighborsClassifier()
]
model_names = [
    'LogisticRegression',
    'SVC',
    'DecisionTreeClassifier',
    'RandomForestClassifier',
    'KNeighborsClassifier'
]
models_scores = []
for model, model_names in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Percision = precision_score(y_test, y_pred)
    models_scores.append((model_names, Percision))

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Percision: ",f'{model[0]} : {model[1]:.2f}')

Accuracy Score:  LogisticRegression : 0.80
Accuracy Score:  RandomForestClassifier : 0.79
Accuracy Score:  SVC : 0.76
Accuracy Score:  DecisionTreeClassifier : 0.70
Accuracy Score:  KNeighborsClassifier : 0.66


In [5]:
models = [
    LogisticRegression(),
    SVC(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    KNeighborsClassifier()
]
model_names = [
    'LogisticRegression',
    'SVC',
    'DecisionTreeClassifier',
    'RandomForestClassifier',
    'KNeighborsClassifier'
]
models_scores = []
for model, model_names in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Recall_score = recall_score(y_test, y_pred)
    models_scores.append((model_names, Recall_score))

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Recall Score: ",f'{model[0]} : {model[1]:.2f}')

Recall Score:  RandomForestClassifier : 0.74
Recall Score:  LogisticRegression : 0.72
Recall Score:  DecisionTreeClassifier : 0.70
Recall Score:  KNeighborsClassifier : 0.54
Recall Score:  SVC : 0.26


In [6]:
models = [
    LogisticRegression(),
    SVC(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    KNeighborsClassifier()
]
model_names = [
    'LogisticRegression',
    'SVC',
    'DecisionTreeClassifier',
    'RandomForestClassifier',
    'KNeighborsClassifier'
]
models_scores = []
for model, model_names in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    F1_score = f1_score(y_test, y_pred)
    models_scores.append((model_names, F1_score))

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("F1 Score: ",f'{model[0]} : {model[1]:.2f}')

F1 Score:  RandomForestClassifier : 0.76
F1 Score:  LogisticRegression : 0.76
F1 Score:  DecisionTreeClassifier : 0.71
F1 Score:  KNeighborsClassifier : 0.59
F1 Score:  SVC : 0.38
