# **Setup**

In [362]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from matplotlib import pyplot as plt

from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn import metrics

#from sklearn.metrics import plot_roc_curve
from sklearn.metrics import RocCurveDisplay
from sklearn.decomposition import PCA
from matplotlib.pyplot import figure
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedShuffleSplit

import pandas as pd
import numpy as np

In [363]:
df = pd.read_csv('preprocessed_spam_ham_phishing.csv')

In [364]:
df['label'].value_counts()

label
1    50199
0    25220
2     1288
Name: count, dtype: int64

**GET ONLY PHISHING/HAM EMAILS**

In [365]:
# Remove spam emails, only consider phishing: (ham = 0, spam = 1, phishing = 2)
df_phish = df[(df['label'] == 2)]
print(df_phish.shape)

(1288, 95)


In [366]:
# Ham data
df_ham = df[(df['label'] == 0)]
df_ham.shape

(25220, 95)

In [367]:
# Split ham data into 50/50 so some can be used for validation
df_split = df_ham[:int(len(df_ham)/2)]
df_split = df_split.reset_index()
df_split = df_split.drop('index', axis=1)

df_val_ham = df_ham[int(len(df_ham)/2):]
df_val_ham = df_val_ham.reset_index()
df_val_ham = df_val_ham.drop('index', axis=1)

df = pd.concat([df_phish, df_split])
print(df.shape)

(13898, 95)


In [368]:
df['label'].value_counts()

label
0    12610
2     1288
Name: count, dtype: int64

In [369]:
# Randomly Sample 1288 Ham emails to create a balanced dataset (match the number of phishing emails)
df_ham = df[df['label'] == 0].sample(1288)
df_phish = df[df['label'] == 2]

df_phish = df_phish.assign(label=1)

df_new = df_ham._append(df_phish, ignore_index=True)
df_new = df_new.sample(frac=1)
df = df_new.reset_index(drop=True)
df

Unnamed: 0,hops,missing_subject,missing_to,missing_content-type,missing_mime-version,missing_x-mailer,missing_content-transfer-encoding,missing_x-mimeole,missing_x-priority,missing_list-id,...,domain_match_errors-to_reply-to,domain_match_sender_from,domain_match_references_reply-to,domain_match_references_in-reply-to,domain_match_references_to,domain_match_from_reply-to,domain_match_to_from,domain_match_to_message-id,domain_match_to_received,label
0,0,0,0,0,0,1,1,0,1,1,...,0,0,0,0,0,0,0,0,1,0
1,1,0,0,0,0,1,1,1,1,1,...,0,0,0,0,0,1,0,0,0,1
2,2,0,0,0,0,1,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,1,1,1,1,1,...,0,0,0,1,0,0,1,0,0,1
4,1,0,0,0,1,1,0,1,1,0,...,1,1,0,0,0,1,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2571,0,0,0,0,0,1,0,1,1,1,...,0,0,0,0,0,0,0,0,0,1
2572,1,0,0,0,0,1,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
2573,1,0,0,0,0,1,0,1,1,0,...,0,1,0,0,0,0,1,1,0,0
2574,0,0,0,0,0,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,1


In [370]:
df['label'].value_counts()

label
0    1288
1    1288
Name: count, dtype: int64

**FEATURE SELECTION**

In [371]:
# Reduce feature set
# The only features that are kept are domain matching features, as these should generalize across very different email datasets without issue.

feature_list = [
'domain_match_from_return-path',
'domain_match_message-id_from',
'domain_match_message-id_return-path',
'domain_match_to_from',
'domain_match_errors-to_from',
'domain_match_message-id_reply-to',
'domain_match_errors-to_message-id',
'domain_match_sender_from',
'domain_match_to_received',
'domain_match_errors-to_reply-to',
'domain_match_to_message-id',
'label']

feature_list = ['domain_val_message-id',
       'domain_match_message-id_from', 'domain_match_from_return-path',
       'domain_match_message-id_return-path', 'domain_match_message-id_sender',
       'domain_match_message-id_reply-to', 'domain_match_return-path_reply-to',
       'domain_match_reply-to_to', 'domain_match_to_in-reply-to',
       'domain_match_errors-to_message-id', 'domain_match_errors-to_from',
       'domain_match_errors-to_sender', 'domain_match_errors-to_reply-to',
       'domain_match_sender_from', 'domain_match_references_reply-to',
       'domain_match_references_in-reply-to', 'domain_match_references_to',
       'domain_match_from_reply-to', 'domain_match_to_from',
       'domain_match_to_message-id', 'domain_match_to_received', 'label']

df = df[feature_list]

In [372]:
df_Y = df['label']
df_X = df.drop('label', axis=1)

features_list = df_X.columns

In [373]:
# Apply a standard scaler to the full data set
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(df_X)
df_X = scaler.transform(df_X)
df_X = pd.DataFrame(df_X, columns=features_list)

In [374]:
# Breaking the data into a test and training set (20% test, 80% train)
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df_X, df_Y, test_size=0.20, random_state=42)

In [375]:
df.shape

(2576, 22)

In [376]:
df.head(5)

Unnamed: 0,domain_val_message-id,domain_match_message-id_from,domain_match_from_return-path,domain_match_message-id_return-path,domain_match_message-id_sender,domain_match_message-id_reply-to,domain_match_return-path_reply-to,domain_match_reply-to_to,domain_match_to_in-reply-to,domain_match_errors-to_message-id,...,domain_match_errors-to_reply-to,domain_match_sender_from,domain_match_references_reply-to,domain_match_references_in-reply-to,domain_match_references_to,domain_match_from_reply-to,domain_match_to_from,domain_match_to_message-id,domain_match_to_received,label
0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,1
4,0,1,1,1,1,1,1,1,0,1,...,1,1,0,0,0,1,1,1,0,0


**NOVELTY: GET VALIDATION DATA**

In [377]:
# This is needed in order to have ham data
df_val = pd.read_csv('preprocessed_phishing_2022.csv')

In [378]:
# The solution to get ham data is to transfer from other data...

df_val = pd.concat([df_val, df_val_ham])

In [379]:
df_val['label'].value_counts()

label
0    12610
2      245
Name: count, dtype: int64

In [380]:
# Randomly Sample 245 Ham emails to create a balanced dataset
df_ham_val = df_val[df_val['label'] == 0].sample(245)

df_phish_val = df_val[df_val['label'] == 2]

df_phish_val = df_phish_val.assign(label=1)

df_new_val = df_ham_val._append(df_phish_val, ignore_index=True)

df_new_val = df_new_val.sample(frac=1)
df_val = df_new_val.reset_index(drop=True)

In [381]:
# Reduce feature set
# The only features that are kept are domain matching features, as these should generalize across very different email datasets without issue.

feature_list = [
'domain_match_from_return-path',
'domain_match_message-id_from',
'domain_match_message-id_return-path',
'domain_match_to_from',
'domain_match_errors-to_from',
'domain_match_message-id_reply-to',
'domain_match_errors-to_message-id',
'domain_match_sender_from',
'domain_match_to_received',
'domain_match_errors-to_reply-to',
'domain_match_to_message-id',
'label']

feature_list = ['domain_val_message-id',
       'domain_match_message-id_from', 'domain_match_from_return-path',
       'domain_match_message-id_return-path', 'domain_match_message-id_sender',
       'domain_match_message-id_reply-to', 'domain_match_return-path_reply-to',
       'domain_match_reply-to_to', 'domain_match_to_in-reply-to',
       'domain_match_errors-to_message-id', 'domain_match_errors-to_from',
       'domain_match_errors-to_sender', 'domain_match_errors-to_reply-to',
       'domain_match_sender_from', 'domain_match_references_reply-to',
       'domain_match_references_in-reply-to', 'domain_match_references_to',
       'domain_match_from_reply-to', 'domain_match_to_from',
       'domain_match_to_message-id', 'domain_match_to_received', 'label']

df_val = df_val[feature_list]

X_val = df_val.drop('label', axis=1)
y_val = df_val['label']

In [382]:
df_val.shape

(490, 22)

In [383]:
df_val.head(5)

Unnamed: 0,domain_val_message-id,domain_match_message-id_from,domain_match_from_return-path,domain_match_message-id_return-path,domain_match_message-id_sender,domain_match_message-id_reply-to,domain_match_return-path_reply-to,domain_match_reply-to_to,domain_match_to_in-reply-to,domain_match_errors-to_message-id,...,domain_match_errors-to_reply-to,domain_match_sender_from,domain_match_references_reply-to,domain_match_references_in-reply-to,domain_match_references_to,domain_match_from_reply-to,domain_match_to_from,domain_match_to_message-id,domain_match_to_received,label
0,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,0,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


# **Random Forest**

In [384]:
%%time
# Original: Hyperparameter Tuning with GRID SEARCH
pipe = Pipeline([("scale", StandardScaler()),
                ("rf", RandomForestClassifier())
                ])

param_grid_list = {'rf__n_estimators': [100, 150],
                  'rf__criterion': ['entropy', 'gini'],
                  'rf__min_samples_split': [2, 3],
                  'rf__min_samples_leaf': [1, 2],
                  'rf__max_features': ['sqrt', 'log2']} # removed 'auto' 
         
grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
rf_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Test the best performing model on the test set
original_predict = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, original_predict)*100)
print('F1 Score:', f1_score(y_test, original_predict)*100)
print('Recall:', recall_score(y_test, original_predict)*100)
print('Precision:', precision_score(y_test, original_predict)*100)
print('ROC AUC:', roc_auc_score(y_test, original_predict)*100)
print('Confusion Matrix:', confusion_matrix(y_test, original_predict))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

{'rf__criterion': 'entropy', 'rf__max_features': 'log2', 'rf__min_samples_leaf': 2, 'rf__min_samples_split': 3, 'rf__n_estimators': 100} 

Pipeline(steps=[('scale', StandardScaler()),
                ('rf',
                 RandomForestClassifier(criterion='entropy',
                                        max_features='log2', min_samples_leaf=2,
                                        min_samples_split=3))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.86821705426357
F1 Score: 98.05996472663139
Recall: 98.93238434163702
Precision: 97.2027972027972
ROC AUC: 97.76406451124403
Confusion Matrix: [[227   8]
 [  3 278]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 76.73469387755102
F1 Score: 81.12582781456953
Recall: 100.0
Precision: 68.24512534818942
ROC AUC: 76.73469387755102
Confusion Matrix: [[131 114]
 [  0 245]]
CPU times: user 392 ms, sys: 57.5 ms, total: 449 ms
Wall time: 3.98 s


**Novelty: Trying hyperparameter tuning with RANDOM SEARCH**

In [385]:
%%time
# Hyperparameter Tuning with RANDOM SEARCH
from scipy.stats import randint

pipe = Pipeline([("scale", StandardScaler()),
                ("rf", RandomForestClassifier())
                ])

rs_space={'rf__n_estimators': [100, 150],
               'rf__criterion': ['entropy', 'gini'],
               'rf__min_samples_split': [2, 3],
               'rf__min_samples_leaf': [1, 2],
               'rf__max_features': ['sqrt', 'log2']
         }

from sklearn.model_selection import RandomizedSearchCV

rf_random = RandomizedSearchCV(pipe, rs_space, n_iter=500, scoring='accuracy', n_jobs=-1, cv=10)
model_random = rf_random.fit(X_train, y_train)

rf_df = pd.DataFrame(model_random.cv_results_)
print('Best hyperparameters are: '+str(model_random.best_params_))
print('Best score is: '+str(model_random.best_score_))
print('Best model is: '+str(model_random.best_estimator_))

# Get the best performing model
best_model = model_random.best_estimator_

# Test the best performing model on the test set
original_predict = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('\nORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, original_predict)*100)
print('F1 Score:', f1_score(y_test, original_predict)*100)
print('Recall:', recall_score(y_test, original_predict)*100)
print('Precision:', precision_score(y_test, original_predict)*100)
print('ROC AUC:', roc_auc_score(y_test, original_predict)*100)
print('Confusion Matrix:', confusion_matrix(y_test, original_predict))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#rf_df[rf_df['rank_test_score'] <= 5].head(5)

Best hyperparameters are: {'rf__n_estimators': 150, 'rf__min_samples_split': 2, 'rf__min_samples_leaf': 2, 'rf__max_features': 'sqrt', 'rf__criterion': 'entropy'}
Best score is: 0.979611650485437
Best model is: Pipeline(steps=[('scale', StandardScaler()),
                ('rf',
                 RandomForestClassifier(criterion='entropy', min_samples_leaf=2,
                                        n_estimators=150))])

ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.86821705426357
F1 Score: 98.05996472663139
Recall: 98.93238434163702
Precision: 97.2027972027972
ROC AUC: 97.76406451124403
Confusion Matrix: [[227   8]
 [  3 278]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 76.73469387755102
F1 Score: 81.12582781456953
Recall: 100.0
Precision: 68.24512534818942
ROC AUC: 76.73469387755102
Confusion Matrix: [[131 114]
 [  0 245]]
CPU times: user 411 ms, sys: 56.6 ms, total: 467 ms
Wall time: 3.9 s


# **MLP**

In [386]:
%%time

# Original: Hyperparameter Tuning with GRID SEARCH
pipe = Pipeline([("scale", StandardScaler()),
                ("mlp", MLPClassifier(max_iter=500))
                ])

param_grid_list = {'mlp__hidden_layer_sizes': [(20,), (20,20), (40,), (40,40)],
                   'mlp__activation': ['tanh', 'relu'],
                   'mlp__learning_rate': ['constant', 'adaptive'],
                   'mlp__solver': ['adam', 'sgd'],
                   'mlp__alpha': [0.0001, 0.001, 0.01]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
mlp_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#mlp_df[mlp_df['rank_test_score'] <= 5].head(5)

{'mlp__activation': 'relu', 'mlp__alpha': 0.001, 'mlp__hidden_layer_sizes': (40, 40), 'mlp__learning_rate': 'constant', 'mlp__solver': 'sgd'} 

Pipeline(steps=[('scale', StandardScaler()),
                ('mlp',
                 MLPClassifier(alpha=0.001, hidden_layer_sizes=(40, 40),
                               max_iter=500, solver='sgd'))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.67441860465115
F1 Score: 97.86476868327402
Recall: 97.86476868327402
Precision: 97.86476868327402
ROC AUC: 97.65578859695616
Confusion Matrix: [[229   6]
 [  6 275]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 94.08163265306122
F1 Score: 94.06952965235175
Recall: 93.87755102040816
Precision: 94.26229508196722
ROC AUC: 94.08163265306123
Confusion Matrix: [[231  14]
 [ 15 230]]
CPU times: user 20.1 s, sys: 559 ms, total: 20.6 s
Wall time: 40.1 s


**Novelty: Trying hyperparameter tuning with RANDOM SEARCH**

In [387]:
%%time
# Hyperparameter Tuning with RANDOM SEARCH
from scipy.stats import randint

pipe = Pipeline([("scale", StandardScaler()),
                ("mlp", MLPClassifier(max_iter=500))
                ])

rs_space={'mlp__hidden_layer_sizes': [(20,), (20,20), (40,), (40,40)],
            'mlp__activation': ['tanh', 'relu'],
            'mlp__learning_rate': ['constant', 'adaptive'],
            'mlp__solver': ['adam', 'sgd'],
            'mlp__alpha': [0.0001, 0.001, 0.01]}

from sklearn.model_selection import RandomizedSearchCV

mlp_random = RandomizedSearchCV(pipe, rs_space, scoring='accuracy', n_jobs=-1, cv=3)
model_random = mlp_random.fit(X_train, y_train)

mlp_df = pd.DataFrame(model_random.cv_results_)
print('Best hyperparameters are: '+str(model_random.best_params_))
print('Best score is: '+str(model_random.best_score_))
print('Best model is: '+str(model_random.best_estimator_))

# Get the best performing model
best_model = model_random.best_estimator_

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#mlp_df[mlp_df['rank_test_score'] <= 5].head(5)

Best hyperparameters are: {'mlp__solver': 'sgd', 'mlp__learning_rate': 'adaptive', 'mlp__hidden_layer_sizes': (40, 40), 'mlp__alpha': 0.0001, 'mlp__activation': 'relu'}
Best score is: 0.9800975495209521
Best model is: Pipeline(steps=[('scale', StandardScaler()),
                ('mlp',
                 MLPClassifier(hidden_layer_sizes=(40, 40),
                               learning_rate='adaptive', max_iter=500,
                               solver='sgd'))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.86821705426357
F1 Score: 98.05996472663139
Recall: 98.93238434163702
Precision: 97.2027972027972
ROC AUC: 97.76406451124403
Confusion Matrix: [[227   8]
 [  3 278]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 93.46938775510203
F1 Score: 93.57429718875503
Recall: 95.10204081632652
Precision: 92.09486166007905
ROC AUC: 93.46938775510203
Confusion Matrix: [[225  20]
 [ 12 233]]
CPU times: user 12.2 s, sys: 278 ms, total: 12.5 s
Wall ti

# **Logistic Regression**

In [388]:
%%time

# Original: Hyperparameter Tuning with GRID SEARCH
pipe = Pipeline([("scale", StandardScaler()),
                ("lr", LogisticRegression(max_iter=2000))
                ])

param_grid_list = {'lr__solver': ['newton-cg', 'lbfgs', 'sag', 'saga'],
                  'lr__fit_intercept': [True, False],
                  'lr__tol': [0.0001, 0.001],
                  'lr__penalty': ['l1', 'l2', 'elasticnet'],
                  'lr__C': [0.1, 1, 10]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
lr_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#lr_df[lr_df['rank_test_score'] <= 5].head(5)

{'lr__C': 1, 'lr__fit_intercept': True, 'lr__penalty': 'l2', 'lr__solver': 'newton-cg', 'lr__tol': 0.0001} 

Pipeline(steps=[('scale', StandardScaler()),
                ('lr',
                 LogisticRegression(C=1, max_iter=2000, solver='newton-cg'))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.67441860465115
F1 Score: 97.87234042553192
Recall: 98.22064056939502
Precision: 97.52650176678446
ROC AUC: 97.62095858256986
Confusion Matrix: [[228   7]
 [  5 276]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 80.81632653061224
F1 Score: 76.61691542288557
Recall: 62.857142857142854
Precision: 98.08917197452229
ROC AUC: 80.81632653061224
Confusion Matrix: [[242   3]
 [ 91 154]]
CPU times: user 1.21 s, sys: 661 ms, total: 1.87 s
Wall time: 4 s


# **SVM**

In [389]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("svc", SVC())
                ])

param_grid_list = {'svc__C': [0.1, 1, 10],
                  'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                  'svc__degree': [3, 4, 5],
                  'svc__tol': [0.001, 0.0001, 0.01]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
svm_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#svm_df[svm_df['rank_test_score'] <= 5].head(5)

{'svc__C': 1, 'svc__degree': 3, 'svc__kernel': 'rbf', 'svc__tol': 0.001} 

Pipeline(steps=[('scale', StandardScaler()), ('svc', SVC(C=1))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.48062015503875
F1 Score: 97.71528998242532
Recall: 98.93238434163702
Precision: 96.52777777777779
ROC AUC: 97.33853259635043
Confusion Matrix: [[225  10]
 [  3 278]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 81.22448979591836
F1 Score: 83.51254480286738
Recall: 95.10204081632652
Precision: 74.4408945686901
ROC AUC: 81.22448979591836
Confusion Matrix: [[165  80]
 [ 12 233]]
CPU times: user 1.5 s, sys: 272 ms, total: 1.77 s
Wall time: 2.49 s


# **Decision Tree**

In [390]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("dt", DecisionTreeClassifier())
                ])

param_grid_list = {'dt__criterion': ['entropy', 'gini'],
                  'dt__min_samples_split': [2, 3, 4],
                  'dt__min_samples_leaf': [1, 2, 3],
                  'dt__ccp_alpha': [0, 0.005, 0.01, 0.025, 0.05, 0.1]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
dt_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#dt_df[dt_df['rank_test_score'] <= 5].head(5)

{'dt__ccp_alpha': 0, 'dt__criterion': 'entropy', 'dt__min_samples_leaf': 1, 'dt__min_samples_split': 2} 

Pipeline(steps=[('scale', StandardScaler()),
                ('dt',
                 DecisionTreeClassifier(ccp_alpha=0, criterion='entropy'))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.86821705426357
F1 Score: 98.05996472663139
Recall: 98.93238434163702
Precision: 97.2027972027972
ROC AUC: 97.76406451124403
Confusion Matrix: [[227   8]
 [  3 278]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 76.73469387755102
F1 Score: 81.12582781456953
Recall: 100.0
Precision: 68.24512534818942
ROC AUC: 76.73469387755102
Confusion Matrix: [[131 114]
 [  0 245]]
CPU times: user 337 ms, sys: 52.7 ms, total: 390 ms
Wall time: 581 ms


# **Naive Bayes (Gaussian)**

In [391]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("gnb", GaussianNB())
                ])

param_grid_list = {'gnb__var_smoothing': [1E-9, 1E-10, 1E-8]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
nb_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#nb_df[nb_df['rank_test_score'] <= 5].head(5)

{'gnb__var_smoothing': 1e-08} 

Pipeline(steps=[('scale', StandardScaler()),
                ('gnb', GaussianNB(var_smoothing=1e-08))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 87.59689922480621
F1 Score: 89.64401294498381
Recall: 98.57651245551602
Precision: 82.19584569732937
ROC AUC: 86.5222987809495
Confusion Matrix: [[175  60]
 [  4 277]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 50.0
F1 Score: 0.0
Recall: 0.0
Precision: 0.0
ROC AUC: 50.0
Confusion Matrix: [[245   0]
 [245   0]]
CPU times: user 30 ms, sys: 6.08 ms, total: 36.1 ms
Wall time: 50.6 ms


# **AdaBoost**

In [392]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("ab", AdaBoostClassifier())
                ])

param_grid_list = {'ab__n_estimators': [50, 100, 150, 200],
                  'ab__learning_rate': [0.95, 1, 1.05, 1.25, 1.5, 1.75, 2],
                  'ab__algorithm': ['SAMME', 'SAMME.R']}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
ab_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#ab_df[ab_df['rank_test_score'] <= 5].head(5)

{'ab__algorithm': 'SAMME.R', 'ab__learning_rate': 0.95, 'ab__n_estimators': 50} 

Pipeline(steps=[('scale', StandardScaler()),
                ('ab', AdaBoostClassifier(learning_rate=0.95))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.28682170542635
F1 Score: 97.52650176678446
Recall: 98.22064056939502
Precision: 96.84210526315789
ROC AUC: 97.19542666767624
Confusion Matrix: [[226   9]
 [  5 276]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 81.22448979591836
F1 Score: 84.19243986254295
Recall: 100.0
Precision: 72.70029673590504
ROC AUC: 81.22448979591836
Confusion Matrix: [[153  92]
 [  0 245]]
CPU times: user 601 ms, sys: 106 ms, total: 707 ms
Wall time: 8.77 s


# **GradientBoostClassifier**

In [393]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("gbc", GradientBoostingClassifier())
                ])

param_grid_list = {'gbc__max_features': ['sqrt', 'log2'], # Removed 'auto'
                   'gbc__learning_rate': [0.05, 0.1, 0.2, 0.25, 0.30, 0.35, 0.40, 0.5, 0.6, 0.7, 0.9],
                   'gbc__n_estimators': [100, 200]
                  }

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
gb_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#gb_df[gb_df['rank_test_score'] <= 5].head(5)

{'gbc__learning_rate': 0.25, 'gbc__max_features': 'sqrt', 'gbc__n_estimators': 100} 

Pipeline(steps=[('scale', StandardScaler()),
                ('gbc',
                 GradientBoostingClassifier(learning_rate=0.25,
                                            max_features='sqrt'))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.86821705426357
F1 Score: 98.05996472663139
Recall: 98.93238434163702
Precision: 97.2027972027972
ROC AUC: 97.76406451124403
Confusion Matrix: [[227   8]
 [  3 278]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 76.73469387755102
F1 Score: 81.12582781456953
Recall: 100.0
Precision: 68.24512534818942
ROC AUC: 76.73469387755102
Confusion Matrix: [[131 114]
 [  0 245]]
CPU times: user 502 ms, sys: 80.2 ms, total: 583 ms
Wall time: 4.04 s


# **KNN**

In [394]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("knn", KNeighborsClassifier())
                ])

param_grid_list = {'knn__n_neighbors': [1, 10, 20],
                  'knn__weights': ['uniform', 'distance'],
                  'knn__p': [1, 2],
                  'knn__algorithm': ['auto'],
                  'knn__leaf_size': [15, 30, 45]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
knn_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#knn_df[knn_df['rank_test_score'] <= 5].head(5)

{'knn__algorithm': 'auto', 'knn__leaf_size': 15, 'knn__n_neighbors': 20, 'knn__p': 1, 'knn__weights': 'distance'} 

Pipeline(steps=[('scale', StandardScaler()),
                ('knn',
                 KNeighborsClassifier(leaf_size=15, n_neighbors=20, p=1,
                                      weights='distance'))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.86821705426357
F1 Score: 98.05996472663139
Recall: 98.93238434163702
Precision: 97.2027972027972
ROC AUC: 97.76406451124403
Confusion Matrix: [[227   8]
 [  3 278]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 72.24489795918367
F1 Score: 77.40863787375415
Recall: 95.10204081632652
Precision: 65.26610644257703
ROC AUC: 72.24489795918367
Confusion Matrix: [[121 124]
 [ 12 233]]
CPU times: user 299 ms, sys: 86.7 ms, total: 386 ms
Wall time: 401 ms


In [395]:
from sklearn.ensemble import StackingClassifier

'''
base_learners = [('rf', RandomForestClassifier(criterion='entropy', max_features='auto', min_samples_leaf=1, min_samples_split=3, n_estimators=100)), 
                ('mlp', MLPClassifier(max_iter=500, activation='relu', alpha=0.001, hidden_layer_sizes=(20,), learning_rate='adaptive', solver='adam')),
                ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=15, n_neighbors=20, p=1, weights='distance')), 
                ('svm', SVC(C=10, kernel='rbf', tol=0.001))]
'''

base_learners_set1 = [('rf', RandomForestClassifier(criterion='entropy', min_samples_leaf=1, min_samples_split=3, n_estimators=100)), 
                ('mlp', MLPClassifier(max_iter=500, activation='relu', alpha=0.001, hidden_layer_sizes=(20,), learning_rate='adaptive', solver='adam')),
                ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=15, n_neighbors=20, p=1, weights='distance'))]

base_learners_set2 = [('rf', RandomForestClassifier(criterion='entropy', min_samples_leaf=1, min_samples_split=3, n_estimators=100)), 
                ('mlp', MLPClassifier(max_iter=500, activation='relu', alpha=0.001, hidden_layer_sizes=(20,), learning_rate='adaptive', solver='adam')), 
                ('svm', SVC(C=10, kernel='rbf', tol=0.001))]

base_learners_set3 = [('rf', RandomForestClassifier(criterion='entropy',  min_samples_leaf=1, min_samples_split=3, n_estimators=100)),
                ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=15, n_neighbors=20, p=1, weights='distance')), 
                ('svm', SVC(C=10, kernel='rbf', tol=0.001))]

base_learners_set4 = [('mlp', MLPClassifier(max_iter=500, activation='relu', alpha=0.001, hidden_layer_sizes=(20,), learning_rate='adaptive', solver='adam')),
                ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=15, n_neighbors=20, p=1, weights='distance')), 
                ('svm', SVC(C=10, kernel='rbf', tol=0.001))]

base_learners = []
base_learners.append(base_learners_set1)
base_learners.append(base_learners_set2)
base_learners.append(base_learners_set3)
base_learners.append(base_learners_set4)

for base_learner_group in base_learners:

    meta_learner = LogisticRegression()

    clf = StackingClassifier(estimators=base_learner_group, final_estimator=meta_learner)

    # Train the stacked model on the full training data
    clf.fit(X_train, y_train)

    predictions = clf.predict(X_test)

    ### Validation
    val_pred = clf.predict(X_val)

    # Get the evaluation metrics
    print('ORIGINAL EVALUATION METRICS')
    print('-----------------------------')
    print('Accuracy:', accuracy_score(y_test, predictions)*100)
    print('F1 Score:', f1_score(y_test, predictions)*100)
    print('Recall:', recall_score(y_test, predictions)*100)
    print('Precision:', precision_score(y_test, predictions)*100)
    print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
    print('Confusion Matrix:', confusion_matrix(y_test, predictions))
    print('-----------------------------------------\n')

    # Get the validation evaluation metrics
    print('\nVALIDATION EVALUATION METRICS')
    print('-----------------------------')
    print('Accuracy:', accuracy_score(y_val, val_pred)*100)
    print('F1 Score:', f1_score(y_val, val_pred)*100)
    print('Recall:', recall_score(y_val, val_pred)*100)
    print('Precision:', precision_score(y_val, val_pred)*100)
    print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
    print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

    

ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.86821705426357
F1 Score: 98.05996472663139
Recall: 98.93238434163702
Precision: 97.2027972027972
ROC AUC: 97.76406451124403
Confusion Matrix: [[227   8]
 [  3 278]]
-----------------------------------------


VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 80.40816326530611
F1 Score: 82.91814946619216
Recall: 95.10204081632652
Precision: 73.50157728706624
ROC AUC: 80.40816326530611
Confusion Matrix: [[161  84]
 [ 12 233]]
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 97.86821705426357
F1 Score: 98.05996472663139
Recall: 98.93238434163702
Precision: 97.2027972027972
ROC AUC: 97.76406451124403
Confusion Matrix: [[227   8]
 [  3 278]]
-----------------------------------------


VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 93.06122448979592
F1 Score: 93.19999999999999
Recall: 95.10204081632652
Precision: 91.37254901960785
ROC AUC: 93.06122448979592
Con