# **Setup**

In [294]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from matplotlib import pyplot as plt

from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn import metrics

#from sklearn.metrics import plot_roc_curve
from sklearn.metrics import RocCurveDisplay
from sklearn.decomposition import PCA
from matplotlib.pyplot import figure
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedShuffleSplit

import pandas as pd
import numpy as np

In [295]:
df = pd.read_csv('preprocessed_spam_ham_phishing.csv')

In [296]:
df['label'].value_counts()

label
1    50199
0    25220
2     1288
Name: count, dtype: int64

**GET ONLY PHISHING/HAM EMAILS**

In [297]:
# Remove spam emails, only consider phishing: (ham = 0, spam = 1, phishing = 2)
df_phish = df[(df['label'] == 2)]
print(df_phish.shape)

(1288, 95)


In [298]:
# Ham data
df_ham = df[(df['label'] == 0)]
df_ham.shape

(25220, 95)

In [299]:
# Split ham data into 50/50 so some can be used for validation
df_split = df_ham[:int(len(df_ham)/2)]
df_split = df_split.reset_index()
df_split = df_split.drop('index', axis=1)

df_val_ham = df_ham[int(len(df_ham)/2):]
df_val_ham = df_val_ham.reset_index()
df_val_ham = df_val_ham.drop('index', axis=1)

df = pd.concat([df_phish, df_split])
print(df.shape)

(13898, 95)


In [300]:
df['label'].value_counts()

label
0    12610
2     1288
Name: count, dtype: int64

In [301]:
# Randomly Sample 1288 Ham emails to create a balanced dataset (match the number of phishing emails)
df_ham = df[df['label'] == 0].sample(1288)
df_phish = df[df['label'] == 2]

df_phish = df_phish.assign(label=1)

df_new = df_ham._append(df_phish, ignore_index=True)
df_new = df_new.sample(frac=1)
df = df_new.reset_index(drop=True)
df

Unnamed: 0,hops,missing_subject,missing_to,missing_content-type,missing_mime-version,missing_x-mailer,missing_content-transfer-encoding,missing_x-mimeole,missing_x-priority,missing_list-id,...,domain_match_errors-to_reply-to,domain_match_sender_from,domain_match_references_reply-to,domain_match_references_in-reply-to,domain_match_references_to,domain_match_from_reply-to,domain_match_to_from,domain_match_to_message-id,domain_match_to_received,label
0,0,0,0,0,0,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,1
1,1,0,0,0,0,1,1,1,1,1,...,0,0,0,1,1,0,1,1,0,1
2,0,0,1,0,0,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,1
3,1,0,0,0,0,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,1
4,1,0,0,0,0,1,0,1,1,0,...,0,1,0,1,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2571,1,0,1,0,0,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,1
2572,2,0,0,0,0,1,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
2573,1,0,0,0,0,1,1,1,1,1,...,0,0,0,1,0,0,1,0,0,1
2574,1,0,0,0,0,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,1


In [302]:
df['label'].value_counts()

label
1    1288
0    1288
Name: count, dtype: int64

**FEATURE SELECTION**

In [303]:
# Reduce feature set
# The only features that are kept are domain matching features, as these should generalize across very different email datasets without issue.

feature_list = [
'domain_match_from_return-path',
'domain_match_message-id_from',
'domain_match_message-id_return-path',
'domain_match_to_from',
'domain_match_errors-to_from',
'domain_match_message-id_reply-to',
'domain_match_errors-to_message-id',
'domain_match_sender_from',
'domain_match_to_received',
'domain_match_errors-to_reply-to',
'domain_match_to_message-id',
'label']

feature_list = ['domain_val_message-id',
       'domain_match_message-id_from', 'domain_match_from_return-path',
       'domain_match_message-id_return-path', 'domain_match_message-id_sender',
       'domain_match_message-id_reply-to', 'domain_match_return-path_reply-to',
       'domain_match_reply-to_to', 'domain_match_to_in-reply-to',
       'domain_match_errors-to_message-id', 'domain_match_errors-to_from',
       'domain_match_errors-to_sender', 'domain_match_errors-to_reply-to',
       'domain_match_sender_from', 'domain_match_references_reply-to',
       'domain_match_references_in-reply-to', 'domain_match_references_to',
       'domain_match_from_reply-to', 'domain_match_to_from',
       'domain_match_to_message-id', 'domain_match_to_received', 'label']

df = df[feature_list]

In [304]:
df_Y = df['label']
df_X = df.drop('label', axis=1)

features_list = df_X.columns

In [305]:
# Apply a standard scaler to the full data set
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(df_X)
df_X = scaler.transform(df_X)
df_X = pd.DataFrame(df_X, columns=features_list)

In [306]:
# Breaking the data into a test and training set (20% test, 80% train)
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df_X, df_Y, test_size=0.20, random_state=42)

In [307]:
df.shape

(2576, 22)

In [308]:
df.head(5)

Unnamed: 0,domain_val_message-id,domain_match_message-id_from,domain_match_from_return-path,domain_match_message-id_return-path,domain_match_message-id_sender,domain_match_message-id_reply-to,domain_match_return-path_reply-to,domain_match_reply-to_to,domain_match_to_in-reply-to,domain_match_errors-to_message-id,...,domain_match_errors-to_reply-to,domain_match_sender_from,domain_match_references_reply-to,domain_match_references_in-reply-to,domain_match_references_to,domain_match_from_reply-to,domain_match_to_from,domain_match_to_message-id,domain_match_to_received,label
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,1,1,1,0,0,0,0,1,0,...,0,0,0,1,1,0,1,1,0,1
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,1,1,1,1,0,0,0,1,1,...,0,1,0,1,1,0,0,0,0,0


**NOVELTY: GET VALIDATION DATA**

In [309]:
# This is needed in order to have ham data
df_val = pd.read_csv('preprocessed_phishing_2022.csv')

In [310]:
# The solution to get ham data is to transfer from other data...

df_val = pd.concat([df_val, df_val_ham])

In [311]:
df_val['label'].value_counts()

label
0    12610
2      245
Name: count, dtype: int64

In [312]:
# Randomly Sample 245 Ham emails to create a balanced dataset
df_ham_val = df_val[df_val['label'] == 0].sample(245)

df_phish_val = df_val[df_val['label'] == 2]

df_phish_val = df_phish_val.assign(label=1)

df_new_val = df_ham_val._append(df_phish_val, ignore_index=True)

df_new_val = df_new_val.sample(frac=1)
df_val = df_new_val.reset_index(drop=True)

In [313]:
# Reduce feature set
# The only features that are kept are domain matching features, as these should generalize across very different email datasets without issue.

feature_list = [
'domain_match_from_return-path',
'domain_match_message-id_from',
'domain_match_message-id_return-path',
'domain_match_to_from',
'domain_match_errors-to_from',
'domain_match_message-id_reply-to',
'domain_match_errors-to_message-id',
'domain_match_sender_from',
'domain_match_to_received',
'domain_match_errors-to_reply-to',
'domain_match_to_message-id',
'label']

feature_list = ['domain_val_message-id',
       'domain_match_message-id_from', 'domain_match_from_return-path',
       'domain_match_message-id_return-path', 'domain_match_message-id_sender',
       'domain_match_message-id_reply-to', 'domain_match_return-path_reply-to',
       'domain_match_reply-to_to', 'domain_match_to_in-reply-to',
       'domain_match_errors-to_message-id', 'domain_match_errors-to_from',
       'domain_match_errors-to_sender', 'domain_match_errors-to_reply-to',
       'domain_match_sender_from', 'domain_match_references_reply-to',
       'domain_match_references_in-reply-to', 'domain_match_references_to',
       'domain_match_from_reply-to', 'domain_match_to_from',
       'domain_match_to_message-id', 'domain_match_to_received', 'label']

df_val = df_val[feature_list]

X_val = df_val.drop('label', axis=1)
y_val = df_val['label']

In [314]:
df_val.shape

(490, 22)

In [315]:
df_val.head(5)

Unnamed: 0,domain_val_message-id,domain_match_message-id_from,domain_match_from_return-path,domain_match_message-id_return-path,domain_match_message-id_sender,domain_match_message-id_reply-to,domain_match_return-path_reply-to,domain_match_reply-to_to,domain_match_to_in-reply-to,domain_match_errors-to_message-id,...,domain_match_errors-to_reply-to,domain_match_sender_from,domain_match_references_reply-to,domain_match_references_in-reply-to,domain_match_references_to,domain_match_from_reply-to,domain_match_to_from,domain_match_to_message-id,domain_match_to_received,label
0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,1,0,0,0,0,0
1,0,1,1,1,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,1,1,1,0,1,1,0,0,0,...,0,0,0,0,0,1,0,0,1,0
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


# **Random Forest**

In [316]:
%%time
# Original: Hyperparameter Tuning with GRID SEARCH
pipe = Pipeline([("scale", StandardScaler()),
                ("rf", RandomForestClassifier())
                ])

param_grid_list = {'rf__n_estimators': [100, 150],
                  'rf__criterion': ['entropy', 'gini'],
                  'rf__min_samples_split': [2, 3],
                  'rf__min_samples_leaf': [1, 2],
                  'rf__max_features': ['sqrt', 'log2']} # removed 'auto' 
         
grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
rf_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Test the best performing model on the test set
original_predict = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, original_predict)*100)
print('F1 Score:', f1_score(y_test, original_predict)*100)
print('Recall:', recall_score(y_test, original_predict)*100)
print('Precision:', precision_score(y_test, original_predict)*100)
print('ROC AUC:', roc_auc_score(y_test, original_predict)*100)
print('Confusion Matrix:', confusion_matrix(y_test, original_predict))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

{'rf__criterion': 'entropy', 'rf__max_features': 'sqrt', 'rf__min_samples_leaf': 1, 'rf__min_samples_split': 2, 'rf__n_estimators': 150} 

Pipeline(steps=[('scale', StandardScaler()),
                ('rf',
                 RandomForestClassifier(criterion='entropy',
                                        n_estimators=150))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 99.03100775193798
F1 Score: 99.08256880733946
Recall: 98.9010989010989
Precision: 99.26470588235294
ROC AUC: 99.0390268168046
Confusion Matrix: [[241   2]
 [  3 270]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 75.91836734693878
F1 Score: 80.5921052631579
Recall: 100.0
Precision: 67.4931129476584
ROC AUC: 75.91836734693878
Confusion Matrix: [[127 118]
 [  0 245]]
CPU times: user 425 ms, sys: 57.6 ms, total: 482 ms
Wall time: 4.12 s


**Novelty: Trying hyperparameter tuning with RANDOM SEARCH**

In [317]:
%%time
# Hyperparameter Tuning with RANDOM SEARCH
from scipy.stats import randint

pipe = Pipeline([("scale", StandardScaler()),
                ("rf", RandomForestClassifier())
                ])

rs_space={'rf__n_estimators': [100, 150],
               'rf__criterion': ['entropy', 'gini'],
               'rf__min_samples_split': [2, 3],
               'rf__min_samples_leaf': [1, 2],
               'rf__max_features': ['sqrt', 'log2']
         }

from sklearn.model_selection import RandomizedSearchCV

rf_random = RandomizedSearchCV(pipe, rs_space, n_iter=500, scoring='accuracy', n_jobs=-1, cv=10)
model_random = rf_random.fit(X_train, y_train)

rf_df = pd.DataFrame(model_random.cv_results_)
print('Best hyperparameters are: '+str(model_random.best_params_))
print('Best score is: '+str(model_random.best_score_))
print('Best model is: '+str(model_random.best_estimator_))

# Get the best performing model
best_model = model_random.best_estimator_

# Test the best performing model on the test set
original_predict = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('\nORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, original_predict)*100)
print('F1 Score:', f1_score(y_test, original_predict)*100)
print('Recall:', recall_score(y_test, original_predict)*100)
print('Precision:', precision_score(y_test, original_predict)*100)
print('ROC AUC:', roc_auc_score(y_test, original_predict)*100)
print('Confusion Matrix:', confusion_matrix(y_test, original_predict))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#rf_df[rf_df['rank_test_score'] <= 5].head(5)

Best hyperparameters are: {'rf__n_estimators': 100, 'rf__min_samples_split': 2, 'rf__min_samples_leaf': 1, 'rf__max_features': 'sqrt', 'rf__criterion': 'entropy'}
Best score is: 0.9859223300970873
Best model is: Pipeline(steps=[('scale', StandardScaler()),
                ('rf', RandomForestClassifier(criterion='entropy'))])

ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 99.03100775193798
F1 Score: 99.08256880733946
Recall: 98.9010989010989
Precision: 99.26470588235294
ROC AUC: 99.0390268168046
Confusion Matrix: [[241   2]
 [  3 270]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 75.91836734693878
F1 Score: 80.5921052631579
Recall: 100.0
Precision: 67.4931129476584
ROC AUC: 75.91836734693878
Confusion Matrix: [[127 118]
 [  0 245]]
CPU times: user 391 ms, sys: 56 ms, total: 447 ms
Wall time: 3.9 s


# **MLP**

In [318]:
%%time

# Original: Hyperparameter Tuning with GRID SEARCH
pipe = Pipeline([("scale", StandardScaler()),
                ("mlp", MLPClassifier(max_iter=500))
                ])

param_grid_list = {'mlp__hidden_layer_sizes': [(20,), (20,20), (40,), (40,40)],
                   'mlp__activation': ['tanh', 'relu'],
                   'mlp__learning_rate': ['constant', 'adaptive'],
                   'mlp__solver': ['adam', 'sgd'],
                   'mlp__alpha': [0.0001, 0.001, 0.01]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
mlp_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#mlp_df[mlp_df['rank_test_score'] <= 5].head(5)

{'mlp__activation': 'tanh', 'mlp__alpha': 0.0001, 'mlp__hidden_layer_sizes': (20,), 'mlp__learning_rate': 'constant', 'mlp__solver': 'adam'} 

Pipeline(steps=[('scale', StandardScaler()),
                ('mlp',
                 MLPClassifier(activation='tanh', hidden_layer_sizes=(20,),
                               max_iter=500))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 99.03100775193798
F1 Score: 99.08256880733946
Recall: 98.9010989010989
Precision: 99.26470588235294
ROC AUC: 99.0390268168046
Confusion Matrix: [[241   2]
 [  3 270]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 80.40816326530611
F1 Score: 76.11940298507463
Recall: 62.44897959183674
Precision: 97.45222929936305
ROC AUC: 80.40816326530613
Confusion Matrix: [[241   4]
 [ 92 153]]
CPU times: user 2 s, sys: 350 ms, total: 2.35 s
Wall time: 39.4 s


**Novelty: Trying hyperparameter tuning with RANDOM SEARCH**

In [319]:
%%time
# Hyperparameter Tuning with RANDOM SEARCH
from scipy.stats import randint

pipe = Pipeline([("scale", StandardScaler()),
                ("mlp", MLPClassifier(max_iter=500))
                ])

rs_space={'mlp__hidden_layer_sizes': [(20,), (20,20), (40,), (40,40)],
            'mlp__activation': ['tanh', 'relu'],
            'mlp__learning_rate': ['constant', 'adaptive'],
            'mlp__solver': ['adam', 'sgd'],
            'mlp__alpha': [0.0001, 0.001, 0.01]}

from sklearn.model_selection import RandomizedSearchCV

mlp_random = RandomizedSearchCV(pipe, rs_space, scoring='accuracy', n_jobs=-1, cv=3)
model_random = mlp_random.fit(X_train, y_train)

mlp_df = pd.DataFrame(model_random.cv_results_)
print('Best hyperparameters are: '+str(model_random.best_params_))
print('Best score is: '+str(model_random.best_score_))
print('Best model is: '+str(model_random.best_estimator_))

# Get the best performing model
best_model = model_random.best_estimator_

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#mlp_df[mlp_df['rank_test_score'] <= 5].head(5)

Best hyperparameters are: {'mlp__solver': 'adam', 'mlp__learning_rate': 'adaptive', 'mlp__hidden_layer_sizes': (40, 40), 'mlp__alpha': 0.001, 'mlp__activation': 'relu'}
Best score is: 0.9859192585331075
Best model is: Pipeline(steps=[('scale', StandardScaler()),
                ('mlp',
                 MLPClassifier(alpha=0.001, hidden_layer_sizes=(40, 40),
                               learning_rate='adaptive', max_iter=500))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 99.03100775193798
F1 Score: 99.08256880733946
Recall: 98.9010989010989
Precision: 99.26470588235294
ROC AUC: 99.0390268168046
Confusion Matrix: [[241   2]
 [  3 270]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 94.89795918367348
F1 Score: 94.88752556237218
Recall: 94.6938775510204
Precision: 95.08196721311475
ROC AUC: 94.89795918367346
Confusion Matrix: [[233  12]
 [ 13 232]]
CPU times: user 6.27 s, sys: 510 ms, total: 6.78 s
Wall time: 1.81 s


# **Logistic Regression**

In [320]:
%%time

# Original: Hyperparameter Tuning with GRID SEARCH
pipe = Pipeline([("scale", StandardScaler()),
                ("lr", LogisticRegression(max_iter=2000))
                ])

param_grid_list = {'lr__solver': ['newton-cg', 'lbfgs', 'sag', 'saga'],
                  'lr__fit_intercept': [True, False],
                  'lr__tol': [0.0001, 0.001],
                  'lr__penalty': ['l1', 'l2', 'elasticnet'],
                  'lr__C': [0.1, 1, 10]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
lr_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#lr_df[lr_df['rank_test_score'] <= 5].head(5)

{'lr__C': 1, 'lr__fit_intercept': True, 'lr__penalty': 'l1', 'lr__solver': 'saga', 'lr__tol': 0.0001} 

Pipeline(steps=[('scale', StandardScaler()),
                ('lr',
                 LogisticRegression(C=1, max_iter=2000, penalty='l1',
                                    solver='saga'))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 98.83720930232558
F1 Score: 98.9010989010989
Recall: 98.9010989010989
Precision: 98.9010989010989
ROC AUC: 98.83326549993217
Confusion Matrix: [[240   3]
 [  3 270]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 91.42857142857143
F1 Score: 90.78947368421053
Recall: 84.48979591836735
Precision: 98.10426540284361
ROC AUC: 91.42857142857143
Confusion Matrix: [[241   4]
 [ 38 207]]
CPU times: user 1.72 s, sys: 985 ms, total: 2.7 s
Wall time: 5.06 s


# **SVM**

In [321]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("svc", SVC())
                ])

param_grid_list = {'svc__C': [0.1, 1, 10],
                  'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                  'svc__degree': [3, 4, 5],
                  'svc__tol': [0.001, 0.0001, 0.01]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
svm_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#svm_df[svm_df['rank_test_score'] <= 5].head(5)

{'svc__C': 1, 'svc__degree': 3, 'svc__kernel': 'rbf', 'svc__tol': 0.001} 

Pipeline(steps=[('scale', StandardScaler()), ('svc', SVC(C=1))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 99.03100775193798
F1 Score: 99.08256880733946
Recall: 98.9010989010989
Precision: 99.26470588235294
ROC AUC: 99.0390268168046
Confusion Matrix: [[241   2]
 [  3 270]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 80.20408163265306
F1 Score: 82.77087033747779
Recall: 95.10204081632652
Precision: 73.27044025157232
ROC AUC: 80.20408163265307
Confusion Matrix: [[160  85]
 [ 12 233]]
CPU times: user 1.43 s, sys: 479 ms, total: 1.91 s
Wall time: 2.47 s


# **Decision Tree**

In [322]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("dt", DecisionTreeClassifier())
                ])

param_grid_list = {'dt__criterion': ['entropy', 'gini'],
                  'dt__min_samples_split': [2, 3, 4],
                  'dt__min_samples_leaf': [1, 2, 3],
                  'dt__ccp_alpha': [0, 0.005, 0.01, 0.025, 0.05, 0.1]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
dt_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#dt_df[dt_df['rank_test_score'] <= 5].head(5)

{'dt__ccp_alpha': 0, 'dt__criterion': 'entropy', 'dt__min_samples_leaf': 1, 'dt__min_samples_split': 2} 

Pipeline(steps=[('scale', StandardScaler()),
                ('dt',
                 DecisionTreeClassifier(ccp_alpha=0, criterion='entropy'))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 99.03100775193798
F1 Score: 99.08256880733946
Recall: 98.9010989010989
Precision: 99.26470588235294
ROC AUC: 99.0390268168046
Confusion Matrix: [[241   2]
 [  3 270]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 75.91836734693878
F1 Score: 80.5921052631579
Recall: 100.0
Precision: 67.4931129476584
ROC AUC: 75.91836734693878
Confusion Matrix: [[127 118]
 [  0 245]]
CPU times: user 353 ms, sys: 50.5 ms, total: 403 ms
Wall time: 564 ms


# **Naive Bayes (Gaussian)**

In [323]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("gnb", GaussianNB())
                ])

param_grid_list = {'gnb__var_smoothing': [1E-9, 1E-10, 1E-8]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
nb_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#nb_df[nb_df['rank_test_score'] <= 5].head(5)

{'gnb__var_smoothing': 1e-09} 

Pipeline(steps=[('scale', StandardScaler()), ('gnb', GaussianNB())])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 89.72868217054264
F1 Score: 91.00169779286928
Recall: 98.16849816849816
Precision: 84.81012658227847
ROC AUC: 89.20770587437255
Confusion Matrix: [[195  48]
 [  5 268]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 50.0
F1 Score: 0.0
Recall: 0.0
Precision: 0.0
ROC AUC: 50.0
Confusion Matrix: [[245   0]
 [245   0]]
CPU times: user 28.8 ms, sys: 5.6 ms, total: 34.4 ms
Wall time: 52.1 ms


# **AdaBoost**

In [324]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("ab", AdaBoostClassifier())
                ])

param_grid_list = {'ab__n_estimators': [50, 100, 150, 200],
                  'ab__learning_rate': [0.95, 1, 1.05, 1.25, 1.5, 1.75, 2],
                  'ab__algorithm': ['SAMME', 'SAMME.R']}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
ab_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#ab_df[ab_df['rank_test_score'] <= 5].head(5)

{'ab__algorithm': 'SAMME', 'ab__learning_rate': 1.75, 'ab__n_estimators': 150} 

Pipeline(steps=[('scale', StandardScaler()),
                ('ab',
                 AdaBoostClassifier(algorithm='SAMME', learning_rate=1.75,
                                    n_estimators=150))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 98.83720930232558
F1 Score: 98.9010989010989
Recall: 98.9010989010989
Precision: 98.9010989010989
ROC AUC: 98.83326549993217
Confusion Matrix: [[240   3]
 [  3 270]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 81.22448979591836
F1 Score: 84.19243986254295
Recall: 100.0
Precision: 72.70029673590504
ROC AUC: 81.22448979591836
Confusion Matrix: [[153  92]
 [  0 245]]
CPU times: user 742 ms, sys: 105 ms, total: 847 ms
Wall time: 8.28 s


# **GradientBoostClassifier**

In [325]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("gbc", GradientBoostingClassifier())
                ])

param_grid_list = {'gbc__max_features': ['sqrt', 'log2'], # Removed 'auto'
                   'gbc__learning_rate': [0.05, 0.1, 0.2, 0.25, 0.30, 0.35, 0.40, 0.5, 0.6, 0.7, 0.9],
                   'gbc__n_estimators': [100, 200]
                  }

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
gb_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#gb_df[gb_df['rank_test_score'] <= 5].head(5)

{'gbc__learning_rate': 0.1, 'gbc__max_features': 'sqrt', 'gbc__n_estimators': 200} 

Pipeline(steps=[('scale', StandardScaler()),
                ('gbc',
                 GradientBoostingClassifier(max_features='sqrt',
                                            n_estimators=200))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 99.03100775193798
F1 Score: 99.08256880733946
Recall: 98.9010989010989
Precision: 99.26470588235294
ROC AUC: 99.0390268168046
Confusion Matrix: [[241   2]
 [  3 270]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 75.91836734693878
F1 Score: 80.5921052631579
Recall: 100.0
Precision: 67.4931129476584
ROC AUC: 75.91836734693878
Confusion Matrix: [[127 118]
 [  0 245]]
CPU times: user 593 ms, sys: 76 ms, total: 669 ms
Wall time: 4.09 s


# **KNN**

In [326]:
%%time

pipe = Pipeline([("scale", StandardScaler()),
                ("knn", KNeighborsClassifier())
                ])

param_grid_list = {'knn__n_neighbors': [1, 10, 20],
                  'knn__weights': ['uniform', 'distance'],
                  'knn__p': [1, 2],
                  'knn__algorithm': ['auto'],
                  'knn__leaf_size': [15, 30, 45]}

grid = GridSearchCV(pipe, param_grid=param_grid_list, cv=10, n_jobs=-1, verbose=0)

# Find the best hyperparameters (using 10 fold CV with the hold out fold being the validation set)
grid.fit(X_train, y_train)

# Check the hyperparameter results
knn_df = pd.DataFrame(grid.cv_results_)
print(grid.best_params_, '\n')
print(grid.best_estimator_)

# Get the best performing model
best_model = grid.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Test the best performing model on the test set
predictions = best_model.predict(X_test)

### Validation
val_pred = best_model.predict(X_val)

# Get the evaluation metrics
print('ORIGINAL EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_test, predictions)*100)
print('F1 Score:', f1_score(y_test, predictions)*100)
print('Recall:', recall_score(y_test, predictions)*100)
print('Precision:', precision_score(y_test, predictions)*100)
print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
print('Confusion Matrix:', confusion_matrix(y_test, predictions))

# Get the validation evaluation metrics
print('\nVALIDATION EVALUATION METRICS')
print('-----------------------------')
print('Accuracy:', accuracy_score(y_val, val_pred)*100)
print('F1 Score:', f1_score(y_val, val_pred)*100)
print('Recall:', recall_score(y_val, val_pred)*100)
print('Precision:', precision_score(y_val, val_pred)*100)
print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

#knn_df[knn_df['rank_test_score'] <= 5].head(5)

{'knn__algorithm': 'auto', 'knn__leaf_size': 15, 'knn__n_neighbors': 10, 'knn__p': 1, 'knn__weights': 'distance'} 

Pipeline(steps=[('scale', StandardScaler()),
                ('knn',
                 KNeighborsClassifier(leaf_size=15, n_neighbors=10, p=1,
                                      weights='distance'))])
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 99.03100775193798
F1 Score: 99.08256880733946
Recall: 98.9010989010989
Precision: 99.26470588235294
ROC AUC: 99.0390268168046
Confusion Matrix: [[241   2]
 [  3 270]]

VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 76.93877551020408
F1 Score: 80.48359240069084
Recall: 95.10204081632652
Precision: 69.76047904191617
ROC AUC: 76.93877551020407
Confusion Matrix: [[144 101]
 [ 12 233]]
CPU times: user 1.13 s, sys: 616 ms, total: 1.75 s
Wall time: 576 ms


# **Stacked Testing:**

In [327]:
from sklearn.ensemble import StackingClassifier

'''
base_learners = [('rf', RandomForestClassifier(criterion='entropy', max_features='auto', min_samples_leaf=1, min_samples_split=3, n_estimators=100)), 
                ('mlp', MLPClassifier(max_iter=500, activation='relu', alpha=0.001, hidden_layer_sizes=(20,), learning_rate='adaptive', solver='adam')),
                ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=15, n_neighbors=20, p=1, weights='distance')), 
                ('svm', SVC(C=10, kernel='rbf', tol=0.001))]
'''

base_learners_set1 = [('rf', RandomForestClassifier(criterion='entropy', min_samples_leaf=1, min_samples_split=3, n_estimators=100)), 
                ('mlp', MLPClassifier(max_iter=500, activation='relu', alpha=0.001, hidden_layer_sizes=(20,), learning_rate='adaptive', solver='adam')),
                ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=15, n_neighbors=20, p=1, weights='distance'))]

base_learners_set2 = [('rf', RandomForestClassifier(criterion='entropy', min_samples_leaf=1, min_samples_split=3, n_estimators=100)), 
                ('mlp', MLPClassifier(max_iter=500, activation='relu', alpha=0.001, hidden_layer_sizes=(20,), learning_rate='adaptive', solver='adam')), 
                ('svm', SVC(C=10, kernel='rbf', tol=0.001))]

base_learners_set3 = [('rf', RandomForestClassifier(criterion='entropy',  min_samples_leaf=1, min_samples_split=3, n_estimators=100)),
                ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=15, n_neighbors=20, p=1, weights='distance')), 
                ('svm', SVC(C=10, kernel='rbf', tol=0.001))]

base_learners_set4 = [('mlp', MLPClassifier(max_iter=500, activation='relu', alpha=0.001, hidden_layer_sizes=(20,), learning_rate='adaptive', solver='adam')),
                ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=15, n_neighbors=20, p=1, weights='distance')), 
                ('svm', SVC(C=10, kernel='rbf', tol=0.001))]

base_learners = []
base_learners.append(base_learners_set1)
base_learners.append(base_learners_set2)
base_learners.append(base_learners_set3)
base_learners.append(base_learners_set4)

for base_learner_group in base_learners:

    meta_learner = LogisticRegression()

    clf = StackingClassifier(estimators=base_learner_group, final_estimator=meta_learner)

    # Train the stacked model on the full training data
    clf.fit(X_train, y_train)

    predictions = clf.predict(X_test)

    ### Validation
    val_pred = clf.predict(X_val)

    # Get the evaluation metrics
    print('ORIGINAL EVALUATION METRICS')
    print('-----------------------------')
    print('Accuracy:', accuracy_score(y_test, predictions)*100)
    print('F1 Score:', f1_score(y_test, predictions)*100)
    print('Recall:', recall_score(y_test, predictions)*100)
    print('Precision:', precision_score(y_test, predictions)*100)
    print('ROC AUC:', roc_auc_score(y_test, predictions)*100)
    print('Confusion Matrix:', confusion_matrix(y_test, predictions))
    print('-----------------------------------------\n')

    # Get the validation evaluation metrics
    print('\nVALIDATION EVALUATION METRICS')
    print('-----------------------------')
    print('Accuracy:', accuracy_score(y_val, val_pred)*100)
    print('F1 Score:', f1_score(y_val, val_pred)*100)
    print('Recall:', recall_score(y_val, val_pred)*100)
    print('Precision:', precision_score(y_val, val_pred)*100)
    print('ROC AUC:', roc_auc_score(y_val, val_pred)*100)
    print('Confusion Matrix:', confusion_matrix(y_val, val_pred))

    

ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 99.03100775193798
F1 Score: 99.08256880733946
Recall: 98.9010989010989
Precision: 99.26470588235294
ROC AUC: 99.0390268168046
Confusion Matrix: [[241   2]
 [  3 270]]
-----------------------------------------


VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 82.24489795918367
F1 Score: 84.26763110307414
Recall: 95.10204081632652
Precision: 75.64935064935064
ROC AUC: 82.24489795918367
Confusion Matrix: [[170  75]
 [ 12 233]]
ORIGINAL EVALUATION METRICS
-----------------------------
Accuracy: 99.03100775193798
F1 Score: 99.08256880733946
Recall: 98.9010989010989
Precision: 99.26470588235294
ROC AUC: 99.0390268168046
Confusion Matrix: [[241   2]
 [  3 270]]
-----------------------------------------


VALIDATION EVALUATION METRICS
-----------------------------
Accuracy: 83.46938775510205
F1 Score: 85.19195612431444
Recall: 95.10204081632652
Precision: 77.15231788079471
ROC AUC: 83.46938775510203
Confu