# Libraries
---

In [1]:
# Data Prep Libraries
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import warnings

# Model Prep Library
from sklearn.model_selection import train_test_split, GridSearchCV

# Model Evaluation Libraries
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix, roc_auc_score, classification_report

# Model Library
from sklearn.neural_network import MLPClassifier

# Settings for display
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 100)
warnings.filterwarnings("ignore")

# Read Data
---

In [2]:
data = pd.read_csv("../data/emails.csv")
df = data.copy()

# Model Evaluation Function
---

In [3]:
model = list()
resample = list()
precision = list()
recall = list()
F1score = list()
AUCROC = list()

In [4]:
def model_eval_(clf_model, X_test, y_test, algo=None, sampling=None):
    # Test set prediction
    y_prob=clf_model.predict_proba(X_test)
    y_pred=clf_model.predict(X_test)

    print('Confusion Matrix')
    print('='*60)
    print(confusion_matrix(y_test,y_pred),"\n")
    print('Classification Report')
    print('='*60)
    print(classification_report(y_test,y_pred),"\n")
    print('AUC-ROC')
    print('='*60)
    print(roc_auc_score(y_test, y_prob[:,1]))
          
    model.append(algo)
    precision.append(precision_score(y_test,y_pred))
    recall.append(recall_score(y_test,y_pred))
    F1score.append(f1_score(y_test,y_pred))
    AUCROC.append(roc_auc_score(y_test, y_prob[:,1]))
    resample.append(sampling)

# Data Prep
---

In [5]:
df.drop("Email No.", axis=1, inplace=True)
print(df.shape)
print()
print(df.isna().sum().sum())

(5172, 3001)

0


In [6]:
y = df["Prediction"]
X = df.drop("Prediction", axis=1)

# Create Train, Validation and Test Datasets
---

In [7]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_valid, y_valid, test_size=0.25, random_state=42)

In [8]:
X_train.shape, X_valid.shape, X_test.shape, y_train.shape, y_valid.shape, y_test.shape

((3879, 3000), (969, 3000), (324, 3000), (3879,), (969,), (324,))

# Model
---

In [None]:
param_grid = {"": list(range()),
              "", list(range())}

In [None]:
nn_cv = GridSearchCV(estimator=MLPClassifier(), param_grid)

In [10]:
nb = GaussianNB().fit(X_train, y_train)

In [11]:
model_eval_(nb, X_valid, y_valid, 'Naive Bayes', 'Validation Evaluation')

Confusion Matrix
[[654  35]
 [ 13 267]] 

Classification Report
              precision    recall  f1-score   support

           0       0.98      0.95      0.96       689
           1       0.88      0.95      0.92       280

    accuracy                           0.95       969
   macro avg       0.93      0.95      0.94       969
weighted avg       0.95      0.95      0.95       969
 

AUC-ROC
0.9555618909392495


### Validation results are very good. We have very high accuracy, recall and precision values.

In [12]:
model_eval_(nb, X_test, y_test, 'Naive Bayes', 'Test Evaluation')

Confusion Matrix
[[209  15]
 [  4  96]] 

Classification Report
              precision    recall  f1-score   support

           0       0.98      0.93      0.96       224
           1       0.86      0.96      0.91       100

    accuracy                           0.94       324
   macro avg       0.92      0.95      0.93       324
weighted avg       0.95      0.94      0.94       324
 

AUC-ROC
0.9510491071428572


### Test results are very good. We have very high accuracy, recall and precision values.