## For evaluating model metrics (load saved models)

In [1]:
import import_ipynb # Allows me to import previous notebooks
from classification import accuracy_fn, X_train, y_train_mapped, X_test, y_test_mapped, PhishingModelV0, PhishingModelV1
from sklearn.metrics import confusion_matrix, classification_report
import torch
from torch import nn

object


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
in_features = 30
out_features = 1
hidden_units = 5 # baseline hidden_units
hidden_units_v2 = 10 # For testing if more neurons = better

## Loading models

In [3]:
# Instantiate all models again (required)
modelV0 = PhishingModelV0(in_features=in_features,out_features=out_features,hidden_units=hidden_units).to(device) # 3 layers, 5 neurons (baseline)
modelV1 = PhishingModelV1(in_features=in_features,out_features=out_features,hidden_units=hidden_units).to(device) # 6 layers, 5 neurons
modelV2 = PhishingModelV0(in_features=in_features,out_features=out_features,hidden_units=hidden_units_v2).to(device) # 3 layers, 10 neurons
modelV3 = PhishingModelV1(in_features=in_features,out_features=out_features,hidden_units=hidden_units_v2).to(device) # 6 layers and 10 neurons

# Load state dict of all models
modelV0.load_state_dict(torch.load("saved_models/modelV0.pt"))
modelV1.load_state_dict(torch.load("saved_models/modelV1.pt"))
modelV2.load_state_dict(torch.load("saved_models/modelV2.pt"))
modelV3.load_state_dict(torch.load("saved_models/modelV3.pt"))

<All keys matched successfully>

## Summary of metrics for all 4 models

In [4]:
target_names = ["Phishy", "Legit"]
print("==================================\n")

modelV0.eval()
with torch.inference_mode():
    y_logits = modelV0(X_test).squeeze()
    y_preds = torch.sigmoid(y_logits).round()
    modelV0_accuracy = accuracy_fn(y_preds,y_test_mapped)
    print(f"ModelV0 Accuracy: {modelV0_accuracy}%")

modelV0_confusion_matrix = confusion_matrix(y_test_mapped,y_preds)
modelV0_classification_report = classification_report(y_test_mapped.cpu().numpy(),y_preds.cpu().numpy(), target_names=target_names)
print(f"ModelV0 Confusion Matrix:\n{modelV0_confusion_matrix}")
print(f"ModelV0 Classification Report:\n{modelV0_classification_report}")
print("==================================\n")

modelV1.eval()
with torch.inference_mode():
    y_logits = modelV1(X_test).squeeze()
    y_preds = torch.sigmoid(y_logits).round()
    modelV1_accuracy = accuracy_fn(y_preds,y_test_mapped)
    print(f"ModelV1 Accuracy: {modelV1_accuracy}%")

modelV1_confusion_matrix = confusion_matrix(y_test_mapped,y_preds)
modelV1_classification_report = classification_report(y_test_mapped.cpu().numpy(),y_preds.cpu().numpy(), target_names=target_names)
print(f"ModelV1 Confusion Matrix:\n{modelV1_confusion_matrix}")
print(f"ModelV1 Classification Report:\n{modelV1_classification_report}")
print("==================================\n")

modelV2.eval()
with torch.inference_mode():
    y_logits = modelV2(X_test).squeeze()
    y_preds = torch.sigmoid(y_logits).round()
    modelV2_accuracy = accuracy_fn(y_preds,y_test_mapped)
    print(f"ModelV2 Accuracy: {modelV2_accuracy}%")

modelV2_confusion_matrix = confusion_matrix(y_test_mapped,y_preds)
modelV2_classification_report = classification_report(y_test_mapped.cpu().numpy(),y_preds.cpu().numpy(), target_names=target_names)
print(f"ModelV2 Confusion Matrix:\n{modelV2_confusion_matrix}")
print(f"ModelV2 Classification Report:\n{modelV2_classification_report}")
print("==================================\n")

modelV3.eval()
with torch.inference_mode():
    y_logits = modelV3(X_test).squeeze()
    y_preds = torch.sigmoid(y_logits).round()
    modelV3_accuracy = accuracy_fn(y_preds,y_test_mapped)
    print(f"ModelV3 Accuracy: {modelV3_accuracy}%")

modelV3_confusion_matrix = confusion_matrix(y_test_mapped,y_preds)
modelV3_classification_report = classification_report(y_test_mapped.cpu().numpy(),y_preds.cpu().numpy(), target_names=target_names)
print(f"ModelV3 Confusion Matrix:\n{modelV3_confusion_matrix}")
print(f"ModelV3 Classification Report:\n{modelV3_classification_report}")
print("==================================\n")


ModelV0 Accuracy: 93.8%
ModelV0 Confusion Matrix:
[[ 904   92]
 [  45 1170]]
ModelV0 Classification Report:
              precision    recall  f1-score   support

      Phishy       0.95      0.91      0.93       996
       Legit       0.93      0.96      0.94      1215

    accuracy                           0.94      2211
   macro avg       0.94      0.94      0.94      2211
weighted avg       0.94      0.94      0.94      2211


ModelV1 Accuracy: 93.22%
ModelV1 Confusion Matrix:
[[ 942   54]
 [  96 1119]]
ModelV1 Classification Report:
              precision    recall  f1-score   support

      Phishy       0.91      0.95      0.93       996
       Legit       0.95      0.92      0.94      1215

    accuracy                           0.93      2211
   macro avg       0.93      0.93      0.93      2211
weighted avg       0.93      0.93      0.93      2211


ModelV2 Accuracy: 94.53%
ModelV2 Confusion Matrix:
[[ 921   75]
 [  46 1169]]
ModelV2 Classification Report:
              pre