Helper notebook to run a model against dev set and get list of URLs that were miscategorized as False Negative or False Positive.

In [1]:
from torch.utils.data import DataLoader
import torch
from classifiers.basic_url_skinny_bert_classifier import BasicUrlSkinnyBertClassifier
from classifiers.ensemble_model_classifier import EnsembleModel
from dataset.phishing_dataset import PhishingDataset
import os
from utils import get_filtered_inputs
import inspect
from transformers import DistilBertTokenizer
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

dataset_path = os.path.expanduser("~/transfer/phishing_output_tokenized.h5")

device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(device)

# Load the model
model = EnsembleModel(device).to(device)
checkpoint_path = "models/canonical/ensemble_phishing_classifier.pt"
model.load_state_dict(torch.load(checkpoint_path, map_location=device))
model.to(device)
model.eval()

required_data = inspect.signature(model.forward).parameters.keys()
test_dataset = PhishingDataset(required_data, split='dev', local_file_path=dataset_path)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False, pin_memory=True)

all_preds, all_labels = [], []
total_loss = 0.0
num_batches = 0

false_positives = {}
false_negatives = {}

with torch.no_grad():
    for batch in test_dataloader:
        labels = batch['label'].to(device)
        urls = batch['url']  # Assuming 'url' is part of the dataset
        filtered_inputs = get_filtered_inputs(batch)
        outputs = model(**filtered_inputs)

        loss = torch.nn.functional.cross_entropy(outputs, labels)
        total_loss += loss.item()
        num_batches += 1

        preds = torch.argmax(outputs, dim=1)
        scores = torch.softmax(outputs, dim=1)  # Get predicted probabilities

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

        for i in range(len(labels)):
            url = urls[i]
            label = labels[i].item()
            pred = preds[i].item()
            score = scores[i].cpu().numpy()

            if label == 1 and pred == 0:  # False Negative
                false_negatives[url] = score[1]  # Store score for "phishing" class
            elif label == 0 and pred == 1:  # False Positive
                false_positives[url] = score[1]  # Store score for "phishing" class

avg_loss = total_loss / num_batches if num_batches > 0 else 0.0

precision = precision_score(all_labels, all_preds, average='binary')
recall = recall_score(all_labels, all_preds, average='binary')
f1 = f1_score(all_labels, all_preds, average='binary')
accuracy = accuracy_score(all_labels, all_preds)

print(f"Dev Loss: {avg_loss:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}, Accuracy: {accuracy:.4f}")

print(f"False Positives: {len(false_positives)}")
print(f"False Negatives: {len(false_negatives)}")



  Referenced from: <6DFB383A-E1D9-3EC6-8A60-382AF4E3C226> /opt/homebrew/Caskroom/miniforge/base/envs/phishing-edge/lib/python3.10/site-packages/torchvision/image.so
  warn(


mps


  model.load_state_dict(torch.load(model_path, map_location=device), strict=True)
  model.load_state_dict(torch.load(checkpoint_path, map_location=device))


RuntimeError: Error(s) in loading state_dict for EnsembleModel:
	Unexpected key(s) in state_dict: "models.BasicCNNClassifier.cnn.conv1.weight", "models.BasicCNNClassifier.cnn.bn1.weight", "models.BasicCNNClassifier.cnn.bn1.bias", "models.BasicCNNClassifier.cnn.bn1.running_mean", "models.BasicCNNClassifier.cnn.bn1.running_var", "models.BasicCNNClassifier.cnn.bn1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer1.0.conv1.weight", "models.BasicCNNClassifier.cnn.layer1.0.bn1.weight", "models.BasicCNNClassifier.cnn.layer1.0.bn1.bias", "models.BasicCNNClassifier.cnn.layer1.0.bn1.running_mean", "models.BasicCNNClassifier.cnn.layer1.0.bn1.running_var", "models.BasicCNNClassifier.cnn.layer1.0.bn1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer1.0.conv2.weight", "models.BasicCNNClassifier.cnn.layer1.0.bn2.weight", "models.BasicCNNClassifier.cnn.layer1.0.bn2.bias", "models.BasicCNNClassifier.cnn.layer1.0.bn2.running_mean", "models.BasicCNNClassifier.cnn.layer1.0.bn2.running_var", "models.BasicCNNClassifier.cnn.layer1.0.bn2.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer1.1.conv1.weight", "models.BasicCNNClassifier.cnn.layer1.1.bn1.weight", "models.BasicCNNClassifier.cnn.layer1.1.bn1.bias", "models.BasicCNNClassifier.cnn.layer1.1.bn1.running_mean", "models.BasicCNNClassifier.cnn.layer1.1.bn1.running_var", "models.BasicCNNClassifier.cnn.layer1.1.bn1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer1.1.conv2.weight", "models.BasicCNNClassifier.cnn.layer1.1.bn2.weight", "models.BasicCNNClassifier.cnn.layer1.1.bn2.bias", "models.BasicCNNClassifier.cnn.layer1.1.bn2.running_mean", "models.BasicCNNClassifier.cnn.layer1.1.bn2.running_var", "models.BasicCNNClassifier.cnn.layer1.1.bn2.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer2.0.conv1.weight", "models.BasicCNNClassifier.cnn.layer2.0.bn1.weight", "models.BasicCNNClassifier.cnn.layer2.0.bn1.bias", "models.BasicCNNClassifier.cnn.layer2.0.bn1.running_mean", "models.BasicCNNClassifier.cnn.layer2.0.bn1.running_var", "models.BasicCNNClassifier.cnn.layer2.0.bn1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer2.0.conv2.weight", "models.BasicCNNClassifier.cnn.layer2.0.bn2.weight", "models.BasicCNNClassifier.cnn.layer2.0.bn2.bias", "models.BasicCNNClassifier.cnn.layer2.0.bn2.running_mean", "models.BasicCNNClassifier.cnn.layer2.0.bn2.running_var", "models.BasicCNNClassifier.cnn.layer2.0.bn2.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer2.0.downsample.0.weight", "models.BasicCNNClassifier.cnn.layer2.0.downsample.1.weight", "models.BasicCNNClassifier.cnn.layer2.0.downsample.1.bias", "models.BasicCNNClassifier.cnn.layer2.0.downsample.1.running_mean", "models.BasicCNNClassifier.cnn.layer2.0.downsample.1.running_var", "models.BasicCNNClassifier.cnn.layer2.0.downsample.1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer2.1.conv1.weight", "models.BasicCNNClassifier.cnn.layer2.1.bn1.weight", "models.BasicCNNClassifier.cnn.layer2.1.bn1.bias", "models.BasicCNNClassifier.cnn.layer2.1.bn1.running_mean", "models.BasicCNNClassifier.cnn.layer2.1.bn1.running_var", "models.BasicCNNClassifier.cnn.layer2.1.bn1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer2.1.conv2.weight", "models.BasicCNNClassifier.cnn.layer2.1.bn2.weight", "models.BasicCNNClassifier.cnn.layer2.1.bn2.bias", "models.BasicCNNClassifier.cnn.layer2.1.bn2.running_mean", "models.BasicCNNClassifier.cnn.layer2.1.bn2.running_var", "models.BasicCNNClassifier.cnn.layer2.1.bn2.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer3.0.conv1.weight", "models.BasicCNNClassifier.cnn.layer3.0.bn1.weight", "models.BasicCNNClassifier.cnn.layer3.0.bn1.bias", "models.BasicCNNClassifier.cnn.layer3.0.bn1.running_mean", "models.BasicCNNClassifier.cnn.layer3.0.bn1.running_var", "models.BasicCNNClassifier.cnn.layer3.0.bn1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer3.0.conv2.weight", "models.BasicCNNClassifier.cnn.layer3.0.bn2.weight", "models.BasicCNNClassifier.cnn.layer3.0.bn2.bias", "models.BasicCNNClassifier.cnn.layer3.0.bn2.running_mean", "models.BasicCNNClassifier.cnn.layer3.0.bn2.running_var", "models.BasicCNNClassifier.cnn.layer3.0.bn2.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer3.0.downsample.0.weight", "models.BasicCNNClassifier.cnn.layer3.0.downsample.1.weight", "models.BasicCNNClassifier.cnn.layer3.0.downsample.1.bias", "models.BasicCNNClassifier.cnn.layer3.0.downsample.1.running_mean", "models.BasicCNNClassifier.cnn.layer3.0.downsample.1.running_var", "models.BasicCNNClassifier.cnn.layer3.0.downsample.1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer3.1.conv1.weight", "models.BasicCNNClassifier.cnn.layer3.1.bn1.weight", "models.BasicCNNClassifier.cnn.layer3.1.bn1.bias", "models.BasicCNNClassifier.cnn.layer3.1.bn1.running_mean", "models.BasicCNNClassifier.cnn.layer3.1.bn1.running_var", "models.BasicCNNClassifier.cnn.layer3.1.bn1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer3.1.conv2.weight", "models.BasicCNNClassifier.cnn.layer3.1.bn2.weight", "models.BasicCNNClassifier.cnn.layer3.1.bn2.bias", "models.BasicCNNClassifier.cnn.layer3.1.bn2.running_mean", "models.BasicCNNClassifier.cnn.layer3.1.bn2.running_var", "models.BasicCNNClassifier.cnn.layer3.1.bn2.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer4.0.conv1.weight", "models.BasicCNNClassifier.cnn.layer4.0.bn1.weight", "models.BasicCNNClassifier.cnn.layer4.0.bn1.bias", "models.BasicCNNClassifier.cnn.layer4.0.bn1.running_mean", "models.BasicCNNClassifier.cnn.layer4.0.bn1.running_var", "models.BasicCNNClassifier.cnn.layer4.0.bn1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer4.0.conv2.weight", "models.BasicCNNClassifier.cnn.layer4.0.bn2.weight", "models.BasicCNNClassifier.cnn.layer4.0.bn2.bias", "models.BasicCNNClassifier.cnn.layer4.0.bn2.running_mean", "models.BasicCNNClassifier.cnn.layer4.0.bn2.running_var", "models.BasicCNNClassifier.cnn.layer4.0.bn2.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer4.0.downsample.0.weight", "models.BasicCNNClassifier.cnn.layer4.0.downsample.1.weight", "models.BasicCNNClassifier.cnn.layer4.0.downsample.1.bias", "models.BasicCNNClassifier.cnn.layer4.0.downsample.1.running_mean", "models.BasicCNNClassifier.cnn.layer4.0.downsample.1.running_var", "models.BasicCNNClassifier.cnn.layer4.0.downsample.1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer4.1.conv1.weight", "models.BasicCNNClassifier.cnn.layer4.1.bn1.weight", "models.BasicCNNClassifier.cnn.layer4.1.bn1.bias", "models.BasicCNNClassifier.cnn.layer4.1.bn1.running_mean", "models.BasicCNNClassifier.cnn.layer4.1.bn1.running_var", "models.BasicCNNClassifier.cnn.layer4.1.bn1.num_batches_tracked", "models.BasicCNNClassifier.cnn.layer4.1.conv2.weight", "models.BasicCNNClassifier.cnn.layer4.1.bn2.weight", "models.BasicCNNClassifier.cnn.layer4.1.bn2.bias", "models.BasicCNNClassifier.cnn.layer4.1.bn2.running_mean", "models.BasicCNNClassifier.cnn.layer4.1.bn2.running_var", "models.BasicCNNClassifier.cnn.layer4.1.bn2.num_batches_tracked". 
	size mismatch for classifier.0.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([512, 1536]).

In [3]:
print(len(false_positives), len(false_negatives))

print(false_positives)
print(false_negatives)

79 140
['https://businessangelsummit.at/page.cfm', 'https://login.squarespace.com/api/1/login/oauth/provider/authorize', 'https://methocarbamol.site/', 'https://mymilestonecard.website/', 'https://globalk9protectiongroup.com/', 'https://24h-emploi-formation.com/', 'https://trennungsfaq.com/', 'https://monitoring-obmennikov-belarus.site/', 'https://tigerdave.wixsite.com/website', 'https://error.alibaba.com/error404.htm', 'https://www.beebella.buzz/', 'https://misrecursosdidacticosparaparvulos.blogspot.com/', 'https://www.irctc.co.in/nget/', 'https://semver.npmjs.com/', 'https://www.th4ts3cur1ty.company/', 'https://celebjihad.com/main6', 'https://help.unicef.org/node/11336', 'https://nubilefilms.com/tube/sfw', 'https://hanabisushi.menu11.com/', 'https://www.banorte.com/wps/portal/banorte/Home/inicio', 'https://prologis.getbynder.com/login/', 'https://embedy.cc/', 'https://donstroy.moscow/objects/serdtse-stolitsy/', 'https://crazytimebangladesh.click/', 'https://deploy-preview-6073--types

In [None]:
from tabulate import tabulate

sorted_false_positives = sorted(false_positives.items(), key=lambda x: x[1], reverse=True)
sorted_false_negatives = sorted(false_negatives.items(), key=lambda x: x[1], reverse=True)

# Prepare data for tabulate
false_positives_table = [[url, f"{score:.4f}"] for url, score in sorted_false_positives]
false_negatives_table = [[url, f"{score:.4f}"] for url, score in sorted_false_negatives]

# Print tables
print("\nFalse Positives (sorted by score):")
print(tabulate(false_positives_table, headers=["URL", "Score"], tablefmt="grid"))

print("\nFalse Negatives (sorted by score):")
print(tabulate(false_negatives_table, headers=["URL", "Score"], tablefmt="grid"))