Helper notebook to run a model against dev set and get list of URLs that were miscategorized as False Negative or False Positive.

In [1]:
from torch.utils.data import DataLoader
import torch
from classifiers.basic_url_skinny_bert_classifier import BasicUrlSkinnyBertClassifier
from dataset.phishing_dataset import PhishingDataset
import os
from utils import get_filtered_inputs
import shap
from transformers import DistilBertTokenizer

# Define dataset path and load the dataset
dataset_path = os.path.expanduser("~/transfer/phishing_output_tokenized.h5")

test_dataset = PhishingDataset(['url_input_ids', 'url_attention_mask', 'labels', 'url'], split='dev', local_file_path=dataset_path)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(device)

# Load the model
model = BasicUrlSkinnyBertClassifier()
checkpoint_path = "../models/canonical/basic_url_skinny_bert_phishing_classifier_epoch_1.pt"
model.load_state_dict(torch.load(checkpoint_path, map_location=device))
model.to(device)
model.eval()

# Load tokenizer for encoding
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Create a prediction function for SHAP
def predict(inputs):
    if isinstance(inputs, list):
        inputs = tokenizer(inputs, return_tensors='pt', padding=True, truncation=True, max_length=128)
    else:
        inputs = tokenizer([inputs], return_tensors='pt', padding=True, truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        logits = model(inputs['input_ids'], inputs['attention_mask'])
    return torch.softmax(logits, dim=1).cpu().numpy()

# Use SHAP to explain the model's predictions
explainer = shap.Explainer(predict, tokenizer)

# Get the first batch and calculate SHAP values
with torch.no_grad():
    for batch in test_dataloader:
        urls = batch['url']
        # Get SHAP values for the first batch
        shap_values = explainer(urls[0])
        shap.plots.text(shap_values)  # Display SHAP values as a text plot for the first example
        break  # Only process the first batch




mps


  model.load_state_dict(torch.load(checkpoint_path, map_location=device))


ValueError: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).

In [3]:
print(len(false_positives), len(false_negatives))

print(false_positives)
print(false_negatives)

79 140
['https://businessangelsummit.at/page.cfm', 'https://login.squarespace.com/api/1/login/oauth/provider/authorize', 'https://methocarbamol.site/', 'https://mymilestonecard.website/', 'https://globalk9protectiongroup.com/', 'https://24h-emploi-formation.com/', 'https://trennungsfaq.com/', 'https://monitoring-obmennikov-belarus.site/', 'https://tigerdave.wixsite.com/website', 'https://error.alibaba.com/error404.htm', 'https://www.beebella.buzz/', 'https://misrecursosdidacticosparaparvulos.blogspot.com/', 'https://www.irctc.co.in/nget/', 'https://semver.npmjs.com/', 'https://www.th4ts3cur1ty.company/', 'https://celebjihad.com/main6', 'https://help.unicef.org/node/11336', 'https://nubilefilms.com/tube/sfw', 'https://hanabisushi.menu11.com/', 'https://www.banorte.com/wps/portal/banorte/Home/inicio', 'https://prologis.getbynder.com/login/', 'https://embedy.cc/', 'https://donstroy.moscow/objects/serdtse-stolitsy/', 'https://crazytimebangladesh.click/', 'https://deploy-preview-6073--types

In [None]:
# Store false positives and false negatives in CSV files
fp_df = pd.DataFrame(false_positives, columns=['False Positive URLs'])
fn_df = pd.DataFrame(false_negatives, columns=['False Negative URLs'])

fp_df.to_csv('false_positives.csv', index=False)
fn_df.to_csv('false_negatives.csv', index=False)

print("False positives and false negatives have been saved to 'false_positives.csv' and 'false_negatives.csv'")