In [2]:
from torch.utils.data import DataLoader

import torch
from classifiers.basic_url_bert_classifier import BasicUrlBertClassifier
from dataset.phishing_dataset import PhishingDataset
import os
from utils import get_filtered_inputs
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import torch.nn as nn
import pandas as pd

dataset_path = os.path.expanduser("~/transfer/phishing_output_tokenized_tmp.h5")

test_dataset = PhishingDataset(['url_input_ids', 'url_attention_mask', 'labels', 'url'], split='dev', local_file_path=dataset_path)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(device)

model = BasicUrlBertClassifier()
checkpoint_path = "../models/basic_url_bert_phishing_classifier.pt"
model.load_state_dict(torch.load(checkpoint_path, map_location=device))
model.to(device)
model.eval()

criterion = nn.CrossEntropyLoss()
all_preds, all_labels = [], []
total_loss = 0.0
num_batches = 0
false_positives = []
false_negatives = []

with torch.no_grad():
    for batch in test_dataloader:
        labels = batch['label'].to(device)
        filtered_inputs = get_filtered_inputs(batch)
        outputs = model(**filtered_inputs)

        loss = criterion(outputs, labels)
        total_loss += loss.item()
        num_batches += 1

        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

        # Track false positives and false negatives
        urls = batch['url']
        for i in range(len(labels)):
            if labels[i].item() == 1 and preds[i].item() == 0:
                false_negatives.append(urls[i])
            elif labels[i].item() == 0 and preds[i].item() == 1:
                false_positives.append(urls[i])

avg_loss = total_loss / num_batches if num_batches > 0 else 0.0

precision = precision_score(all_labels, all_preds, average='binary')
recall = recall_score(all_labels, all_preds, average='binary')
f1 = f1_score(all_labels, all_preds, average='binary')
accuracy = accuracy_score(all_labels, all_preds)
print(f"Dev Loss: {loss:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}, Accuracy: {accuracy:.4f}")





mps


  model.load_state_dict(torch.load(checkpoint_path, map_location=device))


Dev Loss: 0.3683, Precision: 0.9297, Recall: 0.9528, F1 Score: 0.9411, Accuracy: 0.9554


In [4]:
print(len(false_positives), len(false_negatives))

print(false_positives)
print(false_negatives)

192 126
['https://mine.fennecblockchain.com/', 'https://businessangelsummit.at/page.cfm', 'https://1000.menu/', 'https://login.squarespace.com/api/1/login/oauth/provider/authorize', 'https://naver.worksmobile.com/', 'https://www.credem.it/content/credem/it/home.html', 'https://gladly.io/', 'https://methocarbamol.site/', 'https://rustoria-topnews.ru/', 'http://www.100bt.com/', 'https://ataulswebdesigns.com/', 'https://msprivatecars.com/', 'https://drcarloschacon.mystrikingly.com/', 'https://www.qr-code-generator.com/', 'http://portloechurch.org.uk/', 'https://globalk9protectiongroup.com/', 'https://www.active24.cz/servery/virtualni-privatni-servery', 'https://bankofmaharashtra.in/', 'https://allin-mke.com/', 'https://trennungsfaq.com/', 'https://www.clickminded.com/button-generator/', 'https://tigerdave.wixsite.com/website', 'https://data4cure.com/', 'https://lakemartinlocations.com/', 'https://misrecursosdidacticosparaparvulos.blogspot.com/', 'https://unprojects.net/', 'https://discoin

In [None]:
# Store false positives and false negatives in CSV files
fp_df = pd.DataFrame(false_positives, columns=['False Positive URLs'])
fn_df = pd.DataFrame(false_negatives, columns=['False Negative URLs'])

fp_df.to_csv('false_positives.csv', index=False)
fn_df.to_csv('false_negatives.csv', index=False)

print("False positives and false negatives have been saved to 'false_positives.csv' and 'false_negatives.csv'")