In [1]:
import torch
import torch.nn as nn
from transformers import DistilBertModel
from test_harness import test_harness

class PhishingClassifier(nn.Module):
    def __init__(self):
        super(PhishingClassifier, self).__init__()

        self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        
        self.classifier = nn.Sequential(
            nn.Linear(self.bert.config.hidden_size, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 2)
        )

    def forward(self, url_input_ids, url_attention_mask):
        
        bert_outputs = self.bert(input_ids=url_input_ids, attention_mask=url_attention_mask)
        url_features = bert_outputs.last_hidden_state[:, 0, :]
        
        logits = self.classifier(url_features)
        return logits
    
    def test_name(self):
        return 'basic_url_bert'

device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
model = PhishingClassifier().to(device)
test_harness(model)
torch.save(model.state_dict(), f"{model.test_name()}_phishing_classifier.pt")





Epoch 1/10:   0%|          | 0/7080 [00:00<?, ?it/s]

Epoch 1/10, Average Loss: 0.15691078218702478
Epoch 1/10, Loss: 0.1167, Precision: 0.9466, Recall: 0.9490, F1 Score: 0.9478, Accuracy: 0.9603


Epoch 2/10:   0%|          | 0/7080 [00:00<?, ?it/s]

Epoch 2/10, Average Loss: 0.08867528729243034
Epoch 2/10, Loss: 0.1158, Precision: 0.9648, Recall: 0.9416, F1 Score: 0.9531, Accuracy: 0.9648


Epoch 3/10:   0%|          | 0/7080 [00:00<?, ?it/s]

Epoch 3/10, Average Loss: 0.048854169860723266
Epoch 3/10, Loss: 0.1371, Precision: 0.9849, Recall: 0.9150, F1 Score: 0.9487, Accuracy: 0.9624


Epoch 4/10:   0%|          | 0/7080 [00:00<?, ?it/s]

Epoch 4/10, Average Loss: 0.02752620269979274
Epoch 4/10, Loss: 0.1854, Precision: 0.9337, Recall: 0.9527, F1 Score: 0.9431, Accuracy: 0.9564


Epoch 5/10:   0%|          | 0/7080 [00:00<?, ?it/s]

Epoch 5/10, Average Loss: 0.01760292832276189
Epoch 5/10, Loss: 0.1845, Precision: 0.9459, Recall: 0.9494, F1 Score: 0.9476, Accuracy: 0.9602


Epoch 6/10:   0%|          | 0/7080 [00:00<?, ?it/s]

Epoch 6/10, Average Loss: 0.014946453116117173
Epoch 6/10, Loss: 0.2182, Precision: 0.9625, Recall: 0.9383, F1 Score: 0.9502, Accuracy: 0.9627


Epoch 7/10:   0%|          | 0/7080 [00:00<?, ?it/s]

Epoch 7/10, Average Loss: 0.011528324271586239
Epoch 7/10, Loss: 0.2225, Precision: 0.9486, Recall: 0.9475, F1 Score: 0.9481, Accuracy: 0.9606


Epoch 8/10:   0%|          | 0/7080 [00:00<?, ?it/s]

Epoch 8/10, Average Loss: 0.009794402939279296
Epoch 8/10, Loss: 0.1903, Precision: 0.9758, Recall: 0.9232, F1 Score: 0.9487, Accuracy: 0.9622


Epoch 9/10:   0%|          | 0/7080 [00:00<?, ?it/s]

Epoch 9/10, Average Loss: 0.009046823427710694
Epoch 9/10, Loss: 0.1916, Precision: 0.9554, Recall: 0.9413, F1 Score: 0.9483, Accuracy: 0.9610


Epoch 10/10:   0%|          | 0/7080 [00:00<?, ?it/s]

Epoch 10/10, Average Loss: 0.0073852659671066415
Epoch 10/10, Loss: 0.2392, Precision: 0.9685, Recall: 0.9324, F1 Score: 0.9501, Accuracy: 0.9629
