In [None]:
import import_ipynb
from DataPrep import X_train, X_test, y_train, y_test # Previous notebook with data now processed
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

## Further preparing data & setting a batch size

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
in_features = 30
out_features = 1
hidden_units = 5 # baseline hidden_units
hidden_units_v2 = 10 # For testing if more neurons = better

learning_rate = 0.1
batch_size = 64

# Mapping values: -1 -> 0, and 1 -> 2 on all dataset labels because BCE doesn't like negative numbers
y_train_mapped, y_test_mapped = y_train.clone().long(), y_test.clone().long()
mapped_values = {-1:0, 1:1}

y_train_mapped.apply_(lambda x: mapped_values[x])
y_test_mapped.apply_(lambda x: mapped_values[x])

# Get dataset ready for DataLoader
train_dataset = TensorDataset(X_train,y_train_mapped)
test_dataset = TensorDataset(X_test,y_test_mapped)

# Mini-batching data
train_loader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size,shuffle=False)

## ModelV0: 3 layers deep, 5 hidden neurons. Baseline

In [None]:
class PhishingModelV0(nn.Module):

    def __init__(self, in_features, out_features, hidden_units):
        super().__init__()
        self.layer_1 = nn.Linear(in_features = in_features , out_features = hidden_units)
        self.layer_2 = nn.Linear(in_features = hidden_units, out_features = hidden_units)
        self.layer_3 = nn.Linear(in_features = hidden_units, out_features = out_features)
        self.relu = nn.ReLU()

    def forward(self, X:torch.Tensor):
        z = self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(X)))))
        return z

modelV0 = PhishingModelV0(in_features = in_features, out_features = out_features, hidden_units = hidden_units).to(device = device)

## ModelV1: 6 layers deep, 5 hidden neurons. Is deeper better?

In [None]:
class PhishingModelV1(nn.Module): # I know sequential is easier to read in this case. But wanted to stay consistent with previous model

    def __init__(self, in_features, out_features, hidden_units):
        super().__init__()
        self.layer_1 = nn.Linear(in_features = in_features , out_features = hidden_units)
        self.layer_2 = nn.Linear(in_features = hidden_units, out_features = hidden_units)
        self.layer_3 = nn.Linear(in_features = hidden_units, out_features = hidden_units)
        self.layer_4 = nn.Linear(in_features = hidden_units, out_features = hidden_units)
        self.layer_5 = nn.Linear(in_features = hidden_units, out_features = hidden_units)
        self.layer_6 = nn.Linear(in_features = hidden_units, out_features = out_features)
        self.relu = nn.ReLU()

    def forward(self, X:torch.Tensor):
        z = self.layer_6(self.relu(self.layer_5(self.relu(self.layer_4(self.relu(self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(X)))))))))))
        return z
modelV1 = PhishingModelV1(in_features = in_features, out_features = out_features, hidden_units = hidden_units).to(device = device)

## ModelV2: 3 layers deep, 10 hidden_neurons. Is wider better?

In [None]:
modelV2 = PhishingModelV0(in_features = in_features, out_features = out_features, hidden_units = hidden_units_v2).to(device = device)

## ModelV3: 6 layers deep, 10 hidden_neurons. Are both deep & wide better?

In [None]:
modelV3 = PhishingModelV1(in_features = in_features, out_features = out_features, hidden_units = hidden_units_v2).to(device = device)

In [None]:
def accuracy_fn(y_preds, y_true):
    correct = torch.eq(y_preds, y_true).sum().item()
    acc = correct / int(len(y_preds)) * 100
    return round(acc,2)

## Training & Testing loop

In [None]:
def training_loop(model, model_name, epochs=101):
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.SGD(params=model.parameters(), lr = learning_rate)
    print(f"Model: {model_name}")
    for epoch in range(epochs):
        model.train()
    
        for X_batch_train, y_batch_train in train_loader:
            train_logits = model(X_batch_train).squeeze()
            train_labels = torch.sigmoid(train_logits).round()
            train_accuracy = accuracy_fn(train_labels,y_batch_train)
    
            train_loss = loss_fn(train_logits,y_batch_train.type(torch.float))
    
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
    
        if epoch % 10 == 0:
            model.eval()
            all_preds = []
            all_labels = []
            
            for X_batch_test, y_batch_test in test_loader: 
                with torch.inference_mode():
                    test_logits = model(X_batch_test).squeeze()
                    test_labels = torch.sigmoid(test_logits).round()
                    test_accuracy = accuracy_fn(test_labels,y_batch_test)
    
                    test_loss = loss_fn(test_logits,y_batch_test.type(torch.float))
                    all_preds.append(test_labels)
                    all_labels.append(y_batch_test)
                
            all_preds = torch.cat(all_preds)
            all_labels = torch.cat(all_labels)
            test_accuracy = accuracy_fn(all_preds,all_labels)
            
            print(f"Epoch: {epoch} | Test Accuracy: {test_accuracy}% | Test Loss: {test_loss:.4f}")

## Training & saving the best of 5 of each: (commented out to avoid rerunning)

In [None]:
# training_loop(modelV0,"ModelV0") # 3 layers, 5 neurons

In [None]:
# training_loop(modelV1,"ModelV1") # 6 layers, 5 neurons

In [None]:
# training_loop(modelV2,"ModelV2") # 3 layers, 10 neurons

In [None]:
# training_loop(modelV3,"ModelV3") # 6 layers and 10 neurons

### Saving all 4 models to evaluate their metrics

In [None]:
# torch.save(obj=modelV0.state_dict(), f="saved_models/modelV0.pt")
# torch.save(obj=modelV1.state_dict(), f="saved_models/modelV1.pt")
# torch.save(obj=modelV2.state_dict(), f="saved_models/modelV2.pt")
# torch.save(obj=modelV3.state_dict(), f="saved_models/modelV3.pt")