# E-Mail Spam Classification
## YZV 311E Term Project

Abdullah Bilici, 150200330

Bora Boyacıoğlu, 150200310

Import the necessary libraries.

In [1]:
import numpy as np
import torch
from torch import nn, optim 
from torch.nn import functional as F

from dataloader import DataLoader

ModuleNotFoundError: No module named 'torch'

## Load Data

In [None]:
# Load the
data = np.load("../Data/bert_representations.npy")

data = torch.tensor(data)

In [None]:
# Create dataloaders so we can use it easily
train_loader = DataLoader([data[:3000, :-1].to(torch.float), data[:3000, -1]], shuffle=True, batch_size=64)
test_loader = DataLoader([data[3000:4000, :-1].to(torch.float), data[3000:4000, -1]])
validation_loader = DataLoader([data[4000:, : -1].to(torch.float), data[4000:, -1]])

## Creating a model

In [None]:
# Simple fully connected neural network
class FCNN(nn.Module):
    def __init__(self, input_shape, output_dim):
        super(FCNN, self).__init__()

        self.fc1 = nn.Linear(input_shape, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, 32)
        self.fc4 = nn.Linear(32, output_dim)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)

        return x


In [None]:
N, input_shape = train_loader.shape

net = FCNN(input_shape, 2)

## Training the model

In [None]:
def test(net, test_loader, criterion, device, verbose = True):
    # We dont want to store gradients during forward pass
    with torch.no_grad():

        # Extracting the data and targets
        data, target = test_loader.X, test_loader.y
        data.to(device)
        target.to(device)
        
        # Model makes predictions
        target_pred = net(data)
        
        # Test loss
        test_loss = criterion(target_pred, target.to(torch.long)) / test_loader.size

        # Predicted values to caluclate accuracy
        target_predicted = torch.argmax(target_pred, axis = 1)

        # Calculating the accuracy
        test_accuracy = (target_predicted == target).sum() / test_loader.size
    
    if verbose:
        print("Test results: \n")

        print(f"Test loss: {test_loss:.4f}, Test accuracy: {test_accuracy:.4f}") 

    return target_predicted

In [None]:
# Setting hyperparameters
epochs = 10
lr = 1e-3

# Setting device to cuda if it is available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Adam for optimizer
optimizer = optim.Adam(net.parameters(), lr = lr)
# Cross entropy loss for loss function
criterion = nn.CrossEntropyLoss(reduction="sum")


### Train loop

In [None]:
train_losses = list()
validation_losses = list()

print("Training starting...")
for epoch in range(epochs):

    running_loss = .0

    for X_train, y_train in train_loader:
        X_train.to(device)
        y_train.to(device)

        # Forward pass
        y_pred = net(X_train)
        
        loss = criterion(y_pred, y_train.to(torch.long))

        # Zeros the past gradients
        optimizer.zero_grad()
        # Recalculates gradients
        loss.backward()
        # Updates weights
        optimizer.step()

        running_loss += loss.item()

    running_loss /= train_loader.size

    train_losses.append(running_loss)


    # Calculates validation loss and accuracy 
    with torch.no_grad():

        X_val, y_val = validation_loader.X, validation_loader.y

        y_val_pred = net(X_val)

        val_loss = criterion(y_val_pred, y_val.to(torch.long)) / validation_loader.size

        y_val_predicted = torch.max(y_val_pred, axis = 1).indices

        val_accuracy = (y_val_predicted == y_val).sum() / validation_loader.size

    validation_losses.append(val_loss)

    if epoch % 1 == 0:

        print(f"Epoch: {epoch+1}, Training loss: {running_loss:.4f}, Validation loss: {val_loss:.4f}, Validation accuracy: {val_accuracy:.4f}")
print("Training ends.")

Training starting...
Epoch: 1, Training loss: 0.3218, Validation loss: 0.1050, Validation accuracy: 0.9549
Epoch: 2, Training loss: 0.0969, Validation loss: 0.0692, Validation accuracy: 0.9705
Epoch: 3, Training loss: 0.0540, Validation loss: 0.0534, Validation accuracy: 0.9809
Epoch: 4, Training loss: 0.0530, Validation loss: 0.0661, Validation accuracy: 0.9740
Epoch: 5, Training loss: 0.0319, Validation loss: 0.0441, Validation accuracy: 0.9821
Epoch: 6, Training loss: 0.0288, Validation loss: 0.1389, Validation accuracy: 0.9520
Epoch: 7, Training loss: 0.0552, Validation loss: 0.0536, Validation accuracy: 0.9826
Epoch: 8, Training loss: 0.0319, Validation loss: 0.0433, Validation accuracy: 0.9850
Epoch: 9, Training loss: 0.0189, Validation loss: 0.0637, Validation accuracy: 0.9792
Epoch: 10, Training loss: 0.0226, Validation loss: 0.0484, Validation accuracy: 0.9844
Training ends.


## Model Evaluation

In [None]:
val_preds = test(net, validation_loader, criterion, device, verbose = 0)
test_preds = test(net, test_loader, criterion, device)

Test results: 

Test loss: 0.0618, Test accuracy: 0.9770


In [None]:
from utils import evaluate_model

# Evaluate on validation set
print("Validation Results:")
evaluate_model(validation_loader.y, val_preds)


# Evaluate on test set
print("Test Results:")
evaluate_model(test_loader.y, test_preds)

Validation Results:
[4mConfusion Matrix:[0m
[[TP: [91m394[0m	FP: [91m15[0m	]
 [FN: [91m12[0m	TN: [91m1307[0m	]]

[4mClassification Report:[0m
Accuracy : [91m0.9844[0m
Precision: [91m0.9633[0m
Recall   : [91m0.9704[0m
F1 Score : [91m0.9669[0m

Test Results:
[4mConfusion Matrix:[0m
[[TP: [91m226[0m	FP: [91m16[0m	]
 [FN: [91m7[0m	TN: [91m751[0m	]]

[4mClassification Report:[0m
Accuracy : [91m0.9770[0m
Precision: [91m0.9339[0m
Recall   : [91m0.9700[0m
F1 Score : [91m0.9516[0m



## Save the model

In [None]:
torch.save(net, "models/FCNN_1")