In [162]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, RandomSampler

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd
from tqdm import tqdm

import random
import time

from dataset import CSICDataset, Vocab

In [2]:
# Defining global constants
RANDOM_SEED = 42
BATCH_SIZE = 64

torch.manual_seed(RANDOM_SEED)
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')

In [3]:
# This is how we select a GPU if it's available on your computer or in the Colab environment.
print('Device of execution - ', device)

Device of execution -  mps


In [4]:
df = pd.read_csv('./dataset/dataset.csv')
df = CSICDataset.process_df(df)

# The following two lines are used to load the indices of the training and validation sets
train_indices = np.load('./dataset/train_indices.npy')
val_indices = np.load('./dataset/val_indices.npy')

# The following two lines are used to select the training and validation sets from the dataframe based on the indices loaded above
train_data = df.loc[train_indices].reset_index(drop=True)
val_data = df.loc[val_indices].reset_index(drop=True)

In [183]:
train_dataset = CSICDataset(df=train_data, vocab_size=5000, min_frequency=1, tokenization_algorithm='bpe')
train_vocab = train_dataset.vocab

val_dataset = CSICDataset(df=val_data, vocab=train_vocab)

train_sampler = RandomSampler(train_dataset)
val_sampler   = RandomSampler(val_dataset)






In [184]:
len(train_vocab)

5000

In [None]:
# Check Dataset Lengths
assert len(train_dataset) == 45319, "Training Dataset is of incorrect size"
assert len(val_dataset) == 11330, "Validation Dataset is of incorrect size"

print('Training and Validation dataset sizes match!')

Training and Validation dataset sizes match!


In [None]:
PADDING_VALUE = train_vocab.pad_id

In [None]:
def collate_fn(batch, padding_value=PADDING_VALUE):
    # Batch is of the form List[Tuple(Features(tokenized_ids,...), Labels)]
    sequences = [torch.tensor(sample[0]['tokenized_ids'], dtype=torch.long, device=device) for sample in batch]
    padded_tokens = torch.nn.utils.rnn.pad_sequence(sequences=sequences,batch_first=True, padding_value=padding_value)
    
    labels = torch.tensor([sample[1] for sample in batch])

    return padded_tokens, labels

In [None]:
train_iterator = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, collate_fn=collate_fn)
val_iterator   = DataLoader(val_dataset, batch_size=BATCH_SIZE, sampler=val_sampler, collate_fn=collate_fn)

In [10]:
for x, y in train_iterator:
    print(f'x: {x.shape}')
    print(f'y: {y.shape}')
    break

x: torch.Size([64, 199])
y: torch.Size([64])


In [44]:
class RecurrentWAF(nn.Module):
    def __init__(self, vocab_size, embedding_dim, rec_hidden_size, fc_hidden_size, recurrent_type='LSTM', dropout=None):
        super(RecurrentWAF, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim, padding_idx=PADDING_VALUE)
        
        match recurrent_type:
            case 'LSTM':
                self.recurrent = nn.LSTM(input_size=embedding_dim, hidden_size=rec_hidden_size, batch_first=True)
            case 'RNN':
                self.recurrent = nn.RNN(input_size=embedding_dim, hidden_size=rec_hidden_size, batch_first=True)
            case 'GRU':
                self.recurrent = nn.GRU(input_size=embedding_dim, hidden_size=rec_hidden_size, batch_first=True)
            
            case _:
                raise TypeError("Unsupported Recurrent Layer Type received")

        self.fc = nn.Sequential(
            nn.Linear(in_features=rec_hidden_size, out_features=fc_hidden_size, bias=True),
            nn.ReLU(),
            nn.Linear(in_features=fc_hidden_size, out_features=1, bias=True)
        )

        self.activation = nn.Sigmoid()

        if dropout:
            self.dropout = nn.Dropout(p=dropout)
        else:
            self.dropout = None

        self.embedding_dim = embedding_dim
        self.rec_hidden_size = rec_hidden_size
        self.fc_hidden_size = fc_hidden_size
        self.recurrent_type = recurrent_type

    def forward(self, input):
        embed = self.embed_input(input)
        
        if self.recurrent_type == 'RNN' or self.recurrent_type == 'GRU':
            _, hidden = self.recurrent(embed)
        else:
            _, (hidden, cell) = self.recurrent(embed)

        hidden = hidden.squeeze(dim=0)
        out = self.activation(self.fc(hidden))

        return out
    
    def embed_input(self, input):
        if self.dropout:
            return self.dropout(self.embed(input))
        else:
            return self.embed(input)
        
    # @staticmethod
    # def load(model_path: str):
    #     params = torch.load(model_path, map_location=lambda storage, loc: storage)
    #     args = params['args']
    #     model = RecurrentWAF(vocab=params['vocab'], **args)
    #     model.load_state_dict(params['state_dict'])

    #     return model

    # def save(self, path: str):
    #     print('save model parameters to [%s]' % path, file=sys.stderr)

    #     params = {
    #         'args': dict(embed_size=self.embed_size, hidden_size=self.hidden_size, dropout_rate=self.dropout_rate,
    #                      label_smoothing=self.label_smoothing),
    #         'vocab': self.vocab,
    #         'state_dict': self.state_dict()
    #     }

    #     torch.save(params, path)

In [45]:
def get_accuracy_and_f1_score(y_true, y_predicted):
    """
    This function takes in two numpy arrays and computes the accuracy and F1 score
    between them. You can use the imported sklearn functions to do this.

    Args:
        y_true (list) : A 1D numpy array of ground truth labels
        y_predicted (list) : A 1D numpy array of predicted labels

    Returns:
        accuracy (float) : The accuracy of the predictions
        f1_score (float) : The F1 score of the predictions
    """

    # Get the accuracy
    accuracy = accuracy_score(y_true, y_predicted)

    # Get the F1 score
    f1 = f1_score(y_true, y_predicted)

    return accuracy, f1


In [46]:
def train_loop(model, criterion, optimizer, iterator):
    """
    This function is used to train a model for one epoch.
    :param model: The model to be trained
    :param criterion: The loss function
    :param optim: The optimizer
    :param iterator: The training data iterator
    :return: The average loss for this epoch for all batches
    """
    # Set the model to train mode (build computation graph)
    model = model.to(device)
    model.train()
    
    total_loss = 0
    for x, y in tqdm(iterator, total=len(iterator), desc="Training Model"):

        optimizer.zero_grad()

        x, y = x.to(device), y.to(device)
        outs = model(x).squeeze(dim=-1)

        loss = criterion(outs, y.float())
        loss.backward()

        optimizer.step()

        total_loss += loss.item()

    average_loss = total_loss / len(iterator)

    return average_loss

In [164]:
def val_loop(model, criterion, iterator):
    """
    This function is used to evaluate a model on the validation set.
    :param model: The model to be evaluated
    :param iterator: The validation data iterator
    :return: true: a Python boolean array of all the ground truth values
             pred: a Python boolean array of all model predictions.
            average_loss: The average loss over the validation set
    """

    true, pred = [], []
    total_loss = 0
    total_comp_time = 0

    # Set the model to evaluation mode
    model.eval()

    # Don't calculate gradients
    with torch.no_grad():
        for x, y in tqdm(iterator, total=len(iterator), desc="Validating Model"):
            x, y = x.to(device), y.to(device)

            start = time.time()
            outs = model(x)
            total_comp_time += time.time() - start
            
            outs = outs.squeeze(dim=-1)

            predictions = [True if out >= 0.5 else False for out in outs]
            labels = [True if label == 1 else False for label in y]

            loss = criterion(outs, y.float())
            total_loss += loss.item()

            # Add the predictions and labels to the lists
            pred.extend(predictions)
            true.extend(labels)
        average_loss = total_loss / len(iterator)
        average_comp_time = total_comp_time / len(iterator)

    return true, pred, average_loss, average_comp_time

In [189]:
# HYPERPARAMETERS:

EMBEDDING_DIM = 64
REC_HIDDEN_DIM = 64
FC_HIDDEN_DIM = 64
REC_LAYER_TYPE = 'LSTM'
DROPOUT = 0.2

BETAS = (0.9,0.999)
LR = 1e-3
EPOCHS = 10

In [190]:
model = RecurrentWAF(vocab_size=len(train_vocab),embedding_dim=EMBEDDING_DIM,
                     rec_hidden_size=REC_HIDDEN_DIM, fc_hidden_size=FC_HIDDEN_DIM,
                     recurrent_type=REC_LAYER_TYPE, dropout=DROPOUT).to(device)

In [191]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(params=model.parameters(),lr=LR, betas=BETAS)

In [192]:
for epoch in range(EPOCHS):
    train_loss = train_loop(model, criterion, optimizer, train_iterator)
    true, pred, val_loss, comp_time = val_loop(model, criterion, val_iterator)
    accuracy, f1 = get_accuracy_and_f1_score(true, pred)
    print(f"Epoch {epoch+1} -- Train_Loss: {train_loss} -- Val_Loss: {val_loss} -- Val_Accuracy: {accuracy} -- Val_F1: {f1}")

Training Model: 100%|██████████| 709/709 [01:08<00:00, 10.33it/s]
Validating Model: 100%|██████████| 178/178 [00:17<00:00, 10.13it/s]


Epoch 1 -- Train_Loss: 0.6792557511074082 -- Val_Loss: 0.6720932340354062 -- Val_Accuracy: 0.5684907325684024 -- Val_F1: 0.0


Training Model: 100%|██████████| 709/709 [01:12<00:00,  9.83it/s]
Validating Model: 100%|██████████| 178/178 [00:16<00:00, 10.66it/s]


Epoch 2 -- Train_Loss: 0.5240518565490653 -- Val_Loss: 0.40687665962771086 -- Val_Accuracy: 0.7902912621359224 -- Val_F1: 0.8012048192771084


Training Model: 100%|██████████| 709/709 [01:19<00:00,  8.96it/s]
Validating Model: 100%|██████████| 178/178 [00:20<00:00,  8.90it/s]


Epoch 3 -- Train_Loss: 0.38124701969209945 -- Val_Loss: 0.3413784425245242 -- Val_Accuracy: 0.8528684907325684 -- Val_F1: 0.8415549852675601


Training Model: 100%|██████████| 709/709 [01:18<00:00,  9.04it/s]
Validating Model: 100%|██████████| 178/178 [00:18<00:00,  9.82it/s]


Epoch 4 -- Train_Loss: 0.31088578585950377 -- Val_Loss: 0.2851855932714305 -- Val_Accuracy: 0.8664607237422771 -- Val_F1: 0.8456909739928608


Training Model: 100%|██████████| 709/709 [01:18<00:00,  8.98it/s]
Validating Model: 100%|██████████| 178/178 [00:16<00:00, 10.71it/s]


Epoch 5 -- Train_Loss: 0.2711170658006823 -- Val_Loss: 0.24995146217766437 -- Val_Accuracy: 0.8997352162400706 -- Val_F1: 0.8937125748502994


Training Model: 100%|██████████| 709/709 [01:14<00:00,  9.46it/s]
Validating Model: 100%|██████████| 178/178 [00:14<00:00, 12.21it/s]


Epoch 6 -- Train_Loss: 0.2387723523298507 -- Val_Loss: 0.19953990409850847 -- Val_Accuracy: 0.923477493380406 -- Val_F1: 0.9167066961283504


Training Model: 100%|██████████| 709/709 [01:14<00:00,  9.56it/s]
Validating Model: 100%|██████████| 178/178 [00:13<00:00, 13.30it/s]


Epoch 7 -- Train_Loss: 0.1890005446150794 -- Val_Loss: 0.1807700813737478 -- Val_Accuracy: 0.9327449249779347 -- Val_F1: 0.925904317386231


Training Model: 100%|██████████| 709/709 [01:14<00:00,  9.48it/s]
Validating Model: 100%|██████████| 178/178 [00:16<00:00, 10.79it/s]


Epoch 8 -- Train_Loss: 0.17242909855982985 -- Val_Loss: 0.17275825446325072 -- Val_Accuracy: 0.9378640776699029 -- Val_F1: 0.9322164452147121


Training Model: 100%|██████████| 709/709 [01:15<00:00,  9.45it/s]
Validating Model: 100%|██████████| 178/178 [00:15<00:00, 11.19it/s]


Epoch 9 -- Train_Loss: 0.16348104615274117 -- Val_Loss: 0.17686632340460012 -- Val_Accuracy: 0.9318623124448367 -- Val_F1: 0.926657799733992


Training Model: 100%|██████████| 709/709 [01:16<00:00,  9.22it/s]
Validating Model: 100%|██████████| 178/178 [00:15<00:00, 11.65it/s]

Epoch 10 -- Train_Loss: 0.15568438897957587 -- Val_Loss: 0.1611709658244855 -- Val_Accuracy: 0.9423654015887025 -- Val_F1: 0.9371087354329192





In [167]:
torch.save(model,'./models/lstm_waf_new.bin')

In [151]:
true, pred, val_loss, comp_time = val_loop(model, criterion, val_iterator)
accuracy, f1 = get_accuracy_and_f1_score(true, pred)
print(f"Final Validation Accuracy: {accuracy}")
print(f"Final Validation F1-Score: {f1}")

Validating Model: 100%|██████████| 178/178 [00:15<00:00, 11.43it/s]

Final Validation Accuracy: 0.9029126213592233
Final Validation F1-Score: 0.8972154737432255





In [248]:
model = torch.load('./models/lstm_waf_new.bin', map_location='mps')

## Global Unstructured Pruning: ##

In [212]:
import torch.nn.utils.prune as prune

parameters_to_prune = (
    (model.recurrent, 'weight_ih_l0'),
    (model.recurrent, 'weight_hh_l0'),
    (model.recurrent, 'bias_ih_l0'),
    (model.recurrent, 'bias_hh_l0'),
    (model.fc[0], 'weight'),
    (model.fc[0], 'bias'),
    (model.fc[2], 'weight'),
    (model.fc[2], 'bias'),
)

prune.global_unstructured(
    parameters=parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.2,
)

# Apply pruning to the model
# prune.remove(model.recurrent, 'weight_ih_l0')
# prune.remove(model.recurrent, 'weight_hh_l0')
# prune.remove(model.recurrent, 'bias_ih_l0')
# prune.remove(model.recurrent, 'bias_hh_l0')
# prune.remove(model.fc[0], 'weight')
# prune.remove(model.fc[0], 'bias')
# prune.remove(model.fc[2], 'weight')
# prune.remove(model.fc[2], 'bias')

## Local Structured Pruning: ##

In [254]:
import torch.nn.utils.prune as prune

parameters_to_prune = (
    (model.recurrent, 'weight_ih_l0'),
    (model.recurrent, 'weight_hh_l0'),
    # (model.recurrent, 'bias_ih_l0'),
    # (model.recurrent, 'bias_hh_l0'),
    # (model.fc[0], 'weight'),
    # (model.fc[0], 'bias'),
    (model.fc[2], 'weight'),
    # (model.fc[2], 'bias'),
)

for module, name in parameters_to_prune:
    prune.ln_structured(
        module=module,
        name=name,
        amount=0.2,
        n=2,
        dim=1
    )
    prune.remove(module=module, name=name)

# Apply pruning to the model
# prune.remove(model.recurrent, 'weight_ih_l0')
# prune.remove(model.recurrent, 'weight_hh_l0')
# prune.remove(model.recurrent, 'bias_ih_l0')
# prune.remove(model.recurrent, 'bias_hh_l0')
# prune.remove(model.fc[0], 'weight')
# prune.remove(model.fc[0], 'bias')
# prune.remove(model.fc[2], 'weight')
# prune.remove(model.fc[2], 'bias')

## Model Sparsity After Pruning: ##

In [255]:
print(
    "Sparsity in recurrent.weight_ih_l0': {:.2f}%".format(
        100. * float(torch.sum(model.recurrent.weight_ih_l0 == 0))
        / float(model.recurrent.weight_ih_l0.nelement())
    )
)
print(
    "Sparsity in recurrent.weight_hh_l0: {:.2f}%".format(
        100. * float(torch.sum(model.recurrent.weight_hh_l0 == 0))
        / float(model.recurrent.weight_hh_l0.nelement())
    )
)
print(
    "Sparsity in recurrent.bias_ih_l0': {:.2f}%".format(
        100. * float(torch.sum(model.recurrent.bias_ih_l0 == 0))
        / float(model.recurrent.bias_ih_l0.nelement())
    )
)
print(
    "Sparsity in recurrent.bias_hh_l0: {:.2f}%".format(
        100. * float(torch.sum(model.recurrent.bias_hh_l0 == 0))
        / float(model.recurrent.bias_hh_l0.nelement())
    )
)
print(
    "Sparsity in fc1.weight: {:.2f}%".format(
        100. * float(torch.sum(model.fc[0].weight == 0))
        / float(model.fc[0].weight.nelement())
    )
)
print(
    "Sparsity in fc1.bias: {:.2f}%".format(
        100. * float(torch.sum(model.fc[0].bias == 0))
        / float(model.fc[0].bias.nelement())
    )
)
print(
    "Sparsity in fc2.weight: {:.2f}%".format(
        100. * float(torch.sum(model.fc[2].weight == 0))
        / float(model.fc[2].weight.nelement())
    )
)
print(
    "Sparsity in fc2.bias: {:.2f}%".format(
        100. * float(torch.sum(model.fc[2].bias == 0))
        / float(model.fc[2].bias.nelement())
    )
)
print(
    "Global sparsity: {:.2f}%".format(
        100. * float(
            torch.sum(model.recurrent.bias_ih_l0 == 0)
            + torch.sum(model.recurrent.weight_ih_l0 == 0)
            + torch.sum(model.recurrent.weight_hh_l0 == 0)
            + torch.sum(model.recurrent.bias_hh_l0 == 0)
            + torch.sum(model.fc[0].weight == 0)
            + torch.sum(model.fc[2].weight == 0)
            + torch.sum(model.fc[0].bias == 0)
            + torch.sum(model.fc[2].bias == 0)
        )
        / float(
            model.recurrent.bias_ih_l0.nelement()
            + model.recurrent.bias_hh_l0.nelement()
            + model.recurrent.weight_ih_l0.nelement()
            + model.recurrent.weight_hh_l0.nelement()
            + model.fc[0].weight.nelement()
            + model.fc[0].bias.nelement()
            + model.fc[2].weight.nelement()
            + model.fc[2].bias.nelement()
        )
    )
)

Sparsity in recurrent.weight_ih_l0': 20.31%
Sparsity in recurrent.weight_hh_l0: 20.31%
Sparsity in recurrent.bias_ih_l0': 0.00%
Sparsity in recurrent.bias_hh_l0: 0.00%
Sparsity in fc1.weight: 0.00%
Sparsity in fc1.bias: 0.00%
Sparsity in fc2.weight: 40.62%
Sparsity in fc2.bias: 0.00%
Global sparsity: 18.98%


In [256]:
true, pred, val_loss, comp_time = val_loop(model, criterion, val_iterator)
accuracy, f1 = get_accuracy_and_f1_score(true, pred)
print(f"Final Validation Accuracy: {accuracy}")
print(f"Final Validation F1-Score: {f1}")
print(f"Average Time to Compute Forward Pass: {comp_time*1000} ms")

Validating Model: 100%|██████████| 178/178 [00:16<00:00, 10.90it/s]

Final Validation Accuracy: 0.9326566637246249
Final Validation F1-Score: 0.9270624223305611
Average Time to Compute Forward Pass: 4.835521237234052 ms





In [223]:
true, pred, val_loss, comp_time = val_loop(model, criterion, val_iterator)
accuracy, f1 = get_accuracy_and_f1_score(true, pred)
print(f"Final Validation Accuracy: {accuracy}")
print(f"Final Validation F1-Score: {f1}")
print(f"Average Time to Compute Forward Pass: {comp_time*1000} ms")

Validating Model: 100%|██████████| 178/178 [00:19<00:00,  9.19it/s]

Final Validation Accuracy: 0.9494263018534863
Final Validation F1-Score: 0.9443310988050131
Average Time to Compute Forward Pass: 11.048862103665813 ms



