# Imports

In [1]:
from Model import *
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd

# LSTM Definition

In [2]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)
    
#RNN model with LSTM which takes input and embedds it to a certain dimension and then passes it through the LSTM layer and then the output is passed through a fully connected layer to get the final output
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.device = 'cpu'
        # self.embedding = nn.Embedding(65536, input_size)
        # self.pe = PositionalEncoding(input_size)
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout = 0.2)
        self.fc = nn.Sequential(nn.Linear(hidden_size, num_classes), nn.Sigmoid())
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
        # x = self.embedding(x)
        # x = self.pe(x)
        x = torch.nn.functional.one_hot(x.to(torch.int64), num_classes=2).float()
        out, _ = self.rnn(x, (h0, c0))
        # out = out[:, -1, :]
        out = torch.mean(out, 1)
        out = self.fc(out)
        return out


transformer = RNN(input_size = 2, hidden_size = 1024, num_layers = 2, num_classes = 7).to('cpu')
criterion = nn.BCELoss().to('cpu')
optimizer = torch.optim.Adam(transformer.parameters())

# Loading the Datasets

In [3]:
#get the data
load_lstm = False

split_ratio = 0.8
val_ratio = 0.2
data = pd.read_csv("Data/dataset_512.csv", dtype = str)
train_df = data.sample(frac = split_ratio)
test_df = data.drop(train_df.index)
val_df = train_df.sample(frac = val_ratio)
train_df = train_df.drop(val_df.index)

train_dataset = QueueDataset_LSTM(train_df) if load_lstm else QueueDataset(train_df)
test_dataset = QueueDataset_LSTM(test_df) if load_lstm else QueueDataset(test_df)
val_dataset = QueueDataset_LSTM(val_df) if load_lstm else QueueDataset(val_df)

#initialize the data loader 
train_loader_512 = DataLoader(train_dataset, batch_size = 128, shuffle = True)
test_loader_512 = DataLoader(test_dataset, batch_size = 128, shuffle = True)
val_loader_512 = DataLoader(val_dataset, batch_size = 128, shuffle = True)

data = pd.read_csv("Data/dataset_1024.csv", dtype = str)
train_df = data.sample(frac = split_ratio)
test_df = data.drop(train_df.index)
val_df = train_df.sample(frac = val_ratio)
train_df = train_df.drop(val_df.index)

train_dataset = QueueDataset_LSTM(train_df) if load_lstm else QueueDataset(train_df)
test_dataset = QueueDataset_LSTM(test_df) if load_lstm else QueueDataset(test_df)
val_dataset = QueueDataset_LSTM(val_df) if load_lstm else QueueDataset(val_df)

train_loader_1024 = DataLoader(train_dataset, batch_size = 128, shuffle = True)
test_loader_1024 = DataLoader(test_dataset, batch_size = 128, shuffle = True)
val_loader_1024 = DataLoader(val_dataset, batch_size = 128, shuffle = True)

data = pd.read_csv("Data/dataset_2048.csv", dtype = str)
train_df = data.sample(frac = split_ratio)
test_df = data.drop(train_df.index)
val_df = train_df.sample(frac = val_ratio)
train_df = train_df.drop(val_df.index)

train_dataset = QueueDataset_LSTM(train_df) if load_lstm else QueueDataset(train_df)
test_dataset = QueueDataset_LSTM(test_df) if load_lstm else QueueDataset(test_df)
val_dataset = QueueDataset_LSTM(val_df) if load_lstm else QueueDataset(val_df)

train_loader_2048 = DataLoader(train_dataset, batch_size = 128, shuffle = True)
test_loader_2048 = DataLoader(test_dataset, batch_size = 128, shuffle = True)
val_loader_2048 = DataLoader(val_dataset, batch_size = 128, shuffle = True)

data = pd.read_csv("Data/dataset_4096.csv", dtype = str)
train_df = data.sample(frac = split_ratio)
test_df = data.drop(train_df.index)
val_df = train_df.sample(frac = val_ratio)
train_df = train_df.drop(val_df.index)

train_dataset = QueueDataset_LSTM(train_df) if load_lstm else QueueDataset(train_df)
test_dataset = QueueDataset_LSTM(test_df) if load_lstm else QueueDataset(test_df)
val_dataset = QueueDataset_LSTM(val_df) if load_lstm else QueueDataset(val_df)

train_loader_4096 = DataLoader(train_dataset, batch_size = 128, shuffle = True)
test_loader_4096 = DataLoader(test_dataset, batch_size = 128, shuffle = True)
val_loader_4096 = DataLoader(val_dataset, batch_size = 128, shuffle = True)

# Defining the Model

In [None]:
#model hyperparameters 
ntokens = 65536  # size of vocabulary
emsize = 192  # embedding dimension
d_hid = 192  # dimension of the feedforward network model in ``nn.TransformerEncoder``
nlayers = 1  # number of ``nn.TransformerEncoderLayer`` in ``nn.TransformerEncoder``
nhead = 1 # number of heads in ``nn.MultiheadAttention``
dropout = 0.05  # dropout probability
threshold = 0.5
input_size = 32
device = "cuda"
torch.cuda.empty_cache() if torch.cuda.is_available() else None
transformer = RandomLM(ntokens, emsize, nhead, d_hid, nlayers, input_size, dropout, True).to(device)
criterion = nn.BCELoss().to(device)
optimizer = torch.optim.Adam(transformer.parameters())

# Training Model

In [None]:
test = "EncoderResults/"
test_types = [1, 2, 3, 4, 5, 6, 7, 8]

for test_number in test_types:
    emsize = test_number
    d_hid = test_number
    test_type = "/" + str(test_number) + "encoder/"

    for averaging in [False, True]:
        average = "Averaging" if averaging else "NonAveraging"
        for input in ["512", "1024", "2048", "4096"]:
            match input:
                case "512":
                    train_loader = train_loader_512
                    test_loader = test_loader_512
                    val_loader = val_loader_512
                    path = test + average + test_type + input
                    input_size = 32
                case "1024":
                    train_loader = train_loader_1024
                    test_loader = test_loader_1024
                    val_loader = val_loader_1024
                    path = test + average + test_type + input
                    input_size = 64
                case "2048":
                    train_loader = train_loader_2048
                    test_loader = test_loader_2048
                    val_loader = val_loader_2048
                    path = test + average + test_type + input
                    input_size = 128
                case "4096":
                    train_loader = train_loader_4096
                    test_loader = test_loader_4096
                    val_loader = val_loader_4096
                    path = test + average + test_type + input
                    input_size = 256

            print("TRAIN " + input)

            torch.cuda.empty_cache() if torch.cuda.is_available() else None

            transformer = RandomLM(ntokens, emsize, nhead, d_hid, nlayers, input_size, dropout, averaging).to(device)
            criterion = nn.BCELoss().to(device)
            optimizer = torch.optim.Adam(transformer.parameters())

            train_metrics, val_metrics = train(transformer, criterion, optimizer, train_loader, val_loader, 3, threshold, device = device)
            
            print("TEST " + input)

            test_metrics = inference(transformer, criterion, test_loader, threshold, device = device)
            model_save(transformer, path, train_metrics, val_metrics, test_metrics)

    del transformer, criterion, optimizer

# Pre-Trained Models

## Metrics

In [None]:
path1 = 'EncoderResults/'
tests1 = ['1encoder/', '2encoder/', '3encoder/', '4encoder/', '5encoder/', '6encoder/', '7encoder/', '8encoder/']

path2 = 'HeadsResults/'
tests2 = ['1head/', '2head/', '4head/', '6head/', '8head/', '12head/', '16head/', '20head/', '24head/']

path3 = 'EmbeddingsResults/'
tests3 = ['144emsize/', '192emsize/', '240emsize/', '288emsize/', '336emsize/', '384emsize/', '432emsize/', '480emsize/', '528emsize/']

visualize_exeperiment(path3, tests3, 'Macro F1')

## Inference

In [None]:
model = torch.load("temp/model.pt")
data = pd.read_csv("Data/dataset_512.csv", dtype = str)
dataset = QueueDataset(data)
data_loader = DataLoader(dataset, batch_size = 128, shuffle = True)

criterion = nn.BCELoss().to(device)
device = 'cuda'

inference(model, criterion, data_loader, threshold, device = device)