# Imports

In [1]:
from Model import *
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd

# Loading the Datasets

In [2]:
#get the data
split_ratio = 0.8
val_ratio = 0.2
data = pd.read_csv("Data/dataset_512.csv", dtype = str)
train_df = data.sample(frac = split_ratio)
test_df = data.drop(train_df.index)
val_df = train_df.sample(frac = val_ratio)
train_df = train_df.drop(val_df.index)

train_dataset = QueueDataset(train_df)
test_dataset = QueueDataset(test_df)
val_dataset = QueueDataset(val_df)

#initialize the data loader 
train_loader_512 = DataLoader(train_dataset, batch_size = 128, shuffle = True)
test_loader_512 = DataLoader(test_dataset, batch_size = 128, shuffle = True)
val_loader_512 = DataLoader(val_dataset, batch_size = 128, shuffle = True)

data = pd.read_csv("Data/dataset_1024.csv", dtype = str)
train_df = data.sample(frac = split_ratio)
test_df = data.drop(train_df.index)
val_df = train_df.sample(frac = val_ratio)
train_df = train_df.drop(val_df.index)

train_dataset = QueueDataset(train_df)
test_dataset = QueueDataset(test_df)
val_dataset = QueueDataset(val_df)

train_loader_1024 = DataLoader(train_dataset, batch_size = 128, shuffle = True)
test_loader_1024 = DataLoader(test_dataset, batch_size = 128, shuffle = True)
val_loader_1024 = DataLoader(val_dataset, batch_size = 128, shuffle = True)

data = pd.read_csv("Data/dataset_2048.csv", dtype = str)
train_df = data.sample(frac = split_ratio)
test_df = data.drop(train_df.index)
val_df = train_df.sample(frac = val_ratio)
train_df = train_df.drop(val_df.index)

train_dataset = QueueDataset(train_df)
test_dataset = QueueDataset(test_df)
val_dataset = QueueDataset(val_df)

train_loader_2048 = DataLoader(train_dataset, batch_size = 128, shuffle = True)
test_loader_2048 = DataLoader(test_dataset, batch_size = 128, shuffle = True)
val_loader_2048 = DataLoader(val_dataset, batch_size = 128, shuffle = True)

# data = pd.read_csv("Data/dataset_4096.csv", dtype = str)
# train_df = data.sample(frac = split_ratio)
# test_df = data.drop(train_df.index)
# val_df = train_df.sample(frac = val_ratio)
# train_df = train_df.drop(val_df.index)

# train_dataset = QueueDataset(train_df)
# test_dataset = QueueDataset(test_df)
# val_dataset = QueueDataset(val_df)

# train_loader_4096 = DataLoader(train_dataset, batch_size = 128, shuffle = True)
# test_loader_4096 = DataLoader(test_dataset, batch_size = 128, shuffle = True)
# val_loader_4096 = DataLoader(val_dataset, batch_size = 128, shuffle = True)

# Defining the Model

In [9]:
#model hyperparameters 
ntokens = 65536  # size of vocabulary
emsize = 240  # embedding dimension
d_hid = 240  # dimension of the feedforward network model in ``nn.TransformerEncoder``
nlayers = 3  # number of ``nn.TransformerEncoderLayer`` in ``nn.TransformerEncoder``
nhead = 8 # number of heads in ``nn.MultiheadAttention``
dropout = 0.2  # dropout probability
threshold = 0.5
device = "cpu"
torch.cuda.empty_cache() if torch.cuda.is_available() else None

# Training Model

In [10]:
test = "EmbeddingsResults/"
test_types = [480, 528]

for test_number in test_types:
    emsize = test_number
    d_hid = test_number
    test_type = "/" + str(test_number) + "emsize/"

    for averaging in [True, False]:
        average = "Averaging" if averaging else "NonAveraging"
        for input in ["512", "1024", "2048"]:
            match input:
                case "512":
                    train_loader = train_loader_512
                    test_loader = test_loader_512
                    val_loader = val_loader_512
                    path = test + average + test_type + input
                    input_size = 32
                case "1024":
                    train_loader = train_loader_1024
                    test_loader = test_loader_1024
                    val_loader = val_loader_1024
                    path = test + average + test_type + input
                    input_size = 64
                case "2048":
                    train_loader = train_loader_2048
                    test_loader = test_loader_2048
                    val_loader = val_loader_2048
                    path = test + average + test_type + input
                    input_size = 128
                # case "4096":
                #     train_loader = train_loader_4096
                #     test_loader = test_loader_4096
                #     val_loader = val_loader_4096
                #     path = test + average + test_type + input
                #     input_size = 256

            print("TRAIN " + input)

            torch.cuda.empty_cache() if torch.cuda.is_available() else None

            transformer = RandomLM(ntokens, emsize, nhead, d_hid, nlayers, input_size, dropout, averaging).to(device)
            criterion = nn.BCELoss().to(device)
            optimizer = torch.optim.Adam(transformer.parameters())

            train_metrics, val_metrics = train(transformer, criterion, optimizer, train_loader, val_loader, 3, threshold, device = device)
            
            print("TEST " + input)

            test_metrics = inference(transformer, criterion, test_loader, threshold, device = device)
            model_save(transformer, path, train_metrics, val_metrics, test_metrics)

    del transformer, criterion, optimizer

TRAIN 2048
epoch: 1, batch: 100, train loss: 35.764, train macro: 0.546, train micro: 0.711, train sample: 0.647, train weighted 0.641, val loss: 36.566, val macro: 0.542, val micro: 0.708 val sample: 0.643 val weighted: 0.639
epoch: 1, batch: 200, train loss: 36.196, train macro: 0.547, train micro: 0.713, train sample: 0.650, train weighted 0.643, val loss: 36.566, val macro: 0.542, val micro: 0.708 val sample: 0.643 val weighted: 0.639
epoch: 1, batch: 300, train loss: 36.612, train macro: 0.544, train micro: 0.708, train sample: 0.644, train weighted 0.638, val loss: 36.566, val macro: 0.542, val micro: 0.708 val sample: 0.643 val weighted: 0.639
epoch: 1, batch: 400, train loss: 36.485, train macro: 0.543, train micro: 0.708, train sample: 0.644, train weighted 0.640, val loss: 36.566, val macro: 0.542, val micro: 0.708 val sample: 0.643 val weighted: 0.639
epoch: 1, batch: 500, train loss: 36.796, train macro: 0.541, train micro: 0.705, train sample: 0.641, train weighted 0.636, 

# Pre-Trained Models

## Metrics

In [None]:
path = "TrainedModels/240embeddings-3encoder-24heads-128tokensize"

with ZipFile(path + ".zip", "r") as myzip:
    myzip.extractall("temp/")

train_metrics = pd.read_csv("temp/train_metrics.csv", index_col = 0)
val_metrics = pd.read_csv("temp/val_metrics.csv", index_col = 0)

plot_metrics(train_metrics, val_metrics)

## Inference

In [None]:
model = torch.load("temp/model.pt")

# data_loader = None #loader for the data
# inference(model, criterion, test_loader, threshold)

In [2]:
import os 

os.remove("temp/train_metrics.csv")
os.remove("temp/val_metrics.csv")
if (os.path.exists("temp/model.pth")):
    os.remove("temp/model.pth")
if (os.path.exists("temp/model.pt")):
    os.remove("temp/model.pt")
os.rmdir("temp")