# Loading the Datasets

In [1]:
from Model import *
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd

#get the data
split_ratio = 0.8
val_ratio = 0.2
data = pd.read_csv("Data/dataset_2048.csv", dtype = str)
train_df = data.sample(frac = split_ratio)
test_df = data.drop(train_df.index)
val_df = train_df.sample(frac = val_ratio)
train_df = train_df.drop(val_df.index)

train_dataset = QueueDataset(train_df)
test_dataset = QueueDataset(test_df)
val_dataset = QueueDataset(val_df)

#initialize the data loader 
train_loader = DataLoader(train_dataset, batch_size = 128, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 128, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size = 128, shuffle = True)

# Defining the Model

In [2]:
#model hyperparameters 
ntokens = 65536  # size of vocabulary
emsize = 240  # embedding dimension
d_hid = 240  # dimension of the feedforward network model in ``nn.TransformerEncoder``
nlayers = 1  # number of ``nn.TransformerEncoderLayer`` in ``nn.TransformerEncoder``
nhead = 1 # number of heads in ``nn.MultiheadAttention``
dropout = 0.2  # dropout probability
input_size = 128
threshold = 0.5

#define the model 
device = "cpu"
transformer = RandomLM(ntokens, emsize, nhead, d_hid, nlayers, input_size, dropout).to(device)
criterion = nn.BCELoss().to(device)
optimizer = torch.optim.Adam(transformer.parameters())



# Training Model

In [3]:
train_metrics, val_metrics = train(transformer, criterion, optimizer, train_loader, val_loader, 1, threshold)
plot_metrics(train_metrics, val_metrics)
_, _, _, _, _ = test(transformer, criterion, test_loader, threshold)

epoch: 1, batch: 100, train loss: 0.815, train macro: 0.723, train micro: 0.771, train sample: 0.694, train weighted 0.746, val loss: 0.323, val macro: 0.877, val micro: 0.880 val sample: 0.809 val weighted: 0.883
epoch: 1, batch: 200, train loss: 0.310, train macro: 0.870, train micro: 0.875, train sample: 0.799, train weighted 0.877, val loss: 0.316, val macro: 0.876, val micro: 0.880 val sample: 0.733 val weighted: 0.880
epoch: 1, batch: 300, train loss: 0.291, train macro: 0.879, train micro: 0.885, train sample: 0.811, train weighted 0.886, val loss: 0.269, val macro: 0.897, val micro: 0.902 val sample: 0.814 val weighted: 0.903


# Saving Results

In [9]:
from zipfile import ZipFile 
import os 

torch.save(transformer.state_dict(), "model.pt")

dir_name = "TrainedModels/240embeddings-1encoder-1heads-128tokensize"

train_metrics.to_csv("train_metrics.csv")
val_metrics.to_csv("val_metrics.csv")

with ZipFile(dir_name + ".zip", "w") as myzip:
    myzip.write("train_metrics.csv")
    myzip.write("val_metrics.csv")
    myzip.write("model.pt")
    myzip.close()

os.remove("train_metrics.csv")
os.remove("val_metrics.csv")
os.remove("model.pt")