# Import Modules

In [1]:
import os
import pickle
import torch
import timeit
import matplotlib.pyplot as plt
import numpy as np

from datetime import datetime
from itertools import chain
from time import time
from torch import nn, optim
from torchmetrics import F1Score
from torch.optim import lr_scheduler
from tqdm.notebook import tqdm
from torch.utils.data import TensorDataset, DataLoader
from polyglot.mapping import Embedding, CaseExpander, DigitExpander

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

# Hyperparameters Config

In [2]:
class HyperparamsDataset:
    def __init__(
        self,
        left_context_seq_len = None,
        oov_context_seq_len = None,
        right_context_seq_len = None,
        n_features_left_context = None,
        n_features_oov_context = None,
        n_features_right_context = None,
        device=device
    ):
        self.left_context_seq_len = left_context_seq_len
        self.oov_context_seq_len = oov_context_seq_len
        self.right_context_seq_len = right_context_seq_len
        self.n_features_left_context = n_features_left_context,
        self.n_features_oov_context = n_features_oov_context,
        self.n_features_right_context = n_features_right_context,
        self.device = device
        
        
class HyperparamsModel:
     def __init__(
        self,
        num_hidden_layer=None,
        hidden_size=None,
        device=device
    ):
        self.num_hidden_layer = num_hidden_layer
        self.hidden_size = hidden_size
        self.device = device
        
dataset_hyperparams_config = HyperparamsDataset(
    left_context_seq_len = 79,
    oov_context_seq_len = 30,
    right_context_seq_len = 79,
    n_features_left_context = 64,
    n_features_oov_context = 20,
    n_features_right_context = 64,
    device=device
)

context_size = 79

# Prepare Feature Dataset

In [3]:
def convert_doc_to_idxs(docs, dict_vocabs):
    doc_to_idx = []
    
    for doc in docs:
        doc_to_idx.append([dict_vocabs[token] for token in doc])
        
    return np.array(doc_to_idx)

# Left context
left_context = open("../../datasets/features/79_context/left_context_with_pad.pkl", "rb")
left_context = pickle.load(left_context)
left_context_to_idx = open("../../datasets/features/79_context/token2idx_left_context.pkl", "rb")
left_context_to_idx = pickle.load(left_context_to_idx)
doc_left_context_to_idx = convert_doc_to_idxs(left_context, left_context_to_idx)

# OOV context
oov_context = open("../../datasets/features/79_context/oov_context_with_pad.pkl", "rb")
oov_context = pickle.load(oov_context)
oov_context_to_idx = open("../../datasets/features/79_context/token2idx_oov_context.pkl", "rb")
oov_context_to_idx = pickle.load(oov_context_to_idx)
doc_oov_context_to_idx = convert_doc_to_idxs(oov_context, oov_context_to_idx)

# Right context
right_context = open("../../datasets/features/79_context/right_context_with_pad.pkl", "rb")
right_context = pickle.load(right_context)
right_context_to_idx = open("../../datasets/features/79_context/token2idx_right_context.pkl", "rb")
right_context_to_idx = pickle.load(right_context_to_idx)
doc_right_context_to_idx = convert_doc_to_idxs(right_context, right_context_to_idx)

# Actual labels
labels_context = open("../../datasets/features/79_context/lables.pkl", "rb")
labels_context = pickle.load(labels_context)
labels_to_idx = open("../../datasets/features/79_context/lable_vocabs.pkl", "rb")
labels_to_idx = pickle.load(labels_to_idx)
doc_labels_to_idx = convert_doc_to_idxs(labels_context, labels_to_idx).flatten()

print(f"Left context shape: {doc_left_context_to_idx.shape}")
print(f"OOV context shape: {doc_oov_context_to_idx.shape}")
print(f"Right context shape: {doc_right_context_to_idx.shape}")
print(f"Actual lable shape: {doc_labels_to_idx.shape}")

Left context shape: (16562, 71)
OOV context shape: (16562, 28)
Right context shape: (16562, 79)
Actual lable shape: (16562,)


In [4]:
# Convert to Tensor
left_contexts = torch.LongTensor(doc_left_context_to_idx)
oov_contexts = torch.LongTensor(doc_oov_context_to_idx)
right_contexts = torch.LongTensor(doc_right_context_to_idx)
actual_labels = torch.LongTensor(doc_labels_to_idx)
dataset = TensorDataset(left_contexts, oov_contexts, right_contexts, actual_labels)

dataset[0]

(tensor([ 2968, 10710,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,
          1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,
          1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,
          1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,
          1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,
          1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,
          1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,  1320,
          1320]),
 tensor([19, 31, 19, 32, 29, 19, 32, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
         17, 17, 17, 17, 17, 17, 17, 17, 17, 17]),
 tensor([1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929,
         1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929,
         1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929,
         1929, 1929, 1929, 1929, 1929, 1929, 1929, 192

# Char and Word Embedding

In [5]:
# Word Embedding
word_embeddings = Embedding.load("../../word_embeddings/polyglot/idn_embeddings.tar.bz2")
word_embeddings.apply_expansion(DigitExpander)
word_embeddings.apply_expansion(CaseExpander)

left_vocabs = open("../../datasets/features/79_context/left_context_vocabs.pkl", "rb")
left_vocabs = pickle.load(left_vocabs)

right_vocabs = open("../../datasets/features/79_context/right_context_vocabs.pkl", "rb")
right_vocabs = pickle.load(right_vocabs)

left_word_embedding_dict = {left_context_to_idx[vocab] : word_embeddings[vocab] for vocab in left_vocabs}
right_word_embedding_dict = {right_context_to_idx[vocab] : word_embeddings[vocab] for vocab in right_vocabs}

# Char Embedding
char_embedding_dict = open("../../word_embeddings/chars_embedding/char_embeddings.pkl", "rb")
char_embedding_dict = pickle.load(char_embedding_dict)

# Context embedding
left_context_embedding = nn.Embedding.from_pretrained(torch.FloatTensor(np.array(list(left_word_embedding_dict.values()))), freeze=True)
oov_context_embedding = nn.Embedding.from_pretrained(torch.FloatTensor(np.array(list(char_embedding_dict.values()))), freeze=True)
right_context_embedding = nn.Embedding.from_pretrained(torch.FloatTensor(np.array(list(right_word_embedding_dict.values()))), freeze=True)

# Dataloader

In [6]:
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
next(iter(dataloader))

[tensor([[    9,  4780,  4638,  ...,  1320,  1320,  1320],
         [ 1320,  1320,  1320,  ...,  1320,  1320,  1320],
         [ 9808,  8984,  7151,  ...,  1320,  1320,  1320],
         ...,
         [ 4511, 10790,  6234,  ...,  1320,  1320,  1320],
         [ 4315,   149,  5260,  ...,  1320,  1320,  1320],
         [ 5603,  7726,  1320,  ...,  1320,  1320,  1320]]),
 tensor([[34, 33, 26, 33, 32, 25, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
          17, 17, 17, 17, 17, 17, 17, 17, 17, 17],
         [37, 19, 41, 23, 25, 19, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
          17, 17, 17, 17, 17, 17, 17, 17, 17, 17],
         [36, 23, 24, 27, 32, 19, 32, 21, 27, 32, 25, 17, 17, 17, 17, 17, 17, 17,
          17, 17, 17, 17, 17, 17, 17, 17, 17, 17],
         [24, 19, 22, 26, 23, 30, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
          17, 17, 17, 17, 17, 17, 17, 17, 17, 17],
         [30, 19, 20, 19, 18, 20, 23, 36, 37, 27, 26, 17, 17, 17, 17, 17, 17, 17,
          17, 17

# Model

In [7]:
class Comick(nn.Module):
    def __init__(
        self,
        input_size_left_context=64,
        input_size_oov_context=20,
        input_size_right_context=64,
        hidden_size = 128,
        num_layers = 2,
        output_size = len(labels_to_idx),
        batch_first = True,
        bidirectional = True,
        init_wb_with_kaiming_normal=True
    ):
        super(Comick, self).__init__()
        
        self.input_size_left_context = input_size_left_context
        self.input_size_oov_context = input_size_oov_context
        self.input_size_right_context = input_size_right_context
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        self.batch_first = batch_first
        self.bidirectional = bidirectional
        
        self.bilstm_left_context_feature = nn.LSTM(
            input_size = self.input_size_left_context,
            hidden_size = self.hidden_size,
            num_layers = self.num_layers,
            batch_first = self.batch_first,
            bidirectional = self.bidirectional
        )
        
        self.bilstm_oov_context_feature = nn.LSTM(
            input_size = self.input_size_oov_context,
            hidden_size = self.hidden_size,
            num_layers = self.num_layers,
            batch_first = self.batch_first,
            bidirectional = self.bidirectional
        )
        
        self.bilstm_right_context_feature = nn.LSTM(
            input_size = self.input_size_right_context,
            hidden_size = self.hidden_size,
            num_layers = self.num_layers,
            batch_first = self.batch_first,
            bidirectional = self.bidirectional
        )
        
        self.fc = nn.Sequential(
            nn.Linear(2 * self.hidden_size, 64),
            nn.Tanh()
        )
        
        self.oov_embedding = nn.Linear(in_features=3 * 64, out_features=64)
        
        self.embedding = np.empty((output_size, 64), dtype=np.float32)
        
        self.output = nn.Sequential(
            nn.Linear(64, self.output_size),
            nn.LogSoftmax(dim=1)
        )
                
        if init_wb_with_kaiming_normal:
            self.init_wb()
            
    def init_wb(self):
        for module in self.modules():
            if isinstance(module, (nn.Linear, nn.LSTM)):
                for name, param in module.named_parameters():
                    if "weight" in name:
                        nn.init.kaiming_normal_(param)
                    else:
                        nn.init.kaiming_normal_(param.reshape(1, -1))

    def forward(
        self,
        input_left_context,
        input_oov_context,
        input_right_context,
        idxs_target,
        hidden_left_context=None,
        hidden_oov_context=None,
        hidden_right_context=None,
    ):
        # BiLSTM left, oov, and right context
        output_left_context, (hidden_left_context, memory_left_context) = self.bilstm_left_context_feature(input_left_context, hidden_left_context)
        output_oov_context, (hidden_oov_context, memory_oov_context) = self.bilstm_oov_context_feature(input_oov_context, hidden_oov_context)
        output_right_context, (hidden_right_context, memory_right_context) = self.bilstm_right_context_feature(input_right_context, hidden_right_context)
        
        # Concate hidden (forward and backward hidden BiLSTM)
        hidden_left_bidirectional = torch.cat((hidden_left_context[0], hidden_left_context[-1]), dim=1)
        hidden_oov_bidirectional = torch.cat((hidden_oov_context[0], hidden_oov_context[-1]), dim=1)
        hidden_right_bidirectional = torch.cat((hidden_right_context[0], hidden_right_context[-1]), dim=1)
        
        # Fully connected
        output_left_fc = self.fc(hidden_left_bidirectional)
        output_oov_fc = self.fc(hidden_oov_bidirectional)
        output_right_fc = self.fc(hidden_right_bidirectional)
        
        # Concate output left, oov, and right context feature
        output = torch.cat((output_left_fc, output_oov_fc, output_right_fc), dim=1)
        
        # OOV embedding
        output = self.oov_embedding(output)
                
        # save OOV embedding
        self.embedding[idxs_target.tolist()] = output.detach().numpy()
        
        # Projection OOV embedding
        output = self.output(output)
        
        return output

# Model, Optimizer, Criterion, Metric, and Learning Rate Scheduler

In [8]:
model = Comick(init_wb_with_kaiming_normal=True).to(device)
model.output[0].requires_grad_ = False # disable gradient for projection layer
optimizer = optim.Adam(model.parameters(), lr=0.005)
criterion = nn.NLLLoss().to(device)
metric = F1Score().to(device)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5)
model

Comick(
  (bilstm_left_context_feature): LSTM(64, 128, num_layers=2, batch_first=True, bidirectional=True)
  (bilstm_oov_context_feature): LSTM(20, 128, num_layers=2, batch_first=True, bidirectional=True)
  (bilstm_right_context_feature): LSTM(64, 128, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=64, bias=True)
    (1): Tanh()
  )
  (oov_embedding): Linear(in_features=192, out_features=64, bias=True)
  (output): Sequential(
    (0): Linear(in_features=64, out_features=3611, bias=True)
    (1): LogSoftmax(dim=1)
  )
)

In [9]:
sum([param.numel() for param in model.parameters() if param.requires_grad_])

2000219

In [10]:
model.embedding

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

# Training and Looping Step

In [11]:
def training_step(dataloader, model, optimizer, criterion, metric, scheduler=None, path_name=None):
    batch_losses = []
    batch_metric_scores = []
    
    for batch, (input_left_context, input_oov_context, input_right_context, actual_label) in enumerate(tqdm(dataloader), 1):
        # Forward Propagation
        probs = model(
            left_context_embedding(input_left_context).to(device),
            oov_context_embedding(input_oov_context).to(device),
            right_context_embedding(input_right_context).to(device),
            actual_label.to(device)
        )
                
        loss = criterion(probs, actual_label.to(device))
        metric_score = metric(probs.argmax(dim=1), actual_label.to(device))
        metric_score = metric.compute()
        
        if scheduler is not None:
            scheduler.step(loss)
        
        batch_losses.append(loss.item())
        batch_metric_scores.append(metric_score)
        
        if batch % 50 == 0 or batch == len(dataloader):
            print(f"Batch-{batch}: {str(criterion).split('(')[0]}={loss.item()} | {str(metric).split('(')[0]}={metric_score}")
            with open(f"../../logs/comick/{context_size}_contexts/{path_name}/training_history.txt", "a") as f:
                f.write(f"Batch-{batch}: {str(criterion).split('(')[0]}={loss.item()} | {str(metric).split('(')[0]}={metric_score}\n")

        # Backward Propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    return batch_losses, batch_metric_scores

In [None]:
def looping_step(dataloader, model, optimizer, criterion, metric, n_epoch=15, patience=3, monitor="loss"):
    start_time = time()
    
    epoch_losses = []
    epoch_metric_scores = []
    patience_counter = 0
    
    now = datetime.now()
    path_name = now.strftime("%m-%d-%Y_%H-%M-%S")
    os.makedirs(f"../../logs/comick/{context_size}_contexts/{path_name}")

    for epoch in range(1, n_epoch + 1):
        print(f"EPOCH-{epoch}")
        with open(f"../../logs/comick/{context_size}_contexts/{path_name}/training_history.txt", "a") as f:
            f.write(f"EPOCH-{epoch}\n")
        
        batch_losses, batch_metric_scores = training_step(dataloader, model, optimizer, criterion, metric, path_name=path_name)
        epoch_loss = torch.mean(torch.FloatTensor(batch_losses))

        epoch_loss = torch.mean(torch.FloatTensor(batch_losses))
        epoch_losses.append(epoch_loss.item())

        epoch_metric_score = torch.mean(torch.FloatTensor(batch_metric_scores))
        epoch_metric_scores.append(epoch_metric_score.item())
        
        with open(f"../../logs/comick/{context_size}_contexts/{path_name}/training_history.txt", "a") as f:
            if monitor == "loss":
                if epoch == 1:
                    print(f"\nMean {str(criterion).split('(')[0]}: {epoch_loss} | Mean {str(metric).split('(')[0]}: {epoch_metric_score}")
                    
                    f.write(f"\nMean {str(criterion).split('(')[0]}: {epoch_loss} | Mean {str(metric).split('(')[0]}: {epoch_metric_score}\n")
                elif epoch_losses[-1] < epoch_losses[-2]:
                    print("\nYeah 🎉😄! Model improved.")
                    print(f"Mean {str(criterion).split('(')[0]}: {epoch_loss} | Mean {str(metric).split('(')[0]}: {epoch_metric_score}")
                    
                    f.write("\nYeah 🎉😄! Model improved.\n")
                    f.write(f"Mean {str(criterion).split('(')[0]}: {epoch_loss} | Mean {str(metric).split('(')[0]}: {epoch_metric_score}\n")
                else:
                    patience_counter += 1
                    
                    print("\nHuft 😥! Model not improved.")
                    print(f"Mean {str(criterion).split('(')[0]}: {epoch_loss} | Mean {str(metric).split('(')[0]}: {epoch_metric_score}")
                    print(f"Patience = {patience_counter}/{patience}❗")
                    
                    f.write("\nHuft 😥! Model not improved.\n")
                    f.write(f"Mean {str(criterion).split('(')[0]}: {epoch_loss} | Mean {str(metric).split('(')[0]}: {epoch_metric_score}\n")                    
                    f.write(f"Patience = {patience_counter}/{patience}❗")
            else:
                if epoch == 1:
                    print(f"\nMean {str(criterion).split('(')[0]}: {epoch_loss} | Mean {str(metric).split('(')[0]}: {epoch_metric_score}")
                    
                    f.write(f"\nMean {str(criterion).split('(')[0]}: {epoch_loss} | Mean {str(metric).split('(')[0]}: {epoch_metric_score}\n")
                elif epoch_metric_scores[-1] > epoch_metric_scores[-2]:
                    print("\nYeah 🎉😄! Model improved.")
                    print(f"Mean {str(criterion).split('(')[0]}: {epoch_loss} | Mean Mean {str(metric).split('(')[0]}: {epoch_metric_score}")
                    
                    f.write("\nYeah 🎉😄! Model improved.\n")
                    f.write(f"Mean {str(criterion).split('(')[0]}: {epoch_loss} | Mean {str(metric).split('(')[0]}: {epoch_metric_score}\n")
                else:
                    patience_counter += 1
                    
                    print("\nHuft 😥! Model not improved.")
                    print(f"Mean {str(criterion).split('(')[0]}: {epoch_loss} | Mean {str(metric).split('(')[0]}: {epoch_metric_score}\n")
                    print(f"Patience = {patience_counter}/{patience}❗\n")
                    
                    f.write("\nHuft 😥! Model not improved.\n")
                    f.write(f"Mean {str(criterion).split('(')[0]}: {epoch_loss} | Mean {str(metric).split('(')[0]}: {epoch_metric_score}\n")
                    f.write(f"Patience = {patience_counter}/{patience}❗")
                    
            print("=" * 75, end="\n\n")
            f.write(f"{'=' * 75}\n\n")
            
            if patience_counter > patience:
                print(f"Early stopping, patience = {patience_counter}/{patience}❗")
                break
        
        metric.reset()
    
    finish_time = time()
    
    # Training plot 
    fig, (ax_loss, ax_metric_score) = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

    fig.suptitle("Training with context size = 79")

    ax_loss.set_title("Loss")
    ax_loss.set_xlabel("Epoch")
    ax_loss.set_ylabel("Score")
    ax_loss.plot(epoch_losses, "green")
    ax_loss.grid()

    ax_metric_score.set_title("F1 Score")
    ax_metric_score.set_xlabel("Epoch")
    ax_metric_score.set_ylabel("Score")
    ax_metric_score.plot(epoch_metric_scores, "orange")
    ax_metric_score.grid()

    plt.savefig(f"../../logs/comick/{context_size}_contexts/{path_name}/training_plot.jpg", dpi=200)                        
    
    print(f"Training duration : {((finish_time - start_time) / 60):.3f} minutes.")
    print(f"Training date     : {now}")
    
    with open(f"../../logs/comick/{context_size}_contexts/{path_name}/training_history.txt", "a") as f:
        f.write(f"\nTraining duration : {((finish_time - start_time) / 60):.3f} minutes.\n")
        f.write(f"Training date     : {now}\n")
    
    # Save epoch losses, epoch metric scores, model, state dict, and oov embedding dict
    filename_epoch_losses = open(f"../../logs/comick/{context_size}_contexts/{path_name}/epoch_losses.pkl", "ab")
    filename_epoch_metric_scores = open(f"../../logs/comick/{context_size}_contexts/{path_name}/epoch_metric_scores.pkl", "ab")
    filename_model = f"../../logs/comick/{context_size}_contexts/{path_name}/model.pth"
    filename_model_params = f"../../logs/comick/{context_size}_contexts/{path_name}/model_params.pth"
    filename_oov_embedding_dict = open(f"../../logs/comick/{context_size}_contexts/{path_name}/oov_embedding_dict.pkl", "ab")
    
    pickle.dump(epoch_losses, filename_epoch_losses)
    pickle.dump(epoch_metric_scores, filename_epoch_metric_scores)
    torch.save(model, filename_model)
    torch.save(model.state_dict(), filename_model_params)
    pickle.dump({token : embedding for token, embedding in zip(list(labels_to_idx.keys()), model.embedding)}, filename_oov_embedding_dict)
    
    return epoch_losses, epoch_metric_scores

epoch_losses, epoch_metric_scores = looping_step(dataloader, model, optimizer, criterion, metric)

EPOCH-1


  0%|          | 0/518 [00:00<?, ?it/s]

Batch-50: NLLLoss=4.850069046020508 | F1Score=0.31437501311302185
Batch-100: NLLLoss=4.076752185821533 | F1Score=0.3578124940395355
Batch-150: NLLLoss=3.0721304416656494 | F1Score=0.40166667103767395
Batch-200: NLLLoss=2.454435110092163 | F1Score=0.43187499046325684
Batch-250: NLLLoss=3.0282704830169678 | F1Score=0.4543750286102295
Batch-300: NLLLoss=3.1269445419311523 | F1Score=0.4779166579246521
Batch-350: NLLLoss=3.8457987308502197 | F1Score=0.4952678680419922
Batch-400: NLLLoss=2.8274483680725098 | F1Score=0.5121874809265137
Batch-450: NLLLoss=2.3727686405181885 | F1Score=0.5247916579246521
Batch-500: NLLLoss=1.8678960800170898 | F1Score=0.5375000238418579
Batch-518: NLLLoss=2.8908822536468506 | F1Score=0.5418427586555481

Mean NLLLoss: 3.61639404296875 | Mean F1Score: 0.4396011233329773

EPOCH-2


  0%|          | 0/518 [00:00<?, ?it/s]

Batch-50: NLLLoss=1.3746342658996582 | F1Score=0.6918749809265137
Batch-100: NLLLoss=1.6348791122436523 | F1Score=0.6915624737739563
Batch-150: NLLLoss=2.2847845554351807 | F1Score=0.6931250095367432
Batch-200: NLLLoss=2.1237876415252686 | F1Score=0.6976562738418579
Batch-250: NLLLoss=2.4064762592315674 | F1Score=0.7018749713897705
Batch-300: NLLLoss=2.7860002517700195 | F1Score=0.7073958516120911
Batch-350: NLLLoss=1.825637936592102 | F1Score=0.7119643092155457
Batch-400: NLLLoss=1.4622207880020142 | F1Score=0.7153905630111694
Batch-450: NLLLoss=1.3926361799240112 | F1Score=0.7186805009841919
Batch-500: NLLLoss=1.684211015701294 | F1Score=0.7216874957084656
Batch-518: NLLLoss=2.3933305740356445 | F1Score=0.7225577235221863

Yeah 🎉😄! Model improved.
Mean NLLLoss: 1.8895258903503418 | Mean F1Score: 0.7045383453369141

EPOCH-3


  0%|          | 0/518 [00:00<?, ?it/s]

Batch-50: NLLLoss=1.0514910221099854 | F1Score=0.8081250190734863
Batch-100: NLLLoss=0.9198795557022095 | F1Score=0.7943750023841858
Batch-150: NLLLoss=0.7386568784713745 | F1Score=0.7916666865348816
Batch-200: NLLLoss=1.5427154302597046 | F1Score=0.7832812666893005
Batch-250: NLLLoss=1.2860729694366455 | F1Score=0.7806249856948853
Batch-300: NLLLoss=1.0445141792297363 | F1Score=0.7788541913032532
Batch-350: NLLLoss=1.6058205366134644 | F1Score=0.7799999117851257
Batch-400: NLLLoss=1.3499650955200195 | F1Score=0.7799218893051147
Batch-450: NLLLoss=1.2267979383468628 | F1Score=0.7826389074325562
Batch-500: NLLLoss=1.093660831451416 | F1Score=0.7866874933242798
Batch-518: NLLLoss=1.2517352104187012 | F1Score=0.787646472454071

Yeah 🎉😄! Model improved.
Mean NLLLoss: 1.1229726076126099 | Mean F1Score: 0.7873852849006653

EPOCH-4


  0%|          | 0/518 [00:00<?, ?it/s]

Batch-50: NLLLoss=0.44505399465560913 | F1Score=0.8737499713897705


In [None]:
model.embedding