# Assignment 1 - Part of Speech Tagging

## Dependencies

In [1]:
# !pip install lightning
# !pip install torchtext.data
# !pip install torchtext
# !pip install torch

In [2]:
# TODO: remove unused dependencies

# file management
import urllib
from pathlib import Path
import zipfile
import os

# dataframe management
import pandas as pd

# data manipulation
import numpy as np

# for readability
from tqdm import tqdm

# pytorch
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

# pytorch lightning
from lightning import LightningModule
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping

# Glove and vocabulary
from torchtext.vocab import GloVe, build_vocab_from_iterator

## TASK 1: Corpus

* **Download** the corpus.
* **Encode** the corpus into a pandas.DataFrame object.
* **Split** it in training, validation, and test sets.

### Download the corpus

In [3]:
class DownloadProgressBar(tqdm):
    def update_to(self, b=1, bsize=1, tsize=None):
        if tsize is not None:
            self.total = tsize
        self.update(b * bsize - self.n)


def download_url(download_path: Path, url: str):
    with DownloadProgressBar(unit='B', unit_scale=True,
                             miniters=1, desc=url.split('/')[-1]) as t:
        urllib.request.urlretrieve(url, filename=download_path, reporthook=t.update_to)


def download_dataset(download_path: Path, url: str):
    print("Downloading dataset...")
    download_url(url=url, download_path=download_path)
    print("Download complete!")


def extract_dataset(download_path: Path, extract_path: Path):
    print("Extracting dataset... (it may take a while...)")
    with zipfile.ZipFile(download_path, 'r') as zip_file:
        zip_file.extractall(extract_path)

    print("Extraction completed!")

    Path.unlink(download_path)
    print("Deleted .zip dataset file")

In [4]:
url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dependency_treebank.zip"
dataset_name = "dependency_treebank"

print(f"Current work directory: {Path.cwd()}")

dataset_folder = Path.cwd().joinpath("Datasets")

if not dataset_folder.exists():
    dataset_folder.mkdir(parents=True)

dataset_zip_path = dataset_folder.joinpath("dependency_treebank.zip")
dataset_path = dataset_folder.joinpath(dataset_name)

if not dataset_zip_path.exists():
    download_dataset(dataset_zip_path, url)

if not dataset_path.exists():
    extract_dataset(dataset_zip_path, dataset_folder)


Current work directory: c:\Users\merli\OneDrive\Desktop\Github repos\NLP\A1


### Encode the corpus into a pandas.DataFrame object and split it into train, validation and test sets

The corpus contains 200 documents.

   * **Train**: Documents 1-100
   * **Validation**: Documents 101-150
   * **Test**: Documents 151-199

In [5]:
dataframe_rows = []
id = 0

for i, file_path in enumerate(sorted(dataset_path.iterdir())):
    if file_path.is_file():  # split corpus documents in the tree categories: train, validation, tests
        if 1 <= i + 1 <= 100:
            split = 'train'
        elif 101 <= i + 1 <= 150:
            split = 'validation'
        else:
            split = 'test'

        with file_path.open(mode='r', encoding='utf-8') as text_file:  # read corpus lines
            lines = text_file.readlines()

        for line in lines:
            fields = line.strip().split('\t')
            if len(fields) == 1:
                id = id + 1
            if len(fields) >= 2:
                text = fields[0]  # store the first field as 'text'
                POS = fields[1]  # store the second field as 'POS'
                dataframe_row = {  #build DataFrame rows
                    "text": text,
                    "POS": POS,
                    "split": split,
                    "id": id
                }

                dataframe_rows.append(dataframe_row)  #append rows
# corpus DataFrame
corpus_df = pd.DataFrame(dataframe_rows)

#### Data inspection

In [6]:
corpus_df.head(10)

Unnamed: 0,text,POS,split,id
0,Pierre,NNP,train,0
1,Vinken,NNP,train,0
2,",",",",train,0
3,61,CD,train,0
4,years,NNS,train,0
5,old,JJ,train,0
6,",",",",train,0
7,will,MD,train,0
8,join,VB,train,0
9,the,DT,train,0


In [7]:
print("Dataframe structure:")
print(corpus_df)
print()

print(f"Total rows {len(corpus_df)}")
print()

Dataframe structure:
          text  POS  split    id
0       Pierre  NNP  train     0
1       Vinken  NNP  train     0
2            ,    ,  train     0
3           61   CD  train     0
4        years  NNS  train     0
...        ...  ...    ...   ...
94079  quarter   NN   test  3715
94080       of   IN   test  3715
94081     next   JJ   test  3715
94082     year   NN   test  3715
94083        .    .   test  3715

[94084 rows x 4 columns]

Total rows 94084



In [8]:
# Train, test, validation split
df_train = corpus_df[corpus_df['split'] == 'train'].drop(columns=['split'])
df_test = corpus_df[corpus_df['split'] == 'test'].drop(columns=['split'])
df_val = corpus_df[corpus_df['split'] == 'validation'].drop(columns=['split'])

## TASK 2: Text encoding

* Embed words using **GloVe embeddings**.
* TODO: see if we want to do it, otherwise remove it -> [Optional] You are free to experiment with text pre-processing: **make sure you do not delete any token!**

In [9]:
def load_embedding_model(embedding_dimension: int = 300):
    emb_model = GloVe(name="6B", dim=embedding_dimension)
    return emb_model

In [10]:
punctuation_and_symbol_pos = [".", ",", ":", '``', "''", "$", "#", "-LRB-", "-RRB-", "SYM", "LS"] #TODO check "LS" 

### TASK 4.b: OOV tokens

Our vocabulary is stored in the GloVe object, and we simply edit its fields to add tokens and embedding vectors

In [11]:
# Find training set OOV tokens
embedding_dim = 50
embedder = load_embedding_model(embedding_dim)

existing_vocab_tokens = set(embedder.itos)  # Tokens in the vocabulary, i.e. present in GloVe
train_text = set([word.lower() for word in df_train['text']])

train_oov_tokens = train_text - existing_vocab_tokens
print(f"OOV tokens in the training set: {len(train_oov_tokens)}")
print(f"Some OOV tokens: {list(train_oov_tokens)[:50]}")

OOV tokens in the training set: 359
Some OOV tokens: ['savers\\/investors', '520-lawyer', 'glenham', '3\\/4', 'index-arbitrage', 'ghkm', '436.01', '37-a-share', 'new-home', 'wtd', 'sometimes-tawdry', 'anti-china', 'intellectual-property', 'one-country', '-lrb-', 'bridgestone\\/firestone', '84-month', 'purepac', 'boorse', 'school-research', '2645.90', 'integra-a', 'junk-bond', '1\\/4', 'equal-opportunity', 'landonne', '18,444', 'satrum', 'rope-sight', 'nekoosa', 'language-housekeeper', 'tarwhine', 'securities-based', 'purhasing', '446.62', 'ratners', 'abortion-related', '497.34', 'chilver', 'less-serious', '456.64', 'incentive-bonus', 'school-board', 'veselich', 'we-japanese', 'higher-salaried', 'hummerstone', 'test-prep', 'ariail', '1.457']


In [12]:
# Add training set OOV tokens to the GloVe embedder, sampling from a random uniform distribution for each feature in the respective range
mins = torch.min(embedder.vectors, dim=0).values
ranges = mins - torch.max(embedder.vectors, dim=0).values

for token in train_oov_tokens:
    embedder.itos.append(token)
    embedder.stoi[token] = len(embedder.itos) - 1
    embedder.vectors = torch.cat((embedder.vectors, (torch.rand(embedding_dim) * ranges + mins).unsqueeze(dim=0)), dim=0)

# For the '[UNK]' token embedding, sample a vector from a normal distribution for each feature

# Mean and std of the GloVe embeddings, for each feature
means = torch.mean(embedder.vectors, dim=0)
stds = torch.std(embedder.vectors, dim=0)
unk_vector = torch.normal(means, stds)

# The '[UNK]' token is not really added to the vocabulary (i.e. GloVe)
# instead, we redefine the 'unk_init' function of the embedder to return the embedding vector corresponding to '[UNK]'
embedder.unk_init = lambda x: unk_vector

print(f"New vocabulary size: {len(embedder.itos)}")

assert len(embedder.itos) == len(embedder.stoi) == embedder.vectors.shape[0]

New vocabulary size: 400359


### Embed words using GloVe embeddings

For the token to embedding mapping, we decided to use the following approach: the Dataset object takes care of calling the embedder every time it is queried to return a datapoint (\_\_getitem\_\_ method)

In [13]:
iterator = ([pos] for pos in corpus_df["POS"].unique())
pos_vocab = build_vocab_from_iterator(iterator)
pos_vocab.append_token("<PAD>")

pos_padding_value = pos_vocab["<PAD>"]
punctuation_and_symbol_pos_indices = [pos_vocab[token] for token in punctuation_and_symbol_pos]


class CorpusDataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, embedder):
        min_id = dataframe['id'].min()
        dataframe['id'] = dataframe['id'] - min_id
        self.dataframe = dataframe.groupby("id")
        self.embedder = embedder

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        sentence = self.dataframe.get_group(idx)
        text = sentence['text'].to_list()
        text = [token.lower() for token in text]        
        
        POS = sentence['POS'].to_list()
        POS = torch.Tensor([pos_vocab[token] for token in POS])

        embedded_text = self.embedder.get_vecs_by_tokens(text)

        return embedded_text, POS


In [14]:
# Definition of the dataset
dataset_train = CorpusDataset(df_train, embedder)
dataset_test = CorpusDataset(df_test, embedder)
dataset_val = CorpusDataset(df_val, embedder)
dataset_all = CorpusDataset(corpus_df, embedder)    # TODO: remove if not used


# This collate function takes care of adding padding to the sequences
# TODO - test if it works in the LSTM training
def my_collate(batch):
    sequences, labels = zip(*batch)

    # max_len = max([len(seq) for seq in sequences])
    # sequences_padded = [seq + ["<PAD>"] * (max_len - len(seq)) for seq in sequences]

    sequences_padded = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True, padding_value=0)
    labels_padded = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=pos_padding_value)

    sequences_padded = sequences_padded.type(torch.float)
    labels_padded = labels_padded.type(torch.long)

    return [sequences_padded, labels_padded]


train_loader = DataLoader(dataset_train, batch_size=32, collate_fn=my_collate)
val_loader = DataLoader(dataset_val, batch_size=32, collate_fn=my_collate)
test_loader = DataLoader(dataset_test, batch_size=32, collate_fn=my_collate)

## TASK 4: Metrics

* Evaluate models using macro F1-score, computed over **all** tokens.

We defined our own macro F1-score metric in order to accumulate FP, TP, FN, TN iteratively

In [15]:
from torchmetrics import Metric
from torchmetrics import ConfusionMatrix


class F1ScoreCustom(Metric):
    def __init__(self, num_classes: int, pos_padding_value: int = pos_padding_value, punctuation_and_symbol_pos_indices: list = punctuation_and_symbol_pos_indices):
        super().__init__()

        self.num_classes = num_classes
        self.mask = torch.ones([num_classes])
        self.mask[[pos_padding_value] + punctuation_and_symbol_pos_indices] = 0
        
        self.add_state("true_positive", default=torch.zeros([num_classes]), dist_reduce_fx="sum")
        self.add_state("false_negative", default=torch.zeros([num_classes]), dist_reduce_fx="sum")
        self.add_state("false_positive", default=torch.zeros([num_classes]), dist_reduce_fx="sum")

    def update(self, y_hat_class: torch.Tensor, y_class: torch.Tensor):
        confusion_matrix_metric = ConfusionMatrix(num_classes=self.num_classes, task="multiclass")
        confusion_matrix = confusion_matrix_metric(y_hat_class, y_class)

        # # Confusion matrix, TP, FN and FP for class 0 
        # #   TRUE LABEL
        # #   0               TP     FN     FN     FN     FN       
        # #   1               FP       
        # #   2               FP               
        # #   3               FP                       
        # #   4               FP                                
        # # PREDICTED LABEL   0       1      2      3      4
        # 

        true_positive = torch.Tensor([confusion_matrix[i][i] for i in range(self.num_classes)])
        false_negative = torch.Tensor([sum(confusion_matrix[i, :]) - true_positive[i] for i in range(self.num_classes)])
        false_positive = torch.Tensor([sum(confusion_matrix[:, i]) - true_positive[i] for i in range(self.num_classes)])

        self.true_positive += true_positive
        self.false_negative += false_negative
        self.false_positive += false_positive

    def compute(self):
        precision = self.true_positive / (self.true_positive + self.false_positive)
        recall = self.true_positive / (self.true_positive + self.false_negative)

        f1 = 2 * (precision * recall) / (precision + recall)

        f1 = f1 * self.mask

        return f1



## TASK 3: Model definition

* **Baseline**: implement a Bidirectional LSTM with a Dense layer on top.

* **Model 1**: add an additional LSTM layer to the Baseline model.
* **Model 2**: add an additional Dense layer to the Baseline model.

### Baseline model: Bidirectional LSTM + Dense layer

In [16]:
class BiLSTMModel(LightningModule):
    def __init__(self, input_dim, hidden_dim, output_dim, target_padding_value=pos_padding_value):
        super(BiLSTMModel, self).__init__()
        self.output_dim = output_dim
        self.target_padding_value = target_padding_value

        self.lstm = nn.LSTM(input_size=input_dim,
                            hidden_size=hidden_dim,
                            batch_first=True,
                            bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # Multiplied by 2 due to the bidirectionality

        self._train_f1_metric = F1ScoreCustom(num_classes=output_dim, pos_padding_value=self.target_padding_value)
        self._val_f1_metric = F1ScoreCustom(num_classes=output_dim, pos_padding_value=self.target_padding_value)
        self._test_f1_metric = F1ScoreCustom(num_classes=output_dim, pos_padding_value=self.target_padding_value)

    def forward(self, x):
        # embedding = self.embedding_layer(x)
        lstm_out, _ = self.lstm(x)
        # lstm_out (batch_size, seq_length, hidden_size * 2)
        out = self.fc(lstm_out)
        # out (batch_size, seq_length, output_dim)
        return out

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        # Change shape from (batchsize, sequence_len, classes) to be (batchsize, classes, sequence_len) to compute loss function
        y_hat = torch.movedim(y_hat, 1, 2)
        loss = nn.functional.cross_entropy(y_hat, y, ignore_index=self.target_padding_value)

        self.log_dict({'train_loss': loss, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True,
                      on_step=False, reduce_fx="mean")

        y_hat_class = torch.argmax(y_hat, dim=1)
        self._train_f1_metric.update(y_hat_class, y)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        y_hat = torch.movedim(y_hat, 1, 2)

        loss = nn.functional.cross_entropy(y_hat, y, ignore_index=self.target_padding_value)
        self.log_dict({'val_loss': loss, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True,
                      reduce_fx="mean")

        y_hat_class = torch.argmax(y_hat, dim=1)
        self._val_f1_metric.update(y_hat_class, y)

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        y_hat = torch.movedim(y_hat, 1, 2)
        loss = nn.functional.cross_entropy(y_hat, y, ignore_index=self.target_padding_value)

        self.log_dict({'test_loss': loss, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True,
                      reduce_fx="mean")

        y_hat_class = torch.argmax(y_hat, dim=1)
        self._test_f1_metric.update(y_hat_class, y)

        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

    def _compute_f1(self, f1_metric):
        mean_f1_score = f1_metric.compute()

        # Create a mask that is False for NaNs
        mask = torch.isnan(mean_f1_score)

        # Invert the mask: True for valid entries, False for NaNs
        valid_data = mean_f1_score[~mask]

        # Compute the mean of the non-NaN values
        mean_value = torch.mean(valid_data)

        return mean_value

    def on_train_epoch_end(self) -> None:
        mean_f1_score = self._compute_f1(self._train_f1_metric)
        self.log_dict({"train_f1": mean_f1_score, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True)
        self._train_f1_metric.reset()

    def on_validation_epoch_end(self) -> None:
        mean_f1_score = self._compute_f1(self._val_f1_metric)
        self.log_dict({"val_f1": mean_f1_score, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True)
        self._val_f1_metric.reset()

    def on_test_epoch_end(self) -> None:
        mean_f1_score = self._compute_f1(self._test_f1_metric)
        self.log_dict({"test_f1": mean_f1_score, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True)
        self._test_f1_metric.reset()


### Model 1: Bidirectional 2-layers LSTM + Dense layer

In [17]:
class Model1(LightningModule):
    def __init__(self, input_dim, hidden_dim, output_dim, target_padding_value=pos_padding_value):
        super(Model1, self).__init__()
        self.output_dim = output_dim
        self.target_padding_value = target_padding_value

        self.lstm = nn.LSTM(input_size=input_dim,
                            hidden_size=hidden_dim,
                            num_layers=2,
                            batch_first=True,
                            bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # Multiplied by 2 due to the bidirectionality

        self._train_f1_metric = F1ScoreCustom(num_classes=output_dim, pos_padding_value=self.target_padding_value)
        self._val_f1_metric = F1ScoreCustom(num_classes=output_dim, pos_padding_value=self.target_padding_value)
        self._test_f1_metric = F1ScoreCustom(num_classes=output_dim, pos_padding_value=self.target_padding_value)

    def forward(self, x):
        # embedding = self.embedding_layer(x)
        lstm_out, _ = self.lstm(x)
        # lstm_out (batch_size, seq_length, hidden_size * 2)
        out = self.fc(lstm_out)
        # out (batch_size, seq_length, output_dim)
        return out

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        # Change shape from (batchsize, sequence_len, classes) to be (batchsize, classes, sequence_len) to compute loss function
        y_hat = torch.movedim(y_hat, 1, 2)
        loss = nn.functional.cross_entropy(y_hat, y, ignore_index=self.target_padding_value)

        self.log_dict({'train_loss': loss, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True,
                      on_step=False, reduce_fx="mean")

        y_hat_class = torch.argmax(y_hat, dim=1)
        self._train_f1_metric.update(y_hat_class, y)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        y_hat = torch.movedim(y_hat, 1, 2)

        loss = nn.functional.cross_entropy(y_hat, y, ignore_index=self.target_padding_value)
        self.log_dict({'val_loss': loss, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True,
                      reduce_fx="mean")

        y_hat_class = torch.argmax(y_hat, dim=1)
        self._val_f1_metric.update(y_hat_class, y)

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        y_hat = torch.movedim(y_hat, 1, 2)
        loss = nn.functional.cross_entropy(y_hat, y, ignore_index=self.target_padding_value)

        self.log_dict({'test_loss': loss, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True,
                      reduce_fx="mean")

        y_hat_class = torch.argmax(y_hat, dim=1)
        self._test_f1_metric.update(y_hat_class, y)

        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

    def _compute_f1(self, f1_metric):
        mean_f1_score = f1_metric.compute()

        # Create a mask that is False for NaNs
        mask = torch.isnan(mean_f1_score)

        # Invert the mask: True for valid entries, False for NaNs
        valid_data = mean_f1_score[~mask]

        # Compute the mean of the non-NaN values
        mean_value = torch.mean(valid_data)

        return mean_value

    def on_train_epoch_end(self) -> None:
        mean_f1_score = self._compute_f1(self._train_f1_metric)
        self.log_dict({"train_f1": mean_f1_score, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True)
        self._train_f1_metric.reset()

    def on_validation_epoch_end(self) -> None:
        mean_f1_score = self._compute_f1(self._val_f1_metric)
        self.log_dict({"val_f1": mean_f1_score, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True)
        self._val_f1_metric.reset()

    def on_test_epoch_end(self) -> None:
        mean_f1_score = self._compute_f1(self._test_f1_metric)
        self.log_dict({"test_f1": mean_f1_score, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True)
        self._test_f1_metric.reset()

### Model 2: Bidirectional LSTM + 2 Dense layers

In [18]:
class Model2(LightningModule):
    def __init__(self, input_dim, hidden_dim, output_dim, fc_size,
                 target_padding_value=pos_padding_value):
        super(Model2, self).__init__()
        self.output_dim = output_dim
        self.target_padding_value = target_padding_value

        self.lstm = nn.LSTM(input_size=input_dim,
                            hidden_size=hidden_dim,
                            num_layers=2,
                            batch_first=True,
                            bidirectional=True)
        self.fc_1 = nn.Linear(hidden_dim * 2, fc_size)  # Multiplied by 2 due to bidirectionality
        self.fc_2 = nn.Linear(fc_size, output_dim)

        self._train_f1_metric = F1ScoreCustom(num_classes=output_dim, pos_padding_value=self.target_padding_value)
        self._val_f1_metric = F1ScoreCustom(num_classes=output_dim, pos_padding_value=self.target_padding_value)
        self._test_f1_metric = F1ScoreCustom(num_classes=output_dim, pos_padding_value=self.target_padding_value)

    def forward(self, x):
        # embedding = self.embedding_layer(x)
        lstm_out, _ = self.lstm(x)
        # lstm_out (batch_size, seq_length, hidden_size * 2)
        out = self.fc_1(lstm_out)
        out = self.fc_2(out)
        # out (batch_size, seq_length, output_dim)
        return out

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        # Change shape from (batchsize, sequence_len, classes) to be (batchsize, classes, sequence_len) to compute loss function
        y_hat = torch.movedim(y_hat, 1, 2)
        loss = nn.functional.cross_entropy(y_hat, y, ignore_index=self.target_padding_value)

        self.log_dict({'train_loss': loss, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True,
                      on_step=False, reduce_fx="mean")

        y_hat_class = torch.argmax(y_hat, dim=1)
        self._train_f1_metric.update(y_hat_class, y)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        y_hat = torch.movedim(y_hat, 1, 2)

        loss = nn.functional.cross_entropy(y_hat, y, ignore_index=self.target_padding_value)
        self.log_dict({'val_loss': loss, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True,
                      reduce_fx="mean")

        y_hat_class = torch.argmax(y_hat, dim=1)
        self._val_f1_metric.update(y_hat_class, y)

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        y_hat = torch.movedim(y_hat, 1, 2)
        loss = nn.functional.cross_entropy(y_hat, y, ignore_index=self.target_padding_value)

        self.log_dict({'test_loss': loss, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True,
                      reduce_fx="mean")

        y_hat_class = torch.argmax(y_hat, dim=1)
        self._test_f1_metric.update(y_hat_class, y)

        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

    def _compute_f1(self, f1_metric):
        mean_f1_score = f1_metric.compute()

        # Create a mask that is False for NaNs
        mask = torch.isnan(mean_f1_score)

        # Invert the mask: True for valid entries, False for NaNs
        valid_data = mean_f1_score[~mask]

        # Compute the mean of the non-NaN values
        mean_value = torch.mean(valid_data)

        return mean_value

    def on_train_epoch_end(self) -> None:
        mean_f1_score = self._compute_f1(self._train_f1_metric)
        self.log_dict({"train_f1": mean_f1_score, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True)
        self._train_f1_metric.reset()

    def on_validation_epoch_end(self) -> None:
        mean_f1_score = self._compute_f1(self._val_f1_metric)
        self.log_dict({"val_f1": mean_f1_score, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True)
        self._val_f1_metric.reset()

    def on_test_epoch_end(self) -> None:
        mean_f1_score = self._compute_f1(self._test_f1_metric)
        self.log_dict({"test_f1": mean_f1_score, 'step': float(self.current_epoch)}, on_epoch=True, prog_bar=True, logger=True)
        self._test_f1_metric.reset()

## TASK 5: Training and Evaluation

* Train **all** models on the train set.
* Evaluate **all** models on the validation set.
* Compute metrics on the validation set.
* Pick **at least** three seeds for robust estimation.
* Pick the **best** performing model according to the observed validation set performance.

In [19]:
# Fix all possible sources of randomness
torch.use_deterministic_algorithms(True)

torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

### Training and evaluation

#### Training all models on training set and evalutating on validation set at every epoch s.t. to keep the best model for each model/seed pair.
#### Both cross entropy loss and F1-Score are computed at each evaluation

In [20]:
logs_path = Path.cwd() / "logs" / "lightning_logs"


seeds = [6, 90, 157]

epochs = 100
output_dim = len(df_train["POS"].unique()) + 1  # +1 for padding
hidden_dim = 128
input_dim = embedding_dim

model_classes = [BiLSTMModel, Model1, Model2]
model_names = ["baseline", "model1", "model2"]
hyperparameters = [
    {'input_dim': input_dim, 'hidden_dim': hidden_dim, 'output_dim': output_dim},
    {'input_dim': input_dim, 'hidden_dim': hidden_dim, 'output_dim': output_dim},
    {'input_dim': input_dim, 'hidden_dim': hidden_dim, 'output_dim': output_dim, 'fc_size': 64} 
]

for model_class, model_name, hyperparameter in zip(model_classes, model_names, hyperparameters):
    for seed in seeds:
        print(f"Training model {model_name} with seed {seed}...")
        seed_everything(seed, workers=True)

        model = model_class(**hyperparameter)

        logger = TensorBoardLogger(logs_path, name=f"{model_name}_seed{seed}")
        checkpoint_callback = ModelCheckpoint(
            monitor='val_loss',
            dirpath=None,
            filename=f'{model_name}-seed={seed}' + '-{epoch:02d}-{val_loss:.2f}-{val_f1:.2f}',
            save_top_k=1,
        )
        early_stop_callback = EarlyStopping(
            monitor='val_loss',
            patience=3,
            verbose=True,
            mode='min'
        )


        trainer = Trainer(
            max_epochs=epochs,
            logger=logger,
            log_every_n_steps=1,
            callbacks=[checkpoint_callback, early_stop_callback],
            deterministic=True
        )
        
        trainer.fit(model, train_loader, val_loader)


Seed set to 6


Training model baseline with seed 6...


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: c:\Users\merli\OneDrive\Desktop\Github repos\NLP\A1\logs\lightning_logs\baseline_seed6

  | Name             | Type          | Params
---------------------------------------------------
0 | lstm             | LSTM          | 184 K 
1 | fc               | Linear        | 11.8 K
2 | _train_f1_metric | F1ScoreCustom | 0     
3 | _val_f1_metric   | F1ScoreCustom | 0     
4 | _test_f1_metric  | F1ScoreCustom | 0     
---------------------------------------------------
196 K     Trainable params
0         Non-trainable params
196 K     Total params
0.785     Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

d:\Users\edo\envs\nlp\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


                                                                           

d:\Users\edo\envs\nlp\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 59/59 [00:38<00:00,  1.54it/s, v_num=0, val_loss=2.390, step=0.000, val_f1=0.237, train_loss=2.940]

Metric val_loss improved. New best score: 2.393


Epoch 1: 100%|██████████| 59/59 [00:36<00:00,  1.63it/s, v_num=0, val_loss=1.500, step=1.000, val_f1=0.293, train_loss=1.880, train_f1=0.0928]

Metric val_loss improved by 0.892 >= min_delta = 0.0. New best score: 1.501


Epoch 2: 100%|██████████| 59/59 [00:35<00:00,  1.66it/s, v_num=0, val_loss=1.160, step=2.000, val_f1=0.410, train_loss=1.270, train_f1=0.241] 

Metric val_loss improved by 0.343 >= min_delta = 0.0. New best score: 1.158


Epoch 3: 100%|██████████| 59/59 [00:37<00:00,  1.58it/s, v_num=0, val_loss=0.985, step=3.000, val_f1=0.435, train_loss=1.010, train_f1=0.371]

Metric val_loss improved by 0.173 >= min_delta = 0.0. New best score: 0.985


Epoch 4: 100%|██████████| 59/59 [00:33<00:00,  1.75it/s, v_num=0, val_loss=0.878, step=4.000, val_f1=0.451, train_loss=0.869, train_f1=0.438]

Metric val_loss improved by 0.107 >= min_delta = 0.0. New best score: 0.878


Epoch 5: 100%|██████████| 59/59 [00:34<00:00,  1.72it/s, v_num=0, val_loss=0.806, step=5.000, val_f1=0.475, train_loss=0.775, train_f1=0.450]

Metric val_loss improved by 0.072 >= min_delta = 0.0. New best score: 0.806


Epoch 6: 100%|██████████| 59/59 [00:33<00:00,  1.78it/s, v_num=0, val_loss=0.753, step=6.000, val_f1=0.516, train_loss=0.708, train_f1=0.482]

Metric val_loss improved by 0.053 >= min_delta = 0.0. New best score: 0.753


Epoch 7: 100%|██████████| 59/59 [00:36<00:00,  1.60it/s, v_num=0, val_loss=0.713, step=7.000, val_f1=0.525, train_loss=0.656, train_f1=0.513]

Metric val_loss improved by 0.040 >= min_delta = 0.0. New best score: 0.713


Epoch 8: 100%|██████████| 59/59 [00:31<00:00,  1.89it/s, v_num=0, val_loss=0.681, step=8.000, val_f1=0.538, train_loss=0.614, train_f1=0.514]

Metric val_loss improved by 0.032 >= min_delta = 0.0. New best score: 0.681


Epoch 9: 100%|██████████| 59/59 [00:28<00:00,  2.08it/s, v_num=0, val_loss=0.654, step=9.000, val_f1=0.531, train_loss=0.578, train_f1=0.535]

Metric val_loss improved by 0.027 >= min_delta = 0.0. New best score: 0.654


Epoch 10: 100%|██████████| 59/59 [00:24<00:00,  2.44it/s, v_num=0, val_loss=0.630, step=10.00, val_f1=0.540, train_loss=0.545, train_f1=0.549]

Metric val_loss improved by 0.023 >= min_delta = 0.0. New best score: 0.630


Epoch 11: 100%|██████████| 59/59 [00:23<00:00,  2.52it/s, v_num=0, val_loss=0.610, step=11.00, val_f1=0.540, train_loss=0.517, train_f1=0.562]

Metric val_loss improved by 0.020 >= min_delta = 0.0. New best score: 0.610


Epoch 12: 100%|██████████| 59/59 [00:25<00:00,  2.32it/s, v_num=0, val_loss=0.593, step=12.00, val_f1=0.536, train_loss=0.490, train_f1=0.559]

Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 0.593


Epoch 13: 100%|██████████| 59/59 [00:24<00:00,  2.40it/s, v_num=0, val_loss=0.578, step=13.00, val_f1=0.553, train_loss=0.466, train_f1=0.572]

Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 0.578


Epoch 14: 100%|██████████| 59/59 [00:25<00:00,  2.36it/s, v_num=0, val_loss=0.565, step=14.00, val_f1=0.563, train_loss=0.443, train_f1=0.570]

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 0.565


Epoch 15: 100%|██████████| 59/59 [00:24<00:00,  2.44it/s, v_num=0, val_loss=0.554, step=15.00, val_f1=0.570, train_loss=0.422, train_f1=0.581]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.554


Epoch 16: 100%|██████████| 59/59 [00:26<00:00,  2.19it/s, v_num=0, val_loss=0.543, step=16.00, val_f1=0.570, train_loss=0.401, train_f1=0.590]

Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 0.543


Epoch 17: 100%|██████████| 59/59 [00:24<00:00,  2.40it/s, v_num=0, val_loss=0.534, step=17.00, val_f1=0.576, train_loss=0.382, train_f1=0.588]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.534


Epoch 18: 100%|██████████| 59/59 [00:26<00:00,  2.25it/s, v_num=0, val_loss=0.527, step=18.00, val_f1=0.588, train_loss=0.363, train_f1=0.600]

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.527


Epoch 19: 100%|██████████| 59/59 [00:32<00:00,  1.80it/s, v_num=0, val_loss=0.520, step=19.00, val_f1=0.591, train_loss=0.345, train_f1=0.608]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.520


Epoch 20: 100%|██████████| 59/59 [00:35<00:00,  1.67it/s, v_num=0, val_loss=0.516, step=20.00, val_f1=0.579, train_loss=0.328, train_f1=0.606]

Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.516


Epoch 21: 100%|██████████| 59/59 [00:35<00:00,  1.67it/s, v_num=0, val_loss=0.511, step=21.00, val_f1=0.578, train_loss=0.312, train_f1=0.617]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.511


Epoch 22: 100%|██████████| 59/59 [00:33<00:00,  1.78it/s, v_num=0, val_loss=0.508, step=22.00, val_f1=0.579, train_loss=0.296, train_f1=0.625]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.508


Epoch 23: 100%|██████████| 59/59 [00:24<00:00,  2.37it/s, v_num=0, val_loss=0.505, step=23.00, val_f1=0.579, train_loss=0.281, train_f1=0.629]

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.505


Epoch 24: 100%|██████████| 59/59 [00:27<00:00,  2.18it/s, v_num=0, val_loss=0.502, step=24.00, val_f1=0.580, train_loss=0.266, train_f1=0.635]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.502


Epoch 25: 100%|██████████| 59/59 [00:21<00:00,  2.69it/s, v_num=0, val_loss=0.501, step=25.00, val_f1=0.584, train_loss=0.253, train_f1=0.641]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.501


Epoch 28: 100%|██████████| 59/59 [00:23<00:00,  2.53it/s, v_num=0, val_loss=0.497, step=28.00, val_f1=0.574, train_loss=0.218, train_f1=0.655]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.497


Epoch 31: 100%|██████████| 59/59 [00:24<00:00,  2.41it/s, v_num=0, val_loss=0.502, step=31.00, val_f1=0.571, train_loss=0.179, train_f1=0.670]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.497. Signaling Trainer to stop.


Epoch 31: 100%|██████████| 59/59 [00:24<00:00,  2.41it/s, v_num=0, val_loss=0.502, step=31.00, val_f1=0.571, train_loss=0.179, train_f1=0.670]

Seed set to 90
GPU available: False, used: False
TPU available: False, using: 0 TPU cores



Training model baseline with seed 90...


IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: c:\Users\merli\OneDrive\Desktop\Github repos\NLP\A1\logs\lightning_logs\baseline_seed90

  | Name             | Type          | Params
---------------------------------------------------
0 | lstm             | LSTM          | 184 K 
1 | fc               | Linear        | 11.8 K
2 | _train_f1_metric | F1ScoreCustom | 0     
3 | _val_f1_metric   | F1ScoreCustom | 0     
4 | _test_f1_metric  | F1ScoreCustom | 0     
---------------------------------------------------
196 K     Trainable params
0         Non-trainable params
196 K     Total params
0.785     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 59/59 [00:26<00:00,  2.27it/s, v_num=0, val_loss=2.360, step=0.000, val_f1=0.299, train_loss=2.920]

Metric val_loss improved. New best score: 2.361


Epoch 1: 100%|██████████| 59/59 [00:23<00:00,  2.48it/s, v_num=0, val_loss=1.500, step=1.000, val_f1=0.303, train_loss=1.870, train_f1=0.112]

Metric val_loss improved by 0.856 >= min_delta = 0.0. New best score: 1.505


Epoch 2: 100%|██████████| 59/59 [01:08<00:00,  0.86it/s, v_num=0, val_loss=1.170, step=2.000, val_f1=0.397, train_loss=1.280, train_f1=0.238]

Metric val_loss improved by 0.337 >= min_delta = 0.0. New best score: 1.167


Epoch 3: 100%|██████████| 59/59 [01:50<00:00,  0.54it/s, v_num=0, val_loss=0.995, step=3.000, val_f1=0.429, train_loss=1.020, train_f1=0.366]

Metric val_loss improved by 0.172 >= min_delta = 0.0. New best score: 0.995


Epoch 4: 100%|██████████| 59/59 [01:19<00:00,  0.74it/s, v_num=0, val_loss=0.887, step=4.000, val_f1=0.456, train_loss=0.878, train_f1=0.415]

Metric val_loss improved by 0.108 >= min_delta = 0.0. New best score: 0.887


Epoch 5: 100%|██████████| 59/59 [01:29<00:00,  0.66it/s, v_num=0, val_loss=0.812, step=5.000, val_f1=0.489, train_loss=0.784, train_f1=0.446]

Metric val_loss improved by 0.075 >= min_delta = 0.0. New best score: 0.812


Epoch 6: 100%|██████████| 59/59 [00:43<00:00,  1.34it/s, v_num=0, val_loss=0.757, step=6.000, val_f1=0.525, train_loss=0.715, train_f1=0.485]

Metric val_loss improved by 0.055 >= min_delta = 0.0. New best score: 0.757


Epoch 7: 100%|██████████| 59/59 [00:31<00:00,  1.90it/s, v_num=0, val_loss=0.716, step=7.000, val_f1=0.512, train_loss=0.663, train_f1=0.514]

Metric val_loss improved by 0.042 >= min_delta = 0.0. New best score: 0.716


Epoch 8: 100%|██████████| 59/59 [00:34<00:00,  1.71it/s, v_num=0, val_loss=0.683, step=8.000, val_f1=0.521, train_loss=0.620, train_f1=0.527]

Metric val_loss improved by 0.033 >= min_delta = 0.0. New best score: 0.683


Epoch 9: 100%|██████████| 59/59 [00:28<00:00,  2.06it/s, v_num=0, val_loss=0.656, step=9.000, val_f1=0.535, train_loss=0.583, train_f1=0.531]

Metric val_loss improved by 0.026 >= min_delta = 0.0. New best score: 0.656


Epoch 10: 100%|██████████| 59/59 [00:29<00:00,  1.98it/s, v_num=0, val_loss=0.634, step=10.00, val_f1=0.529, train_loss=0.551, train_f1=0.547]

Metric val_loss improved by 0.022 >= min_delta = 0.0. New best score: 0.634


Epoch 11: 100%|██████████| 59/59 [00:25<00:00,  2.35it/s, v_num=0, val_loss=0.615, step=11.00, val_f1=0.542, train_loss=0.522, train_f1=0.562]

Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.615


Epoch 12: 100%|██████████| 59/59 [00:26<00:00,  2.21it/s, v_num=0, val_loss=0.598, step=12.00, val_f1=0.553, train_loss=0.496, train_f1=0.549]

Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 0.598


Epoch 13: 100%|██████████| 59/59 [00:24<00:00,  2.45it/s, v_num=0, val_loss=0.583, step=13.00, val_f1=0.549, train_loss=0.472, train_f1=0.565]

Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 0.583


Epoch 14: 100%|██████████| 59/59 [00:26<00:00,  2.21it/s, v_num=0, val_loss=0.570, step=14.00, val_f1=0.555, train_loss=0.449, train_f1=0.578]

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 0.570


Epoch 15: 100%|██████████| 59/59 [00:23<00:00,  2.46it/s, v_num=0, val_loss=0.559, step=15.00, val_f1=0.562, train_loss=0.428, train_f1=0.589]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.559


Epoch 16: 100%|██████████| 59/59 [00:27<00:00,  2.11it/s, v_num=0, val_loss=0.548, step=16.00, val_f1=0.567, train_loss=0.407, train_f1=0.602]

Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 0.548


Epoch 17: 100%|██████████| 59/59 [00:23<00:00,  2.54it/s, v_num=0, val_loss=0.539, step=17.00, val_f1=0.569, train_loss=0.388, train_f1=0.598]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.539


Epoch 18: 100%|██████████| 59/59 [00:24<00:00,  2.37it/s, v_num=0, val_loss=0.531, step=18.00, val_f1=0.567, train_loss=0.369, train_f1=0.597]

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.531


Epoch 19: 100%|██████████| 59/59 [00:27<00:00,  2.16it/s, v_num=0, val_loss=0.524, step=19.00, val_f1=0.584, train_loss=0.351, train_f1=0.606]

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.524


Epoch 20: 100%|██████████| 59/59 [00:24<00:00,  2.40it/s, v_num=0, val_loss=0.518, step=20.00, val_f1=0.585, train_loss=0.334, train_f1=0.612]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.518


Epoch 21: 100%|██████████| 59/59 [00:23<00:00,  2.48it/s, v_num=0, val_loss=0.514, step=21.00, val_f1=0.587, train_loss=0.317, train_f1=0.624]

Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.514


Epoch 22: 100%|██████████| 59/59 [00:24<00:00,  2.42it/s, v_num=0, val_loss=0.510, step=22.00, val_f1=0.590, train_loss=0.301, train_f1=0.635]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.510


Epoch 23: 100%|██████████| 59/59 [00:24<00:00,  2.41it/s, v_num=0, val_loss=0.507, step=23.00, val_f1=0.590, train_loss=0.285, train_f1=0.641]

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.507


Epoch 24: 100%|██████████| 59/59 [00:23<00:00,  2.53it/s, v_num=0, val_loss=0.505, step=24.00, val_f1=0.587, train_loss=0.270, train_f1=0.649]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.505


Epoch 25: 100%|██████████| 59/59 [00:25<00:00,  2.30it/s, v_num=0, val_loss=0.503, step=25.00, val_f1=0.576, train_loss=0.256, train_f1=0.657]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.503


Epoch 26: 100%|██████████| 59/59 [00:23<00:00,  2.51it/s, v_num=0, val_loss=0.503, step=26.00, val_f1=0.576, train_loss=0.242, train_f1=0.660]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.503


Epoch 29: 100%|██████████| 59/59 [00:25<00:00,  2.28it/s, v_num=0, val_loss=0.505, step=29.00, val_f1=0.560, train_loss=0.204, train_f1=0.663]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.503. Signaling Trainer to stop.


Epoch 29: 100%|██████████| 59/59 [00:25<00:00,  2.28it/s, v_num=0, val_loss=0.505, step=29.00, val_f1=0.560, train_loss=0.204, train_f1=0.663]

Seed set to 157
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: c:\Users\merli\OneDrive\Desktop\Github repos\NLP\A1\logs\lightning_logs\baseline_seed157

  | Name             | Type          | Params
---------------------------------------------------
0 | lstm             | LSTM          | 184 K 
1 | fc               | Linear        | 11.8 K
2 | _train_f1_metric | F1ScoreCustom | 0     
3 | _val_f1_metric   | F1ScoreCustom | 0     
4 | _test_f1_metric  | F1ScoreCustom | 0     
---------------------------------------------------
196 K     Trainable params
0         Non-trainable params
196 K     Total params
0.785     Total estimated model params size (MB)



Training model baseline with seed 157...
Epoch 0: 100%|██████████| 59/59 [00:23<00:00,  2.53it/s, v_num=0, val_loss=2.320, step=0.000, val_f1=0.302, train_loss=2.920]

Metric val_loss improved. New best score: 2.322


Epoch 1: 100%|██████████| 59/59 [00:24<00:00,  2.37it/s, v_num=0, val_loss=1.480, step=1.000, val_f1=0.304, train_loss=1.830, train_f1=0.104]

Metric val_loss improved by 0.842 >= min_delta = 0.0. New best score: 1.480


Epoch 2: 100%|██████████| 59/59 [00:24<00:00,  2.39it/s, v_num=0, val_loss=1.150, step=2.000, val_f1=0.418, train_loss=1.260, train_f1=0.242]

Metric val_loss improved by 0.326 >= min_delta = 0.0. New best score: 1.154


Epoch 3: 100%|██████████| 59/59 [00:29<00:00,  2.01it/s, v_num=0, val_loss=0.988, step=3.000, val_f1=0.425, train_loss=1.020, train_f1=0.380]

Metric val_loss improved by 0.166 >= min_delta = 0.0. New best score: 0.988


Epoch 4: 100%|██████████| 59/59 [00:23<00:00,  2.52it/s, v_num=0, val_loss=0.886, step=4.000, val_f1=0.470, train_loss=0.878, train_f1=0.418]

Metric val_loss improved by 0.101 >= min_delta = 0.0. New best score: 0.886


Epoch 5: 100%|██████████| 59/59 [00:17<00:00,  3.43it/s, v_num=0, val_loss=0.816, step=5.000, val_f1=0.485, train_loss=0.786, train_f1=0.446]

Metric val_loss improved by 0.070 >= min_delta = 0.0. New best score: 0.816


Epoch 6: 100%|██████████| 59/59 [00:18<00:00,  3.19it/s, v_num=0, val_loss=0.764, step=6.000, val_f1=0.524, train_loss=0.718, train_f1=0.468]

Metric val_loss improved by 0.052 >= min_delta = 0.0. New best score: 0.764


Epoch 7: 100%|██████████| 59/59 [00:19<00:00,  3.02it/s, v_num=0, val_loss=0.723, step=7.000, val_f1=0.542, train_loss=0.666, train_f1=0.512]

Metric val_loss improved by 0.041 >= min_delta = 0.0. New best score: 0.723


Epoch 8: 100%|██████████| 59/59 [00:20<00:00,  2.86it/s, v_num=0, val_loss=0.690, step=8.000, val_f1=0.522, train_loss=0.623, train_f1=0.538]

Metric val_loss improved by 0.033 >= min_delta = 0.0. New best score: 0.690


Epoch 9: 100%|██████████| 59/59 [00:19<00:00,  3.05it/s, v_num=0, val_loss=0.663, step=9.000, val_f1=0.519, train_loss=0.586, train_f1=0.529]

Metric val_loss improved by 0.028 >= min_delta = 0.0. New best score: 0.663


Epoch 10: 100%|██████████| 59/59 [00:18<00:00,  3.15it/s, v_num=0, val_loss=0.639, step=10.00, val_f1=0.528, train_loss=0.554, train_f1=0.544]

Metric val_loss improved by 0.023 >= min_delta = 0.0. New best score: 0.639


Epoch 11: 100%|██████████| 59/59 [00:19<00:00,  3.10it/s, v_num=0, val_loss=0.619, step=11.00, val_f1=0.536, train_loss=0.526, train_f1=0.558]

Metric val_loss improved by 0.020 >= min_delta = 0.0. New best score: 0.619


Epoch 12: 100%|██████████| 59/59 [00:19<00:00,  3.09it/s, v_num=0, val_loss=0.602, step=12.00, val_f1=0.543, train_loss=0.499, train_f1=0.568]

Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 0.602


Epoch 13: 100%|██████████| 59/59 [00:19<00:00,  3.04it/s, v_num=0, val_loss=0.586, step=13.00, val_f1=0.554, train_loss=0.475, train_f1=0.563]

Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 0.586


Epoch 14: 100%|██████████| 59/59 [00:18<00:00,  3.16it/s, v_num=0, val_loss=0.573, step=14.00, val_f1=0.548, train_loss=0.452, train_f1=0.560]

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 0.573


Epoch 15: 100%|██████████| 59/59 [00:18<00:00,  3.11it/s, v_num=0, val_loss=0.561, step=15.00, val_f1=0.547, train_loss=0.431, train_f1=0.571]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.561


Epoch 16: 100%|██████████| 59/59 [00:19<00:00,  3.06it/s, v_num=0, val_loss=0.550, step=16.00, val_f1=0.554, train_loss=0.411, train_f1=0.571]

Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 0.550


Epoch 17: 100%|██████████| 59/59 [00:18<00:00,  3.22it/s, v_num=0, val_loss=0.541, step=17.00, val_f1=0.557, train_loss=0.391, train_f1=0.579]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.541


Epoch 18: 100%|██████████| 59/59 [00:18<00:00,  3.12it/s, v_num=0, val_loss=0.533, step=18.00, val_f1=0.561, train_loss=0.373, train_f1=0.591]

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.533


Epoch 19: 100%|██████████| 59/59 [00:18<00:00,  3.19it/s, v_num=0, val_loss=0.527, step=19.00, val_f1=0.561, train_loss=0.355, train_f1=0.598]

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.527


Epoch 20: 100%|██████████| 59/59 [00:20<00:00,  2.83it/s, v_num=0, val_loss=0.521, step=20.00, val_f1=0.547, train_loss=0.338, train_f1=0.590]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.521


Epoch 21: 100%|██████████| 59/59 [00:21<00:00,  2.73it/s, v_num=0, val_loss=0.516, step=21.00, val_f1=0.547, train_loss=0.322, train_f1=0.605]

Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.516


Epoch 22: 100%|██████████| 59/59 [00:18<00:00,  3.17it/s, v_num=0, val_loss=0.512, step=22.00, val_f1=0.554, train_loss=0.306, train_f1=0.610]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.512


Epoch 23: 100%|██████████| 59/59 [00:19<00:00,  3.08it/s, v_num=0, val_loss=0.510, step=23.00, val_f1=0.553, train_loss=0.291, train_f1=0.614]

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.510


Epoch 24: 100%|██████████| 59/59 [00:18<00:00,  3.15it/s, v_num=0, val_loss=0.508, step=24.00, val_f1=0.560, train_loss=0.277, train_f1=0.620]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.508


Epoch 25: 100%|██████████| 59/59 [00:19<00:00,  3.04it/s, v_num=0, val_loss=0.508, step=25.00, val_f1=0.560, train_loss=0.263, train_f1=0.623]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.508


Epoch 28: 100%|██████████| 59/59 [00:20<00:00,  2.94it/s, v_num=0, val_loss=0.513, step=28.00, val_f1=0.558, train_loss=0.227, train_f1=0.637]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.508. Signaling Trainer to stop.


Epoch 28: 100%|██████████| 59/59 [00:20<00:00,  2.94it/s, v_num=0, val_loss=0.513, step=28.00, val_f1=0.558, train_loss=0.227, train_f1=0.637]

Seed set to 6



Training model model1 with seed 6...


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: c:\Users\merli\OneDrive\Desktop\Github repos\NLP\A1\logs\lightning_logs\model1_seed6

  | Name             | Type          | Params
---------------------------------------------------
0 | lstm             | LSTM          | 579 K 
1 | fc               | Linear        | 11.8 K
2 | _train_f1_metric | F1ScoreCustom | 0     
3 | _val_f1_metric   | F1ScoreCustom | 0     
4 | _test_f1_metric  | F1ScoreCustom | 0     
---------------------------------------------------
591 K     Trainable params
0         Non-trainable params
591 K     Total params
2.366     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 59/59 [00:22<00:00,  2.63it/s, v_num=0, val_loss=2.490, step=0.000, val_f1=0.168, train_loss=2.960]

Metric val_loss improved. New best score: 2.493


Epoch 1: 100%|██████████| 59/59 [00:22<00:00,  2.57it/s, v_num=0, val_loss=1.500, step=1.000, val_f1=0.346, train_loss=1.890, train_f1=0.0797]

Metric val_loss improved by 0.995 >= min_delta = 0.0. New best score: 1.497


Epoch 2: 100%|██████████| 59/59 [00:24<00:00,  2.44it/s, v_num=0, val_loss=1.120, step=2.000, val_f1=0.406, train_loss=1.250, train_f1=0.238] 

Metric val_loss improved by 0.379 >= min_delta = 0.0. New best score: 1.118


Epoch 3: 100%|██████████| 59/59 [00:23<00:00,  2.49it/s, v_num=0, val_loss=0.932, step=3.000, val_f1=0.471, train_loss=0.973, train_f1=0.364]

Metric val_loss improved by 0.186 >= min_delta = 0.0. New best score: 0.932


Epoch 4: 100%|██████████| 59/59 [00:23<00:00,  2.52it/s, v_num=0, val_loss=0.810, step=4.000, val_f1=0.458, train_loss=0.813, train_f1=0.448]

Metric val_loss improved by 0.122 >= min_delta = 0.0. New best score: 0.810


Epoch 5: 100%|██████████| 59/59 [00:23<00:00,  2.52it/s, v_num=0, val_loss=0.721, step=5.000, val_f1=0.512, train_loss=0.703, train_f1=0.444]

Metric val_loss improved by 0.089 >= min_delta = 0.0. New best score: 0.721


Epoch 6: 100%|██████████| 59/59 [00:23<00:00,  2.52it/s, v_num=0, val_loss=0.664, step=6.000, val_f1=0.538, train_loss=0.623, train_f1=0.484]

Metric val_loss improved by 0.058 >= min_delta = 0.0. New best score: 0.664


Epoch 7: 100%|██████████| 59/59 [00:38<00:00,  1.53it/s, v_num=0, val_loss=0.625, step=7.000, val_f1=0.540, train_loss=0.560, train_f1=0.519]

Metric val_loss improved by 0.039 >= min_delta = 0.0. New best score: 0.625


Epoch 8: 100%|██████████| 59/59 [00:41<00:00,  1.41it/s, v_num=0, val_loss=0.589, step=8.000, val_f1=0.546, train_loss=0.510, train_f1=0.549]

Metric val_loss improved by 0.035 >= min_delta = 0.0. New best score: 0.589


Epoch 9: 100%|██████████| 59/59 [00:43<00:00,  1.36it/s, v_num=0, val_loss=0.549, step=9.000, val_f1=0.567, train_loss=0.468, train_f1=0.569]

Metric val_loss improved by 0.041 >= min_delta = 0.0. New best score: 0.549


Epoch 10: 100%|██████████| 59/59 [00:43<00:00,  1.37it/s, v_num=0, val_loss=0.518, step=10.00, val_f1=0.578, train_loss=0.429, train_f1=0.573]

Metric val_loss improved by 0.031 >= min_delta = 0.0. New best score: 0.518


Epoch 11: 100%|██████████| 59/59 [00:41<00:00,  1.42it/s, v_num=0, val_loss=0.495, step=11.00, val_f1=0.589, train_loss=0.392, train_f1=0.589]

Metric val_loss improved by 0.023 >= min_delta = 0.0. New best score: 0.495


Epoch 12: 100%|██████████| 59/59 [00:41<00:00,  1.41it/s, v_num=0, val_loss=0.476, step=12.00, val_f1=0.599, train_loss=0.360, train_f1=0.599]

Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.476


Epoch 13: 100%|██████████| 59/59 [00:42<00:00,  1.38it/s, v_num=0, val_loss=0.461, step=13.00, val_f1=0.592, train_loss=0.330, train_f1=0.612]

Metric val_loss improved by 0.016 >= min_delta = 0.0. New best score: 0.461


Epoch 14: 100%|██████████| 59/59 [00:45<00:00,  1.29it/s, v_num=0, val_loss=0.449, step=14.00, val_f1=0.598, train_loss=0.303, train_f1=0.628]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.449


Epoch 15: 100%|██████████| 59/59 [00:43<00:00,  1.37it/s, v_num=0, val_loss=0.441, step=15.00, val_f1=0.610, train_loss=0.278, train_f1=0.638]

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.441


Epoch 16: 100%|██████████| 59/59 [00:42<00:00,  1.39it/s, v_num=0, val_loss=0.437, step=16.00, val_f1=0.612, train_loss=0.255, train_f1=0.627]

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.437


Epoch 18: 100%|██████████| 59/59 [00:41<00:00,  1.41it/s, v_num=0, val_loss=0.435, step=18.00, val_f1=0.603, train_loss=0.212, train_f1=0.632]

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.435


Epoch 19: 100%|██████████| 59/59 [00:43<00:00,  1.37it/s, v_num=0, val_loss=0.429, step=19.00, val_f1=0.601, train_loss=0.193, train_f1=0.644]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.429


Epoch 22: 100%|██████████| 59/59 [00:41<00:00,  1.41it/s, v_num=0, val_loss=0.444, step=22.00, val_f1=0.594, train_loss=0.135, train_f1=0.668]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.429. Signaling Trainer to stop.


Epoch 22: 100%|██████████| 59/59 [00:41<00:00,  1.40it/s, v_num=0, val_loss=0.444, step=22.00, val_f1=0.594, train_loss=0.135, train_f1=0.668]

Seed set to 90
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs



Training model model1 with seed 90...


HPU available: False, using: 0 HPUs
Missing logger folder: c:\Users\merli\OneDrive\Desktop\Github repos\NLP\A1\logs\lightning_logs\model1_seed90

  | Name             | Type          | Params
---------------------------------------------------
0 | lstm             | LSTM          | 579 K 
1 | fc               | Linear        | 11.8 K
2 | _train_f1_metric | F1ScoreCustom | 0     
3 | _val_f1_metric   | F1ScoreCustom | 0     
4 | _test_f1_metric  | F1ScoreCustom | 0     
---------------------------------------------------
591 K     Trainable params
0         Non-trainable params
591 K     Total params
2.366     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 59/59 [00:37<00:00,  1.55it/s, v_num=0, val_loss=2.500, step=0.000, val_f1=0.191, train_loss=2.950]

Metric val_loss improved. New best score: 2.505


Epoch 1: 100%|██████████| 59/59 [00:33<00:00,  1.76it/s, v_num=0, val_loss=1.510, step=1.000, val_f1=0.330, train_loss=1.930, train_f1=0.0848]

Metric val_loss improved by 0.998 >= min_delta = 0.0. New best score: 1.507


Epoch 2: 100%|██████████| 59/59 [00:29<00:00,  2.00it/s, v_num=0, val_loss=1.120, step=2.000, val_f1=0.411, train_loss=1.250, train_f1=0.249] 

Metric val_loss improved by 0.384 >= min_delta = 0.0. New best score: 1.124


Epoch 3: 100%|██████████| 59/59 [00:28<00:00,  2.07it/s, v_num=0, val_loss=0.930, step=3.000, val_f1=0.417, train_loss=0.970, train_f1=0.367]

Metric val_loss improved by 0.194 >= min_delta = 0.0. New best score: 0.930


Epoch 4: 100%|██████████| 59/59 [00:29<00:00,  2.02it/s, v_num=0, val_loss=0.809, step=4.000, val_f1=0.464, train_loss=0.810, train_f1=0.420]

Metric val_loss improved by 0.121 >= min_delta = 0.0. New best score: 0.809


Epoch 5: 100%|██████████| 59/59 [00:28<00:00,  2.09it/s, v_num=0, val_loss=0.725, step=5.000, val_f1=0.513, train_loss=0.704, train_f1=0.458]

Metric val_loss improved by 0.084 >= min_delta = 0.0. New best score: 0.725


Epoch 6: 100%|██████████| 59/59 [00:29<00:00,  2.02it/s, v_num=0, val_loss=0.665, step=6.000, val_f1=0.544, train_loss=0.623, train_f1=0.479]

Metric val_loss improved by 0.060 >= min_delta = 0.0. New best score: 0.665


Epoch 7: 100%|██████████| 59/59 [00:30<00:00,  1.96it/s, v_num=0, val_loss=0.622, step=7.000, val_f1=0.548, train_loss=0.560, train_f1=0.517]

Metric val_loss improved by 0.044 >= min_delta = 0.0. New best score: 0.622


Epoch 8: 100%|██████████| 59/59 [00:30<00:00,  1.95it/s, v_num=0, val_loss=0.589, step=8.000, val_f1=0.561, train_loss=0.509, train_f1=0.548]

Metric val_loss improved by 0.032 >= min_delta = 0.0. New best score: 0.589


Epoch 9: 100%|██████████| 59/59 [00:29<00:00,  2.00it/s, v_num=0, val_loss=0.560, step=9.000, val_f1=0.546, train_loss=0.466, train_f1=0.570]

Metric val_loss improved by 0.029 >= min_delta = 0.0. New best score: 0.560


Epoch 10: 100%|██████████| 59/59 [00:29<00:00,  1.99it/s, v_num=0, val_loss=0.528, step=10.00, val_f1=0.557, train_loss=0.430, train_f1=0.571]

Metric val_loss improved by 0.032 >= min_delta = 0.0. New best score: 0.528


Epoch 11: 100%|██████████| 59/59 [00:30<00:00,  1.93it/s, v_num=0, val_loss=0.508, step=11.00, val_f1=0.575, train_loss=0.396, train_f1=0.564]

Metric val_loss improved by 0.020 >= min_delta = 0.0. New best score: 0.508


Epoch 12: 100%|██████████| 59/59 [00:28<00:00,  2.10it/s, v_num=0, val_loss=0.495, step=12.00, val_f1=0.586, train_loss=0.362, train_f1=0.592]

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 0.495


Epoch 13: 100%|██████████| 59/59 [00:28<00:00,  2.05it/s, v_num=0, val_loss=0.481, step=13.00, val_f1=0.595, train_loss=0.333, train_f1=0.611]

Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 0.481


Epoch 14: 100%|██████████| 59/59 [00:28<00:00,  2.05it/s, v_num=0, val_loss=0.465, step=14.00, val_f1=0.599, train_loss=0.308, train_f1=0.626]

Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 0.465


Epoch 15: 100%|██████████| 59/59 [00:28<00:00,  2.04it/s, v_num=0, val_loss=0.459, step=15.00, val_f1=0.607, train_loss=0.285, train_f1=0.626]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.459


Epoch 16: 100%|██████████| 59/59 [00:27<00:00,  2.12it/s, v_num=0, val_loss=0.453, step=16.00, val_f1=0.606, train_loss=0.261, train_f1=0.634]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.453


Epoch 17: 100%|██████████| 59/59 [00:31<00:00,  1.86it/s, v_num=0, val_loss=0.446, step=17.00, val_f1=0.594, train_loss=0.238, train_f1=0.629]

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.446


Epoch 18: 100%|██████████| 59/59 [00:29<00:00,  1.99it/s, v_num=0, val_loss=0.437, step=18.00, val_f1=0.601, train_loss=0.215, train_f1=0.628]

Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 0.437


Epoch 19: 100%|██████████| 59/59 [00:30<00:00,  1.93it/s, v_num=0, val_loss=0.429, step=19.00, val_f1=0.619, train_loss=0.194, train_f1=0.640]

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.429


Epoch 22: 100%|██████████| 59/59 [00:29<00:00,  1.97it/s, v_num=0, val_loss=0.429, step=22.00, val_f1=0.607, train_loss=0.144, train_f1=0.676]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.429


Epoch 25: 100%|██████████| 59/59 [00:29<00:00,  1.99it/s, v_num=0, val_loss=0.471, step=25.00, val_f1=0.613, train_loss=0.103, train_f1=0.698]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.429. Signaling Trainer to stop.


Epoch 25: 100%|██████████| 59/59 [00:29<00:00,  1.99it/s, v_num=0, val_loss=0.471, step=25.00, val_f1=0.613, train_loss=0.103, train_f1=0.698]

Seed set to 157
GPU available: False, used: False



Training model model1 with seed 157...


TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: c:\Users\merli\OneDrive\Desktop\Github repos\NLP\A1\logs\lightning_logs\model1_seed157

  | Name             | Type          | Params
---------------------------------------------------
0 | lstm             | LSTM          | 579 K 
1 | fc               | Linear        | 11.8 K
2 | _train_f1_metric | F1ScoreCustom | 0     
3 | _val_f1_metric   | F1ScoreCustom | 0     
4 | _test_f1_metric  | F1ScoreCustom | 0     
---------------------------------------------------
591 K     Trainable params
0         Non-trainable params
591 K     Total params
2.366     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 59/59 [00:32<00:00,  1.83it/s, v_num=0, val_loss=2.460, step=0.000, val_f1=0.192, train_loss=2.960]

Metric val_loss improved. New best score: 2.464


Epoch 1: 100%|██████████| 59/59 [00:30<00:00,  1.94it/s, v_num=0, val_loss=1.490, step=1.000, val_f1=0.355, train_loss=1.860, train_f1=0.0825]

Metric val_loss improved by 0.975 >= min_delta = 0.0. New best score: 1.490


Epoch 2: 100%|██████████| 59/59 [00:28<00:00,  2.10it/s, v_num=0, val_loss=1.120, step=2.000, val_f1=0.423, train_loss=1.240, train_f1=0.254] 

Metric val_loss improved by 0.374 >= min_delta = 0.0. New best score: 1.116


Epoch 3: 100%|██████████| 59/59 [00:28<00:00,  2.08it/s, v_num=0, val_loss=0.929, step=3.000, val_f1=0.455, train_loss=0.970, train_f1=0.388]

Metric val_loss improved by 0.186 >= min_delta = 0.0. New best score: 0.929


Epoch 4: 100%|██████████| 59/59 [00:29<00:00,  1.98it/s, v_num=0, val_loss=0.819, step=4.000, val_f1=0.448, train_loss=0.817, train_f1=0.447]

Metric val_loss improved by 0.110 >= min_delta = 0.0. New best score: 0.819


Epoch 5: 100%|██████████| 59/59 [00:28<00:00,  2.06it/s, v_num=0, val_loss=0.740, step=5.000, val_f1=0.496, train_loss=0.712, train_f1=0.449]

Metric val_loss improved by 0.080 >= min_delta = 0.0. New best score: 0.740


Epoch 6: 100%|██████████| 59/59 [00:31<00:00,  1.86it/s, v_num=0, val_loss=0.677, step=6.000, val_f1=0.517, train_loss=0.631, train_f1=0.485]

Metric val_loss improved by 0.063 >= min_delta = 0.0. New best score: 0.677


Epoch 7: 100%|██████████| 59/59 [00:28<00:00,  2.10it/s, v_num=0, val_loss=0.629, step=7.000, val_f1=0.545, train_loss=0.567, train_f1=0.504]

Metric val_loss improved by 0.048 >= min_delta = 0.0. New best score: 0.629


Epoch 8: 100%|██████████| 59/59 [00:28<00:00,  2.07it/s, v_num=0, val_loss=0.590, step=8.000, val_f1=0.550, train_loss=0.515, train_f1=0.544]

Metric val_loss improved by 0.039 >= min_delta = 0.0. New best score: 0.590


Epoch 9: 100%|██████████| 59/59 [00:29<00:00,  2.03it/s, v_num=0, val_loss=0.557, step=9.000, val_f1=0.555, train_loss=0.470, train_f1=0.568]

Metric val_loss improved by 0.032 >= min_delta = 0.0. New best score: 0.557


Epoch 10: 100%|██████████| 59/59 [00:29<00:00,  2.00it/s, v_num=0, val_loss=0.533, step=10.00, val_f1=0.575, train_loss=0.430, train_f1=0.586]

Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 0.533


Epoch 11: 100%|██████████| 59/59 [00:40<00:00,  1.44it/s, v_num=0, val_loss=0.514, step=11.00, val_f1=0.571, train_loss=0.394, train_f1=0.576]

Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.514


Epoch 12: 100%|██████████| 59/59 [00:41<00:00,  1.43it/s, v_num=0, val_loss=0.499, step=12.00, val_f1=0.577, train_loss=0.362, train_f1=0.578]

Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 0.499


Epoch 13: 100%|██████████| 59/59 [00:42<00:00,  1.40it/s, v_num=0, val_loss=0.487, step=13.00, val_f1=0.584, train_loss=0.332, train_f1=0.602]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.487


Epoch 14: 100%|██████████| 59/59 [00:34<00:00,  1.70it/s, v_num=0, val_loss=0.476, step=14.00, val_f1=0.573, train_loss=0.306, train_f1=0.612]

Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 0.476


Epoch 15: 100%|██████████| 59/59 [00:31<00:00,  1.89it/s, v_num=0, val_loss=0.459, step=15.00, val_f1=0.574, train_loss=0.284, train_f1=0.624]

Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 0.459


Epoch 17: 100%|██████████| 59/59 [00:31<00:00,  1.87it/s, v_num=0, val_loss=0.448, step=17.00, val_f1=0.585, train_loss=0.246, train_f1=0.628]

Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 0.448


Epoch 18: 100%|██████████| 59/59 [00:29<00:00,  2.01it/s, v_num=0, val_loss=0.439, step=18.00, val_f1=0.582, train_loss=0.217, train_f1=0.634]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.439


Epoch 21: 100%|██████████| 59/59 [00:30<00:00,  1.95it/s, v_num=0, val_loss=0.460, step=21.00, val_f1=0.591, train_loss=0.156, train_f1=0.655]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.439. Signaling Trainer to stop.


Epoch 21: 100%|██████████| 59/59 [00:30<00:00,  1.95it/s, v_num=0, val_loss=0.460, step=21.00, val_f1=0.591, train_loss=0.156, train_f1=0.655]

Seed set to 6



Training model model2 with seed 6...


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: c:\Users\merli\OneDrive\Desktop\Github repos\NLP\A1\logs\lightning_logs\model2_seed6

  | Name             | Type          | Params
---------------------------------------------------
0 | lstm             | LSTM          | 579 K 
1 | fc_1             | Linear        | 16.4 K
2 | fc_2             | Linear        | 3.0 K 
3 | _train_f1_metric | F1ScoreCustom | 0     
4 | _val_f1_metric   | F1ScoreCustom | 0     
5 | _test_f1_metric  | F1ScoreCustom | 0     
---------------------------------------------------
599 K     Trainable params
0         Non-trainable params
599 K     Total params
2.396     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 59/59 [00:32<00:00,  1.82it/s, v_num=0, val_loss=2.630, step=0.000, val_f1=0.164, train_loss=3.040]

Metric val_loss improved. New best score: 2.628


Epoch 1: 100%|██████████| 59/59 [00:32<00:00,  1.79it/s, v_num=0, val_loss=1.590, step=1.000, val_f1=0.304, train_loss=2.000, train_f1=0.079]

Metric val_loss improved by 1.040 >= min_delta = 0.0. New best score: 1.588


Epoch 2: 100%|██████████| 59/59 [00:30<00:00,  1.95it/s, v_num=0, val_loss=1.170, step=2.000, val_f1=0.380, train_loss=1.300, train_f1=0.215]

Metric val_loss improved by 0.423 >= min_delta = 0.0. New best score: 1.165


Epoch 3: 100%|██████████| 59/59 [00:32<00:00,  1.79it/s, v_num=0, val_loss=0.953, step=3.000, val_f1=0.433, train_loss=0.998, train_f1=0.336]

Metric val_loss improved by 0.212 >= min_delta = 0.0. New best score: 0.953


Epoch 4: 100%|██████████| 59/59 [00:30<00:00,  1.96it/s, v_num=0, val_loss=0.819, step=4.000, val_f1=0.435, train_loss=0.827, train_f1=0.402]

Metric val_loss improved by 0.134 >= min_delta = 0.0. New best score: 0.819


Epoch 5: 100%|██████████| 59/59 [00:31<00:00,  1.85it/s, v_num=0, val_loss=0.738, step=5.000, val_f1=0.458, train_loss=0.720, train_f1=0.434]

Metric val_loss improved by 0.081 >= min_delta = 0.0. New best score: 0.738


Epoch 6: 100%|██████████| 59/59 [00:32<00:00,  1.81it/s, v_num=0, val_loss=0.687, step=6.000, val_f1=0.501, train_loss=0.638, train_f1=0.464]

Metric val_loss improved by 0.051 >= min_delta = 0.0. New best score: 0.687


Epoch 7: 100%|██████████| 59/59 [00:31<00:00,  1.90it/s, v_num=0, val_loss=0.638, step=7.000, val_f1=0.532, train_loss=0.570, train_f1=0.483]

Metric val_loss improved by 0.049 >= min_delta = 0.0. New best score: 0.638


Epoch 8: 100%|██████████| 59/59 [00:31<00:00,  1.89it/s, v_num=0, val_loss=0.590, step=8.000, val_f1=0.557, train_loss=0.515, train_f1=0.518]

Metric val_loss improved by 0.049 >= min_delta = 0.0. New best score: 0.590


Epoch 9: 100%|██████████| 59/59 [00:30<00:00,  1.95it/s, v_num=0, val_loss=0.557, step=9.000, val_f1=0.558, train_loss=0.466, train_f1=0.550]

Metric val_loss improved by 0.033 >= min_delta = 0.0. New best score: 0.557


Epoch 10: 100%|██████████| 59/59 [00:31<00:00,  1.88it/s, v_num=0, val_loss=0.533, step=10.00, val_f1=0.549, train_loss=0.423, train_f1=0.572]

Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 0.533


Epoch 11: 100%|██████████| 59/59 [00:31<00:00,  1.85it/s, v_num=0, val_loss=0.513, step=11.00, val_f1=0.558, train_loss=0.386, train_f1=0.580]

Metric val_loss improved by 0.020 >= min_delta = 0.0. New best score: 0.513


Epoch 12: 100%|██████████| 59/59 [00:32<00:00,  1.82it/s, v_num=0, val_loss=0.494, step=12.00, val_f1=0.571, train_loss=0.354, train_f1=0.583]

Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.494


Epoch 13: 100%|██████████| 59/59 [00:31<00:00,  1.89it/s, v_num=0, val_loss=0.486, step=13.00, val_f1=0.583, train_loss=0.324, train_f1=0.588]

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.486


Epoch 14: 100%|██████████| 59/59 [00:33<00:00,  1.77it/s, v_num=0, val_loss=0.478, step=14.00, val_f1=0.584, train_loss=0.300, train_f1=0.601]

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.478


Epoch 15: 100%|██████████| 59/59 [00:30<00:00,  1.91it/s, v_num=0, val_loss=0.454, step=15.00, val_f1=0.589, train_loss=0.281, train_f1=0.614]

Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 0.454


Epoch 18: 100%|██████████| 59/59 [00:33<00:00,  1.75it/s, v_num=0, val_loss=0.459, step=18.00, val_f1=0.584, train_loss=0.195, train_f1=0.649]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.454. Signaling Trainer to stop.


Epoch 18: 100%|██████████| 59/59 [00:33<00:00,  1.75it/s, v_num=0, val_loss=0.459, step=18.00, val_f1=0.584, train_loss=0.195, train_f1=0.649]

Seed set to 90
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: c:\Users\merli\OneDrive\Desktop\Github repos\NLP\A1\logs\lightning_logs\model2_seed90

  | Name             | Type          | Params
---------------------------------------------------
0 | lstm             | LSTM          | 579 K 
1 | fc_1             | Linear        | 16.4 K
2 | fc_2             | Linear        | 3.0 K 
3 | _train_f1_metric | F1ScoreCustom | 0     
4 | _val_f1_metric   | F1ScoreCustom | 0     
5 | _test_f1_metric  | F1ScoreCustom | 0     
---------------------------------------------------
599 K     Trainable params
0         Non-trainable params
599 K     Total params
2.396     Total estimated model params size (MB)



Training model model2 with seed 90...
Epoch 0: 100%|██████████| 59/59 [00:30<00:00,  1.90it/s, v_num=0, val_loss=2.550, step=0.000, val_f1=0.207, train_loss=3.010]

Metric val_loss improved. New best score: 2.548


Epoch 1: 100%|██████████| 59/59 [00:32<00:00,  1.82it/s, v_num=0, val_loss=1.570, step=1.000, val_f1=0.306, train_loss=1.980, train_f1=0.0734]

Metric val_loss improved by 0.974 >= min_delta = 0.0. New best score: 1.574


Epoch 2: 100%|██████████| 59/59 [00:31<00:00,  1.88it/s, v_num=0, val_loss=1.160, step=2.000, val_f1=0.381, train_loss=1.280, train_f1=0.223] 

Metric val_loss improved by 0.416 >= min_delta = 0.0. New best score: 1.158


Epoch 3: 100%|██████████| 59/59 [00:35<00:00,  1.67it/s, v_num=0, val_loss=0.950, step=3.000, val_f1=0.431, train_loss=0.977, train_f1=0.359]

Metric val_loss improved by 0.209 >= min_delta = 0.0. New best score: 0.950


Epoch 4: 100%|██████████| 59/59 [00:32<00:00,  1.82it/s, v_num=0, val_loss=0.815, step=4.000, val_f1=0.471, train_loss=0.818, train_f1=0.432]

Metric val_loss improved by 0.135 >= min_delta = 0.0. New best score: 0.815


Epoch 5: 100%|██████████| 59/59 [00:30<00:00,  1.94it/s, v_num=0, val_loss=0.740, step=5.000, val_f1=0.484, train_loss=0.709, train_f1=0.460]

Metric val_loss improved by 0.075 >= min_delta = 0.0. New best score: 0.740


Epoch 6: 100%|██████████| 59/59 [00:34<00:00,  1.71it/s, v_num=0, val_loss=0.674, step=6.000, val_f1=0.518, train_loss=0.631, train_f1=0.494]

Metric val_loss improved by 0.065 >= min_delta = 0.0. New best score: 0.674


Epoch 7: 100%|██████████| 59/59 [00:30<00:00,  1.91it/s, v_num=0, val_loss=0.621, step=7.000, val_f1=0.543, train_loss=0.570, train_f1=0.493]

Metric val_loss improved by 0.053 >= min_delta = 0.0. New best score: 0.621


Epoch 8: 100%|██████████| 59/59 [00:32<00:00,  1.81it/s, v_num=0, val_loss=0.589, step=8.000, val_f1=0.553, train_loss=0.515, train_f1=0.522]

Metric val_loss improved by 0.031 >= min_delta = 0.0. New best score: 0.589


Epoch 9: 100%|██████████| 59/59 [00:29<00:00,  1.98it/s, v_num=0, val_loss=0.561, step=9.000, val_f1=0.563, train_loss=0.466, train_f1=0.552]

Metric val_loss improved by 0.028 >= min_delta = 0.0. New best score: 0.561


Epoch 10: 100%|██████████| 59/59 [00:33<00:00,  1.74it/s, v_num=0, val_loss=0.539, step=10.00, val_f1=0.559, train_loss=0.423, train_f1=0.556]

Metric val_loss improved by 0.023 >= min_delta = 0.0. New best score: 0.539


Epoch 11: 100%|██████████| 59/59 [00:32<00:00,  1.81it/s, v_num=0, val_loss=0.523, step=11.00, val_f1=0.554, train_loss=0.384, train_f1=0.561]

Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 0.523


Epoch 12: 100%|██████████| 59/59 [00:32<00:00,  1.82it/s, v_num=0, val_loss=0.509, step=12.00, val_f1=0.557, train_loss=0.351, train_f1=0.579]

Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 0.509


Epoch 13: 100%|██████████| 59/59 [00:30<00:00,  1.91it/s, v_num=0, val_loss=0.492, step=13.00, val_f1=0.550, train_loss=0.321, train_f1=0.584]

Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 0.492


Epoch 14: 100%|██████████| 59/59 [00:37<00:00,  1.59it/s, v_num=0, val_loss=0.474, step=14.00, val_f1=0.579, train_loss=0.293, train_f1=0.601]

Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.474


Epoch 15: 100%|██████████| 59/59 [00:36<00:00,  1.62it/s, v_num=0, val_loss=0.462, step=15.00, val_f1=0.571, train_loss=0.267, train_f1=0.619]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.462


Epoch 16: 100%|██████████| 59/59 [00:32<00:00,  1.82it/s, v_num=0, val_loss=0.455, step=16.00, val_f1=0.562, train_loss=0.244, train_f1=0.634]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.455


Epoch 18: 100%|██████████| 59/59 [00:30<00:00,  1.91it/s, v_num=0, val_loss=0.447, step=18.00, val_f1=0.570, train_loss=0.192, train_f1=0.640]

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.447


Epoch 21: 100%|██████████| 59/59 [00:33<00:00,  1.75it/s, v_num=0, val_loss=0.490, step=21.00, val_f1=0.581, train_loss=0.125, train_f1=0.675]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.447. Signaling Trainer to stop.


Epoch 21: 100%|██████████| 59/59 [00:33<00:00,  1.75it/s, v_num=0, val_loss=0.490, step=21.00, val_f1=0.581, train_loss=0.125, train_f1=0.675]

Seed set to 157
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: c:\Users\merli\OneDrive\Desktop\Github repos\NLP\A1\logs\lightning_logs\model2_seed157

  | Name             | Type          | Params
---------------------------------------------------
0 | lstm             | LSTM          | 579 K 
1 | fc_1             | Linear        | 16.4 K
2 | fc_2             | Linear        | 3.0 K 
3 | _train_f1_metric | F1ScoreCustom | 0     
4 | _val_f1_metric   | F1ScoreCustom | 0     
5 | _test_f1_metric  | F1ScoreCustom | 0     
---------------------------------------------------
599 K     Trainable params
0         Non-trainable params
599 K     Total params
2.396     Total estimated model params size (MB)



Training model model2 with seed 157...
Epoch 0: 100%|██████████| 59/59 [00:32<00:00,  1.80it/s, v_num=0, val_loss=2.570, step=0.000, val_f1=0.169, train_loss=3.010]

Metric val_loss improved. New best score: 2.566


Epoch 1: 100%|██████████| 59/59 [00:31<00:00,  1.86it/s, v_num=0, val_loss=1.450, step=1.000, val_f1=0.331, train_loss=1.920, train_f1=0.0795]

Metric val_loss improved by 1.118 >= min_delta = 0.0. New best score: 1.449


Epoch 2: 100%|██████████| 59/59 [00:33<00:00,  1.78it/s, v_num=0, val_loss=1.120, step=2.000, val_f1=0.393, train_loss=1.220, train_f1=0.218] 

Metric val_loss improved by 0.331 >= min_delta = 0.0. New best score: 1.117


Epoch 3: 100%|██████████| 59/59 [00:36<00:00,  1.62it/s, v_num=0, val_loss=0.924, step=3.000, val_f1=0.426, train_loss=0.959, train_f1=0.363]

Metric val_loss improved by 0.193 >= min_delta = 0.0. New best score: 0.924


Epoch 4: 100%|██████████| 59/59 [00:32<00:00,  1.79it/s, v_num=0, val_loss=0.805, step=4.000, val_f1=0.459, train_loss=0.797, train_f1=0.403]

Metric val_loss improved by 0.119 >= min_delta = 0.0. New best score: 0.805


Epoch 5: 100%|██████████| 59/59 [00:33<00:00,  1.74it/s, v_num=0, val_loss=0.722, step=5.000, val_f1=0.496, train_loss=0.689, train_f1=0.433]

Metric val_loss improved by 0.084 >= min_delta = 0.0. New best score: 0.722


Epoch 6: 100%|██████████| 59/59 [00:31<00:00,  1.90it/s, v_num=0, val_loss=0.657, step=6.000, val_f1=0.493, train_loss=0.609, train_f1=0.492]

Metric val_loss improved by 0.064 >= min_delta = 0.0. New best score: 0.657


Epoch 7: 100%|██████████| 59/59 [00:38<00:00,  1.53it/s, v_num=0, val_loss=0.613, step=7.000, val_f1=0.530, train_loss=0.547, train_f1=0.497]

Metric val_loss improved by 0.044 >= min_delta = 0.0. New best score: 0.613


Epoch 8: 100%|██████████| 59/59 [00:33<00:00,  1.77it/s, v_num=0, val_loss=0.582, step=8.000, val_f1=0.539, train_loss=0.490, train_f1=0.520]

Metric val_loss improved by 0.031 >= min_delta = 0.0. New best score: 0.582


Epoch 9: 100%|██████████| 59/59 [00:36<00:00,  1.60it/s, v_num=0, val_loss=0.549, step=9.000, val_f1=0.550, train_loss=0.439, train_f1=0.557]

Metric val_loss improved by 0.033 >= min_delta = 0.0. New best score: 0.549


Epoch 10: 100%|██████████| 59/59 [00:32<00:00,  1.81it/s, v_num=0, val_loss=0.512, step=10.00, val_f1=0.558, train_loss=0.399, train_f1=0.583]

Metric val_loss improved by 0.037 >= min_delta = 0.0. New best score: 0.512


Epoch 11: 100%|██████████| 59/59 [00:32<00:00,  1.82it/s, v_num=0, val_loss=0.488, step=11.00, val_f1=0.559, train_loss=0.361, train_f1=0.568]

Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 0.488


Epoch 12: 100%|██████████| 59/59 [00:33<00:00,  1.77it/s, v_num=0, val_loss=0.472, step=12.00, val_f1=0.575, train_loss=0.323, train_f1=0.577]

Metric val_loss improved by 0.016 >= min_delta = 0.0. New best score: 0.472


Epoch 13: 100%|██████████| 59/59 [00:32<00:00,  1.82it/s, v_num=0, val_loss=0.462, step=13.00, val_f1=0.567, train_loss=0.290, train_f1=0.600]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.462


Epoch 14: 100%|██████████| 59/59 [00:35<00:00,  1.65it/s, v_num=0, val_loss=0.460, step=14.00, val_f1=0.591, train_loss=0.260, train_f1=0.618]

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.460


Epoch 16: 100%|██████████| 59/59 [00:31<00:00,  1.85it/s, v_num=0, val_loss=0.455, step=16.00, val_f1=0.591, train_loss=0.208, train_f1=0.632]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.455


Epoch 17: 100%|██████████| 59/59 [00:35<00:00,  1.65it/s, v_num=0, val_loss=0.444, step=17.00, val_f1=0.593, train_loss=0.181, train_f1=0.645]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.444


Epoch 20: 100%|██████████| 59/59 [00:42<00:00,  1.38it/s, v_num=0, val_loss=0.504, step=20.00, val_f1=0.595, train_loss=0.127, train_f1=0.659]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.444. Signaling Trainer to stop.


Epoch 20: 100%|██████████| 59/59 [00:42<00:00,  1.38it/s, v_num=0, val_loss=0.504, step=20.00, val_f1=0.595, train_loss=0.127, train_f1=0.659]


### Robust estimation across seeds and choice of best model

In [None]:
# test the model
# validation_results_baseline_model = trainer.validate(model, dataloaders=val_loader)

In [1]:
%reload_ext tensorboard

In [2]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 8392), started 3 days, 0:53:09 ago. (Use '!kill 8392' to kill it.)

## TASK 6: Error analysis

* Compare the errors made on the validation and test sets.
* Aggregate model errors into categories (if possible) 
* Comment about errors and propose possible solutions on how to address them.

# Task 7: Report

Wrap up your experiment in a short report (up to 2 pages).

### Instructions

* Use the NLP course report template.
* Summarize each task in the report following the provided template.

### Recommendations

The report is not a copy-paste of graphs, tables, and command outputs.

* Summarize classification performance in Table format.
* **Do not** report command outputs or screenshots.
* Report learning curves in Figure format.
* The error analysis section should summarize your findings.

# Submission

* **Submit** your report in PDF format.
* **Submit** your python notebook.
* Make sure your notebook is **well organized**, with no temporary code, commented sections, tests, etc...
* You can upload **model weights** in a cloud repository and report the link in the report.