In [1]:
import torch.nn as nn
import torch
from torch.utils.data import DataLoader, Dataset
from typing import Tuple, List , Optional, Dict
import numpy as np
import torch.optim as optim
from tqdm import tqdm
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data.dataloader import default_collate
from sklearn.metrics import precision_score, recall_score, f1_score
import os 

from itertools import product
from sklearn.utils.class_weight import compute_class_weight
from collections import Counter

import warnings 
warnings.filterwarnings('ignore')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [2]:
def build_vocab(data_files: list[str]) -> tuple[dict[str, int], dict[str, int]]:

    """Builds word and tag vocabularies from the given list of data files.

    Args:
        data_files (list): A list of file paths containing the data.

    Returns:
        tuple: A tuple containing two dictionaries: `word_vocab` and `tag_vocab`.
            - `word_vocab` (dict): A dictionary mapping words to their indices.
            - `tag_vocab` (dict): A dictionary mapping tags to their indices.
    """

    word_vocab = {'<PAD>': 0, '<UNK>': 1}
    tag_vocab = {}
    word_idx, tag_idx = 2, 0  

    for file_path in data_files:
        with open(file_path, 'r', encoding='utf-8') as file:
            for line in file:
                line = line.strip()
                if line:
                    _, word, tag = line.split()
                    if word not in word_vocab:
                        word_vocab[word] = word_idx
                        word_idx += 1
                    if tag not in tag_vocab:
                        tag_vocab[tag] = tag_idx
                        tag_idx += 1
    return word_vocab, tag_vocab


def get_class_weights(file_paths: list[str], tag_vocab: dict[str, int]) -> tuple[torch.Tensor, torch.Tensor]:
    
    """Calculate class weights for a set of tags based on their frequency in the dataset.

    Args:
        file_paths (list[str]): A list of file paths containing the data.
        tag_vocab (dict[str, int]): A dictionary mapping tags to their indices.

    Returns:
        tuple[torch.Tensor, torch.Tensor]: A tuple containing two tensors:
            - `regular_weights_tensor` (torch.Tensor): A tensor containing the regular class weights.
            - `inv_weights_tensor` (torch.Tensor): A tensor containing the inverse class weights.
    """

    tag_counts = Counter()

    for file_path in file_paths:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) == 3:
                    _, _, tag = parts
                    if tag in tag_vocab:  
                        tag_counts[tag] += 1

    total_tags = sum(tag_counts.values())
    regular_weights = {tag: (count / total_tags) for tag, count in tag_counts.items()}
    inv_weights = {tag: (total_tags / (count + 1e-9)) for tag, count in tag_counts.items()} # Calculate inverse class weights (inversely proportional to frequency)

    num_tags = len(tag_vocab)
    regular_weights_tensor = torch.zeros(num_tags, dtype=torch.float) # Initialize weights tensors based on tag_vocab ordering
    inv_weights_tensor = torch.zeros(num_tags, dtype=torch.float)

    # Populate the tensors from tag_vocab indices
    for tag, idx in tag_vocab.items():
        regular_weights_tensor[idx] = regular_weights.get(tag, 0)
        inv_weights_tensor[idx] = inv_weights.get(tag, 0)

    return regular_weights_tensor, inv_weights_tensor


class BILSTMForNER(nn.Module):
    """A Bidirectional LSTM model for Named Entity Recognition.

    Args:
        input_dim (int): The dimensionality of the input data.
        embedding_dim (int): The dimensionality of the word embeddings.
        hidden_dim (int): The dimensionality of the hidden state of the LSTM.
        output_dim (int): The dimensionality of the output.
        num_layers (int, optional): Number of LSTM layers. Defaults to 1.
        dropout (float, optional): Dropout probability. Defaults to 0.33.

    Returns:
        BILSTMForNER: A Bidirectional LSTM model for Named Entity Recognition.
    """
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, glove_embeddings = None, num_layers = 1, dropout = 0.33):
        super(BILSTMForNER, self).__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        
        if glove_embeddings is not None:
            self.embedding.weight = nn.Parameter(glove_embeddings)
        
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first = True, bidirectional = True, dropout = dropout)
        self.fc = nn.Linear(hidden_dim * 2, 128)
        self.classifier = nn.Linear(128, output_dim)
        self.elu = nn.ELU()

    def forward(self, x):
        x = self.embedding(x)
        lstm_out, _ = self.lstm(x)
        out = self.elu(self.fc(lstm_out))  
        logits = self.classifier(out)  
        return logits  # batch, seq, embeddim


class IndexedNERDataset(Dataset):

    """Dataset class for Indexed Named Entity Recognition.

    This class prepares data for Named Entity Recognition tasks by indexing words and tags.

    Attributes:
        word_vocab (dict[str, int]): A dictionary mapping words to their indices.
        tag_vocab (Optional[dict[str, int]]): A dictionary mapping tags to their indices if `use_tags` is True, otherwise None.
        use_tags (bool): Whether to include tags.
        data (list): A list to store the processed data.

    Methods:
        __init__: Initialize the dataset.
        _load_data: Load data from the dataset file.
        __len__: Return the number of data instances in the dataset.
        __getitem__: Retrieve a specific data instance from the dataset.
    """


    def __init__(self, file_path, word_vocab, tag_vocab = None, use_tags = True):
        self.word_vocab = word_vocab
        self.tag_vocab = tag_vocab if use_tags else None
        self.use_tags = use_tags
        self.data = []
        self._load_data(file_path)
        
    def _load_data(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            sentence = []
            for line in f:
                line = line.strip()
                if line:
                    if self.use_tags:
                        _, word, tag = line.split()
                        tag_idx = self.tag_vocab.get(tag, -1)  
                    else:
                        word = line
                        tag_idx = -1 
                    sentence.append((self.word_vocab.get(word, self.word_vocab['<UNK>']), tag_idx))
                else:
                    self.data.append(sentence)
                    sentence = []
            if sentence:  # Handle the case where the file doesn't end with a newline
                self.data.append(sentence)
                
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sentence, tags = zip(*self.data[idx])
        return torch.tensor(sentence, dtype=torch.long), torch.tensor(tags, dtype=torch.long)


def pad_collate(batch):
    sentences, tags = zip(*batch)
    sentences_padded = pad_sequence(sentences, batch_first=True, padding_value = word_vocab['<PAD>'])
    tags_padded = pad_sequence(tags, batch_first=True, padding_value = -1)  # Use -1 or another unique index for padding in tags
    return sentences_padded, tags_padded


def compute_metrics(preds: List[int], labels: List[int]) -> Tuple[float, float, float]:
    """Compute F1 scores for the given predictions and labels.

    Args:
        preds (List[int]): Predicted labels.
        labels (List[int]): True labels.

    Returns:
        Tuple[float, float, float]: A tuple containing F1 scores for macro, micro, and weighted averages.
    """

    f1_mac = f1_score(labels, preds, average='macro', zero_division=0)
    f1_mic = f1_score(labels, preds, average='micro', zero_division=0)
    f1_weight = f1_score(labels, preds, average='weighted', zero_division=0)

    return f1_mac, f1_mic, f1_weight


def evaluate_model(model: nn.Module, dataloader: DataLoader, criterion: nn.Module, device: torch.device) -> Tuple[float, float, float, float, float]:
    """Evaluate the model on the given dataloader using the specified criterion.

    Args:
        model (nn.Module): The model to evaluate.
        dataloader (DataLoader): DataLoader providing the evaluation data.
        criterion (nn.Module): The loss criterion.
        device (torch.device): The device (CPU or GPU) to perform evaluation on.

    Returns:
        Tuple[float, float, float, float, float]: A tuple containing the average loss, accuracy, and F1 scores for macro, micro, and weighted averages.
    """

    model.eval()
    total_loss = 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for inputs, labels in dataloader:
            # (batch sz, seq len) 
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            # print(torch.max(outputs, dim=2))
            loss = criterion(outputs.view(-1, outputs.shape[-1]), labels.view(-1))
            total_loss += loss.item()
            _, preds = torch.max(outputs, dim=2)
            all_preds.extend(preds.view(-1).cpu().numpy())
            all_labels.extend(labels.view(-1).cpu().numpy())

    valid_indices = [i for i, label in enumerate(all_labels) if label != -1]
    valid_preds = [all_preds[i] for i in valid_indices]
    valid_labels = [all_labels[i] for i in valid_indices]

    accuracy = np.mean(np.array(valid_preds) == np.array(valid_labels))
    f1_mac, f1_mic, f1_weight = compute_metrics(valid_preds, valid_labels)

    return total_loss / len(dataloader), accuracy, f1_mac, f1_mic, f1_weight


def train_model(model: nn.Module, dataloaders: dict[str, DataLoader], optimizer: optim.Optimizer, criterion: nn.Module, device: torch.device, num_epochs: int = 50, patience: int = 10, scheduler: Optional[optim.lr_scheduler._LRScheduler] = None, ckpt_name: str = 'best_model.pth') -> Tuple[float, float, float]:
    """Train the model using the provided dataloaders, optimizer, and criterion.

    Args:
        model (nn.Module): The model to train.
        dataloaders (dict[str, DataLoader]): A dictionary containing DataLoader objects for training and validation datasets.
        optimizer (optim.Optimizer): The optimizer for updating the model's parameters.
        criterion (nn.Module): The loss criterion.
        device (torch.device): The device (CPU or GPU) to perform training on.
        num_epochs (int, optional): Number of epochs for training. Defaults to 50.
        patience (int, optional): Number of epochs to wait for improvement in validation F1 score before early stopping. Defaults to 10.
        scheduler (Optional[optim.lr_scheduler._LRScheduler], optional): Learning rate scheduler. Defaults to None.
        ckpt_name (str, optional): File name to save the best model checkpoint. Defaults to 'best_model.pth'.

    Returns:
        Tuple[float, float, float]: A tuple containing the accuracy on the validation set, accuracy on the training set, and the best validation F1 score achieved during training.
    """

    best_val_f1 = -float('inf')
    patience_counter = 0
    model.to(device)

    for epoch in tqdm( range(num_epochs) ):
        model.train()
        for inputs, labels in dataloaders['train']:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)

            a_max, a_indx = torch.max(outputs, dim = 2)
            # print(torch.max(outputs, dim=2))
            # print('predition shape ',outputs.shape, 'label shape', labels.shape)
            # print('predition',outputs, 'label', labels) 
            loss = criterion(outputs.view(-1, outputs.shape[-1]), labels.view(-1))
            loss.backward()
            optimizer.step()
        
        if scheduler is not None:
            scheduler.step()

        train_loss, train_acc, train_f1_mac, train_f1_mic, train_f1_weighted = evaluate_model(model, dataloaders['train'], criterion, device)
        val_loss, val_acc, val_f1_mac, val_f1_mic, val_f1_weighted = evaluate_model(model, dataloaders['dev'], criterion, device)

        if epoch % 5 == 0: 
            print(f'Epoch {epoch+1}:')
            print(f'Train - Loss: {train_loss:.4f}, Acc: {train_acc:.4f},  F1Mac: {train_f1_mac:.4f}')
            print(f'Val - Loss: {val_loss:.4f}, Acc: {val_acc:.4f}, F1Mac: {val_f1_mac:.4f}')

            
        # Early stopping based on validation F1 score
        if val_f1_mac > best_val_f1:
            best_val_f1 = val_f1_mac
            patience_counter = 0
            torch.save(model.state_dict(), ckpt_name)

        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break
    ### after saving location of model in ckpt_name
        #checkpoint = torch.load(ckpt_name)
        # model.load_state_dict(checkpoint)
    ### and run lower eval, retrieve predictions => return dev prediction file path custom/glove,
        # file_paths = ['../../data/lstm-data/dev']#, '../../data/lstm-data/dev']
        # word_vocab, _ = build_vocab(data_files)
        # output_paths = get_eval_preds(model, file_paths, word_vocab)
    ###  the run   # evaluate_fb1_model(preds_file_path, gold_file_path)
        # evaluate_fb1_model(output_paths, gold_file_path = '../../data/lstm-data/dev')
    return val_acc, train_acc, best_val_f1

data_files = ['../../data/lstm-data/train', '../../data/lstm-data/dev']
word_vocab, tag_vocab = build_vocab(data_files)

### CAN IGNORE FOR NOW UNTIL HYPERPARAM GRID SEARCH DONE

In [3]:
# def call_eval_script(preds_file_path, gold_file_path):
#     # Ensure preds_file_path is a single string
#     # If preds_file_path can be a list but you only need to handle one file at a time, adjust accordingly
#     if isinstance(preds_file_path, list):
#         preds_file_path = preds_file_path[0]  # Assuming you're handling one file at a time

#     print('preds_file_path: ', preds_file_path, "gold_file_path: ", gold_file_path)

#     # Correctly formatted command
#     cmd = f"python ../eval.py -p {preds_file_path} -g {gold_file_path}"
    
#     # Running the subprocess and capturing output
#     result = subprocess.run(cmd, capture_output=True, text=True, shell=True)
#     return result

# def parse_eval_output(output):
#     # Parse the output string from your script to extract F1 and potentially other metrics
#     # This is highly dependent on your script's output format
#     print('outputs from eval script: ', output)
#     lines = output.strip().split('\n')
#     summary_line = lines[0]  # Assuming the first line contains the overall metrics
#     metrics = summary_line.split(';')
#     # Extract precision, recall, and F1 - adjust parsing as per your actual output
#     precision = float(metrics[1].split(':')[1].strip().replace('%', ''))
#     recall = float(metrics[2].split(':')[1].strip().replace('%', ''))
#     f1 = float(metrics[3].split(':')[1].strip().replace('%', ''))
#     print('f1: ',f1, '\n Line: ', lines)
#     return precision, recall, f1

# def evaluate_fb1_model(preds_file_path, gold_file_path):
#     """
#     Evaluates the model on the given dataloader and calls an external script for metrics.
#     """
#     # Your code to generate predictions and save them to preds_file_path
#     # Ensure predictions are in the correct format for your eval script

#     # After saving predictions, call the eval script
#     eval_output = call_eval_script(preds_file_path, gold_file_path)
#     precision, recall, f1 = parse_eval_output(eval_output)

#     return precision, recall, f1

### Custom Embedding Model Training

In [4]:
input_dim = len(word_vocab)  
output_dim = len(tag_vocab)

dataset_dict = {
    'train': IndexedNERDataset('../../data/lstm-data/train', word_vocab, tag_vocab),
    'dev': IndexedNERDataset('../../data/lstm-data/dev', word_vocab, tag_vocab)
}

# print('input dim: ', input_dim, 'output dim: ', output_dim)

regular_class_weight, inv_class_weight = get_class_weights(data_files, tag_vocab)
regular_class_weight, inv_class_weight = regular_class_weight.to(device),  inv_class_weight.to(device)

model = BILSTMForNER(input_dim = input_dim, embedding_dim = 100, hidden_dim = 256, dropout = 0.33, output_dim = output_dim)
criterion = nn.CrossEntropyLoss(weight = inv_class_weight, ignore_index = tag_vocab.get('<PAD>', -1))  
optimizer = optim.SGD(model.parameters(), lr = 0.1, momentum = 0.9, weight_decay = 0.0001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 50, gamma = 0.1)

dataloaders = {
    'train': DataLoader(IndexedNERDataset('../../data/lstm-data/train', word_vocab, tag_vocab), batch_size = 16, shuffle = True, collate_fn = pad_collate),
    'dev': DataLoader(IndexedNERDataset('../../data/lstm-data/dev', word_vocab, tag_vocab), batch_size = 16, shuffle = False, collate_fn = pad_collate),
    # 'test': DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=pad_collate)
}

train_model(model, dataloaders, optimizer, criterion, device, ckpt_name = './ckpts/custom_BiLSTM.pth', patience = 20, num_epochs = 10) # MODEL SAVED AT '/NER-Tagging/notebooks/ckpts/custom_BiLSTM.pth'

 10%|█         | 1/10 [00:19<02:55, 19.46s/it]

Epoch 1:
Train - Loss: 0.7575, Acc: 0.7889,  F1Mac: 0.5193
Val - Loss: 1.0084, Acc: 0.7755, F1Mac: 0.4669


 60%|██████    | 6/10 [01:54<01:16, 19.11s/it]

Epoch 6:
Train - Loss: 0.1401, Acc: 0.9161,  F1Mac: 0.7726
Val - Loss: 0.9824, Acc: 0.8732, F1Mac: 0.6337


100%|██████████| 10/10 [02:52<00:00, 17.26s/it]


(0.8711078366745512, 0.9168194283535468, 0.6671650354068519)

### GLOVE Model

In [5]:
def load_glove_embeddings(path: str, word_vocab: dict[str, int], embedding_dim: int) -> torch.Tensor:
    """Load pre-trained GloVe embeddings from the specified path and create an embedding matrix.

    Args:
        path (str): The path to the GloVe embeddings file.
        word_vocab (dict[str, int]): A dictionary mapping words to their indices.
        embedding_dim (int): The dimensionality of the word embeddings.

    Returns:
        torch.Tensor: A tensor representing the embedding matrix.
    """

    embedding_dict = {}
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]  
            vector = np.asarray(values[1:], "float32")
            embedding_dict[word] = vector
    
    vocab_size = len(word_vocab)
    embedding_matrix = np.zeros((vocab_size, embedding_dim))
    
    for word, idx in word_vocab.items():
        embedding_vector = embedding_dict.get(word, embedding_dict.get(word.lower()))
        if embedding_vector is not None:
            embedding_matrix[idx] = embedding_vector
        else:
            embedding_matrix[idx] = np.random.randn(embedding_dim) 
    
    return torch.tensor(embedding_matrix, dtype=torch.float)


In [6]:
glove_path = '../../data/lstm-data/glove.6B.100d/glove.6B.100d.txt' 
glove_embeddings = load_glove_embeddings(glove_path, word_vocab, embedding_dim = 100)

### GLOVE Model Training

In [7]:
dataloaders = {
    'train': DataLoader(IndexedNERDataset('../../data/lstm-data/train', word_vocab, tag_vocab), batch_size = 32, shuffle = True, collate_fn = pad_collate),
    'dev': DataLoader(IndexedNERDataset('../../data/lstm-data/dev', word_vocab, tag_vocab), batch_size = 32, shuffle = False, collate_fn = pad_collate),
    # 'test': DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=pad_collate)
}

glove_model = BILSTMForNER(input_dim = input_dim, embedding_dim = 100, hidden_dim = 256, glove_embeddings = glove_embeddings, dropout = 0.33, output_dim = output_dim)
criterion = nn.CrossEntropyLoss(weight = inv_class_weight, ignore_index = tag_vocab.get('<PAD>', -1))  
# step_optimizer = optim.SGD(glove_model_2.parameters(), lr = 0.1, momentum = 0.9, weight_decay = 0.0001)
# step_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 50, gamma = 0.1)

train_model(glove_model, dataloaders, optim.SGD(glove_model.parameters(), lr = 0.05, momentum = 0.9, weight_decay = 0.0001), criterion, device, ckpt_name = './ckpts/glove_BiLSTM.pth', patience = 150, num_epochs = 150)

  1%|          | 1/150 [00:11<29:04, 11.71s/it]

Epoch 1:
Train - Loss: 0.5651, Acc: 0.7875,  F1Mac: 0.5387
Val - Loss: 0.6399, Acc: 0.7913, F1Mac: 0.5325


  4%|▍         | 6/150 [01:04<25:03, 10.44s/it]

Epoch 6:
Train - Loss: 0.1285, Acc: 0.9134,  F1Mac: 0.7666
Val - Loss: 0.3761, Acc: 0.9037, F1Mac: 0.7123


  7%|▋         | 11/150 [01:58<25:31, 11.02s/it]

Epoch 11:
Train - Loss: 0.0589, Acc: 0.9683,  F1Mac: 0.8933
Val - Loss: 0.4414, Acc: 0.9527, F1Mac: 0.8173


 11%|█         | 16/150 [02:50<23:16, 10.42s/it]

Epoch 16:
Train - Loss: 0.0241, Acc: 0.9841,  F1Mac: 0.9486
Val - Loss: 0.6143, Acc: 0.9653, F1Mac: 0.8523


 14%|█▍        | 21/150 [03:41<22:06, 10.28s/it]

Epoch 21:
Train - Loss: 0.0243, Acc: 0.9633,  F1Mac: 0.8938
Val - Loss: 0.5233, Acc: 0.9464, F1Mac: 0.8070


 17%|█▋        | 26/150 [04:33<21:11, 10.25s/it]

Epoch 26:
Train - Loss: 0.0102, Acc: 0.9880,  F1Mac: 0.9574
Val - Loss: 0.5701, Acc: 0.9676, F1Mac: 0.8581


 21%|██        | 31/150 [05:30<22:19, 11.26s/it]

Epoch 31:
Train - Loss: 0.0118, Acc: 0.9865,  F1Mac: 0.9547
Val - Loss: 0.5461, Acc: 0.9667, F1Mac: 0.8590


 24%|██▍       | 36/150 [06:24<20:29, 10.79s/it]

Epoch 36:
Train - Loss: 0.0166, Acc: 0.9858,  F1Mac: 0.9587
Val - Loss: 0.5441, Acc: 0.9664, F1Mac: 0.8607


 27%|██▋       | 41/150 [07:21<20:28, 11.27s/it]

Epoch 41:
Train - Loss: 0.0073, Acc: 0.9928,  F1Mac: 0.9775
Val - Loss: 0.5671, Acc: 0.9717, F1Mac: 0.8745


 31%|███       | 46/150 [08:17<19:54, 11.48s/it]

Epoch 46:
Train - Loss: 0.0076, Acc: 0.9945,  F1Mac: 0.9814
Val - Loss: 0.6009, Acc: 0.9723, F1Mac: 0.8742


 34%|███▍      | 51/150 [09:20<21:14, 12.88s/it]

Epoch 51:
Train - Loss: 0.0180, Acc: 0.9869,  F1Mac: 0.9411
Val - Loss: 0.5012, Acc: 0.9632, F1Mac: 0.8410


 37%|███▋      | 56/150 [10:29<21:25, 13.67s/it]

Epoch 56:
Train - Loss: 0.0068, Acc: 0.9958,  F1Mac: 0.9849
Val - Loss: 0.6045, Acc: 0.9729, F1Mac: 0.8766


 41%|████      | 61/150 [11:38<19:51, 13.39s/it]

Epoch 61:
Train - Loss: 0.0057, Acc: 0.9961,  F1Mac: 0.9877
Val - Loss: 0.6032, Acc: 0.9731, F1Mac: 0.8808


 44%|████▍     | 66/150 [12:37<16:35, 11.85s/it]

Epoch 66:
Train - Loss: 0.0078, Acc: 0.9926,  F1Mac: 0.9789
Val - Loss: 0.5620, Acc: 0.9690, F1Mac: 0.8699


 47%|████▋     | 71/150 [13:31<14:30, 11.02s/it]

Epoch 71:
Train - Loss: 0.0047, Acc: 0.9972,  F1Mac: 0.9901
Val - Loss: 0.6237, Acc: 0.9744, F1Mac: 0.8874


 51%|█████     | 76/150 [14:25<12:22, 10.03s/it]

Epoch 76:
Train - Loss: 0.0035, Acc: 0.9980,  F1Mac: 0.9926
Val - Loss: 0.6180, Acc: 0.9745, F1Mac: 0.8865


 54%|█████▍    | 81/150 [15:05<09:38,  8.38s/it]

Epoch 81:
Train - Loss: 0.0263, Acc: 0.9920,  F1Mac: 0.9742
Val - Loss: 0.5642, Acc: 0.9667, F1Mac: 0.8620


 57%|█████▋    | 86/150 [15:45<08:32,  8.01s/it]

Epoch 86:
Train - Loss: 0.0039, Acc: 0.9978,  F1Mac: 0.9908
Val - Loss: 0.5979, Acc: 0.9739, F1Mac: 0.8840


 61%|██████    | 91/150 [16:31<09:17,  9.45s/it]

Epoch 91:
Train - Loss: 0.0030, Acc: 0.9984,  F1Mac: 0.9947
Val - Loss: 0.5987, Acc: 0.9734, F1Mac: 0.8849


 64%|██████▍   | 96/150 [17:32<10:18, 11.46s/it]

Epoch 96:
Train - Loss: 0.0470, Acc: 0.9894,  F1Mac: 0.9627
Val - Loss: 0.6376, Acc: 0.9649, F1Mac: 0.8501


 67%|██████▋   | 101/150 [18:24<08:39, 10.61s/it]

Epoch 101:
Train - Loss: 0.0035, Acc: 0.9980,  F1Mac: 0.9931
Val - Loss: 0.6408, Acc: 0.9733, F1Mac: 0.8834


 71%|███████   | 106/150 [19:26<08:50, 12.05s/it]

Epoch 106:
Train - Loss: 0.0027, Acc: 0.9989,  F1Mac: 0.9945
Val - Loss: 0.5659, Acc: 0.9734, F1Mac: 0.8823


 74%|███████▍  | 111/150 [20:14<06:21,  9.78s/it]

Epoch 111:
Train - Loss: 0.0154, Acc: 0.9897,  F1Mac: 0.9639
Val - Loss: 0.5586, Acc: 0.9611, F1Mac: 0.8466


 77%|███████▋  | 116/150 [21:05<05:55, 10.46s/it]

Epoch 116:
Train - Loss: 0.0034, Acc: 0.9984,  F1Mac: 0.9927
Val - Loss: 0.6539, Acc: 0.9724, F1Mac: 0.8816


 81%|████████  | 121/150 [21:59<05:22, 11.13s/it]

Epoch 121:
Train - Loss: 0.0021, Acc: 0.9992,  F1Mac: 0.9971
Val - Loss: 0.6069, Acc: 0.9728, F1Mac: 0.8840


 84%|████████▍ | 126/150 [22:49<03:55,  9.82s/it]

Epoch 126:
Train - Loss: 0.0064, Acc: 0.9969,  F1Mac: 0.9861
Val - Loss: 0.6293, Acc: 0.9707, F1Mac: 0.8760


 87%|████████▋ | 131/150 [23:34<02:53,  9.13s/it]

Epoch 131:
Train - Loss: 0.0028, Acc: 0.9991,  F1Mac: 0.9960
Val - Loss: 0.6131, Acc: 0.9729, F1Mac: 0.8830


 91%|█████████ | 136/150 [24:21<02:09,  9.24s/it]

Epoch 136:
Train - Loss: 0.0025, Acc: 0.9991,  F1Mac: 0.9957
Val - Loss: 0.6058, Acc: 0.9719, F1Mac: 0.8800


 94%|█████████▍| 141/150 [25:06<01:21,  9.10s/it]

Epoch 141:
Train - Loss: 0.0925, Acc: 0.9934,  F1Mac: 0.9598
Val - Loss: 0.8607, Acc: 0.9641, F1Mac: 0.8264


 97%|█████████▋| 146/150 [25:52<00:36,  9.04s/it]

Epoch 146:
Train - Loss: 0.0038, Acc: 0.9985,  F1Mac: 0.9941
Val - Loss: 0.6661, Acc: 0.9717, F1Mac: 0.8804


100%|██████████| 150/150 [26:29<00:00, 10.60s/it]


(0.9718290744115708, 0.9989636647162055, 0.8906130444694725)

### MAKE PREDS AND RUN EVAL

In [13]:
def get_eval_preds(model, file_paths, word_vocab, device = torch.device("cuda" if torch.cuda.is_available() else "cpu"), idx_to_tag={idx: tag for tag, idx in tag_vocab.items()}, output_dir='../../data/lstm-data/preds/', output_postfix = None):   
    """Generate predictions for evaluation using the provided model.

    Args:
        model (nn.Module): The trained model.
        file_paths (list[str]): A list of file paths containing evaluation data.
        word_vocab (dict[str, int]): A dictionary mapping words to their indices.
        device (torch.device, optional): The device (CPU or GPU) to perform evaluation on. Defaults to GPU if available, otherwise CPU.
        idx_to_tag (dict[int, str], optional): A dictionary mapping tag indices to tag names. Defaults to None.
        output_dir (str, optional): The directory to save prediction files. Defaults to '../../data/lstm-data/preds/'.
        output_postfix (str, optional): A postfix to append to the output file names. Defaults to None.

    Returns:
        list[str]: A list of file paths where the predictions are saved.
    """

    model.eval()
    model.to(device)
    output_paths = []

    for file_path in file_paths:
        print(file_path)
        output_file_name = os.path.basename(file_path) + "_preds_" + output_postfix
        output_path = os.path.join(output_dir, 'new', output_file_name)
        output_paths.append(output_path)
        # print("error may be: ", file_path, 'Or: ', output_path)
        
        with open(file_path, 'r', encoding='utf-8') as f, open(output_path, 'w', encoding='utf-8') as out_f:     
            sentences = []
            current_sentence = []
            for line in f:
                if line.strip():  # if line contains stripable parts 
                    parts = line.strip().split()
                    original_word = parts[1]  
                    current_sentence.append(word_vocab.get(original_word, word_vocab['<UNK>']))
                elif current_sentence:  # Empty line and current sentence is not empty
                    sentences.append(current_sentence)
                    current_sentence = []

            # Add the last sentence if the file doesn't end with a newline
            if current_sentence:
                sentences.append(current_sentence)

            # Predict and write to file
            for sentence in sentences: 
                sentence_tensor = torch.tensor([sentence], dtype = torch.long, device = device)
                outputs = model(sentence_tensor)
                _, preds = torch.max(outputs, dim = 2)
                pred_tags = [idx_to_tag[pred.item()] for pred in preds[0]]  # Convert indices to tags


                # Prediction writing 
                for i, word_idx in enumerate(sentence):
                    word = list(word_vocab.keys())[list(word_vocab.values()).index(word_idx)]  # Inverse lookup
                    tag = pred_tags[i]
                    out_f.write(f"{i+1}\t{word}\t{tag}\n")
                out_f.write("\n")  # New line after each sentence
    return output_paths

### Prediction files generated in next two cells

In [14]:
file_paths = ['../../data/lstm-data/test', '../../data/lstm-data/dev']

checkpoint = torch.load('./ckpts/custom_BiLSTM.pth') 
model.load_state_dict(checkpoint)

get_eval_preds(model, file_paths, word_vocab, output_postfix = 'custom_BiLSTM')

../../data/lstm-data/test
../../data/lstm-data/dev


['../../data/lstm-data/preds/new\\test_preds_custom_BiLSTM',
 '../../data/lstm-data/preds/new\\dev_preds_custom_BiLSTM']

In [15]:
checkpoint = torch.load('./ckpts/glove_BiLSTM.pth') 
model.load_state_dict(checkpoint)

get_eval_preds(model, file_paths, word_vocab, output_postfix = 'glove_BiLSTM')

../../data/lstm-data/test
../../data/lstm-data/dev


['../../data/lstm-data/preds/new\\test_preds_glove_BiLSTM',
 '../../data/lstm-data/preds/new\\dev_preds_glove_BiLSTM']

### GRID SEARCH

In [15]:
# def write_params(params, str):
#     f = open(str, 'w')
#     f.write(repr(params) + '\n')
#     f.close()


# def pad_collate(batch):
#     (xx, yy) = zip(*batch)
#     x_lens = [len(x) for x in xx]
    
#     xx_pad = pad_sequence(xx, batch_first=True, padding_value=word_vocab.get('<PAD>', 0))
#     yy_pad = pad_sequence(yy, batch_first=True, padding_value=-1)
    
#     return xx_pad, yy_pad, x_lens

# def get_class_weights(file_paths, tag_vocab):
#     tag_counts = Counter()

#     for file_path in file_paths:
#         with open(file_path, 'r', encoding='utf-8') as f:
#             for line in f:
#                 parts = line.strip().split()
#                 if len(parts) == 3:
#                     _, _, tag = parts
#                     if tag in tag_vocab:  # Count only tags present in tag_vocab
#                         tag_counts[tag] += 1

#     # Total number of tags to normalize tag frequencies
#     total_tags = sum(tag_counts.values())
    
#     # Calculate regular class weights (proportional to frequency)
#     regular_weights = {tag: (count / total_tags) for tag, count in tag_counts.items()}

#     # Calculate inverse class weights (inversely proportional to frequency)
#     inv_weights = {tag: (total_tags / (count + 1e-9)) for tag, count in tag_counts.items()}

#     # Initialize weights tensors based on tag_vocab ordering
#     num_tags = len(tag_vocab)
#     regular_weights_tensor = torch.zeros(num_tags, dtype=torch.float)
#     inv_weights_tensor = torch.zeros(num_tags, dtype=torch.float)

#     # Populate the tensors according to the tag_vocab indices
#     for tag, idx in tag_vocab.items():
#         regular_weights_tensor[idx] = regular_weights.get(tag, 0)
#         inv_weights_tensor[idx] = inv_weights.get(tag, 0)

#     return regular_weights_tensor, inv_weights_tensor


# def evaluate_model(model, dataloader, criterion, device):
#     """
#     Evaluates the model on the given dataloader.
#     """
#     model.eval()
#     total_loss = 0
#     all_preds, all_labels = [], []

#     with torch.no_grad():
#         for inputs, labels, _ in dataloader:
#             # (batch sz, seq len) 
#             inputs, labels = inputs.to(device), labels.to(device)
#             outputs = model(inputs)
#             # print(torch.max(outputs, dim=2))
#             loss = criterion(outputs.view(-1, outputs.shape[-1]), labels.view(-1))
#             # print(outputs.view(-1, outputs.shape[-1]).shape,  labels.view(-1).shape)
#             total_loss += loss.item()
#             _, preds = torch.max(outputs, dim=2)
#             all_preds.extend(preds.view(-1).cpu().numpy())
#             all_labels.extend(labels.view(-1).cpu().numpy())

#     valid_indices = [i for i, label in enumerate(all_labels) if label != -1]
#     valid_preds = [all_preds[i] for i in valid_indices]
#     valid_labels = [all_labels[i] for i in valid_indices]

#     accuracy = np.mean(np.array(valid_preds) == np.array(valid_labels))
#     f1_mac, f1_mic, f1_weight = compute_metrics(valid_preds, valid_labels)

#     return total_loss / len(dataloader), accuracy, f1_mac, f1_mic, f1_weight


# def train_model(model, dataloaders, optimizer, criterion, device,  num_epochs = 50, patience = 10, scheduler = None, ckpt_name = 'best_model.pth'):
#     best_val_f1 = -float('inf')
#     patience_counter = 0
#     model.to(device)

#     for epoch in tqdm( range(num_epochs) ):
#         model.train()
#         for inputs, labels, _ in dataloaders['train']:

#             inputs, labels = inputs.to(device), labels.to(device)
            
#             optimizer.zero_grad()
#             outputs = model(inputs)

#             a_max, a_indx = torch.max(outputs, dim = 2)
#             # print(torch.max(outputs, dim=2))
#             # print('predition shape ',outputs.shape, 'label shape', labels.shape)
#             # print('predition',outputs, 'label', labels) 
#             loss = criterion(outputs.view(-1, outputs.shape[-1]), labels.view(-1))
#             loss.backward()
#             optimizer.step()

#         if scheduler is not None:
#             scheduler.step()

#             # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        
#         train_loss, train_acc, train_f1_mac, train_f1_mic, train_f1_weighted = evaluate_model(model, dataloaders['train'], criterion, device)
#         val_loss, val_acc, val_f1_mac, val_f1_mic, val_f1_weighted = evaluate_model(model, dataloaders['dev'], criterion, device)

#         # if epoch % 5 == 0: 
#         #     print(f'Epoch {epoch+1}:')
#         #     print(f'Train - Loss: {train_loss:.4f}, Acc: {train_acc:.4f},  F1Mac: {train_f1_mac:.4f}, F1Mic: {train_f1_mic:.4f},  F1W: {train_f1_weighted:.4f}')
#         #     print(f'Val -   Loss: {val_loss:.4f}, Acc: {val_acc:.4f}, F1Mac: {val_f1_mac:.4f}, F1Mic: {val_f1_mic:.4f},  F1W: {val_f1_weighted:.4f}')

            
#         # Early stopping based on validation F1 score
#         if val_f1_mac > best_val_f1:
#             best_val_f1 = val_f1_mac
#             patience_counter = 0
#             torch.save(model.state_dict(), ckpt_name + str(best_val_f1))
#         else:
#             patience_counter += 1
#             if patience_counter >= patience:
#                 print("Early stopping triggered.")
#                 break
#         if epoch > 3 and best_val_f1 < 0.45:
#             print('very poor model')
#             break
#     ### after saving location of model in ckpt_name
#         #checkpoint = torch.load(ckpt_name)
#         # model.load_state_dict(checkpoint)
#     ### and run lower eval, retrieve predictions => return dev prediction file path custom/glove,
#         # file_paths = ['../../data/lstm-data/dev']#, '../../data/lstm-data/dev']
#         # word_vocab, _ = build_vocab(data_files)
#         # output_paths = get_eval_preds(model, file_paths, word_vocab)
#     #  the run   ### evaluate_fb1_model(preds_file_path, gold_file_path)
#         # evaluate_fb1_model(output_paths, gold_file_path = '../../data/lstm-data/dev')
#     print(f'Epoch {epoch+1}:')
#     print(f'Train - Loss: {train_loss:.4f}, Acc: {train_acc:.4f},  F1Mac: {train_f1_mac:.4f}, F1Mic: {train_f1_mic:.4f},  F1W: {train_f1_weighted:.4f}')
#     print(f'Val -   Loss: {val_loss:.4f}, Acc: {val_acc:.4f}, F1Mac: {val_f1_mac:.4f}, F1Mic: {val_f1_mic:.4f},  F1W: {val_f1_weighted:.4f}')

#     return val_acc, train_acc, best_val_f1


# def grid_search(tag_vocab, dim, model_init, datasets, param_grid, class_weights, device):
#     max_f1 = -np.inf
#     best_params_f1 = None
#     all_combinations = list(product(*param_grid.values()))
    
#     for combination in tqdm(all_combinations):
#         lr, optimizer_class, criterion_class, epochs, batch_size, patience, momentum, weight_decay, gamma, step_size, scheduler_type = combination
#         model = model_init(input_dim = dim[0], embedding_dim = 100, hidden_dim = 256, dropout = 0.33, glove_embeddings = glove_embeddings, output_dim = dim[1]).to(device)
        
#         dataloader_dict = {
#             'train': DataLoader(datasets['train'], batch_size=batch_size, shuffle=True, collate_fn=pad_collate),
#             'dev': DataLoader(datasets['dev'], batch_size=batch_size, shuffle=False, collate_fn=pad_collate)
#         }

#         if optimizer_class == optim.SGD:
#             optimizer = optimizer_class(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
#         else:
#             optimizer = optimizer_class(model.parameters(), lr=lr)  # Adjust accordingly for other optimizers
        
#         criterion = criterion_class(weight=class_weights, ignore_index=tag_vocab.get('<PAD>', -1)).to(device)
        
#         if scheduler_type == 'StepLR':
#             scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
#         elif scheduler_type == 'ExponentialLR':
#             scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)
#         else:
#             scheduler = None  # Handle other scheduler types or lack thereof

#         # Call the training function
#         val_accuracy, train_acc, best_val_f1 = train_model(
#             model, dataloader_dict, optimizer, criterion, device, num_epochs = epochs, patience = patience, scheduler = scheduler, ckpt_name = 'custom_embedding_best_f1.pth',
#         )
        
#         print(f"LR: {lr}, Batch: {batch_size}, Patience: {patience}, Momentum: {momentum}, Weight_decay: {weight_decay}, Val_Acc: {val_accuracy:.4f}, Scheduler Params: {gamma}, {step_size}, {scheduler_type}, Train_Acc: {train_acc:.4f}, Val_F1: {best_val_f1:.4f}")

#         # Update best parameters based on F1 score
#         if best_val_f1 > max_f1: 
#             max_f1 = best_val_f1
#             best_params_f1 = {
#                 'learning_rate': lr, 
#                 'optimizer': optimizer_class.__name__, 
#                 'criterion': criterion_class.__name__,
#                 'epochs': epochs, 
#                 'batch_size': batch_size, 
#                 'patience': patience, 
#                 'momentum': momentum, 
#                 'weight_decay': weight_decay, 
#                 'gamma': gamma, 
#                 'step_size': step_size,
#                 'scheduler_type': scheduler_type, 
#                 'validation_accuracy': val_accuracy, 
#                 'F1': best_val_f1, 
#                 'train_acc': train_acc
#             }
#             write_params(best_params_f1, 'max_f1.txt')


#     return best_params_f1


# data_files = [ '../../data/lstm-data/train', '../../data/lstm-data/dev']
# word_vocab, tag_vocab = build_vocab(data_files)
# dim = len(word_vocab), len(tag_vocab)
# print('dim: ', dim)

# dataset_dict = {
#     'train': IndexedNERDataset('../../data/lstm-data/train', word_vocab, tag_vocab),
#     'dev': IndexedNERDataset('../../data/lstm-data/dev', word_vocab, tag_vocab)
# }


# regular_class_weight, inv_class_weight = get_class_weights(data_files, tag_vocab)
# regular_class_weight, inv_class_weight = regular_class_weight.to(device),  inv_class_weight.to(device)


# print(f"class weight: {regular_class_weight.shape},\n INV: {inv_class_weight.shape}")

# # param_grid = {
# #     'learning_rate': [1, 5e-1, 1e-1, 5e-2, 1e-2],
# #     'optimizer': [optim.SGD], 
# #     'criterion': [torch.nn.CrossEntropyLoss], 
# #     'epochs': [50, 100],
# #     'batch_size': [8, 16, 32, 64],
# #     'patience': [10, 30],
# #     'momentum': [0.9, 0.95, 0.99], 
# #     'weight_decay': [0, 1e-4, 1e-3], 
# #     'gamma': [1e-1, 1e-2, 1e-3], 
# #     'step_size': [30, 50, 70],  
# #     'scheduler_type': ['ExponentialLR', 'StepLR', None], 
# # }
# param_grid = {
#     'learning_rate': [1e-2, 0.05, 1e-1],
#     'optimizer': [optim.SGD], 
#     'criterion': [torch.nn.CrossEntropyLoss], 
#     'epochs': [100],
#     'batch_size': [32],
#     'patience': [100],
#     'momentum': [0.9], 
#     'weight_decay': [1e-4], 
#     'gamma': [1e-1, 1e-2], 
#     'step_size': [25, 50],  
#     'scheduler_type': [ None, 'StepLR'], 
# }


# best_params_f1 = grid_search(tag_vocab, dim, model_init = BILSTMForNER, datasets = dataset_dict, param_grid = param_grid, class_weights = inv_class_weight, device = device)

dim:  (26886, 9)
class weight: torch.Size([9]),
 INV: torch.Size([9])


100%|██████████| 100/100 [16:45<00:00, 10.05s/it]
  4%|▍         | 1/24 [16:45<6:25:20, 1005.25s/it]

Epoch 100:
Train - Loss: 0.0191, Acc: 0.9694,  F1Mac: 0.8971, F1Mic: 0.9694,  F1W: 0.9714
Val -   Loss: 0.3298, Acc: 0.9536, F1Mac: 0.8165, F1Mic: 0.9536,  F1W: 0.9571
LR: 0.01, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9536, Scheduler Params: 0.1, 25, None, Train_Acc: 0.9694, Val_F1: 0.8467


100%|██████████| 100/100 [17:28<00:00, 10.48s/it]
  8%|▊         | 2/24 [34:13<6:17:51, 1030.53s/it]

Epoch 100:
Train - Loss: 0.0856, Acc: 0.9388,  F1Mac: 0.8147, F1Mic: 0.9388,  F1W: 0.9450
Val -   Loss: 0.2947, Acc: 0.9295, F1Mac: 0.7627, F1Mic: 0.9295,  F1W: 0.9372
LR: 0.01, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9295, Scheduler Params: 0.1, 25, StepLR, Train_Acc: 0.9388, Val_F1: 0.7643


100%|██████████| 100/100 [16:44<00:00, 10.05s/it]
 12%|█▎        | 3/24 [50:58<5:56:35, 1018.84s/it]

Epoch 100:
Train - Loss: 0.0213, Acc: 0.9707,  F1Mac: 0.9056, F1Mic: 0.9707,  F1W: 0.9724
Val -   Loss: 0.4010, Acc: 0.9535, F1Mac: 0.8193, F1Mic: 0.9535,  F1W: 0.9564
LR: 0.01, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9535, Scheduler Params: 0.1, 50, None, Train_Acc: 0.9707, Val_F1: 0.8527


100%|██████████| 100/100 [16:43<00:00, 10.03s/it]
 17%|█▋        | 4/24 [1:07:41<5:37:32, 1012.62s/it]

Epoch 100:
Train - Loss: 0.0306, Acc: 0.9648,  F1Mac: 0.8804, F1Mic: 0.9648,  F1W: 0.9672
Val -   Loss: 0.3087, Acc: 0.9503, F1Mac: 0.8111, F1Mic: 0.9503,  F1W: 0.9544
LR: 0.01, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9503, Scheduler Params: 0.1, 50, StepLR, Train_Acc: 0.9648, Val_F1: 0.8187


100%|██████████| 100/100 [17:22<00:00, 10.43s/it]
 21%|██        | 5/24 [1:25:04<5:24:06, 1023.50s/it]

Epoch 100:
Train - Loss: 0.0166, Acc: 0.9756,  F1Mac: 0.9197, F1Mic: 0.9756,  F1W: 0.9769
Val -   Loss: 0.3395, Acc: 0.9603, F1Mac: 0.8401, F1Mic: 0.9603,  F1W: 0.9626
LR: 0.01, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9603, Scheduler Params: 0.01, 25, None, Train_Acc: 0.9756, Val_F1: 0.8529


100%|██████████| 100/100 [17:23<00:00, 10.44s/it]
 25%|██▌       | 6/24 [1:42:28<5:09:07, 1030.40s/it]

Epoch 100:
Train - Loss: 0.1079, Acc: 0.9290,  F1Mac: 0.7903, F1Mic: 0.9290,  F1W: 0.9371
Val -   Loss: 0.2935, Acc: 0.9203, F1Mac: 0.7447, F1Mic: 0.9203,  F1W: 0.9300
LR: 0.01, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9203, Scheduler Params: 0.01, 25, StepLR, Train_Acc: 0.9290, Val_F1: 0.7464


100%|██████████| 100/100 [17:20<00:00, 10.41s/it]
 29%|██▉       | 7/24 [1:59:49<4:52:55, 1033.87s/it]

Epoch 100:
Train - Loss: 0.0183, Acc: 0.9712,  F1Mac: 0.9035, F1Mic: 0.9712,  F1W: 0.9729
Val -   Loss: 0.3415, Acc: 0.9577, F1Mac: 0.8329, F1Mic: 0.9577,  F1W: 0.9603
LR: 0.01, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9577, Scheduler Params: 0.01, 50, None, Train_Acc: 0.9712, Val_F1: 0.8494


100%|██████████| 100/100 [17:22<00:00, 10.43s/it]
 33%|███▎      | 8/24 [2:17:11<4:36:27, 1036.69s/it]

Epoch 100:
Train - Loss: 0.0378, Acc: 0.9599,  F1Mac: 0.8708, F1Mic: 0.9599,  F1W: 0.9627
Val -   Loss: 0.3049, Acc: 0.9481, F1Mac: 0.8057, F1Mic: 0.9481,  F1W: 0.9521
LR: 0.01, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9481, Scheduler Params: 0.01, 50, StepLR, Train_Acc: 0.9599, Val_F1: 0.8088


100%|██████████| 100/100 [17:23<00:00, 10.43s/it]
 38%|███▊      | 9/24 [2:34:35<4:19:40, 1038.73s/it]

Epoch 100:
Train - Loss: 0.0050, Acc: 0.9938,  F1Mac: 0.9733, F1Mic: 0.9938,  F1W: 0.9939
Val -   Loss: 0.4870, Acc: 0.9705, F1Mac: 0.8657, F1Mic: 0.9705,  F1W: 0.9711
LR: 0.05, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9705, Scheduler Params: 0.1, 25, None, Train_Acc: 0.9938, Val_F1: 0.8862


100%|██████████| 100/100 [17:21<00:00, 10.41s/it]
 42%|████▏     | 10/24 [2:51:56<4:02:34, 1039.59s/it]

Epoch 100:
Train - Loss: 0.0162, Acc: 0.9782,  F1Mac: 0.9256, F1Mic: 0.9782,  F1W: 0.9790
Val -   Loss: 0.3430, Acc: 0.9639, F1Mac: 0.8447, F1Mic: 0.9639,  F1W: 0.9656
LR: 0.05, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9639, Scheduler Params: 0.1, 25, StepLR, Train_Acc: 0.9782, Val_F1: 0.8458


100%|██████████| 100/100 [17:19<00:00, 10.40s/it]
 46%|████▌     | 11/24 [3:09:16<3:45:16, 1039.71s/it]

Epoch 100:
Train - Loss: 0.0040, Acc: 0.9963,  F1Mac: 0.9859, F1Mic: 0.9963,  F1W: 0.9963
Val -   Loss: 0.5621, Acc: 0.9748, F1Mac: 0.8858, F1Mic: 0.9748,  F1W: 0.9748
LR: 0.05, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9748, Scheduler Params: 0.1, 50, None, Train_Acc: 0.9963, Val_F1: 0.8922


100%|██████████| 100/100 [17:20<00:00, 10.41s/it]
 50%|█████     | 12/24 [3:26:37<3:28:00, 1040.06s/it]

Epoch 100:
Train - Loss: 0.0067, Acc: 0.9894,  F1Mac: 0.9625, F1Mic: 0.9894,  F1W: 0.9896
Val -   Loss: 0.4000, Acc: 0.9713, F1Mac: 0.8685, F1Mic: 0.9713,  F1W: 0.9720
LR: 0.05, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9713, Scheduler Params: 0.1, 50, StepLR, Train_Acc: 0.9894, Val_F1: 0.8765


100%|██████████| 100/100 [17:18<00:00, 10.39s/it]
 54%|█████▍    | 13/24 [3:43:56<3:10:37, 1039.75s/it]

Epoch 100:
Train - Loss: 0.0065, Acc: 0.9926,  F1Mac: 0.9718, F1Mic: 0.9926,  F1W: 0.9928
Val -   Loss: 0.4920, Acc: 0.9682, F1Mac: 0.8555, F1Mic: 0.9682,  F1W: 0.9691
LR: 0.05, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9682, Scheduler Params: 0.01, 25, None, Train_Acc: 0.9926, Val_F1: 0.8859


100%|██████████| 100/100 [17:16<00:00, 10.37s/it]
 58%|█████▊    | 14/24 [4:01:13<2:53:08, 1038.88s/it]

Epoch 100:
Train - Loss: 0.0228, Acc: 0.9734,  F1Mac: 0.9089, F1Mic: 0.9734,  F1W: 0.9747
Val -   Loss: 0.3456, Acc: 0.9583, F1Mac: 0.8280, F1Mic: 0.9583,  F1W: 0.9608
LR: 0.05, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9583, Scheduler Params: 0.01, 25, StepLR, Train_Acc: 0.9734, Val_F1: 0.8289


100%|██████████| 100/100 [17:17<00:00, 10.37s/it]
 62%|██████▎   | 15/24 [4:18:30<2:35:45, 1038.38s/it]

Epoch 100:
Train - Loss: 0.0040, Acc: 0.9970,  F1Mac: 0.9874, F1Mic: 0.9970,  F1W: 0.9970
Val -   Loss: 0.5438, Acc: 0.9758, F1Mac: 0.8892, F1Mic: 0.9758,  F1W: 0.9756
LR: 0.05, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9758, Scheduler Params: 0.01, 50, None, Train_Acc: 0.9970, Val_F1: 0.8930


100%|██████████| 100/100 [17:17<00:00, 10.38s/it]
 67%|██████▋   | 16/24 [4:35:48<2:18:25, 1038.16s/it]

Epoch 100:
Train - Loss: 0.0075, Acc: 0.9885,  F1Mac: 0.9590, F1Mic: 0.9885,  F1W: 0.9887
Val -   Loss: 0.3820, Acc: 0.9723, F1Mac: 0.8777, F1Mic: 0.9723,  F1W: 0.9729
LR: 0.05, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9723, Scheduler Params: 0.01, 50, StepLR, Train_Acc: 0.9885, Val_F1: 0.8786


100%|██████████| 100/100 [17:13<00:00, 10.33s/it]
 71%|███████   | 17/24 [4:53:01<2:00:57, 1036.75s/it]

Epoch 100:
Train - Loss: 0.0175, Acc: 0.9856,  F1Mac: 0.9265, F1Mic: 0.9856,  F1W: 0.9875
Val -   Loss: 0.7835, Acc: 0.9496, F1Mac: 0.8044, F1Mic: 0.9496,  F1W: 0.9524
LR: 0.1, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9496, Scheduler Params: 0.1, 25, None, Train_Acc: 0.9856, Val_F1: 0.8841


100%|██████████| 100/100 [17:13<00:00, 10.34s/it]
 75%|███████▌  | 18/24 [5:10:15<1:43:35, 1035.84s/it]

Epoch 100:
Train - Loss: 0.0071, Acc: 0.9894,  F1Mac: 0.9635, F1Mic: 0.9894,  F1W: 0.9896
Val -   Loss: 0.4321, Acc: 0.9715, F1Mac: 0.8731, F1Mic: 0.9715,  F1W: 0.9721
LR: 0.1, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9715, Scheduler Params: 0.1, 25, StepLR, Train_Acc: 0.9894, Val_F1: 0.8750


100%|██████████| 100/100 [17:18<00:00, 10.38s/it]
 79%|███████▉  | 19/24 [5:27:33<1:26:23, 1036.61s/it]

Epoch 100:
Train - Loss: 0.0071, Acc: 0.9964,  F1Mac: 0.9830, F1Mic: 0.9964,  F1W: 0.9965
Val -   Loss: 0.7935, Acc: 0.9638, F1Mac: 0.8465, F1Mic: 0.9638,  F1W: 0.9634
LR: 0.1, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9638, Scheduler Params: 0.1, 50, None, Train_Acc: 0.9964, Val_F1: 0.8824


100%|██████████| 100/100 [17:15<00:00, 10.35s/it]
 83%|████████▎ | 20/24 [5:44:49<1:09:04, 1036.22s/it]

Epoch 100:
Train - Loss: 0.0026, Acc: 0.9977,  F1Mac: 0.9899, F1Mic: 0.9977,  F1W: 0.9977
Val -   Loss: 0.5741, Acc: 0.9759, F1Mac: 0.8898, F1Mic: 0.9759,  F1W: 0.9757
LR: 0.1, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9759, Scheduler Params: 0.1, 50, StepLR, Train_Acc: 0.9977, Val_F1: 0.8914


100%|██████████| 100/100 [17:11<00:00, 10.32s/it]
 88%|████████▊ | 21/24 [6:02:00<51:44, 1034.84s/it]  

Epoch 100:
Train - Loss: 0.0059, Acc: 0.9958,  F1Mac: 0.9836, F1Mic: 0.9958,  F1W: 0.9959
Val -   Loss: 0.7358, Acc: 0.9656, F1Mac: 0.8572, F1Mic: 0.9656,  F1W: 0.9653
LR: 0.1, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9656, Scheduler Params: 0.01, 25, None, Train_Acc: 0.9958, Val_F1: 0.8882


100%|██████████| 100/100 [17:12<00:00, 10.32s/it]
 92%|█████████▏| 22/24 [6:19:12<34:28, 1034.07s/it]

Epoch 100:
Train - Loss: 0.0108, Acc: 0.9858,  F1Mac: 0.9497, F1Mic: 0.9858,  F1W: 0.9861
Val -   Loss: 0.3964, Acc: 0.9688, F1Mac: 0.8606, F1Mic: 0.9688,  F1W: 0.9698
LR: 0.1, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9688, Scheduler Params: 0.01, 25, StepLR, Train_Acc: 0.9858, Val_F1: 0.8635


100%|██████████| 100/100 [17:14<00:00, 10.34s/it]
 96%|█████████▌| 23/24 [6:36:27<17:14, 1034.18s/it]

Epoch 100:
Train - Loss: 0.0075, Acc: 0.9972,  F1Mac: 0.9898, F1Mic: 0.9972,  F1W: 0.9973
Val -   Loss: 0.8958, Acc: 0.9640, F1Mac: 0.8569, F1Mic: 0.9640,  F1W: 0.9631
LR: 0.1, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9640, Scheduler Params: 0.01, 50, None, Train_Acc: 0.9972, Val_F1: 0.8860


100%|██████████| 100/100 [17:10<00:00, 10.31s/it]
100%|██████████| 24/24 [6:53:38<00:00, 1034.10s/it]

Epoch 100:
Train - Loss: 0.0041, Acc: 0.9958,  F1Mac: 0.9825, F1Mic: 0.9958,  F1W: 0.9958
Val -   Loss: 0.5586, Acc: 0.9738, F1Mac: 0.8782, F1Mic: 0.9738,  F1W: 0.9738
LR: 0.1, Batch: 32, Patience: 100, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9738, Scheduler Params: 0.01, 50, StepLR, Train_Acc: 0.9958, Val_F1: 0.8867





In [None]:
# Early stopping triggered.
# Epoch 65:
# Train - Loss: 0.0191, Acc: 0.9946,  F1Mac: 0.9692, F1Mic: 0.9946,  F1W: 0.9948
# Val -   Loss: 0.6446, Acc: 0.9683, F1Mac: 0.8465, F1Mic: 0.9683,  F1W: 0.9690
# LR: 0.09, Batch: 32, Patience: 15, Momentum: 0.9, Weight_decay: 0.0001, Val_Acc: 0.9683, Scheduler Params: 0.1, 25, None, Train_Acc: 0.9946, Val_F1: 0.8902

The bonus task is to equip the BLSTM model in Task 2 with a CNN module
to capture character-level information (see slides page 45 in lecture 12 for the
network architecture). The character embedding dimension is set to 30. You
need to tune other hyper-parameters of CNN module, such as the number of
CNN layers, the kernel size and output dimension of each CNN layer. What
are the precision, recall and F1 score on the dev data? Predicting the NER
tags of the sentences in the test data and output the predictions in a file
named pred, in the same format of training data. (hint: the bonus points are
assigned based on the ranking of your model F1 score on the test data)