# 1.1 Word Embedding

In [1]:
import gensim.downloader
import os
model_name = 'word2vec-google-news-300'

# Define the full path for the model
model_path = os.path.join('models', model_name)
# Check if the model is already in the specified directory
if os.path.isfile(model_path):
    # Load the model from the specified directory
    print(f"Loading {model_name}...")
    w2v = gensim.models.KeyedVectors.load(model_path)
else:
    print(f"Downloading...")
    # Download the model using gensim.downloader and save it to the directory
    w2v = gensim.downloader.load(model_name)
    w2v.save(model_path)

Downloading...


FileNotFoundError: [Errno 2] No such file or directory: 'models/word2vec-google-news-300.vectors.npy'

## Question 1.1
use cosine similarity to find the most similar 
word to each of these worsds

In [2]:
words = ["student", "Apple", "apple"]

# Print the header
print("Word\t\tMost similar word\tCosine similarity")
print("=======================================================================")

for word in words:
    # Use the downloaded vectors as usual:
    most_similar = w2v.most_similar(positive=[word], topn=1)[0]
    print("{:<15}\t{:<15}\t\t{:.4f}".format(word, most_similar[0], most_similar[1]))


Word		Most similar word	Cosine similarity
student        	students       		0.7295
Apple          	Apple_AAPL     		0.7457
apple          	apples         		0.7204


# 1.2 Data
process: https://wandb.ai/mostafaibrahim17/ml-articles/reports/Named-Entity-Recognition-With-HuggingFace-Using-PyTorch-and-W-B--Vmlldzo0NDgzODA2

In [3]:
# read data
from datasets import Dataset, DatasetDict

def read_conll_file(file_path):
    with open(file_path, "r") as f:
        content = f.read().strip()
        sentences = content.split("\n\n")
        data = []
        for sentence in sentences:
            tokens = sentence.split("\n")
            token_data = []
            for token in tokens:
                token_data.append(token.split())
            data.append(token_data)
    return data


train_data = read_conll_file("../datasets/CoNLL2003/eng.train")
validation_data = read_conll_file("../datasets/CoNLL2003/eng.testa")
test_data = read_conll_file("../datasets/CoNLL2003/eng.testb")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# prepare data
def convert_to_dataset(data, label_map):
    formatted_data = {"tokens": [], "ner_tags": []}
    for sentence in data:
        tokens = [token_data[0] for token_data in sentence]
        ner_tags = [label_map[token_data[3]] for token_data in sentence]
        formatted_data["tokens"].append(tokens)
        formatted_data["ner_tags"].append(ner_tags)
    return Dataset.from_dict(formatted_data)


label_list = sorted(list(set([token_data[3] for sentence in train_data for token_data in sentence])))
label_map = {label: i for i, label in enumerate(label_list)}

train_dataset = convert_to_dataset(train_data, label_map)
validation_dataset = convert_to_dataset(validation_data, label_map)
test_dataset = convert_to_dataset(test_data, label_map)


In [5]:
label_map

{'B-LOC': 0,
 'B-MISC': 1,
 'B-ORG': 2,
 'I-LOC': 3,
 'I-MISC': 4,
 'I-ORG': 5,
 'I-PER': 6,
 'O': 7}

## Question 1.2
(a) Describe the size (number of sentences) of the training, development and test file for CoNLL2003.
Specify the complete set of all possible word labels based on the tagging scheme (IO, BIO,
etc.) you chos

(b) Choose an example sentence from the training set of CoNLL2003 that has at least two named
entities with more than one word. Explain how to form complete named entities from the label
for each word, and list all the named entities in this sentence.e.

In [6]:
# (a)
print("Dataset Sizes:")
print(f"Training:\t{train_dataset.num_rows} sentences")
print(f"Development:\t{validation_dataset.num_rows} sentences")
print(f"Test:\t\t{test_dataset.num_rows} sentences")

print("=======================================================================")
print("All Possible Word Labels (BIO):\n", label_list)


!! (b) means finding the sentence that contains at least two distinct named entities, and each of those entities consists of more than one word.
=> but seems in training dataset, there isn't this kind of sentence

In [7]:
from tqdm import tqdm
lists2 = [0,1,2] # ['B-LOC', 'B-MISC', 'B-ORG']

def has_at_least_two_common_elements(list1, list2=[0,1,2]):
    common_elements = [value for value in list1 if value in list2]
    return len(common_elements) >= 2

for i in tqdm(range(len(train_dataset))):
    tokens = train_dataset['tokens'][i]
    ner_tags = train_dataset['ner_tags'][i]

    if has_at_least_two_common_elements(ner_tags):
        print(i)
        print(tokens)
        print(ner_tags)
        print([label_list[tag] for tag in ner_tags])
        break


In [8]:
# [label_list[tag] for tag in train_dataset[5969]['ner_tags']]

In [9]:
# (b)
def form_complete_ne(dataset, i):
    # define sets of tags
    begin_tags = {'B-LOC', 'B-ORG', 'B-MISC'}
    inside_tags = {'I-ORG', 'I-LOC', 'I-PER', 'I-MISC'}
    outside_tags = {'O'}

    words = []
    word = []
    entities = []
    entity = []

    tokens = dataset['tokens'][i]
    ner_tags = dataset['ner_tags'][i]

    for token, tag in zip(tokens, ner_tags):
        tag = label_list[tag]

        if (tag in begin_tags or tag in outside_tags) and word:
            words.append(' '.join(word))
            entities.append(' '.join(entity))
            word = []
            entity = []

        if tag in begin_tags or tag in inside_tags:
            word.append(token)
            entity.append(tag)

    if word:
        words.append(' '.join(word))
        entities.append(' '.join(entity))

    return words, entities

form_complete_ne(train_dataset, 5969)


# 1.3 Model

1. `<PAD>` Token:

This token is typically initialized to a zero vector because it's meant to be a neutral padding value that doesn't interfere with computation

2. `<UNK>` Token:
- Zero Vector: Similar to the <PAD> token, you can initialize it to a zero vector.
- Average Vector: Initialize it as the average of all word vectors in your pretrained embeddings. This gives it a kind of "average" representation of the language.
- Random Vector: Randomly initialize it, which might add some noise and robustness to the embeddi

For many tasks, initializing the <UNK> token as the average of all word vectors works well. It makes the <UNK> token have a representation that is, on average, similar to any random word from the vocabulary, which can be beneficial since the <UNK> token is used for words that aren't in the training vocabulary but could be anywhere in the semantic space.ngs.s.

In [10]:
# prepare dataset
datasets = DatasetDict({
    "train": train_dataset,
    "validation": validation_dataset,
    "test": test_dataset,
})


In [11]:
import numpy as np

# Out-of-vocabulary (OOV) words
# 1. can be replaced with a special token, such as "<OOV>" or "<UNK>".
# 2. can be ignored.

word2idx = w2v.key_to_index
print(f"whether <UNK> in w2v: {'<UNK>' in word2idx}") # False
print(f"whether <PAD> in w2v: {'<PAD>' in word2idx}") # False

# Add '<UNK>' and '<PAD>' tokens to the vocabulary index
word2idx['<UNK>'] = len(word2idx)
word2idx['<PAD>'] = len(word2idx)

print(f"word2idx['<UNK>']: {word2idx['<UNK>']}")
print(f"word2idx['<PAD>']: {word2idx['<PAD>']}")

# add the '<UNK>' word to the vocabulary of the Word2Vec model
# initialize it with the average of all word vectors in the pretrained embeddings.
unk_vector = np.mean(w2v.vectors, axis=0)
w2v.vectors = np.vstack([w2v.vectors, unk_vector])
print("after insert UNK: ", w2v.vectors.shape)

# add the '<PAD>' word to the vocabulary of the Word2Vec model
# initialize it with a row of zeros in the vectors matrix.
w2v.vectors = np.vstack([w2v.vectors, np.zeros(w2v.vectors[0].shape)])
print("after insert UNK: ", w2v.vectors.shape)


whether <UNK> in w2v: False
whether <PAD> in w2v: False
word2idx['<UNK>']: 3000000
word2idx['<PAD>']: 3000001


after insert UNK:  (3000001, 300)
after insert UNK:  (3000002, 300)


# Tokenize Dataset

In [12]:
# Map words to Indices
def sentence_to_indices(sentence, vocab):
    return [vocab.get(word, vocab.get('<UNK>')) for word in sentence]

tag2idx = {
    'B-LOC': 0,
    'B-MISC': 1,
    'B-ORG': 2,
    'I-LOC': 3,
    'I-MISC': 4,
    'I-ORG': 5,
    'I-PER': 6,
    'O': 7,
    'PAD': 8
}

In [13]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader

class NERDataset(Dataset):
    def __init__(self, sentences, tags, vocab):
        self.sentences = [torch.tensor(sentence_to_indices(sentence, vocab)) for sentence in sentences]
        self.tags = [torch.tensor(tag) for tag in tags]

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        return self.sentences[idx], self.tags[idx]

# Create PyTorch datasets and data loaders
train_dataset = NERDataset(train_dataset['tokens'], train_dataset['ner_tags'], word2idx)
validation_dataset = NERDataset(validation_dataset['tokens'], validation_dataset['ner_tags'], word2idx)
test_dataset = NERDataset(test_dataset['tokens'], test_dataset['ner_tags'], word2idx)


def collate_fn(batch):
    sentences, tags = zip(*batch)
    sentences_padded = pad_sequence(sentences, batch_first=True, padding_value=word2idx['<PAD>'])
    tags_padded = pad_sequence(tags, batch_first=True, padding_value=tag2idx['PAD'])
    return sentences_padded, tags_padded


# Model

In [28]:
embedding_matrix = torch.FloatTensor(w2v.vectors)

class LSTMNERModel(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim):
        super(LSTMNERModel, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, padding_idx=word2idx['<PAD>'], freeze=True)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        lstm_out, _ = self.lstm(x)
        tag_space = self.fc(lstm_out)
        tag_scores = torch.log_softmax(tag_space, dim=-1)
        return tag_scores

class BiLSTMNERModel(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, num_of_layers, output_dim):
        super(BiLSTMNERModel, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, padding_idx=word2idx['<PAD>'], freeze=True)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_of_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        lstm_out, _ = self.lstm(x)
        tag_space = self.fc(lstm_out)
        tag_scores = torch.log_softmax(tag_space, dim=-1)
        return tag_scores

# Utilities

In [15]:
# Hyperparameters
EMBEDDING_DIM = w2v[0].shape[0]
print(f"EMBEDDING_DIM: {EMBEDDING_DIM}")
HIDDEN_DIM = 150
VOCAB_SIZE = len(word2idx)
print(f"VOCAB_SIZE: {VOCAB_SIZE}")
TAGSET_SIZE = len(tag2idx)
print(f"TAGSET_SIZE: {TAGSET_SIZE}")
MAX_EPOCHS = 50

class EarlyStopper:
    def __init__(self, patience=5):
        self.patience = patience
        self.counter = 0
        self.max_f1 = 0

    def early_stop(self, f1):
        if f1 > self.max_f1:
            self.max_f1 = f1
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False


EMBEDDING_DIM: 300
VOCAB_SIZE: 3000002
TAGSET_SIZE: 9


# Train & Test
https://necromuralist.github.io/Neurotic-Networking/posts/nlp/ner-evaluating-the-model/index.html

In [23]:
from seqeval.metrics import f1_score
from seqeval.metrics import classification_report
from seqeval.scheme import IOB1
idx2tag = {v: k for k, v in tag2idx.items()}
def idx_to_tags(indices):
    return [idx2tag[idx] for idx in indices]

def test(model, batch_size):
    # Placeholder to store true and predicted tags for the test set
    y_true_test = []
    y_pred_test = []
    test_loader = DataLoader(test_dataset, batch_size, shuffle=False, collate_fn=collate_fn)

    # Evaluate the model on the test dataset
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        for sentences, tags in test_loader:
            tag_scores = model(sentences)
            predictions = tag_scores.argmax(dim=-1).tolist()

            # Convert index to tags
            # Note: filtering out padding tokens
            for sentence, true_seq, pred_seq in zip(sentences, tags.tolist(), predictions):
                valid_len = (sentence != word2idx['<PAD>']).sum().item()
                true_tags = idx_to_tags(true_seq[:valid_len])
                pred_tags = idx_to_tags(pred_seq[:valid_len])
                y_true_test.append(true_tags)
                y_pred_test.append(pred_tags)

    # Compute F1 score for the test set
    f1_test = f1_score(y_true_test, y_pred_test)
    #report_test = classification_report(y_true_test, y_pred_test)

    print("F1 Score on Test Set:", f1_test)
    return f1_test
    #print("Classification Report on Test Set:\n", report_test)


In [25]:
def find_optimal_batch_size(parameters):
    f1_score_batch = []
    f1_score_test = []
    for batch_size in parameters:
        model = BiLSTMNERModel(EMBEDDING_DIM, HIDDEN_DIM, TAGSET_SIZE)
        loss_function = nn.NLLLoss(ignore_index=tag2idx['PAD'])
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        print("Batch Size: ", batch_size)
        train_loader = DataLoader(train_dataset, batch_size, shuffle=True, collate_fn=collate_fn)
        validation_loader = DataLoader(validation_dataset, batch_size, shuffle=False, collate_fn=collate_fn)
        early_stopper = EarlyStopper()

        for epoch in range(MAX_EPOCHS):
            total_loss = 0
            model.train()
            for sentences, tags in train_loader:
                tag_scores = model(sentences)
                loss = loss_function(tag_scores.view(-1, TAGSET_SIZE), tags.view(-1))
                # Backpropagation
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            print(f"Epoch {epoch+1}, Loss: {total_loss}")

            model.eval()
            y_true = []
            y_pred = []
            with torch.no_grad():
                for sentences, tags in validation_loader:
                    tag_scores = model(sentences)
                    predictions = tag_scores.argmax(dim=-1).tolist()
                    # Convert index to tags and compute F1 score
                    for sentence, true_seq, pred_seq in zip(sentences, tags.tolist(), predictions):
                        valid_len = (sentence != word2idx['<PAD>']).sum().item()
                        true_tags = [idx2tag[idx] for idx in true_seq[:valid_len]]
                        pred_tags = [idx2tag[idx] for idx in pred_seq[:valid_len]]
                        y_true.append(true_tags)
                        y_pred.append(pred_tags)

            f1 = f1_score(y_true, y_pred)
            print("F1 Score:", f1)
            if early_stopper.early_stop(f1):
                f1_score_batch.append(f1)
                break
        f1_score_test.append(test(model, batch_size))
    print("f1 of testset with different batch size: ", f1_score_test)
    print("f1 of trainset with different batch size: ", f1_score_batch)
    return f1_score_test

batch_sizes = [16, 32, 64, 128, 256]
f1_score_batch = find_optimal_batch_size(batch_sizes)
optimal_batch_size = batch_sizes[np.argmax(f1_score_batch)]


Batch Size:  16
Epoch 1, Loss: 199.0441536065191
F1 Score: 0.8257945182444929
Epoch 2, Loss: 85.18818697798997
F1 Score: 0.8537429318929868
Epoch 3, Loss: 64.29607850406319
F1 Score: 0.8659322033898305
Epoch 4, Loss: 49.4091847599484
F1 Score: 0.8773888043294436
Epoch 5, Loss: 37.594765432178974
F1 Score: 0.884465526003727
Epoch 6, Loss: 28.330530517501757
F1 Score: 0.8881350182342465
Epoch 7, Loss: 20.579610534594394
F1 Score: 0.8809140922556072
Epoch 8, Loss: 13.929533022630494
F1 Score: 0.888926544098958
Epoch 9, Loss: 10.1325377827161
F1 Score: 0.8877041550308877
Epoch 10, Loss: 6.61751740440377
F1 Score: 0.8836342062886685
Epoch 11, Loss: 6.029687131769606
F1 Score: 0.8846056729231546
Epoch 12, Loss: 4.695050049253041
F1 Score: 0.8828480053862986
Epoch 13, Loss: 4.621472282116883
F1 Score: 0.8882288042094543
F1 Score on Test Set: 0.8262062859672421
Batch Size:  32
Epoch 1, Loss: 130.979077398777
F1 Score: 0.79986517232662
Epoch 2, Loss: 49.51226925291121
F1 Score: 0.84066026612767

In [26]:
# find optimal hidden dimension
def find_optimal_hidden_dim(parameters):
    f1_score_hidden = []
    f1_score_test = []
    for hidden_dim in parameters:
        model = BiLSTMNERModel(EMBEDDING_DIM, hidden_dim, TAGSET_SIZE)
        loss_function = nn.NLLLoss(ignore_index=tag2idx['PAD'])
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        print("Hidden Dimension: ", hidden_dim)
        train_loader = DataLoader(train_dataset, optimal_batch_size, shuffle=True, collate_fn=collate_fn)
        validation_loader = DataLoader(validation_dataset, optimal_batch_size, shuffle=False, collate_fn=collate_fn)
        early_stopper = EarlyStopper()

        for epoch in range(MAX_EPOCHS):
            total_loss = 0
            model.train()
            for sentences, tags in train_loader:
                tag_scores = model(sentences)
                loss = loss_function(tag_scores.view(-1, TAGSET_SIZE), tags.view(-1))
                total_loss += loss.item()
                # Backpropagation
                model.zero_grad()
                loss.backward()
                optimizer.step()
            print(f"Epoch {epoch+1}, Loss: {total_loss}")

            model.eval()
            y_true = []
            y_pred = []
            with torch.no_grad():
                for sentences, tags in validation_loader:
                    tag_scores = model(sentences)
                    predictions = tag_scores.argmax(dim=-1).tolist()
                    # Convert index to tags and compute F1 score
                    for sentence, true_seq, pred_seq in zip(sentences, tags.tolist(), predictions):
                        valid_len = (sentence != word2idx['<PAD>']).sum().item()
                        true_tags = idx_to_tags(true_seq[:valid_len])
                        pred_tags = idx_to_tags(pred_seq[:valid_len])
                        y_true.append(true_tags)
                        y_pred.append(pred_tags)
            f1 = f1_score(y_true, y_pred)
            print("F1 Score:", f1)
            if early_stopper.early_stop(f1):
                f1_score_hidden.append(f1)
                break
        f1_score_test.append(test(model, optimal_batch_size))
    print("f1 of testset with different hidden dim: ", f1_score_test)
    print("f1 of trainset with different hidden dim: ", f1_score_hidden)
    return f1_score_test

hidden_dims = [32, 64, 128, 256]
f1_score_hidden = find_optimal_hidden_dim(hidden_dims)
optimal_hidden_dim = hidden_dims[np.argmax(f1_score_hidden)]


Hidden Dimension:  32
Epoch 1, Loss: 293.0157448761165
F1 Score: 0.7713503195285916
Epoch 2, Loss: 113.53828140161932
F1 Score: 0.8185937237791576
Epoch 3, Loss: 92.73915387224406
F1 Score: 0.8364674278038953
Epoch 4, Loss: 77.6808013478294
F1 Score: 0.8493335582925595
Epoch 5, Loss: 68.0739809833467
F1 Score: 0.8589754409654823
Epoch 6, Loss: 59.714668814092875
F1 Score: 0.863010225640159
Epoch 7, Loss: 52.77886861003935
F1 Score: 0.8632558139534884
Epoch 8, Loss: 46.79307142365724
F1 Score: 0.8727733220768256
Epoch 9, Loss: 41.281550084240735
F1 Score: 0.8693739424703891
Epoch 10, Loss: 36.87571499473415
F1 Score: 0.8758335443572213
Epoch 11, Loss: 32.7561949826777
F1 Score: 0.8726658217152513
Epoch 12, Loss: 29.140497231506743
F1 Score: 0.8734487125369355
Epoch 13, Loss: 26.04674756480381
F1 Score: 0.8753373819163293
Epoch 14, Loss: 22.698843191028573
F1 Score: 0.8769438810006761
Epoch 15, Loss: 19.987431074492633
F1 Score: 0.8735127837313307
Epoch 16, Loss: 17.727546258829534
F1 Sc

In [29]:
# find optimal number of layers
def find_optimal_num_of_layers(parameters):
    f1_score_layers = []
    f1_score_test = []
    for num_of_layers in parameters:
        model = BiLSTMNERModel(EMBEDDING_DIM, optimal_hidden_dim, num_of_layers, TAGSET_SIZE)
        loss_function = nn.NLLLoss(ignore_index=tag2idx['PAD'])
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        print("Number of Layers: ", num_of_layers)
        train_loader = DataLoader(train_dataset, optimal_batch_size, shuffle=True, collate_fn=collate_fn)
        validation_loader = DataLoader(validation_dataset, optimal_batch_size, shuffle=False, collate_fn=collate_fn)
        early_stopper = EarlyStopper()

        for epoch in range(MAX_EPOCHS):
            total_loss = 0
            model.train()
            for sentences, tags in train_loader:
                tag_scores = model(sentences)
                loss = loss_function(tag_scores.view(-1, TAGSET_SIZE), tags.view(-1))
                total_loss += loss.item()
                # Backpropagation
                model.zero_grad()
                loss.backward()
                optimizer.step()
            print(f"Epoch {epoch+1}, Loss: {total_loss}")

            model.eval()
            y_true = []
            y_pred = []
            with torch.no_grad():
                for sentences, tags in validation_loader:
                    tag_scores = model(sentences)
                    predictions = tag_scores.argmax(dim=-1).tolist()
                    # Convert index to tags and compute F1 score
                    for sentence, true_seq, pred_seq in zip(sentences, tags.tolist(), predictions):
                        valid_len = (sentence != word2idx['<PAD>']).sum().item()
                        true_tags = idx_to_tags(true_seq[:valid_len])
                        pred_tags = idx_to_tags(pred_seq[:valid_len])
                        y_true.append(true_tags)
                        y_pred.append(pred_tags)
            f1 = f1_score(y_true, y_pred)
            print("F1 Score:", f1)
            if early_stopper.early_stop(f1):
                f1_score_layers.append(f1)
                break
        f1_score_test.append(test(model, optimal_batch_size))
    print("f1 of testset with different number of layers: ", f1_score_test)
    print("f1 of trainset with different number of layers: ", f1_score_layers)
    return f1_score_test

num_of_layers = [1,2,3,4,5]
f1_score_layers = find_optimal_num_of_layers(num_of_layers)
optimal_num_of_layers = num_of_layers[np.argmax(f1_score_layers)]
optimal_num_of_layers


Number of Layers:  1
Epoch 1, Loss: 183.8239844245836
F1 Score: 0.823187414500684
Epoch 2, Loss: 79.49619261920452
F1 Score: 0.8633508742149041
Epoch 3, Loss: 58.63065435970202
F1 Score: 0.8780981284774912
Epoch 4, Loss: 44.700760033680126
F1 Score: 0.8831124702684336
Epoch 5, Loss: 32.25607054203283
F1 Score: 0.884514212982605
Epoch 6, Loss: 23.299222364788875
F1 Score: 0.8850690933602966
Epoch 7, Loss: 15.971395267988555
F1 Score: 0.8807416772018541
Epoch 8, Loss: 10.239005498384358
F1 Score: 0.8859470468431772
Epoch 9, Loss: 7.2644547176896594
F1 Score: 0.8842443729903536
Epoch 10, Loss: 5.662726964641479
F1 Score: 0.8845795579642645
Epoch 11, Loss: 5.449617199505155
F1 Score: 0.8761440927029978
Epoch 12, Loss: 4.49406147076661
F1 Score: 0.8848823429829017
Epoch 13, Loss: 4.005842244201631
F1 Score: 0.8899796885578876
Epoch 14, Loss: 4.083229969583044
F1 Score: 0.8824871166680748
Epoch 15, Loss: 3.390537276485702
F1 Score: 0.8860865910046238
Epoch 16, Loss: 3.7772921284158656
F1 Sco



F1 Score: 0.8492056749638943
Epoch 2, Loss: 71.19694637879729
F1 Score: 0.8822287885183623
Epoch 3, Loss: 51.88021343154833
F1 Score: 0.8898604338321844
Epoch 4, Loss: 38.12816311826464
F1 Score: 0.8943459915611813
Epoch 5, Loss: 28.71021058491897
F1 Score: 0.8878931349340548
Epoch 6, Loss: 22.24855596909765
F1 Score: 0.8938105263157894
Epoch 7, Loss: 17.218069708556868
F1 Score: 0.896849593495935
Epoch 8, Loss: 13.590502724488033
F1 Score: 0.8964997487857981
Epoch 9, Loss: 11.288492613952258
F1 Score: 0.8931116389548693
Epoch 10, Loss: 9.044924925932719
F1 Score: 0.8977702986958349
Epoch 11, Loss: 7.881069247639971
F1 Score: 0.8990872210953347
Epoch 12, Loss: 9.99096315215138
F1 Score: 0.9045099521289998
Epoch 13, Loss: 3.6907899444704526
F1 Score: 0.9063997981666808
Epoch 14, Loss: 2.7473539108609657
F1 Score: 0.899613250378342
Epoch 15, Loss: 2.9962779560319177
F1 Score: 0.9026743398781314
Epoch 16, Loss: 3.7344546610993348
F1 Score: 0.8995713925539962
Epoch 17, Loss: 4.789533712053

3

In [32]:
# Final training
from torch.utils.data import DataLoader, ConcatDataset

model = BiLSTMNERModel(EMBEDDING_DIM, optimal_hidden_dim, optimal_num_of_layers, TAGSET_SIZE)
loss_function = nn.NLLLoss(ignore_index=tag2idx['PAD'])
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print("Final Training")

merged_train_dataset = ConcatDataset([train_dataset, validation_dataset])
merged_train_loader = DataLoader(merged_train_dataset, optimal_batch_size, shuffle=True, collate_fn=collate_fn)
early_stopper = EarlyStopper()

for epoch in range(MAX_EPOCHS):
    total_loss = 0
    model.train()
    for sentences, tags in merged_train_loader:
        tag_scores = model(sentences)
        loss = loss_function(tag_scores.view(-1, TAGSET_SIZE), tags.view(-1))
        total_loss += loss.item()
        # Backpropagation
        model.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {total_loss}")
    f1_test = test(model, optimal_batch_size)
    if early_stopper.early_stop(f1_test):
        break
print("F1 Score on Test Set:", f1_test)


Final Training
Epoch 1, Loss: 211.2243566084653
F1 Score on Test Set: 0.8203576830678887
Epoch 2, Loss: 85.49815228581429
F1 Score on Test Set: 0.8310655592860929
Epoch 3, Loss: 63.95651657227427
F1 Score on Test Set: 0.8478935698447895
Epoch 4, Loss: 49.66351771284826
F1 Score on Test Set: 0.8482000354672814
Epoch 5, Loss: 38.52881736995187
F1 Score on Test Set: 0.8559714795008913
Epoch 6, Loss: 31.395666539494414
F1 Score on Test Set: 0.8584579976985041
Epoch 7, Loss: 24.271331545722205
F1 Score on Test Set: 0.8539925306775743
Epoch 8, Loss: 19.308925223827828
F1 Score on Test Set: 0.8639305026150165
Epoch 9, Loss: 17.019748357968638
F1 Score on Test Set: 0.8573200992555832
Epoch 10, Loss: 13.347994626405125
F1 Score on Test Set: 0.8584507042253522
Epoch 11, Loss: 12.555585913782124
F1 Score on Test Set: 0.8533427912750488
Epoch 12, Loss: 11.178530612298346
F1 Score on Test Set: 0.8586966211999643
Epoch 13, Loss: 9.511184494767804
F1 Score on Test Set: 0.8604405286343613
F1 Score on 

# Inference

In [None]:
# def infer(sentence):
#     # Tokenize the sentence
#     tokens = sentence.split()

#     # Convert tokens to indices
#     token_indices = torch.tensor([sentence_to_indices(tokens, word2idx)])

#     # Get predictions from the model
#     model.eval()
#     with torch.no_grad():
#         tag_scores = model(token_indices)
#         predictions = tag_scores.argmax(dim=-1).tolist()[0]

#     # Convert index to tags
#     predicted_tags = idx_to_tags(predictions, {v: k for k, v in tag2idx.items()})

#     """
#     # Display the results
#     for token, tag in zip(tokens, predicted_tags):
#         print(f"{token}: {tag}")
#     """
#     # Prepare aligned output
#     token_line = ""
#     tag_line = ""
#     for token, tag in zip(tokens, predicted_tags):
#         space_padding = max(len(token), len(tag)) + 2  # +2 to add some space between words for better readability
#         token_line += token.ljust(space_padding)
#         tag_line += tag.ljust(space_padding)

#     # Display the results
#     print(token_line)
#     print(tag_line)


# # Example usage:
# sentence = "EU rejects German call to boycott British lamb ."
# sentence = "Barack Obama was born in Hawaii and worked as the President of the United States."
# infer(sentence)

# sentence = "Jiang Yuxin was born in Shenyang and is now a student in Nanyang Technological University."
# infer(sentence)


# Analysis
e.g. f1 score per class: https://medium.com/illuin/named-entity-recognition-with-bilstm-cnns-632ba83d3d41
## data report
https://github.com/senadkurtisi/pytorch-NER