In [None]:
# Import Libraries
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
from tqdm import tqdm

In [3]:
# Load CoNLL-2003 Dataset
def load_conll2003_data(file_path):
    sentences, tags = [], []
    sentence, sentence_tags = [], []
    with open(file_path, 'r') as f:
        for line in f:
            if line.strip() == "":
                if sentence:
                    sentences.append(sentence)
                    tags.append(sentence_tags)
                    sentence, sentence_tags = [], []
            else:
                word, _, _, tag = line.split()
                sentence.append(word)
                sentence_tags.append(tag)
    return sentences, tags

train_sentences, train_tags = load_conll2003_data('train.txt')
test_sentences, test_tags = load_conll2003_data('test.txt')

In [4]:
# Preprocess Data
def build_vocab(data):
    vocab = {"<PAD>": 0, "<UNK>": 1}
    for sentence in data:
        for word in sentence:
            if word not in vocab:
                vocab[word] = len(vocab)
    return vocab

word_vocab = build_vocab(train_sentences)
tag_vocab = build_vocab(train_tags)
tag_reverse_vocab = {v: k for k, v in tag_vocab.items()}

def encode_data(sentences, tags, word_vocab, tag_vocab, max_len=50):
    encoded_sentences, encoded_tags = [], []
    for sent, tag in zip(sentences, tags):
        encoded_sentences.append([word_vocab.get(w, 1) for w in sent][:max_len] + [0] * (max_len - len(sent)))
        encoded_tags.append([tag_vocab[t] for t in tag][:max_len] + [0] * (max_len - len(tag)))
    return np.array(encoded_sentences), np.array(encoded_tags)

train_inputs, train_labels = encode_data(train_sentences, train_tags, word_vocab, tag_vocab)
test_inputs, test_labels = encode_data(test_sentences, test_tags, word_vocab, tag_vocab)

In [7]:
# Step 4: Define Dataset and DataLoader
class NERDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return torch.tensor(self.inputs[idx]), torch.tensor(self.labels[idx])

train_dataset = NERDataset(train_inputs, train_labels)
test_dataset = NERDataset(test_inputs, test_labels)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# Define RNN Model
class NERModel(nn.Module):
    def __init__(self, vocab_size, tag_size, embedding_dim, hidden_dim):
        super(NERModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, tag_size)

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.rnn(x)
        x = self.fc(x)
        return x

vocab_size = len(word_vocab)
tag_size = len(tag_vocab)
model = NERModel(vocab_size, tag_size, embedding_dim=100, hidden_dim=128)

# Train the Model
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)

def train_model(model, data_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for inputs, labels in tqdm(data_loader):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.view(-1, outputs.shape[-1]), labels.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss/len(data_loader)}")

train_model(model, train_loader, criterion, optimizer)

100%|██████████| 469/469 [00:36<00:00, 12.94it/s]


Epoch 1, Loss: 0.24618404563556093


100%|██████████| 469/469 [00:33<00:00, 14.08it/s]


Epoch 2, Loss: 0.11880475890312368


100%|██████████| 469/469 [00:32<00:00, 14.22it/s]


Epoch 3, Loss: 0.07616251681659267


100%|██████████| 469/469 [00:31<00:00, 14.70it/s]


Epoch 4, Loss: 0.05158718096326663


100%|██████████| 469/469 [00:32<00:00, 14.56it/s]


Epoch 5, Loss: 0.03581014247353016


100%|██████████| 469/469 [00:31<00:00, 14.86it/s]


Epoch 6, Loss: 0.025151696399664445


100%|██████████| 469/469 [00:32<00:00, 14.63it/s]


Epoch 7, Loss: 0.017532751319616208


100%|██████████| 469/469 [00:32<00:00, 14.65it/s]


Epoch 8, Loss: 0.012095221706521091


100%|██████████| 469/469 [00:32<00:00, 14.37it/s]


Epoch 9, Loss: 0.008388328453603346


100%|██████████| 469/469 [00:32<00:00, 14.42it/s]

Epoch 10, Loss: 0.005872354603686265





In [9]:
# Evaluate the Model
def evaluate_model(model, data_loader, tag_reverse_vocab):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in data_loader:
            outputs = model(inputs)
            preds = outputs.argmax(dim=-1).view(-1).cpu().numpy()
            labels = labels.view(-1).cpu().numpy()
            mask = labels != 0
            all_preds.extend(preds[mask])
            all_labels.extend(labels[mask])

    # Get unique labels from predictions and ground truth
    unique_labels = list(set(all_labels))

    # Filter target names to match unique labels
    target_names = [tag_reverse_vocab[label] for label in unique_labels if label in tag_reverse_vocab]

    #Generate and print classification report
    print(classification_report(all_labels, all_preds, target_names=target_names, labels=unique_labels))

evaluate_model(model, test_loader, tag_reverse_vocab)

              precision    recall  f1-score   support

           O       0.97      0.96      0.96     38378
       B-ORG       0.67      0.59      0.63      1658
      B-MISC       0.73      0.62      0.67       701
       B-PER       0.84      0.59      0.70      1580
       I-PER       0.87      0.50      0.64      1111
       B-LOC       0.83      0.79      0.81      1656
       I-ORG       0.28      0.79      0.41       827
      I-MISC       0.65      0.52      0.58       216
       I-LOC       0.70      0.65      0.68       255

    accuracy                           0.91     46382
   macro avg       0.73      0.67      0.67     46382
weighted avg       0.93      0.91      0.91     46382

