In [7]:
from collections import defaultdict
from preprocess import NERDataset
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim

<H1>Preprocess

In [8]:
dataset = NERDataset("train.txt")
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
word2idx = dataset.word2idx
label2idx = dataset.label2idx

In [None]:
class NERModel(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=50, hidden_dim=100):
        super(NERModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, tagset_size)

    def forward(self, x):
        x = self.embedding(x)
        lstm_out, _ = self.lstm(x)
        output = self.fc(lstm_out)
        return output
    
# Set model parameters
vocab_size = len(word2idx)
tagset_size = len(label2idx)

# Create model
model = NERModel(vocab_size, tagset_size)

# Define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [10]:
# Training loop
for epoch in range(10):  # for simplicity, we use 10 epochs
    for inputs, labels in dataloader:
        # Forward pass
        outputs = model(inputs)
        
        # Flatten the outputs and labels for the loss function
        outputs = outputs.view(-1, tagset_size)
        labels = labels.view(-1)
        
        # Compute loss
        loss = loss_fn(outputs, labels)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


Epoch 1, Loss: 1.6067739725112915
Epoch 2, Loss: 1.5059528350830078
Epoch 3, Loss: 1.4319322109222412
Epoch 4, Loss: 1.3395683765411377
Epoch 5, Loss: 1.2412277460098267
Epoch 6, Loss: 1.1413650512695312
Epoch 7, Loss: 1.04754638671875
Epoch 8, Loss: 0.9547275304794312
Epoch 9, Loss: 0.8888961672782898
Epoch 10, Loss: 0.8287760615348816


In [6]:
# Predict function
def predict(sentence):
    inputs = [word2idx[word] for word in sentence]
    inputs = torch.tensor([inputs]).long()

    outputs = model(inputs)
    _, predicted = torch.max(outputs, dim=2)
    predicted_labels = [list(label2idx.keys())[i] for i in predicted[0]]

    return predicted_labels

# Test the model
test_sentence = ["Barack", "Obama", "is", "the", "president"]
predictions = predict(test_sentence)
print(predictions)


['B-PER', 'I-PER', 'O', 'O', 'O']
