In [1]:
import torch
import pandas as pd
import torch.nn as nn
from torch.utils.data import DataLoader
from torchtext.vocab import build_vocab_from_iterator, GloVe
from app.utils import getData, iterator, train_and_eval, predict, Data
from app.model import NER

### Data

In [2]:
data = pd.read_csv("./data/ner_datasetreference.csv", encoding='latin1')
sentences, labels = getData(data)
split_idx = int(0.9 * len(sentences))
train, train_labels, test, test_labels = sentences[:split_idx], labels[:split_idx], sentences[split_idx:], labels[split_idx:]
vocab = build_vocab_from_iterator(iterator(train), specials=["<unk>", "<pad>"])
vocab.set_default_index(vocab["<unk>"])
labels_dict = build_vocab_from_iterator(iterator(train_labels), specials=["<pad>"])
glove_vocab = GloVe(name='42B', dim=300)
use_glove = False
max_len = max([len(s) for s in sentences])
train_pipeline = Data(train, train_labels, max_len, vocab, glove_vocab, labels_dict, use_glove=use_glove)
test_pipeline = Data(test, test_labels, max_len, vocab, glove_vocab, labels_dict, use_glove=use_glove)
train_loader = DataLoader(train_pipeline, batch_size=64, shuffle=True)
test_loader = DataLoader(test_pipeline)

In [3]:
print(max_len)

104


### Hyperparameters

In [4]:
vocab_size = len(vocab)
tag_size = len(labels_dict)
embed_size = 300
num_layers = 5
hidden_size = 64
n_epochs = 20
learning_rate = 1e-3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Model

In [5]:
model = NER(vocab_size, embed_size, hidden_size, num_layers, tag_size, use_glove=use_glove).to(device=device)

### Training

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(n_epochs):
    train_and_eval(model, train_loader, test_loader, optimizer, criterion, device, epoch)

training loss of epoch 1 is 0.69902
test acc after epoch 1 is 0.88465
training loss of epoch 2 is 0.59868
test acc after epoch 2 is 0.91017
training loss of epoch 3 is 0.58275
test acc after epoch 3 is 0.93792
training loss of epoch 4 is 0.57093
test acc after epoch 4 is 0.95058
training loss of epoch 5 is 0.56412
test acc after epoch 5 is 0.95521
training loss of epoch 6 is 0.55972
test acc after epoch 6 is 0.95976
training loss of epoch 7 is 0.55741
test acc after epoch 7 is 0.95913
training loss of epoch 8 is 0.55530
test acc after epoch 8 is 0.95998
training loss of epoch 9 is 0.55328
test acc after epoch 9 is 0.96087
training loss of epoch 10 is 0.55197
test acc after epoch 10 is 0.95852
training loss of epoch 11 is 0.55155
test acc after epoch 11 is 0.96200
training loss of epoch 12 is 0.54963
test acc after epoch 12 is 0.95976
training loss of epoch 13 is 0.54934
test acc after epoch 13 is 0.96021
training loss of epoch 14 is 0.54837
test acc after epoch 14 is 0.96200
training l

### Testing

In [12]:
print(predict(model, "Alex and Tom expected the American leader to return", max_len, vocab, glove_vocab, labels_dict, use_glove, device))

['B-per', 'O', 'B-per', 'O', 'O', 'B-gpe', 'O', 'O', 'O']


### Saving Model

In [None]:
torch.save(model.state_dict(), '.\saved_no_glove.pth')