In [1]:
from scripts.utils import *
from scripts.embedding_fabric import EmbeddingFabric
from scripts.indexer import Indexer
from scripts.metrics import *
from scripts.model import *
from scripts.training_model import *
from scripts.visual_util import *

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
TRAIN_PATH = 'data/train.txt'
DEV_PATH = 'data/dev.txt'
TEST_PATH = 'data/test.txt'
EMBEDDINGS_PATH = 'embeddings/glove.6B.100d.txt'

In [4]:
glove = load_embedding_dict(EMBEDDINGS_PATH)

100%|██████████| 400000/400000 [00:03<00:00, 117832.89it/s]


In [5]:
words, tags = read_ner_data_from_connl(TRAIN_PATH)
val_words, val_tags = read_ner_data_from_connl(DEV_PATH)
test_words, test_tags = read_ner_data_from_connl(TEST_PATH)

data_dict = {
    'train': (words, tags),
    'dev': (val_words, val_tags),
    'test': (test_words, test_tags)
}

words_indexer = Indexer(words)
tags_indexer = Indexer(tags)


In [6]:
print(tags_indexer.index_to_element(0))
print(tags_indexer.indices_to_elements([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))

<UNKNOWN>
['<UNKNOWN>', 'B-ORG', 'O', 'B-MISC', 'B-PER', 'I-PER', 'B-LOC', 'I-ORG', 'I-MISC', 'I-LOC']


In [7]:
# Model initialization

EMBEDDING_DIM = 100
HIDDEN_DIM = 100

models = {}
for strat in ['b', 'c']: # original ['a', 'b', 'c']
    strategy = f"strategy_{strat}"
    model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM,
                   tags_indexer.size(), 
                   lambda: EmbeddingFabric.get_embedding_layer(words_indexer, glove, strategy))
    
    models[strategy] = model

In [8]:
labels = [x for x in tags_indexer.get_element_to_index_dict().values()]
print(labels)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [9]:
plotting_data = {}
trained_models = {}

In [15]:
# Main training loop

for name, model in models.items():
    print(f"Training {name} model")
    loss_function = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    
    metric_handler = MetricsHandler(labels)
    valid_metric = MetricsHandler(labels)
    
    model, train, valid, losses = train_model(model, optimizer,
                                  loss_function,
                                  data_dict, 
                                  128, #batch size 
                                  words_indexer, 
                                  tags_indexer, 
                                  metric_handler, 
                                  valid_metric,
                                  num_epochs=10)
    
    plotting_data[name] = (train.get_metrics(), losses, valid.get_metrics())

Training strategy_b model
Epoch 1/10
----------


  0%|          | 0/1591 [00:00<?, ?it/s]

100%|██████████| 1591/1591 [01:27<00:00, 18.23it/s]


Loss per epoch - 48.10175979940079
Precision - 0.6486940405510399
Recall - 0.37005806110498374
F1-score - 0.4973725761716654
F0.5-score - 0.5297032602522971

Validating on dev test: 
Precision - 0.7209014270928104
Recall - 0.3962113927009723
F1-score - 0.5495764382332381
F0.5-score - 0.6098256065680436


Epoch 2/10
----------


100%|██████████| 1591/1591 [01:27<00:00, 18.27it/s]


Loss per epoch - 31.869849692433043
Precision - 0.7279421680540632
Recall - 0.5439108262600213
F1-score - 0.5746032519562444
F0.5-score - 0.6140414445680379

Validating on dev test: 
Precision - 0.7597182235727741
Recall - 0.5302891611345213
F1-score - 0.5829270966298868
F0.5-score - 0.6364290902187445


Epoch 3/10
----------


100%|██████████| 1591/1591 [01:26<00:00, 18.39it/s]


Loss per epoch - 25.18623580990592
Precision - 0.78532404936878
Recall - 0.6519632782675134
F1-score - 0.6948613947407765
F0.5-score - 0.7370530500899704

Validating on dev test: 
Precision - 0.7909236986886817
Recall - 0.5918669970201372
F1-score - 0.6525800116189573
F0.5-score - 0.7111836228801584


Epoch 4/10
----------


100%|██████████| 1591/1591 [01:29<00:00, 17.87it/s]


Loss per epoch - 21.86498760194111
Precision - 0.8080631985278025
Recall - 0.7044387595883508
F1-score - 0.7435945386109684
F0.5-score - 0.7772205141118378

Validating on dev test: 
Precision - 0.80390495444187
Recall - 0.6315036091737652
F1-score - 0.6897865415615755
F0.5-score - 0.7436625488344502


Epoch 5/10
----------


100%|██████████| 1591/1591 [01:28<00:00, 18.06it/s]


Loss per epoch - 19.669263324512393
Precision - 0.8230944993950323
Recall - 0.7341368716619234
F1-score - 0.7702676177690388
F0.5-score - 0.799027943529075

Validating on dev test: 
Precision - 0.8353179282073109
Recall - 0.6591475154169802
F1-score - 0.721926209636962
F0.5-score - 0.7784072373958142


Epoch 6/10
----------


100%|██████████| 1591/1591 [01:28<00:00, 17.99it/s]


Loss per epoch - 17.95474897267907
Precision - 0.8317151450187403
Recall - 0.7520122970700416
F1-score - 0.7854047572598176
F0.5-score - 0.8110460390985672

Validating on dev test: 
Precision - 0.8418237987051596
Recall - 0.6785899387887897
F1-score - 0.7379092720092764
F0.5-score - 0.7905378985608905


Epoch 7/10
----------


100%|██████████| 1591/1591 [01:28<00:00, 17.95it/s]


Loss per epoch - 16.855773874704266
Precision - 0.8444090325187655
Recall - 0.7725037820526713
F1-score - 0.8033511547685779
F0.5-score - 0.8263498400500207

Validating on dev test: 
Precision - 0.8480444593161743
Recall - 0.6947769953340359
F1-score - 0.7511786706449415
F0.5-score - 0.8004642940748162


Epoch 8/10
----------


100%|██████████| 1591/1591 [01:28<00:00, 17.93it/s]


Loss per epoch - 15.722947502769834
Precision - 0.8550182247851792
Recall - 0.78720408015894
F1-score - 0.8168115496397301
F0.5-score - 0.8384135300914743

Validating on dev test: 
Precision - 0.853591996929259
Recall - 0.7112664062272344
F1-score - 0.7692187643784673
F0.5-score - 0.8151913781440484


Epoch 9/10
----------


100%|██████████| 1591/1591 [01:29<00:00, 17.72it/s]


Loss per epoch - 14.692758808979011
Precision - 0.8584387738837393
Recall - 0.7975906060894155
F1-score - 0.8245632285878337
F0.5-score - 0.8438533693109188

Validating on dev test: 
Precision - 0.8489940737420469
Recall - 0.7243156845846408
F1-score - 0.7744886594075893
F0.5-score - 0.8148915804610622


Epoch 10/10
----------


100%|██████████| 1591/1591 [01:29<00:00, 17.87it/s]


Loss per epoch - 13.887930518469295
Precision - 0.8688643893828026
Recall - 0.8111303166438659
F1-score - 0.8370337584242439
F0.5-score - 0.8552607450745344

Validating on dev test: 
Precision - 0.8541094644987943
Recall - 0.7405643209359338
F1-score - 0.7868304868280891
F0.5-score - 0.8235412864475213


Training strategy_c model
Epoch 1/10
----------


100%|██████████| 1591/1591 [01:29<00:00, 17.77it/s]


Loss per epoch - 73.01093282898516
Precision - 0.4003498029650982
Recall - 0.16529801823431034
F1-score - 0.27217713584438724
F0.5-score - 0.3218076099451993

Validating on dev test: 
Precision - 0.6999009608703647
Recall - 0.2804065458151064
F1-score - 0.5284035286088284
F0.5-score - 0.585901789596608


Epoch 2/10
----------


100%|██████████| 1591/1591 [01:29<00:00, 17.86it/s]


Loss per epoch - 42.5795610753865
Precision - 0.6781819578901798
Recall - 0.42067628065525914
F1-score - 0.5630118332304334
F0.5-score - 0.5991488828000391

Validating on dev test: 
Precision - 0.7253244418433605
Recall - 0.4345057376038695
F1-score - 0.5927697857048532
F0.5-score - 0.6459546790072759


Epoch 3/10
----------


  9%|▉         | 144/1591 [00:08<01:22, 17.51it/s]


KeyboardInterrupt: 

In [None]:
for name, plot_data in plotting_data.items():
    train, losses, valid = plot_data
    build_training_visualization(name, train, losses, valid, f'visualizations/{name}.png')

In [None]:
# Save the models
for name, model in models.items():
    torch.save(model.state_dict(), f'saved_models/{name}.pt')
    trained_models[name] = model

In [14]:
##Printing test set results
##TODO: Extract to scripts and merge with validate method

test_metrics = MetricsHandler(labels)

for name, model in models.items():
    print(f"{name} results on test set:")
    with torch.no_grad():
        inputs = torch.tensor(words_indexer.elements_to_indices(test_words), dtype=torch.long)
        true_vals = tags_indexer.elements_to_indices(test_tags)
        tag_scores = model(inputs)
        prediction = get_tag_indices_from_scores(tag_scores)
        test_metrics.update(prediction, true_vals)
        test_metrics.collect()
        for metric in test_metrics.metrics_dict.keys():
            print(f"{metric} - {test_metrics.metrics_dict[metric][-1]}")
        print()

strategy_b results on test set:


  return x[1, 1]/(x[1, 1] + x[0, 1])
  return x[1, 1] / (x[1, 0] + x[1, 1])


Precision - 0.5760847949651751
Recall - 0.13429124392375907
F1-score - 0.3087931440395692
F0.5-score - 0.34340604829976557

strategy_c results on test set:
Precision - 0.1661809134631242
Recall - 0.10629109231351373
F1-score - 0.023897488028865127
F0.5-score - 0.020430536329697522



  return ((1 + beta**2)*precision*recall)/(beta**2 * precision + recall)
