In [1]:
%load_ext autoreload
%autoreload 2

import sys, os
sys.path.append('../')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from elmo_on_md.data_loaders.ner_loader import NERLoader
from elmo_on_md.evaluation.model_loader import load_model
from elmo_on_md.evaluation.named_entitiy_recognition import NER

In [2]:
pos_weight = 10
random_state = 42

ner_loader = NERLoader()
data = ner_loader.load_data()
train_set, test_set = train_test_split(data, test_size=0.2, random_state = random_state)

### Original ELMo

In [3]:
elmo = load_model('original')
ner_model = NER([elmo], pos_weight=pos_weight)

ner_model.train(train_set, test_set, ner_loader.types, n_epochs=50)

2019-09-08 21:54:14,828 INFO: char embedding size: 2289
2019-09-08 21:54:15,495 INFO: word embedding size: 189561
2019-09-08 21:54:20,142 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(189561, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(2289, 50, padding_idx=2286)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out_fe

Epoch: 0	 Train Loss: 0.22913484275341034	 Validation Loss: 0.3744853436946869
Epoch: 1	 Train Loss: 0.12010227143764496	 Validation Loss: 0.24791452288627625
Epoch: 2	 Train Loss: 0.08312205970287323	 Validation Loss: 0.21563656628131866
Epoch: 3	 Train Loss: 0.07143276184797287	 Validation Loss: 0.2002323716878891
Epoch: 4	 Train Loss: 0.07354626059532166	 Validation Loss: 0.19526444375514984
Epoch: 5	 Train Loss: 0.03902340680360794	 Validation Loss: 0.20250344276428223
Epoch: 6	 Train Loss: 0.04654436931014061	 Validation Loss: 0.20442716777324677
Epoch: 7	 Train Loss: 0.0317196324467659	 Validation Loss: 0.213932067155838
Epoch: 8	 Train Loss: 0.026396067813038826	 Validation Loss: 0.23688848316669464
Epoch: 9	 Train Loss: 0.014204113744199276	 Validation Loss: 0.22602225840091705
Epoch: 10	 Train Loss: 0.01432726439088583	 Validation Loss: 0.21499918401241302
Epoch: 11	 Train Loss: 0.015616772696375847	 Validation Loss: 0.20285773277282715
Epoch: 12	 Train Loss: 0.013266916386783

<elmo_on_md.evaluation.named_entitiy_recognition.NER at 0x240bb709ef0>

In [4]:
y_pred = ner_model.predict(test_set)
y_true = ner_model._create_labels(test_set, y_pred.shape[1], ner_loader.types).to('cpu')
desired_labels = range(len(ner_loader.types))
report = classification_report(y_true.flatten(), y_pred.flatten(), labels=desired_labels,target_names=ner_loader.types)
print(report)

2019-09-08 21:56:18,063 INFO: 11 batches, avg len: 20.9


              precision    recall  f1-score   support

        PERS       0.91      0.85      0.88       453
        MISC       0.75      0.48      0.59       196
         LOC       0.75      0.64      0.69       337
        TIME       1.00      0.33      0.50         6
       MONEY       0.93      0.98      0.96        88
        DATE       0.90      0.67      0.77       162
     PERCENT       0.91      0.93      0.92        46
         ORG       0.61      0.59      0.60       373

   micro avg       0.79      0.70      0.74      1661
   macro avg       0.85      0.69      0.74      1661
weighted avg       0.79      0.70      0.74      1661



### The new Enhanced ELMo

In [3]:
elmo = load_model('elmo_on_md')
ner_model = NER([elmo], pos_weight=pos_weight)

ner_model.train(train_set, test_set, ner_loader.types, n_epochs=50)

2019-09-08 21:40:35,206 INFO: 42 batches, avg len: 21.1
2019-09-08 21:40:38,527 INFO: Finished 1000 sentences.
2019-09-08 21:40:42,289 INFO: Finished 2000 sentences.
2019-09-08 21:40:50,107 INFO: 11 batches, avg len: 20.9


Epoch: 0	 Train Loss: 0.17130769789218903	 Validation Loss: 0.2864668667316437
Epoch: 1	 Train Loss: 0.08408407866954803	 Validation Loss: 0.18905964493751526
Epoch: 2	 Train Loss: 0.06812340766191483	 Validation Loss: 0.16057804226875305
Epoch: 3	 Train Loss: 0.03649020940065384	 Validation Loss: 0.14847993850708008
Epoch: 4	 Train Loss: 0.02776391990482807	 Validation Loss: 0.1410379409790039
Epoch: 5	 Train Loss: 0.020519785583019257	 Validation Loss: 0.141128808259964
Epoch: 6	 Train Loss: 0.025155803188681602	 Validation Loss: 0.13629546761512756
Epoch: 7	 Train Loss: 0.017894530668854713	 Validation Loss: 0.14307568967342377
Epoch: 8	 Train Loss: 0.011906404048204422	 Validation Loss: 0.1482289880514145
Epoch: 9	 Train Loss: 0.01388997957110405	 Validation Loss: 0.1459144651889801
Epoch: 10	 Train Loss: 0.009685349650681019	 Validation Loss: 0.14278726279735565
Epoch: 11	 Train Loss: 0.006459803786128759	 Validation Loss: 0.15313859283924103
Epoch: 12	 Train Loss: 0.0043182522058

<elmo_on_md.evaluation.named_entitiy_recognition.NER at 0x272b4a09eb8>

In [4]:
y_pred = ner_model.predict(test_set)
y_true = ner_model._create_labels(test_set, y_pred.shape[1], ner_loader.types).to('cpu')
desired_labels = range(len(ner_loader.types))
report = classification_report(y_true.flatten(), y_pred.flatten(), labels=desired_labels,target_names=ner_loader.types)
print(report)

2019-09-08 21:42:26,868 INFO: 11 batches, avg len: 20.9


              precision    recall  f1-score   support

        PERS       0.92      0.89      0.90       453
        MISC       0.72      0.53      0.61       196
         LOC       0.83      0.73      0.78       337
        TIME       1.00      0.33      0.50         6
       MONEY       0.96      0.98      0.97        88
        DATE       0.90      0.76      0.82       162
     PERCENT       1.00      0.83      0.90        46
         ORG       0.63      0.59      0.61       373

   micro avg       0.82      0.74      0.77      1661
   macro avg       0.87      0.70      0.76      1661
weighted avg       0.81      0.74      0.77      1661



## Both Models

In [3]:
elmo = load_model('original')
new_elmo = load_model('elmo_on_md')
ner_model = NER([elmo, new_elmo], pos_weight=pos_weight)

ner_model.train(train_set, test_set, ner_loader.types, n_epochs=50)

2019-09-08 21:56:38,787 INFO: char embedding size: 2289
2019-09-08 21:56:39,454 INFO: word embedding size: 189561
2019-09-08 21:56:44,164 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(189561, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(2289, 50, padding_idx=2286)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out_fe

Epoch: 0	 Train Loss: 0.1399768441915512	 Validation Loss: 0.2669811248779297
Epoch: 1	 Train Loss: 0.07062292098999023	 Validation Loss: 0.1994614154100418
Epoch: 2	 Train Loss: 0.05533478781580925	 Validation Loss: 0.18148314952850342
Epoch: 3	 Train Loss: 0.04072807729244232	 Validation Loss: 0.16703681647777557
Epoch: 4	 Train Loss: 0.027625294402241707	 Validation Loss: 0.1620883345603943
Epoch: 5	 Train Loss: 0.02794036827981472	 Validation Loss: 0.15895597636699677
Epoch: 6	 Train Loss: 0.025657862424850464	 Validation Loss: 0.15631170570850372
Epoch: 7	 Train Loss: 0.014933017082512379	 Validation Loss: 0.15964637696743011
Epoch: 8	 Train Loss: 0.011895806528627872	 Validation Loss: 0.1631808876991272
Epoch: 9	 Train Loss: 0.008296997286379337	 Validation Loss: 0.1652330905199051
Epoch: 10	 Train Loss: 0.006309822667390108	 Validation Loss: 0.17537061870098114
Epoch: 11	 Train Loss: 0.019310949370265007	 Validation Loss: 0.24337944388389587
Epoch: 12	 Train Loss: 0.007316636387

<elmo_on_md.evaluation.named_entitiy_recognition.NER at 0x1bca2a609e8>

In [4]:
y_pred = ner_model.predict(test_set)
y_true = ner_model._create_labels(test_set, y_pred.shape[1], ner_loader.types).to('cpu')
desired_labels = range(len(ner_loader.types))
report = classification_report(y_true.flatten(), y_pred.flatten(), labels=desired_labels,target_names=ner_loader.types)
print(report)

2019-09-08 22:00:14,910 INFO: 11 batches, avg len: 20.9
2019-09-08 22:00:18,987 INFO: 11 batches, avg len: 20.9


              precision    recall  f1-score   support

        PERS       0.93      0.91      0.92       453
        MISC       0.76      0.56      0.64       196
         LOC       0.81      0.77      0.79       337
        TIME       0.57      0.67      0.62         6
       MONEY       0.96      0.99      0.97        88
        DATE       0.90      0.77      0.83       162
     PERCENT       0.91      0.93      0.92        46
         ORG       0.61      0.66      0.64       373

   micro avg       0.81      0.78      0.79      1661
   macro avg       0.81      0.78      0.79      1661
weighted avg       0.81      0.78      0.79      1661

