In [1]:
%load_ext autoreload
%autoreload 2

import sys, os
sys.path.append('../')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from elmo_on_md.data_loaders.ner_loader import NERLoader
from elmo_on_md.evaluation.model_loader import load_model
from elmo_on_md.evaluation.named_entitiy_recognition import NER

In [2]:
pos_weight = 7
random_state = 7

ner_loader = NERLoader()
data = ner_loader.load_data()
train_set, test_set = train_test_split(data, test_size=0.2, random_state = random_state)

### Original ELMo

In [4]:
elmo = load_model('original')
ner_model = NER(elmo, pos_weight=pos_weight)

ner_model.train(train_set, test_set, ner_loader.types, n_epochs=50)

2019-08-31 20:54:03,618 INFO: char embedding size: 2289
2019-08-31 20:54:04,289 INFO: word embedding size: 189561
2019-08-31 20:54:09,519 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(189561, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(2289, 50, padding_idx=2286)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out_fe

Epoch: 0	 Train Loss: 0.42872563004493713	 Validation Loss: 0.4142396152019501
Epoch: 1	 Train Loss: 0.3087858259677887	 Validation Loss: 0.2704983055591583
Epoch: 2	 Train Loss: 0.24886779487133026	 Validation Loss: 0.20035143196582794
Epoch: 3	 Train Loss: 0.17772328853607178	 Validation Loss: 0.1681295782327652
Epoch: 4	 Train Loss: 0.15167826414108276	 Validation Loss: 0.1561121791601181
Epoch: 5	 Train Loss: 0.0848379135131836	 Validation Loss: 0.14658507704734802
Epoch: 6	 Train Loss: 0.10155167430639267	 Validation Loss: 0.14034660160541534
Epoch: 7	 Train Loss: 0.08674357086420059	 Validation Loss: 0.13875041902065277
Epoch: 8	 Train Loss: 0.07598503679037094	 Validation Loss: 0.1436724066734314
Epoch: 9	 Train Loss: 0.0571456141769886	 Validation Loss: 0.1422075778245926
Epoch: 10	 Train Loss: 0.048719026148319244	 Validation Loss: 0.15055817365646362
Epoch: 11	 Train Loss: 0.04883527383208275	 Validation Loss: 0.13898865878582
Epoch: 12	 Train Loss: 0.0322318896651268	 Valida

<elmo_on_md.evaluation.named_entitiy_recognition.NER at 0x20195aa3940>

In [5]:
y_pred = ner_model.predict(test_set)
y_true = ner_model._create_labels(test_set, y_pred.shape[1], ner_loader.types).to('cpu')
desired_labels = range(len(ner_loader.types))
report = classification_report(y_true.flatten(), y_pred.flatten(), labels=desired_labels,target_names=ner_loader.types)
print(report)

2019-08-31 20:56:06,317 INFO: 11 batches, avg len: 21.6


              precision    recall  f1-score   support

        PERS       0.88      0.89      0.88       406
        MISC       0.65      0.63      0.64       186
         LOC       0.75      0.72      0.74       342
        TIME       0.83      0.29      0.43        17
       MONEY       0.92      0.97      0.95       133
        DATE       0.81      0.84      0.82       113
     PERCENT       0.91      0.95      0.93        44
         ORG       0.62      0.65      0.64       412

   micro avg       0.76      0.76      0.76      1653
   macro avg       0.80      0.74      0.75      1653
weighted avg       0.76      0.76      0.76      1653



### The new Enhanced ELMo

In [3]:
elmo = load_model('pos_weight8_lr-4_new_tags_30epochs')
ner_model = NER(elmo, pos_weight=pos_weight)

ner_model.train(train_set, test_set, ner_loader.types, n_epochs=50)

2019-09-01 15:29:59,356 INFO: 42 batches, avg len: 21.0
2019-09-01 15:30:03,124 INFO: Finished 1000 sentences.
2019-09-01 15:30:06,653 INFO: Finished 2000 sentences.
2019-09-01 15:30:12,555 INFO: 11 batches, avg len: 21.6


Epoch: 0	 Train Loss: 0.4129904508590698	 Validation Loss: 0.3888172507286072
Epoch: 1	 Train Loss: 0.3129417896270752	 Validation Loss: 0.29219964146614075
Epoch: 2	 Train Loss: 0.25284793972969055	 Validation Loss: 0.22670434415340424
Epoch: 3	 Train Loss: 0.20774218440055847	 Validation Loss: 0.19525742530822754
Epoch: 4	 Train Loss: 0.1815802901983261	 Validation Loss: 0.18141847848892212
Epoch: 5	 Train Loss: 0.14964720606803894	 Validation Loss: 0.16772224009037018
Epoch: 6	 Train Loss: 0.14665023982524872	 Validation Loss: 0.1632736325263977
Epoch: 7	 Train Loss: 0.14451821148395538	 Validation Loss: 0.1627836674451828
Epoch: 8	 Train Loss: 0.07470446079969406	 Validation Loss: 0.15452244877815247
Epoch: 9	 Train Loss: 0.0878874734044075	 Validation Loss: 0.15380635857582092
Epoch: 10	 Train Loss: 0.05690300837159157	 Validation Loss: 0.16406579315662384
Epoch: 11	 Train Loss: 0.06901171803474426	 Validation Loss: 0.17002391815185547
Epoch: 12	 Train Loss: 0.044879619032144547	 

<elmo_on_md.evaluation.named_entitiy_recognition.NER at 0x1fb146e6e48>

In [4]:
y_pred = ner_model.predict(test_set)
y_true = ner_model._create_labels(test_set, y_pred.shape[1], ner_loader.types).to('cpu')
desired_labels = range(len(ner_loader.types))
report = classification_report(y_true.flatten(), y_pred.flatten(), labels=desired_labels,target_names=ner_loader.types)
print(report)

2019-09-01 15:31:52,354 INFO: 11 batches, avg len: 21.6


              precision    recall  f1-score   support

        PERS       0.86      0.89      0.87       406
        MISC       0.69      0.51      0.58       186
         LOC       0.77      0.70      0.73       342
        TIME       1.00      0.06      0.11        17
       MONEY       0.98      0.97      0.97       133
        DATE       0.83      0.81      0.82       113
     PERCENT       0.84      0.98      0.91        44
         ORG       0.60      0.56      0.58       412

   micro avg       0.77      0.72      0.74      1653
   macro avg       0.82      0.68      0.70      1653
weighted avg       0.76      0.72      0.74      1653

