In [1]:
%load_ext autoreload
%autoreload 2

import sys, os
sys.path.append('../')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from elmo_on_md.data_loaders.ner_loader import NERLoader
from elmo_on_md.evaluation.model_loader import load_model
from elmo_on_md.evaluation.named_entitiy_recognition import NER

In [2]:
pos_weight = 7
random_state = 7

ner_loader = NERLoader()
data = ner_loader.load_data()
train_set, test_set = train_test_split(data, test_size=0.2, random_state = random_state)

### Original ELMo

In [3]:
elmo = load_model('original')
ner_model.train(train_set, test_set, ner_loader.types, n_epochs=50)

2019-08-17 21:47:06,067 INFO: char embedding size: 2289
2019-08-17 21:47:06,759 INFO: word embedding size: 189561
2019-08-17 21:47:11,469 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(189561, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(2289, 50, padding_idx=2286)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out_fe

Epoch: 0	 Train Loss: 0.34759122133255005	 Validation Loss: 0.30664557218551636
Epoch: 1	 Train Loss: 0.16652123630046844	 Validation Loss: 0.19582664966583252
Epoch: 2	 Train Loss: 0.13033396005630493	 Validation Loss: 0.16199418902397156
Epoch: 3	 Train Loss: 0.11020185053348541	 Validation Loss: 0.14724349975585938
Epoch: 4	 Train Loss: 0.08663025498390198	 Validation Loss: 0.13859716057777405
Epoch: 5	 Train Loss: 0.06982453912496567	 Validation Loss: 0.13293901085853577
Epoch: 6	 Train Loss: 0.06554021686315536	 Validation Loss: 0.13033433258533478
Epoch: 7	 Train Loss: 0.055042609572410583	 Validation Loss: 0.12927770614624023
Epoch: 8	 Train Loss: 0.050155159085989	 Validation Loss: 0.13703952729701996
Epoch: 9	 Train Loss: 0.049948617815971375	 Validation Loss: 0.12496048212051392
Epoch: 10	 Train Loss: 0.045917145907878876	 Validation Loss: 0.128797248005867
Epoch: 11	 Train Loss: 0.033340729773044586	 Validation Loss: 0.12669862806797028
Epoch: 12	 Train Loss: 0.0237313602119

<elmo_on_md.evaluation.named_entitiy_recognition.NER at 0x2640ed18d68>

In [4]:
y_pred = ner_model.predict(test_set)
y_true = ner_model._create_labels(test_set, y_pred.shape[1], ner_loader.types).to('cpu')
desired_labels = range(len(ner_loader.types))
report = classification_report(y_true.flatten(), y_pred.flatten(), labels=desired_labels,target_names=ner_loader.types)
print(report)

2019-08-17 21:49:07,484 INFO: 11 batches, avg len: 21.6


              precision    recall  f1-score   support

        PERS       0.92      0.86      0.89       406
        MISC       0.78      0.54      0.63       186
         LOC       0.74      0.75      0.74       342
        TIME       0.83      0.29      0.43        17
       MONEY       0.96      0.97      0.97       133
        DATE       0.84      0.78      0.81       113
     PERCENT       0.91      0.95      0.93        44
         ORG       0.62      0.64      0.63       412

   micro avg       0.79      0.75      0.77      1653
   macro avg       0.83      0.72      0.76      1653
weighted avg       0.79      0.75      0.76      1653



### The new Enhanced ELMo

In [3]:
elmo = load_model('pos_weight_8_lr_1e-5')
ner_model = NER(elmo, pos_weight=pos_weight)

ner_model.train(train_set, test_set, ner_loader.types, n_epochs=50)

2019-08-17 21:54:06,723 INFO: 42 batches, avg len: 21.0
2019-08-17 21:54:10,976 INFO: Finished 1000 sentences.
2019-08-17 21:54:13,928 INFO: Finished 2000 sentences.
2019-08-17 21:54:19,687 INFO: 11 batches, avg len: 21.6


Epoch: 0	 Train Loss: 0.4133738875389099	 Validation Loss: 0.3831273317337036
Epoch: 1	 Train Loss: 0.26489901542663574	 Validation Loss: 0.2659037113189697
Epoch: 2	 Train Loss: 0.2097492218017578	 Validation Loss: 0.21513010561466217
Epoch: 3	 Train Loss: 0.13066917657852173	 Validation Loss: 0.19195672869682312
Epoch: 4	 Train Loss: 0.11398670077323914	 Validation Loss: 0.17549945414066315
Epoch: 5	 Train Loss: 0.11411567032337189	 Validation Loss: 0.1702551245689392
Epoch: 6	 Train Loss: 0.08149869740009308	 Validation Loss: 0.16620494425296783
Epoch: 7	 Train Loss: 0.08925852924585342	 Validation Loss: 0.15475761890411377
Epoch: 8	 Train Loss: 0.07055612653493881	 Validation Loss: 0.15756797790527344
Epoch: 9	 Train Loss: 0.0619855560362339	 Validation Loss: 0.15685847401618958
Epoch: 10	 Train Loss: 0.0579092800617218	 Validation Loss: 0.15990108251571655
Epoch: 11	 Train Loss: 0.039520014077425	 Validation Loss: 0.1642831563949585
Epoch: 12	 Train Loss: 0.04038253426551819	 Vali

<elmo_on_md.evaluation.named_entitiy_recognition.NER at 0x2024ac4a860>

In [4]:
y_pred = ner_model.predict(test_set)
y_true = ner_model._create_labels(test_set, y_pred.shape[1], ner_loader.types).to('cpu')
desired_labels = range(len(ner_loader.types))
report = classification_report(y_true.flatten(), y_pred.flatten(), labels=desired_labels,target_names=ner_loader.types)
print(report)

2019-08-17 21:55:55,531 INFO: 11 batches, avg len: 21.6


              precision    recall  f1-score   support

        PERS       0.85      0.87      0.86       406
        MISC       0.60      0.51      0.55       186
         LOC       0.69      0.75      0.72       342
        TIME       1.00      0.12      0.21        17
       MONEY       0.98      0.95      0.97       133
        DATE       0.85      0.68      0.75       113
     PERCENT       0.91      0.95      0.93        44
         ORG       0.63      0.51      0.57       412

   micro avg       0.75      0.70      0.73      1653
   macro avg       0.81      0.67      0.69      1653
weighted avg       0.75      0.70      0.72      1653

