In [9]:
%load_ext autoreload
%autoreload 2

import sys, os
sys.path.append('../')
from sklearn.model_selection import train_test_split

from elmo_on_md.data_loaders.ner_loader import NERLoader
from elmo_on_md.evaluation.model_loader import load_model
from elmo_on_md.evaluation.named_entitiy_recognition import NER


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
pos_weight = 7

### Original ELMo

In [11]:
elmo = load_model('original')
ner_model = NER(elmo, pos_weight=pos_weight)

ner_loader = NERLoader()
data = ner_loader.load_data()
train_set, test_set = train_test_split(data, test_size=0.2)

2019-08-06 19:02:58,130 INFO: char embedding size: 2289
2019-08-06 19:02:58,868 INFO: word embedding size: 189561
2019-08-06 19:03:02,226 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(189561, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(2289, 50, padding_idx=2286)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out_fe

In [12]:
ner_model.train(train_set, ner_loader.types, n_epochs=10)

  0%|                                                                                           | 0/40 [00:00<?, ?it/s]2019-08-06 19:03:05,533 INFO: 1 batches, avg len: 18.8
2019-08-06 19:03:06,186 INFO: 1 batches, avg len: 20.9
2019-08-06 19:03:06,771 INFO: 1 batches, avg len: 21.5
2019-08-06 19:03:07,494 INFO: 1 batches, avg len: 19.9
2019-08-06 19:03:08,138 INFO: 1 batches, avg len: 21.7
2019-08-06 19:03:09,286 INFO: 1 batches, avg len: 21.4
2019-08-06 19:03:09,976 INFO: 1 batches, avg len: 20.3
2019-08-06 19:03:10,578 INFO: 1 batches, avg len: 20.5
2019-08-06 19:03:11,285 INFO: 1 batches, avg len: 21.3
2019-08-06 19:03:11,932 INFO: 1 batches, avg len: 22.7
2019-08-06 19:03:12,683 INFO: 1 batches, avg len: 21.8
2019-08-06 19:03:13,377 INFO: 1 batches, avg len: 19.8
2019-08-06 19:03:13,983 INFO: 1 batches, avg len: 21.1
2019-08-06 19:03:14,726 INFO: 1 batches, avg len: 20.4
2019-08-06 19:03:15,307 INFO: 1 batches, avg len: 20.8
2019-08-06 19:03:15,903 INFO: 1 batches, avg len: 23.5
2

<elmo_on_md.evaluation.named_entitiy_recognition.NER at 0x1699ade6da0>

In [13]:
from sklearn.metrics import classification_report

y_pred = ner_model.predict(test_set)
y_true = ner_model._create_labels(test_set, y_pred.shape[1], ner_loader.types).to('cpu')
desired_labels = range(len(ner_loader.types))
report = classification_report(y_true, y_pred.flatten(), labels=desired_labels,target_names=ner_loader.types)
print(report)

2019-08-06 19:03:35,745 INFO: 11 batches, avg len: 20.8


              precision    recall  f1-score   support

        PERS       0.78      0.87      0.83       456
        MISC       0.36      0.58      0.45       211
         LOC       0.53      0.67      0.59       291
        TIME       0.00      0.00      0.00        11
       MONEY       0.74      0.96      0.84       106
        DATE       0.84      0.38      0.52       125
     PERCENT       1.00      0.30      0.47        33
         ORG       0.28      0.59      0.38       365

   micro avg       0.50      0.68      0.58      1598
   macro avg       0.57      0.54      0.51      1598
weighted avg       0.57      0.68      0.60      1598



### The new Enhanced ELMo

In [6]:
elmo = load_model('pos_factor_30')
ner_model = NER(elmo, pos_weight=pos_weight)

In [7]:
ner_model.train(train_set, ner_loader.types, n_epochs=10)

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]2019-08-06 08:29:15,367 INFO: 1 batches, avg len: 21.0
2019-08-06 08:29:16,023 INFO: 1 batches, avg len: 23.1
2019-08-06 08:29:16,699 INFO: 1 batches, avg len: 20.4
2019-08-06 08:29:17,412 INFO: 1 batches, avg len: 19.4
2019-08-06 08:29:18,215 INFO: 1 batches, avg len: 22.2
2019-08-06 08:29:18,844 INFO: 1 batches, avg len: 22.3
2019-08-06 08:29:19,605 INFO: 1 batches, avg len: 20.1
2019-08-06 08:29:20,269 INFO: 1 batches, avg len: 22.0
2019-08-06 08:29:21,024 INFO: 1 batches, avg len: 20.8
2019-08-06 08:29:21,664 INFO: 1 batches, avg len: 24.2
2019-08-06 08:29:22,381 INFO: 1 batches, avg len: 20.3
2019-08-06 08:29:23,017 INFO: 1 batches, avg len: 22.1
2019-08-06 08:29:23,646 INFO: 1 batches, avg len: 20.7
2019-08-06 08:29:24,342 INFO: 1 batches, avg len: 19.5
2019-08-06 08:29:25,327 INFO: 1 batches, avg len: 21.2
2019-08-06 08:29:25,964 INFO: 1 batches, avg len: 20.6
2

<elmo_on_md.evaluation.named_entitiy_recognition.NER at 0x169998a4eb8>

In [8]:
y_pred = ner_model.predict(test_set)
y_true = ner_model._create_labels(test_set, y_pred.shape[1], ner_loader.types).to('cpu')
desired_labels = range(len(ner_loader.types))
report = classification_report(y_true, y_pred.flatten(), labels=desired_labels,target_names=ner_loader.types)
print(report)

2019-08-06 08:29:44,470 INFO: 11 batches, avg len: 21.7


              precision    recall  f1-score   support

        PERS       0.65      0.92      0.76       442
        MISC       0.63      0.28      0.38       214
         LOC       0.49      0.53      0.51       385
        TIME       0.00      0.00      0.00        18
       MONEY       0.85      0.90      0.87        89
        DATE       0.89      0.12      0.22       138
     PERCENT       0.00      0.00      0.00        43
         ORG       0.28      0.50      0.36       430

   micro avg       0.49      0.56      0.52      1759
   macro avg       0.47      0.40      0.39      1759
weighted avg       0.53      0.56      0.50      1759

