In [10]:
%load_ext autoreload
%autoreload 2

import sys, os
sys.path.append('../')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from elmo_on_md.data_loaders.ner_loader import NERLoader
from elmo_on_md.evaluation.model_loader import load_model
from elmo_on_md.evaluation.named_entitiy_recognition import NER


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
pos_weight = 7

### Original ELMo

In [6]:
elmo = load_model('original')
ner_model = NER(elmo, pos_weight=pos_weight)

ner_loader = NERLoader()
data = ner_loader.load_data()
train_set, test_set = train_test_split(data, test_size=0.2)

2019-08-10 16:14:05,218 INFO: char embedding size: 2289
2019-08-10 16:14:05,903 INFO: word embedding size: 189561
2019-08-10 16:14:08,792 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(189561, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(2289, 50, padding_idx=2286)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out_fe

In [4]:
ner_model.train(train_set, test_set, ner_loader.types, n_epochs=10)

2019-08-10 16:11:28,779 INFO: 42 batches, avg len: 21.1
2019-08-10 16:11:32,184 INFO: Finished 1000 sentences.
2019-08-10 16:11:35,852 INFO: Finished 2000 sentences.
2019-08-10 16:11:42,154 INFO: 11 batches, avg len: 21.0


Epoch: 0	 Train Loss: 0.44009220600128174	 Validation Loss: 0.4078238308429718
Epoch: 1	 Train Loss: 0.263390451669693	 Validation Loss: 0.24798768758773804
Epoch: 2	 Train Loss: 0.18455564975738525	 Validation Loss: 0.18591977655887604
Epoch: 3	 Train Loss: 0.14358460903167725	 Validation Loss: 0.16115854680538177
Epoch: 4	 Train Loss: 0.11507905274629593	 Validation Loss: 0.14806154370307922
Epoch: 5	 Train Loss: 0.09379848092794418	 Validation Loss: 0.1416408121585846
Epoch: 6	 Train Loss: 0.07475953549146652	 Validation Loss: 0.13818854093551636
Epoch: 7	 Train Loss: 0.05891880765557289	 Validation Loss: 0.13678371906280518
Epoch: 8	 Train Loss: 0.0503205843269825	 Validation Loss: 0.13715003430843353
Epoch: 9	 Train Loss: 0.046089161187410355	 Validation Loss: 0.1476268619298935


<elmo_on_md.evaluation.named_entitiy_recognition.NER at 0x1a351ec70f0>

In [5]:

y_pred = ner_model.predict(test_set)
y_true = ner_model._create_labels(test_set, y_pred.shape[1], ner_loader.types).to('cpu')
desired_labels = range(len(ner_loader.types))
report = classification_report(y_true.flatten(), y_pred.flatten(), labels=desired_labels,target_names=ner_loader.types)
print(report)

2019-08-10 16:12:03,720 INFO: 11 batches, avg len: 21.0


              precision    recall  f1-score   support

        PERS       0.88      0.90      0.89       423
        MISC       0.64      0.51      0.57       226
         LOC       0.72      0.68      0.70       324
        TIME       0.75      0.30      0.43        10
       MONEY       0.93      0.95      0.94       121
        DATE       0.81      0.70      0.75       137
     PERCENT       0.83      0.94      0.88        31
         ORG       0.50      0.75      0.60       423

   micro avg       0.69      0.75      0.72      1695
   macro avg       0.76      0.72      0.72      1695
weighted avg       0.72      0.75      0.73      1695



### The new Enhanced ELMo

In [7]:
elmo = load_model('BiLSTM_pos_weight_8')
ner_model = NER(elmo, pos_weight=pos_weight)

In [8]:
ner_model.train(train_set, test_set, ner_loader.types, n_epochs=10)

2019-08-10 16:14:18,141 INFO: 42 batches, avg len: 21.1
2019-08-10 16:14:21,586 INFO: Finished 1000 sentences.
2019-08-10 16:14:25,692 INFO: Finished 2000 sentences.
2019-08-10 16:14:31,700 INFO: 11 batches, avg len: 21.2


Epoch: 0	 Train Loss: 0.4411655366420746	 Validation Loss: 0.4643690884113312
Epoch: 1	 Train Loss: 0.30454781651496887	 Validation Loss: 0.33242329955101013
Epoch: 2	 Train Loss: 0.2343941032886505	 Validation Loss: 0.2660597860813141
Epoch: 3	 Train Loss: 0.18478761613368988	 Validation Loss: 0.22832244634628296
Epoch: 4	 Train Loss: 0.14929310977458954	 Validation Loss: 0.20719395577907562
Epoch: 5	 Train Loss: 0.12783607840538025	 Validation Loss: 0.19445723295211792
Epoch: 6	 Train Loss: 0.11144714057445526	 Validation Loss: 0.1879783719778061
Epoch: 7	 Train Loss: 0.0975450873374939	 Validation Loss: 0.18154528737068176
Epoch: 8	 Train Loss: 0.08934814482927322	 Validation Loss: 0.1820758581161499
Epoch: 9	 Train Loss: 0.0698544979095459	 Validation Loss: 0.17740774154663086


<elmo_on_md.evaluation.named_entitiy_recognition.NER at 0x1f8832c9860>

In [12]:
y_pred = ner_model.predict(test_set)
y_true = ner_model._create_labels(test_set, y_pred.shape[1], ner_loader.types).to('cpu')
desired_labels = range(len(ner_loader.types))
report = classification_report(y_true.flatten(), y_pred.flatten(), labels=desired_labels,target_names=ner_loader.types)
print(report)

2019-08-10 16:15:33,242 INFO: 11 batches, avg len: 21.2
  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

        PERS       0.59      0.93      0.72       393
        MISC       0.39      0.63      0.48       177
         LOC       0.56      0.64      0.60       295
        TIME       0.00      0.00      0.00         8
       MONEY       0.90      0.97      0.93       145
        DATE       0.63      0.69      0.66       136
     PERCENT       0.69      0.85      0.76        60
         ORG       0.35      0.72      0.47       394

   micro avg       0.51      0.77      0.61      1608
   macro avg       0.51      0.68      0.58      1608
weighted avg       0.54      0.77      0.62      1608

