In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from typing import *
import copy
from deeppavlov.dataset_readers.ontonotes_reader import OntonotesReader

2018-09-04 13:09:12.711 DEBUG in 'matplotlib.backends'['__init__'] at line 90: backend module://ipykernel.pylab.backend_inline version unknown


In [2]:
from src.fewshot_ner_viz_component.data_processing import *
from src.fewshot_ner_viz_component.utils import *
from src.fewshot_ner_viz_component.fewshot_ner_binary_classifier import *

2018-09-04 13:09:12.801 INFO in 'summarizer.preprocessing.cleaner'['textcleaner'] at line 37: 'pattern' package not found; tag filters are not available for English


In [3]:
dataset_orig = read_data()
ne_type = 'PERSON'
dataset = filter_dataset_by_ne_types(dataset_orig, ne_type)

Num of train sentences: 75187
Num of valid sentences: 9603
Num of test sentences: 9479
[(['Actions', 'had', 'to', 'be', 'taken', 'to', 'break', 'through', 'the', 'blockade', '.'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']), (['On', 'a', 'night', 'in', 'late', 'July', '1940', ',', 'the', 'atmosphere', 'in', 'Zhuanbi', 'Village', 'in', 'Shaanxi', 'was', 'unusual', '.'], ['O', 'B-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'O', 'O', 'O', 'O', 'B-GPE', 'I-GPE', 'O', 'B-GPE', 'O', 'O', 'O']), (['Villager', 'Xiao', 'Jianghe', 'has', 'a', 'vivid', 'memory', 'of', 'this', 'piece', 'of', 'history', '.'], ['O', 'B-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']), (['On', 'that', 'dark', 'night', ',', 'everyone', 'was', 'sleeping', 'when', 'human', 'voices', 'and', 'neighing', 'horses', 'were', 'heard', 'within', 'the', 'village', '.'], ['O', 'B-TIME', 'I-TIME', 'I-TIME', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

In [4]:
N_EXAMPLES = 10
N_TEST_SENTENCES = 100
N_SAMPLES = 10
MODELS = ['ne_centroid', 'weighted_kNN', 'centroid_kNN', 'svm']
# MODELS = ['svm']
MODEL_PARAMS = {'ne_centroid': {'sim_type': 'cosine'},
               'ne_nearest': {'sim_type': 'cosine'},
               'weighted_kNN': {'k': 1, 'metric': 'cosine', 'use_class_weights': False, 'use_sim_weights': True},
               'centroid_kNN': {'k': 10, 'metric': 'dot_prod', 'use_class_weights': False},
               'svm': {}}

### Make samples

In [5]:
data_samples = []
for i in range(N_SAMPLES):
    tokens_train,tags_train = get_data_sample(dataset['train'], N_EXAMPLES)
    tokens_test,tags_test = get_data_sample(dataset['test'], min(N_TEST_SENTENCES, len(dataset['test'])))
    y_test = np.array([1 if t == 'T' else 0 for seq in tags_test for t in seq])
    data_samples.append({'train': {'tokens': tokens_train, 'tags': tags_train}, 'test': {'tokens': tokens_test, 'tags': tags_test, 'y': y_test}})

### Prepare embedder params

In [6]:
embedder_params=[{'elmo_params': {'trainable_cells': False}, 'name': 'Default ELMo', 'label': 'default'},
             {'elmo_params': {'trainable_cells': False, 'restore_path': "./model_params/elmo.ckpt"}, 'name': 'Learned ELMo layer aggregation coefficients', 'label': 'elmo_coef'},
             {'elmo_params': {'trainable_cells': True, 'restore_path': "./model_params/elmo.ckpt"}, 'name': 'ELMo coefficients + cell params', 'label': 'elmo_coef_and_cells'}]

In [7]:
f1_scores = {}
sims = {}
for i, embedder_params_cur in enumerate(embedder_params):
    print('============ {} ============'.format(embedder_params_cur['name']))
    elmo_params = embedder_params_cur['elmo_params']
    label = embedder_params_cur['label']
    tf.reset_default_graph()
    embedder = CompositeEmbedder(use_elmo=True, elmo_scale=1, cap_scale=1, use_cap_feat=False, use_glove=False, elmo_params=elmo_params)
    f1_scores[label] = {}
    for model in MODELS:
        f1_scores[label][model] = {'vals': [], 'avg': 0}
    for j, data_sample in enumerate(data_samples):
        print('Sample {}/{}'.format(j+1, N_SAMPLES))
        tokens_train = data_sample['train']['tokens']
        tags_train = data_sample['train']['tags']
        tokens_test = data_sample['test']['tokens']
        tags_test = data_sample['test']['tags']
        y_test = data_sample['test']['y']
        ner_bin_clf = FewshotNerBinaryClassifier(embedder)
        ner_bin_clf.train_on_batch(tokens_train, tags_train)
        results = ner_bin_clf.predict(tokens_test, MODELS, MODEL_PARAMS)
        for model in MODELS:    
            pred = results[model]['pred']
            f1 = f1_score(y_test, pred)
            f1_scores[label][model]['vals'].append(f1)
            print('Model: {}, F1 (tokens) = {:.2f}'.format(model, f1*100))
    print('Average F1 scores:')
    for model in MODELS:
        f1_avg = np.mean(f1_scores[label][model]['vals'])
        f1_scores[label][model]['avg'] = f1_avg
        print('Model: {}, avg F1 (tokens) = {:.2f}'.format(model, f1_avg*100))

INFO:tensorflow:Using /tmp/tfhub_modules to cache modules.


2018-09-04 13:09:15.811 INFO in 'tensorflow'['tf_logging'] at line 159: Using /tmp/tfhub_modules to cache modules.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:09:16.988 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


Sample 1/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:09:18.68 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 29, # tokens: 212
n_samples: 212
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:09:19.149 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 73.60
Model: weighted_kNN, F1 (tokens) = 66.44
Model: centroid_kNN, F1 (tokens) = 85.23
Model: svm, F1 (tokens) = 83.40
Sample 2/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:09:55.170 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 21, # tokens: 326
n_samples: 326
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:09:56.521 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 78.22
Model: weighted_kNN, F1 (tokens) = 84.17
Model: centroid_kNN, F1 (tokens) = 86.61
Model: svm, F1 (tokens) = 85.22
Sample 3/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:10:46.130 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 18, # tokens: 320
n_samples: 320
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:10:47.221 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 67.74
Model: weighted_kNN, F1 (tokens) = 78.34
Model: centroid_kNN, F1 (tokens) = 77.54
Model: svm, F1 (tokens) = 82.94
Sample 4/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:11:30.606 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 17, # tokens: 176
n_samples: 176
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:11:31.661 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 77.48
Model: weighted_kNN, F1 (tokens) = 83.63
Model: centroid_kNN, F1 (tokens) = 89.95
Model: svm, F1 (tokens) = 87.00
Sample 5/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:12:03.41 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 17, # tokens: 232
n_samples: 232
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:12:04.261 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 80.43
Model: weighted_kNN, F1 (tokens) = 85.85
Model: centroid_kNN, F1 (tokens) = 85.92
Model: svm, F1 (tokens) = 89.32
Sample 6/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:12:42.949 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 21, # tokens: 259
n_samples: 259
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:12:44.218 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 81.39
Model: weighted_kNN, F1 (tokens) = 83.54
Model: centroid_kNN, F1 (tokens) = 89.85
Model: svm, F1 (tokens) = 90.26
Sample 7/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:13:26.271 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 23, # tokens: 252
n_samples: 252
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:13:27.663 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 77.85
Model: weighted_kNN, F1 (tokens) = 80.90
Model: centroid_kNN, F1 (tokens) = 85.39
Model: svm, F1 (tokens) = 84.53
Sample 8/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:14:11.607 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 21, # tokens: 206
n_samples: 206
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:14:13.234 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 78.07
Model: weighted_kNN, F1 (tokens) = 80.89
Model: centroid_kNN, F1 (tokens) = 85.43
Model: svm, F1 (tokens) = 87.56
Sample 9/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:14:45.464 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 15, # tokens: 209
n_samples: 209
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:14:46.925 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 78.26
Model: weighted_kNN, F1 (tokens) = 85.51
Model: centroid_kNN, F1 (tokens) = 88.04
Model: svm, F1 (tokens) = 89.43
Sample 10/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:15:20.210 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 23, # tokens: 277
n_samples: 277
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:15:21.816 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 67.57
Model: weighted_kNN, F1 (tokens) = 83.54
Model: centroid_kNN, F1 (tokens) = 87.53
Model: svm, F1 (tokens) = 93.91
Average F1 scores:
Model: ne_centroid, avg F1 (tokens) = 76.06
Model: weighted_kNN, avg F1 (tokens) = 81.28
Model: centroid_kNN, avg F1 (tokens) = 86.15
Model: svm, avg F1 (tokens) = 87.36
Restoring finetuned ELMo params from ./model_params/elmo.ckpt
INFO:tensorflow:Restoring parameters from ./model_params/elmo.ckpt


2018-09-04 13:16:08.943 INFO in 'tensorflow'['tf_logging'] at line 115: Restoring parameters from ./model_params/elmo.ckpt


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:16:09.479 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


Sample 1/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:16:10.341 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 29, # tokens: 212
n_samples: 212
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:16:11.449 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 80.53
Model: weighted_kNN, F1 (tokens) = 65.31
Model: centroid_kNN, F1 (tokens) = 84.98
Model: svm, F1 (tokens) = 84.26
Sample 2/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:16:47.957 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 21, # tokens: 326
n_samples: 326
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:16:49.173 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 80.50
Model: weighted_kNN, F1 (tokens) = 83.88
Model: centroid_kNN, F1 (tokens) = 87.11
Model: svm, F1 (tokens) = 84.93
Sample 3/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:17:40.719 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 18, # tokens: 320
n_samples: 320
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:17:41.887 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 72.68
Model: weighted_kNN, F1 (tokens) = 80.23
Model: centroid_kNN, F1 (tokens) = 78.05
Model: svm, F1 (tokens) = 82.80
Sample 4/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:18:28.281 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 17, # tokens: 176
n_samples: 176
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:18:29.448 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 80.64
Model: weighted_kNN, F1 (tokens) = 84.90
Model: centroid_kNN, F1 (tokens) = 89.74
Model: svm, F1 (tokens) = 85.91
Sample 5/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:19:00.147 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 17, # tokens: 232
n_samples: 232
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:19:01.551 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 82.11
Model: weighted_kNN, F1 (tokens) = 86.94
Model: centroid_kNN, F1 (tokens) = 84.33
Model: svm, F1 (tokens) = 88.73
Sample 6/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:19:41.751 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 21, # tokens: 259
n_samples: 259
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:19:43.76 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 81.93
Model: weighted_kNN, F1 (tokens) = 83.08
Model: centroid_kNN, F1 (tokens) = 92.91
Model: svm, F1 (tokens) = 88.37
Sample 7/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:20:26.765 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 23, # tokens: 252
n_samples: 252
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:20:28.123 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 78.81
Model: weighted_kNN, F1 (tokens) = 80.82
Model: centroid_kNN, F1 (tokens) = 84.48
Model: svm, F1 (tokens) = 82.77
Sample 8/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:21:13.688 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 21, # tokens: 206
n_samples: 206
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:21:15.546 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 78.28
Model: weighted_kNN, F1 (tokens) = 80.63
Model: centroid_kNN, F1 (tokens) = 86.14
Model: svm, F1 (tokens) = 85.91
Sample 9/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:21:46.717 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 15, # tokens: 209
n_samples: 209
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:21:48.557 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 81.00
Model: weighted_kNN, F1 (tokens) = 85.92
Model: centroid_kNN, F1 (tokens) = 87.74
Model: svm, F1 (tokens) = 89.52
Sample 10/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:22:24.445 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 23, # tokens: 277
n_samples: 277
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:22:26.79 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 69.31
Model: weighted_kNN, F1 (tokens) = 84.36
Model: centroid_kNN, F1 (tokens) = 88.11
Model: svm, F1 (tokens) = 92.64
Average F1 scores:
Model: ne_centroid, avg F1 (tokens) = 78.58
Model: weighted_kNN, avg F1 (tokens) = 81.61
Model: centroid_kNN, avg F1 (tokens) = 86.36
Model: svm, avg F1 (tokens) = 86.58
Restoring finetuned ELMo params from ./model_params/elmo.ckpt
INFO:tensorflow:Restoring parameters from ./model_params/elmo.ckpt


2018-09-04 13:23:12.993 INFO in 'tensorflow'['tf_logging'] at line 115: Restoring parameters from ./model_params/elmo.ckpt


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:23:13.697 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


Sample 1/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:23:14.598 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 29, # tokens: 212
n_samples: 212
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:23:15.717 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 83.47
Model: weighted_kNN, F1 (tokens) = 75.39
Model: centroid_kNN, F1 (tokens) = 88.52
Model: svm, F1 (tokens) = 81.38
Sample 2/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:23:53.992 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 21, # tokens: 326
n_samples: 326
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:23:55.168 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 85.95
Model: weighted_kNN, F1 (tokens) = 85.47
Model: centroid_kNN, F1 (tokens) = 85.12
Model: svm, F1 (tokens) = 84.25
Sample 3/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:24:46.250 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 18, # tokens: 320
n_samples: 320
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:24:47.385 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 84.70
Model: weighted_kNN, F1 (tokens) = 88.52
Model: centroid_kNN, F1 (tokens) = 86.59
Model: svm, F1 (tokens) = 86.04
Sample 4/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:25:32.967 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 17, # tokens: 176
n_samples: 176
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:25:34.149 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 87.67
Model: weighted_kNN, F1 (tokens) = 90.99
Model: centroid_kNN, F1 (tokens) = 85.90
Model: svm, F1 (tokens) = 85.84
Sample 5/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:26:05.148 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 17, # tokens: 232
n_samples: 232
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:26:06.710 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 91.04
Model: weighted_kNN, F1 (tokens) = 91.57
Model: centroid_kNN, F1 (tokens) = 78.46
Model: svm, F1 (tokens) = 90.26
Sample 6/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:26:46.781 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 21, # tokens: 259
n_samples: 259
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:26:48.110 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 89.83
Model: weighted_kNN, F1 (tokens) = 89.36
Model: centroid_kNN, F1 (tokens) = 88.09
Model: svm, F1 (tokens) = 90.52
Sample 7/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:27:31.214 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 23, # tokens: 252
n_samples: 252
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:27:32.630 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 85.59
Model: weighted_kNN, F1 (tokens) = 83.50
Model: centroid_kNN, F1 (tokens) = 83.06
Model: svm, F1 (tokens) = 83.75
Sample 8/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:28:17.227 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 21, # tokens: 206
n_samples: 206
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:28:18.806 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 88.74
Model: weighted_kNN, F1 (tokens) = 87.53
Model: centroid_kNN, F1 (tokens) = 91.99
Model: svm, F1 (tokens) = 89.93
Sample 9/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:28:51.415 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 15, # tokens: 209
n_samples: 209
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:28:52.918 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 90.12
Model: weighted_kNN, F1 (tokens) = 89.51
Model: centroid_kNN, F1 (tokens) = 86.17
Model: svm, F1 (tokens) = 89.52
Sample 10/10
Train
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:29:28.100 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


# ne: 23, # tokens: 277
n_samples: 277
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


2018-09-04 13:29:29.741 INFO in 'tensorflow'['tf_logging'] at line 115: Saver not created because there are no variables in the graph to restore


NE centroid similarity model
SVM classifier model
Weighted kNN model
k = 1, metric: cosine
NE centroid + words kNN similarity model
k = 10, metric: dot_prod
Model: ne_centroid, F1 (tokens) = 90.75
Model: weighted_kNN, F1 (tokens) = 89.07
Model: centroid_kNN, F1 (tokens) = 90.25
Model: svm, F1 (tokens) = 92.53
Average F1 scores:
Model: ne_centroid, avg F1 (tokens) = 87.79
Model: weighted_kNN, avg F1 (tokens) = 87.09
Model: centroid_kNN, avg F1 (tokens) = 86.42
Model: svm, avg F1 (tokens) = 87.40


In [8]:
print(f1_scores)

{'default': {'ne_centroid': {'vals': [0.736, 0.7822410147991543, 0.6774193548387096, 0.7747747747747749, 0.8042895442359248, 0.8138957816377171, 0.778494623655914, 0.780701754385965, 0.7826086956521738, 0.6756756756756755], 'avg': 0.7606101219656008}, 'weighted_kNN': {'vals': [0.6643835616438357, 0.8416666666666667, 0.7833827893175075, 0.836283185840708, 0.8585131894484412, 0.8354430379746836, 0.8089887640449438, 0.8088888888888888, 0.8551401869158879, 0.8353909465020577], 'avg': 0.812808121724362}, 'centroid_kNN': {'vals': [0.8523002421307506, 0.8660714285714286, 0.7753846153846154, 0.8995215311004785, 0.8591885441527446, 0.8984771573604061, 0.8539325842696629, 0.8542713567839195, 0.8803827751196172, 0.8752941176470588], 'avg': 0.8614824352520681}, 'svm': {'vals': [0.8340425531914893, 0.8522483940042828, 0.8294117647058824, 0.8699551569506727, 0.8932038834951457, 0.9026128266033254, 0.8453159041394337, 0.8755760368663594, 0.8943488943488944, 0.9391304347826086], 'avg': 0.8735845849088

In [9]:
labels = [par['label'] for par in embedder_params]
print('{:<8s}\t{:<8s}\t{:<8s}\t{:<8s}'.format('MODEL', embedder_params[0]['label'], embedder_params[1]['label'], embedder_params[2]['label']))
for model in MODELS:
    print('{:<8s}\t{:<8.2f}\t{:<8.2f}\t{:<8.2f}'.format(model, f1_scores[labels[0]][model]['avg']*100, f1_scores[labels[1]][model]['avg']*100, f1_scores[labels[2]][model]['avg']*100))

MODEL   	default 	elmo_coef	elmo_coef_and_cells
ne_centroid	76.06   	78.58   	87.79   
weighted_kNN	81.28   	81.61   	87.09   
centroid_kNN	86.15   	86.36   	86.42   
svm     	87.36   	86.58   	87.40   
