### Setup

In [1]:
import pandas as pd
from src.base import Base
from src.gbrt import GBRT
from src.utils import test_local, test_global

Loading GBRT data files...
Done.


In [2]:
EMB_PATH = "C:\\Personal Files\\NED-using-KG\\embeddings\\"

embs = ['word2vec-google-news-300', 'glove-wiki-gigaword-300',
        'fasttext-wiki-news-subwords-300', 'en.wiki.bpe.vs200000.d300.w2v',
        'wiki2vec_w10_100d.pkl']

## Test Baseline Models

In [4]:
base_results = []

for emb in embs[:-1]:
    res = [emb]
    model = Base(EMB_PATH + emb)
    for case in [True, False]:
        model.cased = case
        acc, _ = test_local(model)
        res.append(acc)
    base_results.append(res)

acc, _ = test_local(Base(EMB_PATH + embs[-1], is_wiki2vec=True))
base_results.append(['wiki2vec_w10_100d', None, acc])

res = pd.DataFrame(base_results, columns=['Embedding', 'Accuracy (cased)', 'Accuracy (uncased)'])
res

100%|██████████| 231/231 [05:34<00:00,  1.45s/it]


Unnamed: 0,Embedding,Accuracy (cased),Accuracy (uncased)
0,word2vec-google-news-300,50.546,44.245
1,glove-wiki-gigaword-300,27.842,50.158
2,fasttext-wiki-news-subwords-300,44.597,42.52
3,en.wiki.bpe.vs200000.d300.w2v,23.126,41.851
4,wiki2vec_w10_100d,,65.329


## Test GBRT

In [5]:
results = []
pretrained = ['base.pkl', 'string_sim.pkl', 'context.pkl', 'coherence.pkl']
for i in pretrained:
    model = GBRT(EMB_PATH + embs[-1], model_path=i, is_wiki2vec=True)
    acc, res = test_global(model)
    results.append([i[:-4], acc, res.shape[0]])

model.two_step = True
acc, res = test_global(model)
results.append([i[:-4] + ' (two - step)', acc, res.shape[0]])
pd.DataFrame(results, columns=['Model', 'Accuracy', 'Mentions Tested'])

100%|██████████| 231/231 [22:11<00:00,  5.76s/it]
100%|██████████| 231/231 [08:51<00:00,  2.30s/it]
100%|██████████| 231/231 [07:14<00:00,  1.88s/it]
100%|██████████| 231/231 [07:20<00:00,  1.91s/it]
100%|██████████| 231/231 [07:25<00:00,  1.93s/it]


Unnamed: 0,Model,Accuracy,Mentions Tested
0,base,84.913,3029
1,string_sim,86.893,3029
2,context,89.931,3029
3,coherence,92.671,3029
4,coherence (two - step),92.638,3029


In [8]:
results = []
pretrained = [i + '_trained.pkl' for i in embs[:-1]]
for i in range(len(pretrained)):
    print("Testing", pretrained[i])
    model = GBRT(EMB_PATH + embs[i], model_path=pretrained[i], )
    acc, res = test_global(model)
    results.append([embs[i], acc])

pd.DataFrame(results, columns=['Model', 'Accuracy'])

Unnamed: 0,Model,Accuracy
0,word2vec-google-news-300,37.141
1,glove-wiki-gigaword-300,56.883
2,fasttext-wiki-news-subwords-300,38.131
3,en.wiki.bpe.vs200000.d300.w2v,33.278
