In [13]:
from nltk.corpus import wordnet as wn
from collections import defaultdict
import numpy as np
import json

In [89]:
mfs_kb = defaultdict(lambda: defaultdict(int))
with open('data/jsonl/SemCor.jsonl', 'r') as f:
    for line in f:
        items = json.loads(line)
        for sense in items['sense'].split(';'):
            mfs_kb[items['lemma']][sense] += 1

In [91]:
mfs = defaultdict(str)
for lemma, kb in mfs_kb.items():
    mfs[lemma] = max(kb, key=kb.get)

In [2]:
wsd = ['ALL', 'semeval2007', 'semeval2013', 'semeval2015', 'senseval2', 'senseval3']

In [82]:
def wn_first(word, pos = None):
    if pos is not None:
        return [l.key() for l in wn.synsets(word, pos)[0].lemmas()]
    else:
        return [l.key() for l in wn.synsets(word)[0].lemmas()]

In [11]:
def evaluate(system: list, reference: list):
    
    system = list(map(lambda x: x.strip(), system))
    reference = list(map(lambda x: x.strip(), reference))
    
    intersection = set(system).intersection(set(reference))
    if len(intersection) >= 1:
        return 1.0
    else:
        return 0.0

In [71]:
pos = {
    'VERB': 'v',
    'NOUN': 'n',
    'ADJ': ['a', 's'],
    'ADV': 'r'
}

In [96]:
scores = []
pos_wise = defaultdict(list)
with open('data/jsonl/ALL.jsonl', 'r') as f:
    for line in f:
        items = json.loads(line)
        mfs = wn_first(items['lemma'], pos[items['pos']])
        truth = items['sense'].split(';')
        scores.append(evaluate(mfs, truth))
        pos_wise[items['pos']].append(evaluate(mfs, truth))

In [97]:
{k: np.mean(v) for k, v in pos_wise.items()}

{'NOUN': 0.676046511627907,
 'ADJ': 0.743455497382199,
 'VERB': 0.5030266343825666,
 'ADV': 0.8092485549132948}

In [98]:
np.mean(scores)

0.6518681924720805