In [None]:
import glob
import itertools
import os
import numpy as np

import pandas
from gensim.models import KeyedVectors
from scipy.spatial.distance import euclidean, minkowski, cosine
from sklearn.metrics import balanced_accuracy_score
import re
import matplotlib.pyplot as plt
import seaborn

from tqdm.notebook import tqdm

sort_alphanum = lambda key: [float(text) if text.isdigit() else text for text in re.split('([-+]?[0-9]*\.?[0-9]*)', key)]

In [None]:
embeddings = {}

for f in glob.glob('./embeddings/*.kv'):
    e = KeyedVectors.load(f)
    embeddings[os.path.basename(f).split('.')[0]] = e

In [None]:
annotations = pandas.read_csv('./raw_annotations.tsv', sep='\t', index_col=None)
dimensions = ['Content', 'Form', 'Style', 'Emotion', 'Overall']
annotated_triples = annotations[['base_ID', 'left_ID', 'right_ID']].drop_duplicates()
majority = pandas.DataFrame(index=pandas.MultiIndex.from_arrays(annotated_triples.values.T, names=annotated_triples))

for t in majority.index:
    sel = annotations['base_ID'] == t[0]
    sel &= annotations['left_ID'] == t[1]
    sel &= annotations['right_ID'] == t[2]


    # goldstandard
    if (annotations[sel]['Runde'] == 'FJ+MK+SW').any():
        sel &= annotations['Runde'] == 'FJ+MK+SW'

    assert len(annotations[sel]['Runde'].unique()) == 1

    for d in dimensions:
        m = set(annotations[sel][d].mode().values)
        if len(m) == 1 and m != {'same'}:
            s = next(iter(m))
            assert s == 'left' or s == 'right'
            majority.loc[t, d] = next(iter(m))
        if len(m) == 2 and m != {'left', 'right'}:
            s = m - {'same'}
            assert s == {'left'} or s == {'right'}
            majority.loc[t, d] = next(iter(m - {'same'}))

In [None]:
distfns = {'L2': euclidean, 'L1': lambda u,v: minkowski(u,v,p=1), 'cosine': cosine}

def eval_embedding(distfn, triple_vectors, y_true, **args):
    y_pred = ['left' if distfn(t[0], t[1]) < distfn(t[0], t[2]) else 'right' for t in triple_vectors]
    return balanced_accuracy_score(y_true=y_true, y_pred=y_pred, **args)

single_run_df = pandas.DataFrame(index=pandas.MultiIndex.from_product([sorted(embeddings.keys(), key=sort_alphanum), distfns.keys()]), columns=dimensions)
with tqdm(total=len(dimensions) * len(embeddings) * len(distfns)) as pbar:
    for dim in dimensions:
        triples = majority[dim].dropna().index
        y = majority[dim][triples].values
        for ename, distname in single_run_df.index:
            vectors = []
            y_true = []
            for t, annot in zip(triples, y):
                if not set(t) <= set(embeddings[ename].index_to_key):
                    continue
                vectors.append(embeddings[ename][t])
                y_true.append(annot)
            vectors = np.array(vectors)
            bas = eval_embedding(distfn=distfns[distname], triple_vectors=vectors, y_true=y_true)
            single_run_df.loc[(ename, distname), dim] = bas
            pbar.update(1)


In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(16, len(embeddings)*0.6), sharey=True)
cbar_ax = fig.add_axes([0.94, .18, .02, .65])
plt.suptitle('Balanced Accuracy Score for each model and dimension', fontsize='x-large')

for i, (ax, distname) in enumerate(zip(axs, ['L1', 'cosine', 'L2'])):
    x = single_run_df.loc[(slice(None), distname),:].droplevel(level=1)
    seaborn.heatmap(x.astype(float), annot=True, ax=ax, vmin=0.5, vmax=0.75, cbar_ax=cbar_ax)
    ax.set_title(f'Evaluated on {distname} norm')