In [5]:
%load_ext autoreload
%autoreload 2

import evals2

import numpy as np
import scipy
import torch
from torch import nn

SEED = 0
np.random.seed(SEED)
torch.manual_seed(SEED)

from matplotlib import pyplot as plt
from pandas import DataFrame
import seaborn as sns
sns.set(font_scale=1.5)
sns.set_style("ticks", {'font.family': 'serif'})
#plt.tight_layout()

SEP = '__'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
unigrams = set()
bigrams = {}
with open('rep2_data/reddy.txt') as f:
    next(f) # header
    for line in f:
        line = line.split()
        w1, w2 = line[:2]
        sim = float(line[6])
        w1, _ = w1.split('-')
        w2, _ = w2.split('-')
        bigrams[w1, w2] = sim
        unigrams.add(w1)
        unigrams.add(w2)

In [7]:
class Composition(nn.Module):
    def forward(self, x, y):
        return x + y
    
def ranks(arr):
    return np.array(arr).argsort().argsort()

def measure_comp(context, suffix):
    print('----')
    print("context: "'%s%s' % (context, suffix))
    vecs = {}
    with open('rep2_data/model%d%s.vec' % (context, suffix)) as f:
        for line in f:
            word, *vec = line.split()
            vec = np.asarray([float(v) for v in vec])
            if SEP in word:
                word = tuple(word.split(SEP))
            if word in bigrams or word in unigrams:
                assert word not in vecs
                vecs[word] = vec
    eprim = []
    rprim = []
    ecomp = []
    rcomp = []
    for w1, w2 in bigrams:
        b = (w1, w2)
        if not (w1 in vecs and w2 in vecs and b in vecs):
            continue
        eprim.append(w1)
        rprim.append(vecs[w1])
        eprim.append(w2)
        rprim.append(vecs[w2])
        ecomp.append(b)
        rcomp.append(vecs[b])
    errs = evals2.evaluate(rprim + rcomp, eprim + ecomp, Composition(), evals2.CosDist(), zero_init=False)
    errs = errs[-len(ecomp):]
    
    scores = [bigrams[e] for e in ecomp]
    
    r_errs = ranks(errs)
    r_scores = ranks(scores)
    
    data = DataFrame({'err': r_errs, 'score': r_scores})
    sns.lmplot(x='err', y='score', data=data)
    plt.xlabel('TRE (rank)')
    plt.ylabel('compositionality (rank)')
    #plt.ylim(0, 5)
    plt.savefig('rep_ctx%d.pdf' % context, format='pdf')
    plt.show()
    print(scipy.stats.spearmanr(errs, scores))
    
    comb = zip(scores, errs, ecomp)
    comb = sorted(comb, key=lambda x: x[1])
    print("compositional:")
    print("%20s %20s %40s" % ("human score", "model err", "words"))
    for c in comb[:5]:
        print("%20.2f %20.2f %40s" % c)
    print()
    print("non-compositional:")
    print("%20s %20s %40s" % ("human score", "model err", "words"))
    for c in comb[-5:]:
        print("%20.2f %20.2f %40s" % c)

In [None]:
for context in [7]: #[1, 3, 5, 7]:
    for suffix in ['']: # ['', '_tiny']:
        measure_comp(context, suffix)

----
context: 7
184.9040985107422
19.09400177001953
9.937151908874512
8.994105339050293
