In [1]:
from pandas import DataFrame, read_csv, concat
from csv import reader
from numpy import mean, array, zeros, errstate, seterr, isfinite
from collections import defaultdict
from scipy.spatial.distance import cosine
from scipy.stats import spearmanr
from os import path
from gensim.models import KeyedVectors, Word2Vec
from pymorphy2 import MorphAnalyzer
from itertools import product

morph = MorphAnalyzer()
seterr(all='raise')

{'divide': 'warn', 'invalid': 'warn', 'over': 'warn', 'under': 'ignore'}

In [2]:
# df_en = DataFrame.from_csv('data/eye-tracking/eye_tracking_data_en_raw.csv').reset_index(drop=True)

In [3]:
# df_ru = DataFrame.from_csv('data/eye-tracking/eye_tracking_data_ru.csv')

Load raw messy corpus proposed by the Laboratory of Neurolinguistics and transform it to a decent dataset

In [4]:
# with open(path.join('data', 'data.csv'), mode='r') as infile:
#     corpus_bytes = reader(infile)
#     corpus = [i for i in corpus_bytes]

# corpus = [i[0].split('\t') if len(i) == 1 else ''.join(i).split('\t') for i in corpus]
# df = DataFrame(corpus[1:], columns=corpus[0])

Amount of unique words in a corpus

In [5]:
# len(df['word.id'].unique())

Aggregate corpus data by unqiue words

In [6]:
# df_en = df_en.astype('str').groupby('Word_Cleaned').agg({
#                             'IA_DWELL_TIME': ', '.join,
#                             'IA_FIRST_FIXATION_DURATION': ', '.join,
#                             'IA_FIRST_FIXATION_VISITED_IA_COUNT': ', '.join,
#                             'IA_FIRST_FIXATION_X': ', '.join,
#                             'IA_FIRST_FIXATION_Y': ', '.join,
#                             'IA_FIRST_FIX_PROGRESSIVE': ', '.join,
#                             'IA_FIRST_FIXATION_TIME': ', '.join,
#                             'IA_FIRST_RUN_DWELL_TIME': ', '.join,
#                             'IA_FIRST_RUN_START_TIME': ', '.join,
#                             'IA_FIRST_RUN_END_TIME': ', '.join,
#                             'IA_FIRST_FIXATION_TIME': ', '.join,
#                             'IA_FIRST_RUN_FIXATION_COUNT': ', '.join,
#                             'IA_DWELL_TIME': ', '.join,
#                             'IA_FIXATION_COUNT': ', '.join,
#                             'IA_RUN_COUNT': ', '.join,
#                             'IA_SKIP': ', '.join,
#                             'IA_REGRESSION_IN': ', '.join,
#                             'IA_REGRESSION_IN_COUNT': ', '.join,
#                             'IA_REGRESSION_OUT': ', '.join,
#                             'IA_REGRESSION_OUT_COUNT': ', '.join,
#                             'IA_REGRESSION_OUT_FULL': ', '.join,
#                             'IA_REGRESSION_OUT_FULL_COUNT': ', '.join,
#                             'IA_REGRESSION_PATH_DURATION': ', '.join,
#                             'IA_REGRESSION_OUT_FULL_COUNT': ', '.join,
#                             'IA_REGRESSION_PATH_DURATION': ', '.join,
#                             'IA_FIRST_SACCADE_AMPLITUDE': ', '.join,
#                             'IA_FIRST_SACCADE_ANGLE': ', '.join,
#                             'IA_FIRST_SACCADE_END_TIME': ', '.join,
#                             'IA_FIRST_SACCADE_START_TIME': ', '.join
#                         }).reset_index()

In [7]:
# df = df.groupby('Lemma').agg({
#                             'average.accuracy': ', '.join,
#                             'IA_DWELL_TIME': ', '.join,
#                             'IA_FIRST_FIXATION_DURATION': ', '.join,
#                             'IA_FIRST_RUN_DWELL_TIME': ', '.join,
#                             'IA_FIRST_RUN_FIXATION_COUNT': ', '.join,
#                             'IA_FIXATION_COUNT': ', '.join,
#                             'IA_LEGAL': ', '.join,
#                             'IA_REGRESSION_IN': ', '.join,
#                             'IA_REGRESSION_OUT_FULL': ', '.join,
#                             'IA_REGRESSION_PATH_DURATION': ', '.join,
#                             'IA_SECOND_RUN_DWELL_TIME': ', '.join,
#                             'ao': ', '.join,
#                             'IA_SELECTIVE_REGRESSION_PATH_DURATION': ', '.join,
#                             'IA_SKIP': ', '.join,
#                             'IA_SPILLOVER': ', '.join,
#                             'landing': ', '.join,
#                             'dir': ', '.join,
#                             'fixated.letter': ', '.join,
#                             'one_fix': ', '.join,
#                             'twoplus_fix': ', '.join,
#                             'logit.acc': ', '.join,
#                         }).reset_index()

Serialize transformed dataset

In [8]:
# df_ru.to_csv('data/eye-tracking/eye_tracking_data_ru.csv')
df_ru = DataFrame.from_csv('data/eye-tracking/eye_tracking_data_ru.csv').fillna(',')

In [9]:
# df_en[7:].reset_index(drop=True).to_csv('data/eye-tracking/eye_tracking_data_en.csv')
df_en = DataFrame.from_csv('data/eye-tracking/eye_tracking_data_en.csv').fillna(',')

In [10]:
ru_en_dict = DataFrame.from_csv('data/dictionaries/en-ru-dict.csv')

Calculate mean value of aggregated values for each word in the dataset

In [11]:
def calculate_mean(df, na):
    for column in df:
        for i in range(len(df)):
            try:
                values = list(filter(lambda a: a != na, [a.strip() for a in df[column][i].split(',')]))
                if not values:
                    df[column][i] = 0
                    continue
                df[column][i] = mean([float(a) for a in values])
            except ValueError: # if column is a column of words
                continue 
            except FloatingPointError: # if all are NA
                df[column][i] = 0
    return df

In [12]:
df_en = calculate_mean(df_en, 'nan')
df_ru = calculate_mean(df_ru, 'NA')

Loading and processing dataset of human judgements of embeddings

In [13]:
def load_sim_dataset(name, embeddings, verbose=False, rusvectores=False):
    df = read_csv(path.join('data', '{}.csv'.format(name))).dropna()
    old_len = len(df)
    for i, m in df.iterrows():
        if not rusvectores:
            if not m['word1'] in embeddings or not m['word2'] in embeddings:
                df.drop(i, inplace=True)
        else:
            if not add_pos_tag(m['word1']) in embeddings or not add_pos_tag(m['word2']) in embeddings:
                df.drop(i, inplace=True)
    if verbose:
        print('Percent of dropped = {:2.1f}%, amount of remanining words = {}'.format((old_len - len(df))/old_len*100, len(df)))
    return df.reset_index(drop=True)

In [14]:
def load_en_sim_dataset(name, embeddings, verbose=False, rusvectores=False):
    df = read_csv(path.join('data', '{}.csv'.format(name))).dropna()
    old_len = len(df)
    for i, m in df.iterrows():
        try:
            if not ru_en_dict[ru_en_dict['ru'] == m['word1']].iloc[0, 0] in embeddings or not ru_en_dict[ru_en_dict['ru'] == m['word2']].iloc[0, 0] in embeddings:
                df.drop(i, inplace=True)
        except IndexError:
            df.drop(i, inplace=True)
    if verbose:
        print('Percent of dropped = {:2.1f}%, amount of remanining words = {}'.format((old_len - len(df))/old_len*100, len(df)))
    return df.reset_index(drop=True)

In [15]:
def load_ru_sim_dataset(name, embeddings, verbose=False, rusvectores=False):
    df = read_csv(path.join('data', '{}.csv'.format(name))).dropna()
    old_len = len(df)
    for i, m in df.iterrows():
        try:
            if not add_pos_tag(ru_en_dict[ru_en_dict['en'] == m['word1']].iloc[0, 1]) in embeddings or not add_pos_tag(ru_en_dict[ru_en_dict['en'] == m['word2']].iloc[0, 1]) in embeddings:
                df.drop(i, inplace=True)
        except IndexError:
            df.drop(i, inplace=True)
    if verbose:
        print('Percent of dropped = {:2.1f}%, amount of remanining words = {}'.format((old_len - len(df))/old_len*100, len(df)))
    return df.reset_index(drop=True)

In [16]:
def make_sims_dataset(dataset, embeddings, rusvectores=False):
    sims = zeros(shape=len(dataset), dtype='float32')
    for i, m in dataset.iterrows():
        if not rusvectores:
            sims[i] = 1 - cosine(embeddings[m['word1']], embeddings[m['word2']])
        else:
            sims[i] = 1 - cosine(embeddings[add_pos_tag(m['word1'])], embeddings[add_pos_tag(m['word2'])])
    return sims

In [17]:
def make_en_sims_dataset(dataset, embeddings, rusvectores=False):
    sims = []
    for i, m in dataset.iterrows():
        
        try:
            sims.append(1 - cosine(embeddings[ru_en_dict[ru_en_dict['ru'] == m['word1']].iloc[0, 0]], embeddings[ru_en_dict[ru_en_dict['ru'] == m['word2']].iloc[0, 0]]))
        except IndexError:
            continue
    return array(sims)

In [18]:
def make_ru_sims_dataset(dataset, embeddings, rusvectores=False):
    sims = []
    for i, m in dataset.iterrows():
        try:
            sims.append(1 - cosine(embeddings[add_pos_tag(ru_en_dict[ru_en_dict['en'] == m['word1']].iloc[0, 1])], 
                        embeddings[add_pos_tag(ru_en_dict[ru_en_dict['en'] == m['word2']].iloc[0, 1])]))
        except IndexError:
            continue
    return array(sims)

In [19]:
def make_mapped_datasets_en_ru(dataset_en, dataset_ru):
    sims_en = []
    sims_ru = []
    for i, m in dataset_en.iterrows():
        try:
            sims_ru.append(dataset_ru[(dataset_ru['word1'] == morph.parse(ru_en_dict[ru_en_dict['en'] == m['word1']].iloc[0, 1])[0].normal_form) 
                & (dataset_ru['word2'] == morph.parse(ru_en_dict[ru_en_dict['en'] == m['word2']].iloc[0, 1])[0].normal_form)].iloc[0, 2])
            sims_en.append(m['similarity'])
        except IndexError:
            continue
    return array(sims_en), array(sims_ru)

In [20]:
def make_mapped_datasets_ru_en(dataset_ru, dataset_en):
    sims_ru = []
    sims_en = []
    for i, m in dataset_ru.iterrows():
        try:
            sims_en.append(dataset_ru[(dataset_ru['word1'] == ru_en_dict[ru_en_dict['ru'] == m['word1']].iloc[0, 0]) 
                & (dataset_ru['word2'] == ru_en_dict[ru_en_dict['ru'] == m['word2']].iloc[0, 0])].iloc[0, 2])
            sims_ru.append(m['sim'])
        except IndexError:
            continue
    return array(sims_en), array(sims_ru)

In [21]:
def make_word2vec_dataset(dataset, model):
    sims = zeros(shape=len(dataset), dtype='float32')
    for i, m in dataset.iterrows():
        sims[i] = 1 - cosine(model[add_pos_tag(m['word1'])], model[add_pos_tag(m['word2'])])
    return sims

In [22]:
def make_word2vec_eye_dataset(dataset, model, lang='ru'):
    pairs = list(product(dataset, repeat=2))
    sims_w2v = zeros(shape=len(pairs), dtype='float32')
    sims_eye = zeros(shape=len(pairs), dtype='float32')
    for i in enumerate(pairs):
        try:
            if lang == 'en':
                sims_w2v[i[0]] = 1 - cosine(model[i[1][0]], model[i[1][1]])
                sims_eye[i[0]] = 1 - cosine(dataset[i[1][0]], dataset[i[1][1]])
            else: # add pos tags
                sims_w2v[i[0]] = 1 - cosine(model[add_pos_tag(i[1][0])], model[add_pos_tag(i[1][1])])
                sims_eye[i[0]] = 1 - cosine(dataset[i[1][0]], dataset[i[1][1]])
        except KeyError:
            continue
    return sims_w2v, sims_eye

Load Word2Vec-model

In [23]:
rusvectores = KeyedVectors.load_word2vec_format(path.join('..', 'word2vec-models', 'ruwikiruscorpora.bin'), binary=True)

In [24]:
googlenews = KeyedVectors.load_word2vec_format(path.join('..', 'word2vec-models', 'google-news.bin'), binary=True)

In [25]:
def add_pos_tag(word):
    tag = str(morph.parse(word)[0].tag.POS)
    if tag == 'ADJF':
        tag = 'ADJ'
    elif tag == 'INFN':
        tag = 'VERB'
    if word == 'объем': 
        tag = 'NOUN'
    if word == 'струя':
        tag = 'NOUN'
    if word == 'чай':
        tag = 'NOUN'
    if word == 'два':
        word = 'двадцать'
        tag = 'NUM'
    return '{}_{}'.format(word, tag)

Make a comparison

In [26]:
eye_embeddings_en = {}
eye_embeddings_en_ru = {}

for i, k in df_en.iloc[:,0:len(df_en.columns)].iterrows():
    try:
        eye_embeddings_en_ru[ru_en_dict[ru_en_dict['en'] == k['Word_Cleaned']].iloc[0, 1]] = array(k[1:].values)
        eye_embeddings_en[k['Word_Cleaned']] = array(k[1:].values)
    except IndexError: # no such word in the en-ru dict
        continue

In [27]:
eye_embeddings_ru = {}
eye_embeddings_ru_en = {}

for i, k in df_ru.iloc[:,0:len(df_ru.columns)].iterrows():
    try:
        eye_embeddings_ru_en[ru_en_dict[ru_en_dict['ru'] == k['Lemma']].iloc[0, 0]] = array(k[1:].values)
        eye_embeddings_ru[k['Lemma']] = array(k[1:].values)
    except IndexError: # no such word in the en-ru dict
        continue

In [28]:
for name, printed_name in [
                ('human_judgements/ru/simlex999', 'RuSimLex999'),
                ('human_judgements/ru/simlex965', 'RuSimLex965'),
                ('human_judgements/ru/hj', 'HJ: Human Judgements of Word Pairs'),
                # ('human_judgements/ru/rt', 'RT: Synonyms and Hypernyms from the Thesaurus RuThes'), 
                # ('human_judgements/ru/ae2', 'AE: Cognitive Associations from the Sociation.org Experiment'),
                ]:
        print('Dataset: {}'.format(printed_name))
        
        dataset = load_sim_dataset(name, eye_embeddings_ru, True)
        eye_sims_ru = make_sims_dataset(dataset, eye_embeddings_ru)
        print('Correlation with Russian human judgements of Russian gaze vectors: {:0.2f} ({:0.2f})'.format(*spearmanr(eye_sims_ru, dataset.sim)))
        print()
        
        dataset = load_sim_dataset(name, eye_embeddings_en_ru, True)
        eye_sims_en = make_sims_dataset(dataset, eye_embeddings_en_ru)
        print('Correlation with Russian human judgements of English gaze vectors: {:0.2f} ({:0.2f})'.format(*spearmanr(eye_sims_en, dataset.sim)))
        print('=================================')
        print()

Dataset: RuSimLex999
Percent of dropped = 99.1%, amount of remanining words = 9
Correlation with Russian human judgements of Russian gaze vectors: 0.47 (0.21)

Percent of dropped = 98.7%, amount of remanining words = 13
Correlation with Russian human judgements of English gaze vectors: -0.40 (0.17)

Dataset: RuSimLex965
Percent of dropped = 99.1%, amount of remanining words = 9
Correlation with Russian human judgements of Russian gaze vectors: 0.47 (0.21)

Percent of dropped = 98.7%, amount of remanining words = 13
Correlation with Russian human judgements of English gaze vectors: -0.40 (0.17)

Dataset: HJ: Human Judgements of Word Pairs
Percent of dropped = 97.0%, amount of remanining words = 12
Correlation with Russian human judgements of Russian gaze vectors: 0.63 (0.03)

Percent of dropped = 97.2%, amount of remanining words = 11
Correlation with Russian human judgements of English gaze vectors: -0.31 (0.36)



In [29]:
for name, printed_name in [
                ('human_judgements/en/wordsim353-rel', 'wordsim353'),
                ]:
        print('{}'.format(printed_name))
        
        dataset = load_sim_dataset(name, eye_embeddings_en, True)
        eye_sims_en = make_sims_dataset(dataset, eye_embeddings_en)
        print('Correlation with English human judgements of English gaze vectors: {:0.2f} ({:0.2f})'.format(*spearmanr(eye_sims_en, dataset.similarity)))
        print()
        
        dataset = load_sim_dataset(name, eye_embeddings_ru_en, True)
        eye_sims_ru = make_sims_dataset(dataset, eye_embeddings_ru_en)
        print('Correlation with English human judgements of Russian gaze vectors: {:0.2f} ({:0.2f})'.format(*spearmanr(eye_sims_ru, dataset.similarity)))
        print('=================================')

wordsim353
Percent of dropped = 95.6%, amount of remanining words = 11
Correlation with English human judgements of English gaze vectors: 0.05 (0.89)

Percent of dropped = 97.6%, amount of remanining words = 6
Correlation with English human judgements of Russian gaze vectors: 0.52 (0.29)


In [30]:
seterr(all='ignore')

{'divide': 'raise', 'invalid': 'raise', 'over': 'raise', 'under': 'raise'}

In [31]:
print('Correlation with Russian gaze vectors of Russian Word2Vec: {:0.2f} ({:0.2f})'.format(*spearmanr(*make_word2vec_eye_dataset(eye_embeddings_ru, rusvectores))))
print('Correlation with English gaze vectors of Russian Word2Vec: {:0.2f} ({:0.2f})'.format(*spearmanr(*make_word2vec_eye_dataset(eye_embeddings_en_ru, rusvectores))))

Correlation with Russian gaze vectors of Russian Word2Vec: 0.06 (0.00)
Correlation with English gaze vectors of Russian Word2Vec: 0.09 (0.00)


In [32]:
print('Correlation with Russian gaze vectors of English Word2Vec: {:0.2f} ({:0.2f})'.format(*spearmanr(*make_word2vec_eye_dataset(eye_embeddings_ru_en, googlenews, 'en'))))
print('Correlation with English gaze vectors of English Word2Vec: {:0.2f} ({:0.2f})'.format(*spearmanr(*make_word2vec_eye_dataset(eye_embeddings_en, googlenews, 'en'))))

Correlation with Russian gaze vectors of English Word2Vec: 0.14 (0.00)
Correlation with English gaze vectors of English Word2Vec: 0.09 (0.00)


In [33]:
for en_dataset, printed_name_en in [
                ('data/human_judgements/en/wordsim353-rel.csv', 'wordsim353'),
            ]:
    for ru_dataset, printed_name_ru in [
                ('data/human_judgements/ru/simlex999.csv', 'RuSimLex999'),
                ('data/human_judgements/ru/simlex965.csv', 'RuSimLex965'),
                ('data/human_judgements/ru/hj.csv', 'HJ: Human Judgements of Word Pairs'),
                # ('human_judgements/ru/rt', 'RT: Synonyms and Hypernyms from the Thesaurus RuThes'), 
                # ('human_judgements/ru/ae2', 'AE: Cognitive Associations from the Sociation.org Experiment'),
                ]:
        print('English Dataset: {}'.format(printed_name_en))
        print('Russian Dataset: {}'.format(printed_name_ru))
        print('Correlation of English human judgements with Russian human judgements: {:0.2f} ({:0.2f})'.
              format(*spearmanr(*make_mapped_datasets_en_ru(
                  DataFrame.from_csv(en_dataset), DataFrame.from_csv(ru_dataset)))))
        print()

English Dataset: wordsim353
Russian Dataset: RuSimLex999
Correlation of English human judgements with Russian human judgements: nan (nan)

English Dataset: wordsim353
Russian Dataset: RuSimLex965
Correlation of English human judgements with Russian human judgements: nan (nan)

English Dataset: wordsim353
Russian Dataset: HJ: Human Judgements of Word Pairs
Correlation of English human judgements with Russian human judgements: 0.81 (0.00)



In [34]:
for ru_dataset, printed_name_ru in [
            ('data/human_judgements/ru/simlex999.csv', 'RuSimLex999'),
            ('data/human_judgements/ru/simlex965.csv', 'RuSimLex965'),
            ('data/human_judgements/ru/hj.csv', 'HJ: Human Judgements of Word Pairs'),
            # ('human_judgements/ru/rt', 'RT: Synonyms and Hypernyms from the Thesaurus RuThes'), 
            # ('human_judgements/ru/ae2', 'AE: Cognitive Associations from the Sociation.org Experiment'),
            ]:
        for en_dataset, printed_name_en in [
                ('data/human_judgements/en/wordsim353-rel.csv', 'wordsim353'),
            ]:
            print('Russian Dataset: {}'.format(printed_name_ru))
            print('English Dataset: {}'.format(printed_name_en))
            print('Correlation of Russian human judgements with English human judgements: {:0.2f} ({:0.2f})'.
                  format(*spearmanr(*make_mapped_datasets_ru_en(
                      DataFrame.from_csv(ru_dataset), DataFrame.from_csv(en_dataset)))))
            print()

Russian Dataset: RuSimLex999
English Dataset: wordsim353
Correlation of Russian human judgements with English human judgements: nan (nan)

Russian Dataset: RuSimLex965
English Dataset: wordsim353
Correlation of Russian human judgements with English human judgements: nan (nan)

Russian Dataset: HJ: Human Judgements of Word Pairs
English Dataset: wordsim353
Correlation of Russian human judgements with English human judgements: nan (nan)



In [35]:
for name, printed_name in [
                ('human_judgements/ru/simlex999', 'RuSimLex999'),
                ('human_judgements/ru/simlex965', 'RuSimLex965'),
                ('human_judgements/ru/hj', 'HJ: Human Judgements of Word Pairs'),
                # ('human_judgements/ru/rt', 'RT: Synonyms and Hypernyms from the Thesaurus RuThes'), 
                # ('human_judgements/ru/ae2', 'AE: Cognitive Associations from the Sociation.org Experiment'),
                ]:
        print('Dataset: {}'.format(printed_name))
        
        dataset = load_sim_dataset(name, rusvectores, True, True)
        dataset.to_csv('data/temp.csv')
        dataset = load_en_sim_dataset('temp', googlenews, True)
        
        w2v_en = make_en_sims_dataset(dataset, googlenews)
        print('Correlation with Russian human judgements of English word2vec vectors: {:0.2f} ({:0.2f})'.format(*spearmanr(w2v_en, dataset.sim)))
        print()
       
        w2v_ru = make_sims_dataset(dataset, rusvectores, rusvectores=True)
        print('Correlation with Russian human judgements of Russian word2vec vectors: {:0.2f} ({:0.2f})'.format(*spearmanr(w2v_ru, dataset.sim)))
        print()
        print('=================================')
      

Dataset: RuSimLex999
Percent of dropped = 2.6%, amount of remanining words = 973
Percent of dropped = 43.0%, amount of remanining words = 555
Correlation with Russian human judgements of English word2vec vectors: 0.24 (0.00)

Correlation with Russian human judgements of Russian word2vec vectors: 0.28 (0.00)

Dataset: RuSimLex965
Percent of dropped = 1.8%, amount of remanining words = 948
Percent of dropped = 42.8%, amount of remanining words = 542
Correlation with Russian human judgements of English word2vec vectors: 0.22 (0.00)

Correlation with Russian human judgements of Russian word2vec vectors: 0.27 (0.00)

Dataset: HJ: Human Judgements of Word Pairs
Percent of dropped = 5.8%, amount of remanining words = 375
Percent of dropped = 21.1%, amount of remanining words = 296
Correlation with Russian human judgements of English word2vec vectors: 0.58 (0.00)

Correlation with Russian human judgements of Russian word2vec vectors: 0.69 (0.00)



In [36]:
for name, printed_name in [
                ('human_judgements/en/wordsim353-rel', 'wordsim353'),
                ]:
        print('Dataset: {}'.format(printed_name))
        
        dataset = load_ru_sim_dataset(name, rusvectores, True, rusvectores=True)
        w2v_ru = make_ru_sims_dataset(dataset, rusvectores, rusvectores=True)
        print('Correlation with English human judgements of Russian word2vec vectors: {:0.2f} ({:0.2f})'.format(*spearmanr(w2v_ru, dataset.similarity)))
        print()
        
        w2v_en = make_sims_dataset(dataset, googlenews)
        print('Correlation with Enlglish human judgements of English word2vec vectors: {:0.2f} ({:0.2f})'.format(*spearmanr(w2v_en, dataset.similarity)))
        print('=================================')
        print()

Dataset: wordsim353
Percent of dropped = 57.5%, amount of remanining words = 107
Correlation with English human judgements of Russian word2vec vectors: 0.46 (0.00)

Correlation with Enlglish human judgements of English word2vec vectors: 0.65 (0.00)

