# 1. Load results

In [1]:
import os
import sys
import csv
import codecs
import math
import random
import itertools
import pandas as pd

In [2]:
file_dict = {
    "n2v_additive" : "..\Model_Results\\Additive_Refined.csv",
    "n2v_standard" : "..\Model_Results\\N2V_consistency_refined.csv",
    "n2v_consistency" : "..\Model_Results\\N2V_standard_refined.csv",
    "w2v" : "..\Model_Results\\w2v_all_results_refined.csv",
    "hyper_all" : "..\Model_Results\\Hyperword_results_Quine_wiki_processed_Refined.csv",
    "hyper_quine" : "..\Model_Results\\Hyperword_results_quine_processed_Refined.csv",
}

In [3]:
def create_data_list(file, dict_name):
    with open(file, 'r', encoding="utf8") as csvfile:
        reader = csv.reader(csvfile)
        for n, term, nn1, nn2, nn3, nn5, nn10, nn50, outlier in reader:
            if term == "term":
                continue
            dict_name[term] = {"nn1" : nn1,
                              "nn2" : nn2,
                               "nn3": nn3,
                               "nn5" : nn5,
                               "nn10" : nn10,
                               "nn50" : nn50,
                               "outlier" : outlier
                              }

In [4]:
additive, n2v, consistency, w2v, hyper_all, hyper_quine = {}, {}, {}, {}, {}, {}

model_dicts = [("n2v_additive", additive), ("n2v_standard", n2v), ("n2v_consistency", consistency), 
               ("w2v", w2v), ("hyper_all", hyper_all), ("hyper_quine", hyper_quine)]
for name, dict in model_dicts:
    create_data_list(file_dict[name], dict)

In [5]:
target_file = '..\\term_with_freq.csv'
term_freq_dict = {}
with open(target_file, 'r', encoding="utf8") as csvfile:
    reader = csv.reader(csvfile)
    for n, (_, term, freq) in enumerate(reader):
        if n > 0:
            term_freq_dict[term] = freq

In [6]:
hyper_all_refined = {}
for target, values in hyper_all.items():
    target_refined = target.replace("__xx", "")
    hyper_all_refined[target_refined] = values
hyper_quine_refined = {}
for target, values in hyper_quine.items():
    target_refined = target.replace("__xx", "")
    hyper_quine_refined[target_refined] = values

In [7]:
#model_list = [additive, n2v, consistency, w2v]
#target_list = ['physicalism', 'words', 'quantifiers', 'variables', 'maxim']
model_dicts = [("n2v_additive", additive), ("n2v_standard", n2v), ("n2v_consistency", consistency), 
               ("w2v", w2v), ("hyper_all", hyper_all_refined), ("hyper_quine", hyper_quine_refined)]

# 2. Task 1 Stimuli

In [8]:
def get_stimuli(term, rank):
    key = "nn" + str(rank)
    result = {}
    for name, model in model_dicts:
        result[name] = model[term][key]
    return result

In [9]:
#example stimuli for target term 'ideas' on rank 2
get_stimuli("ideas", 2)

{'n2v_additive': "('parts', 0.9998624324798584)",
 'n2v_standard': "('parts', 0.9998587965965271)",
 'n2v_consistency': "('relative_terms', 0.7685747146606445)",
 'w2v': "('input', 0.6655906438827515)",
 'hyper_all': 'predicates(0.644643233777078)',
 'hyper_quine': 'impressions(0.55739400834)'}

In [10]:
#get stimuli for all terms on all ranks, order of stimuli was randomized in the experiment
#only stimuli in the relatedness terms list were used
for term, freq in term_freq_dict.items():
    for rank in [2,5,50]:
        print("-----------")
        print("term: " + term + "    rank : " + str(rank) + "      freq : " + str(freq))
        print(get_stimuli(term, rank))

-----------
term: objects    rank : 2      freq : 1985
{'n2v_additive': "('time', 0.9999998807907104)", 'n2v_standard': "('time', 0.9999998807907104)", 'n2v_consistency': "('observation_sentences', 0.7791314125061035)", 'w2v': "('generalization', 0.6695494055747986)", 'hyper_all': 'numbers__xx(0.5614315542633023)', 'hyper_quine': 'physical(0.642044816049)'}
-----------
term: objects    rank : 5      freq : 1985
{'n2v_additive': "[('meaninglessness', 0.9999992847442627)]", 'n2v_standard': "[('intensional_abstraction', 0.9999990463256836)]", 'n2v_consistency': "[('variables', 0.5780197978019714)]", 'w2v': "('relented', 0.6466934680938721)", 'hyper_all': 'predicates(0.5153655475019328)', 'hyper_quine': 'mobile(0.492253318375)'}
-----------
term: objects    rank : 50      freq : 1985
{'n2v_additive': "[('learning', 0.9949288964271545)]", 'n2v_standard': "[('specious_present', 0.993363618850708)]", 'n2v_consistency': "[('urelements', 0.48002809286117554)]", 'w2v': "('given', 0.5951076149940

In [11]:
#all terget words
counter = 1
wordlist = []
for term, freq in term_freq_dict.items():
        print(counter, term, freq )
        counter += 1
        wordlist.append(term)

1 objects 1985
2 quantification 1671
3 about 1408
4 meaning 1366
5 object 1287
6 time 1017
7 translation 676
8 attributes 545
9 ideas 444
10 paradox 438
11 observation_sentences 422
12 construction 377
13 application 357
14 description 350
15 information 275
16 parts 272
17 truth_functions 267
18 reduction 217
19 reality 207
20 ordered_pair 191
21 ambiguity 188
22 conditionals 141
23 propositional_attitudes 138
24 nominalism 123
25 prediction 113
26 dispositions 112
27 pronouns 111
28 mentalistic 98
29 conditioning 91
30 utterances 84
31 adjectives 71
32 memory 55
33 confirmation 49
34 posit 47
35 subtraction 42
36 meaninglessness 40
37 nouns 38
38 intension 37
39 logical_particles 33
40 relative_terms 29
41 mental_states 23
42 transparency 18
43 truth_vehicles 14
44 intensional_abstraction 9
45 sense_datum 7


In [12]:
relatedness_terms = ['objects','quantification','about', 'meaning','object', 'time', 'translation', 'attributes', 'ideas', 
                     'paradox','observation_sentences', 'construction', 'application', 'description', 'information',
                     'utterances', 'adjectives', 'memory', 'confirmation','posit','subtraction','meaninglessness','nouns',
                    'intension','logical_particles','relative_terms','mental_states','transparency','truth_vehicles',
                     'intensional_abstraction','sense_datum']

new_words = ['construction', 'application', 'description', 'information',
                     'utterances', 'adjectives', 'memory', 'confirmation','posit',]
#terms used in relatedness task
print("terms synonym detection task:")
print()
print("high frequency:")
counter = 1
for word in relatedness_terms:
    if counter == 16:
        print()
        print("low frequency:")
    print(str(counter)+",", word+",", term_freq_dict[word])
    counter += 1

terms synonym detection task:

high frequency:
1, objects, 1985
2, quantification, 1671
3, about, 1408
4, meaning, 1366
5, object, 1287
6, time, 1017
7, translation, 676
8, attributes, 545
9, ideas, 444
10, paradox, 438
11, observation_sentences, 422
12, construction, 377
13, application, 357
14, description, 350
15, information, 275

low frequency:
16, utterances, 84
17, adjectives, 71
18, memory, 55
19, confirmation, 49
20, posit, 47
21, subtraction, 42
22, meaninglessness, 40
23, nouns, 38
24, intension, 37
25, logical_particles, 33
26, relative_terms, 29
27, mental_states, 23
28, transparency, 18
29, truth_vehicles, 14
30, intensional_abstraction, 9
31, sense_datum, 7


# 3. Outlier task stimuli

In [13]:
def get_outlier_stimuli(model, term):
    print("nn1: " + model[term]["nn2"])
    print("nn2: " + model[term]["nn3"])
    print("outlier: " + model[term]["outlier"])

In [14]:
from operator import itemgetter
outlier_terms = 0
#for the outlier detection task we use all words not in synonym detection task + 3 high- and 3 low frequency words
outlier_terms = [x for x in wordlist if x not in relatedness_terms]
outlier_terms += wordlist[0:3]
outlier_terms += wordlist[-3:]
outliers = [(x, int(term_freq_dict[x])) for x in outlier_terms]
outliers_sorted = sorted(outliers, key=itemgetter(1))
print("terms outlier task")
print("low freq:")
for n, (term, freq) in enumerate(outliers_sorted):
    if n < 10:
        print(str(n)+",", term + ",", freq)
    elif n==11:
        print()
        print("high freq:")
        print(str(n)+",", term + ",", freq)
    else:
        print(str(n)+",", term + ",", freq)

terms outlier task
low freq:
0, sense_datum, 7
1, intensional_abstraction, 9
2, truth_vehicles, 14
3, conditioning, 91
4, mentalistic, 98
5, pronouns, 111
6, dispositions, 112
7, prediction, 113
8, nominalism, 123
9, propositional_attitudes, 138
10, conditionals, 141

high freq:
11, ambiguity, 188
12, ordered_pair, 191
13, reality, 207
14, reduction, 217
15, truth_functions, 267
16, parts, 272
17, about, 1408
18, quantification, 1671
19, objects, 1985


In [15]:
outlier_models = {"w2v": w2v, "hyper_quine": hyper_quine_refined, "n2v_consistency": consistency}
outlier_model_names = [key for key in outlier_models.keys()]

In [16]:
models = [model_name for model_name, model in outlier_models.items()]
import itertools
c = list(itertools.product(outlier_terms, models))
random.shuffle(c)

In [17]:
#stimuli for the outlier detection task
for n, (term, model) in enumerate(c):
    freq = term_freq_dict[term]
    print("-----------")
    print(n+ 1)
    print("term: " + term + "    model : " + str(model) + "      freq : " + str(freq))
    get_outlier_stimuli(outlier_models[model], term)

-----------
1
term: conditioning    model : hyper_quine      freq : 91
nn1: intake(0.505824625333)
nn2: direct(0.491588300285)
outlier: (0.18425645339784349, 'inculcated')
-----------
2
term: reduction    model : n2v_consistency      freq : 217
nn1: ('ordered_pair', 0.6487832069396973)
nn2: ('memory', 0.5213330984115601)
outlier: ('truths', 0.23513129353523254)
-----------
3
term: pronouns    model : n2v_consistency      freq : 111
nn1: ('about', 0.6425174474716187)
nn2: ('divided_reference', 0.5624713897705078)
outlier: ('desirable', 0.27970218658447266)
-----------
4
term: quantification    model : hyper_quine      freq : 1671
nn1: existential(0.602559559971)
nn2: universal(0.586911254645)
outlier: (0.18535683353091537, 'logic')
-----------
5
term: objects    model : hyper_quine      freq : 1985
nn1: physical(0.642044816049)
nn2: object(0.549032999713)
outlier: (0.21639245648127881, 'them')
-----------
6
term: ambiguity    model : w2v      freq : 188
nn1: ('instantiation', 0.71488922