# Import

In [48]:
import numpy as np
import random
from scipy.stats import spearmanr, pearsonr
from scipy.optimize import fsolve
import math

#import the custom functions
import MFunctions as mf

In [2]:
#Load verb_symilarity from which we extract the pairs
simdict = mf.verb_similarity("SimVerb-3500.txt")

# Initialise vector dictionaries and choose lexical classes

In [81]:
#Here we initialise the vector dictionaries needed for the calculations.
# We pick 13, 15 and 28 observables, also mixing weights between subject and object of 1, 0.8, 0.5 ,0.2 and 0

# Weights controlling the mixing of the object/subject matrices
vals_of_a = [[1,0],[0.8,0.2],[0.5,0.5],[0.2,0.8],[0,1]]

# Build the different sets of observables, in this case 13, 15 and 28
obs_sets = [["Linear.txt","Quadratic.txt"],["Cubic1.txt","Quartic1.txt"],["Linear.txt","Quadratic.txt","Cubic1.txt","Quartic1.txt","Additional1.txt"]]
obs_sets = [mf.observables(a) for a in obs_sets]

obs_set_names = ['text{13 obs}','text{15 obs}','text{28 obs}']

In [15]:
# Make the full list of sets. Initialise to None and then fill with vector dictionaries corresponding to the
# various different combinations

# N.B. this type of list initialisation has issues when I tried to fill in the values, when you do all_sets[0][1]
# all elements in the column 2 get replaced not just the one I want. I guess there is an internal issue with aliasing
# occuring in the way Python builds the list
#all_sets = [[None] * len(vals_of_a)]* len(obs_sets)

all_sets = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a0,a1 in enumerate(obs_sets):
    for b0,b1 in enumerate(vals_of_a):
        all_sets[a0][b0] = mf.vector_dictionary([["matrices_1160_arg_obj_context_subj.txt","matrices_1160_arg_subj_context_obj.txt"],b1],a1,[1])

# The means for each lexical class, observable-deviation with cosine distance

## Antonyms vs None vs Synonyms

In [None]:
lexical_classes = ['ANTONYMS','NONE','SYNONYMS']

In [20]:
# The means and standard deviations for observable-deviation vectors, with cosine distance

all_means = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        print('Mixing values:',vals_of_a[b],'\n')
        
        all_means[a][b] = [mf.averaged_product(simdict,c,all_sets[a][b],"std_dev") for c in lexical_classes]
        
        print(all_means[a][b],'\n')
        

Observable set: 13 obs 

Mixing values: [1, 0] 

[[0.11686779330026016, 0.4870706285854948], [0.16025470482805165, 0.5329307757565349], [0.29050619231896, 0.4997487791339401]] 

Mixing values: [0.8, 0.2] 

[[0.12149503258801209, 0.4985780975123047], [0.16635270690908777, 0.5286639441995463], [0.3061805670475563, 0.4951254885623443]] 

Mixing values: [0.5, 0.5] 

[[0.1351872399885167, 0.516498268589881], [0.17959127504584851, 0.5319063501305401], [0.3217781486682351, 0.48458313199209463]] 

Mixing values: [0.2, 0.8] 

[[0.1420036844922273, 0.510305680020097], [0.18276384341752816, 0.5395036187163176], [0.32072295580802446, 0.4910727307573661]] 

Mixing values: [0, 1] 

[[0.15018222720102836, 0.4884472407440774], [0.18040667083996495, 0.5389258677200239], [0.3140819649420818, 0.4950256383438749]] 

Observable set: 15 obs 

Mixing values: [1, 0] 

[[0.12206992679076673, 0.4830357121748659], [0.2671421815959444, 0.5598336550096815], [0.33585492378390713, 0.5390383768881372]] 

Mixing value

In [21]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(lexical_classes)
    
    for b in range(len(vals_of_a)):
        
        myout = str(vals_of_a[b][0]) + ' & '
        
        for c in range(mylen):
            
            myout = myout + str(round(all_means[a][b][c][0],3)) + ' \pm ' + str(round(all_means[a][b][c][1],3))
            
            if c < mylen - 1:
                myout = myout + ' & '
            else:
                myout = myout + ' \\'
            
        print(myout)
        
    print('\n')

1 & 0.117 \pm 0.487 & 0.16 \pm 0.533 & 0.291 \pm 0.5 \
0.8 & 0.121 \pm 0.499 & 0.166 \pm 0.529 & 0.306 \pm 0.495 \
0.5 & 0.135 \pm 0.516 & 0.18 \pm 0.532 & 0.322 \pm 0.485 \
0.2 & 0.142 \pm 0.51 & 0.183 \pm 0.54 & 0.321 \pm 0.491 \
0 & 0.15 \pm 0.488 & 0.18 \pm 0.539 & 0.314 \pm 0.495 \


1 & 0.122 \pm 0.483 & 0.267 \pm 0.56 & 0.336 \pm 0.539 \
0.8 & 0.127 \pm 0.473 & 0.265 \pm 0.539 & 0.34 \pm 0.515 \
0.5 & 0.187 \pm 0.467 & 0.333 \pm 0.535 & 0.383 \pm 0.508 \
0.2 & 0.231 \pm 0.555 & 0.414 \pm 0.564 & 0.461 \pm 0.537 \
0 & 0.296 \pm 0.561 & 0.443 \pm 0.564 & 0.478 \pm 0.536 \


1 & 0.087 \pm 0.419 & 0.177 \pm 0.498 & 0.281 \pm 0.473 \
0.8 & 0.093 \pm 0.434 & 0.181 \pm 0.498 & 0.298 \pm 0.467 \
0.5 & 0.129 \pm 0.46 & 0.199 \pm 0.502 & 0.316 \pm 0.463 \
0.2 & 0.135 \pm 0.458 & 0.205 \pm 0.512 & 0.32 \pm 0.472 \
0 & 0.151 \pm 0.443 & 0.204 \pm 0.514 & 0.315 \pm 0.476 \




## Antonyms vs None vs Synonyms Mahalanobis

In [73]:
lexical_classes = ['ANTONYMS','NONE','SYNONYMS']

In [74]:
# The means and standard deviations for observable-deviation vectors, with cosine distance

all_means = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        print('Mixing values:',vals_of_a[b],'\n')
        
        all_means[a][b] = [mf.averaged_product(simdict,c,all_sets[a][b],"maha") for c in lexical_classes]
        
        print(all_means[a][b],'\n')
        

Observable set: 13 obs 

Mixing values: [1, 0] 

[[0.053876079937608595, 0.3044157992509545], [0.09409812865040305, 0.34223133458929456], [0.16551755788424274, 0.3613969681045333]] 

Mixing values: [0.8, 0.2] 

[[0.059709051466106326, 0.2983087122605351], [0.0988979379227174, 0.3513532650138626], [0.18421318967655523, 0.3573100146918827]] 

Mixing values: [0.5, 0.5] 

[[0.07005568833984911, 0.3617018277098897], [0.10930060859964545, 0.3698658894072271], [0.1943485552797929, 0.37992408182335746]] 

Mixing values: [0.2, 0.8] 

[[0.0635812316329008, 0.3618917855670194], [0.10136338452051255, 0.38636719894404886], [0.18299435632491934, 0.3937793774301293]] 

Mixing values: [0, 1] 

[[0.06640862748147096, 0.35611339497821504], [0.0945117194292231, 0.38726667150789035], [0.17833341047733775, 0.3970761956266328]] 

Observable set: 15 obs 

Mixing values: [1, 0] 

[[0.0671363468438966, 0.44454081857727634], [0.2144443416080678, 0.5128357279626186], [0.2856271633302787, 0.4994128732266571]] 

M

In [75]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(lexical_classes)
    
    for b in range(len(vals_of_a)):
        
        myout = str(vals_of_a[b][0]) + ' & '
        
        for c in range(mylen):
            
            myout = myout + str(round(all_means[a][b][c][0],3)) + ' \pm ' + str(round(all_means[a][b][c][1],3))
            
            if c < mylen - 1:
                myout = myout + ' & '
            else:
                myout = myout + ' \\'
            
        print(myout)
        
    print('\n')

1 & 0.054 \pm 0.304 & 0.094 \pm 0.342 & 0.166 \pm 0.361 \
0.8 & 0.06 \pm 0.298 & 0.099 \pm 0.351 & 0.184 \pm 0.357 \
0.5 & 0.07 \pm 0.362 & 0.109 \pm 0.37 & 0.194 \pm 0.38 \
0.2 & 0.064 \pm 0.362 & 0.101 \pm 0.386 & 0.183 \pm 0.394 \
0 & 0.066 \pm 0.356 & 0.095 \pm 0.387 & 0.178 \pm 0.397 \


1 & 0.067 \pm 0.445 & 0.214 \pm 0.513 & 0.286 \pm 0.499 \
0.8 & 0.091 \pm 0.438 & 0.229 \pm 0.497 & 0.306 \pm 0.487 \
0.5 & 0.112 \pm 0.439 & 0.296 \pm 0.495 & 0.334 \pm 0.478 \
0.2 & 0.179 \pm 0.476 & 0.355 \pm 0.515 & 0.389 \pm 0.503 \
0 & 0.213 \pm 0.482 & 0.364 \pm 0.513 & 0.393 \pm 0.501 \


1 & -0.005 \pm 0.238 & 0.075 \pm 0.305 & 0.141 \pm 0.323 \
0.8 & 0.02 \pm 0.235 & 0.083 \pm 0.309 & 0.159 \pm 0.312 \
0.5 & 0.045 \pm 0.291 & 0.097 \pm 0.325 & 0.161 \pm 0.35 \
0.2 & 0.044 \pm 0.292 & 0.09 \pm 0.345 & 0.148 \pm 0.366 \
0 & 0.049 \pm 0.305 & 0.082 \pm 0.35 & 0.136 \pm 0.365 \




## Antonyms vs None vs Synonyms plain product

In [76]:
lexical_classes = ['ANTONYMS','NONE','SYNONYMS']

In [77]:
# The means and standard deviations for observable-deviation vectors, with cosine distance

all_means = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        print('Mixing values:',vals_of_a[b],'\n')
        
        all_means[a][b] = [mf.averaged_product(simdict,c,all_sets[a][b],"plain") for c in lexical_classes]
        
        print(all_means[a][b],'\n')
        

Observable set: 13 obs 

Mixing values: [1, 0] 

[[0.8060558220067201, 0.18861368164743378], [0.8231949202559977, 0.15961817485713023], [0.8185005351827515, 0.17508935573288908]] 

Mixing values: [0.8, 0.2] 

[[0.8139826094909628, 0.188090820966675], [0.8291557566396288, 0.1546765108298057], [0.8331221129258357, 0.1680039041864535]] 

Mixing values: [0.5, 0.5] 

[[0.8184950962749088, 0.1708270164721316], [0.8151906287814297, 0.1589408679440375], [0.8385346379856569, 0.14901898704239072]] 

Mixing values: [0.2, 0.8] 

[[0.8267758094667564, 0.14247303239111417], [0.8121235549933316, 0.15901822252280035], [0.830319358773506, 0.15357102142911763]] 

Mixing values: [0, 1] 

[[0.8424491669411976, 0.13657234705640797], [0.8126555657603333, 0.16326454745647584], [0.83396377143757, 0.15163515337158912]] 

Observable set: 15 obs 

Mixing values: [1, 0] 

[[0.666551221655536, 0.36288405692790765], [0.5519718924199511, 0.41881326367872923], [0.5537120723933104, 0.4478156796132517]] 

Mixing values

In [78]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(lexical_classes)
    
    for b in range(len(vals_of_a)):
        
        myout = str(vals_of_a[b][0]) + ' & '
        
        for c in range(mylen):
            
            myout = myout + str(round(all_means[a][b][c][0],3)) + ' \pm ' + str(round(all_means[a][b][c][1],3))
            
            if c < mylen - 1:
                myout = myout + ' & '
            else:
                myout = myout + ' \\'
            
        print(myout)
        
    print('\n')

1 & 0.806 \pm 0.189 & 0.823 \pm 0.16 & 0.819 \pm 0.175 \
0.8 & 0.814 \pm 0.188 & 0.829 \pm 0.155 & 0.833 \pm 0.168 \
0.5 & 0.818 \pm 0.171 & 0.815 \pm 0.159 & 0.839 \pm 0.149 \
0.2 & 0.827 \pm 0.142 & 0.812 \pm 0.159 & 0.83 \pm 0.154 \
0 & 0.842 \pm 0.137 & 0.813 \pm 0.163 & 0.834 \pm 0.152 \


1 & 0.667 \pm 0.363 & 0.552 \pm 0.419 & 0.554 \pm 0.448 \
0.8 & 0.546 \pm 0.424 & 0.508 \pm 0.438 & 0.493 \pm 0.464 \
0.5 & 0.486 \pm 0.444 & 0.418 \pm 0.456 & 0.49 \pm 0.461 \
0.2 & 0.563 \pm 0.429 & 0.485 \pm 0.437 & 0.504 \pm 0.437 \
0 & 0.654 \pm 0.391 & 0.555 \pm 0.407 & 0.567 \pm 0.431 \


1 & 0.641 \pm 0.366 & 0.592 \pm 0.347 & 0.594 \pm 0.353 \
0.8 & 0.556 \pm 0.361 & 0.606 \pm 0.328 & 0.603 \pm 0.33 \
0.5 & 0.615 \pm 0.343 & 0.589 \pm 0.323 & 0.631 \pm 0.318 \
0.2 & 0.576 \pm 0.377 & 0.59 \pm 0.336 & 0.595 \pm 0.344 \
0 & 0.66 \pm 0.375 & 0.586 \pm 0.345 & 0.615 \pm 0.352 \




## Hyper/Hypo vs CoHypo

In [45]:
lexical_classes = ['HYPER/HYPONYMS','COHYPONYMS']

In [46]:
# The means and standard deviations for observable-deviation vectors, with cosine distance

all_means = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        print('Mixing values:',vals_of_a[b],'\n')
        
        all_means[a][b] = [mf.averaged_product(simdict,c,all_sets[a][b],"std_dev") for c in lexical_classes]
        
        print(all_means[a][b],'\n')
        

Observable set: 13 obs 

Mixing values: [1, 0] 

[[0.12819513814742128, 0.5071554695289946], [0.18283011891159115, 0.4875770015152735]] 

Mixing values: [0.8, 0.2] 

[[0.13413771448778683, 0.5014386364259339], [0.193787209750406, 0.4717811907126341]] 

Mixing values: [0.5, 0.5] 

[[0.1368454677421456, 0.5070858000104288], [0.2192787931664428, 0.4965412505958847]] 

Mixing values: [0.2, 0.8] 

[[0.12527195503905225, 0.5089138971794549], [0.24019095634601484, 0.5048472830502597]] 

Mixing values: [0, 1] 

[[0.12538794135946613, 0.5024734689765858], [0.240215784303051, 0.5059754138407245]] 

Observable set: 15 obs 

Mixing values: [1, 0] 

[[0.23430703642978412, 0.4918431036851528], [0.31531163054484945, 0.5149285441313115]] 

Mixing values: [0.8, 0.2] 

[[0.23265881542388137, 0.4885868888170528], [0.333440748425028, 0.48336781321665245]] 

Mixing values: [0.5, 0.5] 

[[0.23930849412289, 0.5314410484113232], [0.3392611376129496, 0.5407862731041616]] 

Mixing values: [0.2, 0.8] 

[[0.29843

In [47]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(lexical_classes)
    
    for b in range(len(vals_of_a)):
        
        myout = str(vals_of_a[b][0]) + ' & '
        
        for c in range(mylen):
            
            myout = myout + str(round(all_means[a][b][c][0],3)) + ' \pm ' + str(round(all_means[a][b][c][1],3))
            
            if c < mylen - 1:
                myout = myout + ' & '
            else:
                myout = myout + ' \\'
            
        print(myout)
        
    print('\n')

1 & 0.128 \pm 0.507 & 0.183 \pm 0.488 \
0.8 & 0.134 \pm 0.501 & 0.194 \pm 0.472 \
0.5 & 0.137 \pm 0.507 & 0.219 \pm 0.497 \
0.2 & 0.125 \pm 0.509 & 0.24 \pm 0.505 \
0 & 0.125 \pm 0.502 & 0.24 \pm 0.506 \


1 & 0.234 \pm 0.492 & 0.315 \pm 0.515 \
0.8 & 0.233 \pm 0.489 & 0.333 \pm 0.483 \
0.5 & 0.239 \pm 0.531 & 0.339 \pm 0.541 \
0.2 & 0.298 \pm 0.563 & 0.416 \pm 0.588 \
0 & 0.323 \pm 0.565 & 0.447 \pm 0.562 \


1 & 0.133 \pm 0.459 & 0.204 \pm 0.453 \
0.8 & 0.141 \pm 0.458 & 0.209 \pm 0.452 \
0.5 & 0.145 \pm 0.467 & 0.231 \pm 0.471 \
0.2 & 0.139 \pm 0.473 & 0.254 \pm 0.483 \
0 & 0.139 \pm 0.469 & 0.256 \pm 0.484 \




# Testing the new functions for the balanced accuracy

In [22]:
test = mf.vector_dictionary([["matrices_1160_arg_obj_context_subj.txt","matrices_1160_arg_subj_context_obj.txt"],[1,0]],obs_sets[0],[1])

In [9]:
mf.balanced_accuracy_subsets_aux(test,simdict,'ANTONYMS','SYNONYMS','std_dev',117,70,200)

0.5449838932351587

In [7]:
mf.balanced_accuracy(test,simdict,'ANTONYMS','SYNONYMS','std_dev')

0.5602808691043986

In [23]:
# Works also with multiple classes bunched together 
mf.balanced_accuracy(test,simdict,['ANTONYMS','NONE'],'SYNONYMS','std_dev')

0.5506936289337272

In [8]:
mf.balanced_accuracy_subsets(test,simdict,'ANTONYMS','SYNONYMS','std_dev',3000,70,200)

array([0.5633226 , 0.03448822])

In [13]:
mf.balanced_accuracy3(test,simdict,'ANTONYMS','NONE','SYNONYMS','std_dev')

0.35955434932417024

In [14]:
mf.balanced_accuracy_subsets3_aux(test,simdict,'ANTONYMS','NONE','SYNONYMS','std_dev',7,70,1600,200)

0.3391093968729882

In [12]:
mf.balanced_accuracy_subsets3(test,simdict,'ANTONYMS','NONE','SYNONYMS','std_dev',3000,70,1600,200)

array([0.36462051, 0.0250972 ])

In [51]:
# Find 65% of each key
print(simdict.keys())
print([len(simdict[a]) for a in list(simdict.keys())])
print([math.floor(len(simdict[a]) * 65 /100) for a in list(simdict.keys())])

dict_keys(['SYNONYMS', 'COHYPONYMS', 'ANTONYMS', 'HYPER/HYPONYMS', 'NONE'])
[306, 190, 111, 800, 2093]
[198, 123, 72, 520, 1360]


# Computing the balanced accuracy

## Syn vs Ant

In [40]:
all_syn_vs_ant = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    #print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        #print('Mixing values:',vals_of_a[b],'\n')
        
        all_syn_vs_ant[a][b] = mf.balanced_accuracy(all_sets[a][b],simdict,'ANTONYMS','SYNONYMS','std_dev')
        
        #print(all_syn_vs_ant[a][b],'\n')

In [82]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(vals_of_a)
    
    myout = obs_set_names[a] + ' & '
    
    for b in range(len(vals_of_a)):
            
        myout = myout + str(round(all_syn_vs_ant[a][b],3))
            
        if b < mylen -1:
            myout = myout + ' & '
        else:
            myout = myout + ' \\'
            
    print(myout)
        
    #print('\n')

text{13 obs} & 0.56 & 0.557 & 0.575 & 0.53 & 0.523 \
text{15 obs} & 0.579 & 0.574 & 0.564 & 0.571 & 0.554 \
text{28 obs} & 0.555 & 0.578 & 0.587 & 0.575 & 0.564 \


In [52]:
all_syn_vs_ant_sub = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    #print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        #print('Mixing values:',vals_of_a[b],'\n')
        
        all_syn_vs_ant_sub[a][b] = mf.balanced_accuracy_subsets(all_sets[a][b],simdict,'ANTONYMS','SYNONYMS','std_dev',7,72,198)
        
        #print(all_syn_vs_ant[a][b],'\n')

In [83]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(vals_of_a)
    
    myout = obs_set_names[a] + ' & '
    
    for b in range(len(vals_of_a)):
            
        myout = myout + str(round(all_syn_vs_ant_sub[a][b][0],3)) + ' \pm ' + str(round(all_syn_vs_ant_sub[a][b][1],3))
            
        if b < mylen -1:
            myout = myout + ' & '
        else:
            myout = myout + ' \\'
            
    print(myout)
        
    #print('\n')

text{13 obs} & 0.569 \pm 0.023 & 0.564 \pm 0.041 & 0.576 \pm 0.037 & 0.548 \pm 0.039 & 0.543 \pm 0.036 \
text{15 obs} & 0.577 \pm 0.039 & 0.573 \pm 0.031 & 0.58 \pm 0.041 & 0.581 \pm 0.039 & 0.559 \pm 0.034 \
text{28 obs} & 0.561 \pm 0.033 & 0.579 \pm 0.037 & 0.597 \pm 0.032 & 0.591 \pm 0.035 & 0.577 \pm 0.045 \


In [58]:
round(all_syn_vs_ant_sub[0][0][1],3)

0.023

## Syn vs NonSyn

In [35]:
all_syn_vs_nonsyn = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    #print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        #print('Mixing values:',vals_of_a[b],'\n')
        
        all_syn_vs_nonsyn[a][b] = mf.balanced_accuracy(all_sets[a][b],simdict,['ANTONYMS','NONE'],'SYNONYMS','std_dev')
        
        #print(all_syn_vs_nonsyn[a][b],'\n')

In [84]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(vals_of_a)
    
    myout = obs_set_names[a] + ' & '
    
    for b in range(len(vals_of_a)):
            
        myout = myout + str(round(all_syn_vs_nonsyn[a][b],3))
            
        if b < mylen -1:
            myout = myout + ' & '
        else:
            myout = myout + ' \\'
            
    print(myout)
        
    #print('\n')

text{13 obs} & 0.551 & 0.568 & 0.571 & 0.555 & 0.55 \
text{15 obs} & 0.522 & 0.528 & 0.524 & 0.524 & 0.508 \
text{28 obs} & 0.54 & 0.556 & 0.554 & 0.557 & 0.554 \


In [61]:
all_syn_vs_nonsyn_sub = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    #print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        #print('Mixing values:',vals_of_a[b],'\n')
        
        all_syn_vs_nonsyn_sub[a][b] = mf.balanced_accuracy_subsets(all_sets[a][b],simdict,['ANTONYMS','NONE'],'SYNONYMS','std_dev',7,72+1360,198)
        
        #print(all_syn_vs_nonsyn[a][b],'\n')

In [85]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(vals_of_a)
    
    myout = obs_set_names[a] + ' & '
    
    for b in range(len(vals_of_a)):
            
        myout = myout + str(round(all_syn_vs_nonsyn_sub[a][b][0],3)) + ' \pm ' + str(round(all_syn_vs_nonsyn_sub[a][b][1],3))
            
        if b < mylen -1:
            myout = myout + ' & '
        else:
            myout = myout + ' \\'
            
    print(myout)
        
    #print('\n')

text{13 obs} & 0.55 \pm 0.016 & 0.57 \pm 0.015 & 0.57 \pm 0.018 & 0.558 \pm 0.018 & 0.557 \pm 0.017 \
text{15 obs} & 0.527 \pm 0.024 & 0.528 \pm 0.021 & 0.527 \pm 0.02 & 0.529 \pm 0.025 & 0.514 \pm 0.023 \
text{28 obs} & 0.54 \pm 0.019 & 0.556 \pm 0.016 & 0.556 \pm 0.019 & 0.561 \pm 0.019 & 0.56 \pm 0.018 \


## Ant vs NonAnt

In [38]:
all_ant_vs_nonant = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    #print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        #print('Mixing values:',vals_of_a[b],'\n')
        
        all_ant_vs_nonant[a][b] = mf.balanced_accuracy(all_sets[a][b],simdict,'ANTONYMS',['NONE','SYNONYMS'],'std_dev')
        
        #print(all_ant_vs_nonant[a][b],'\n')

In [86]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(vals_of_a)
    
    myout = obs_set_names[a] + ' & '
    
    for b in range(len(vals_of_a)):
            
        myout = myout + str(round(all_ant_vs_nonant[a][b],3))
            
        if b < mylen -1:
            myout = myout + ' & '
        else:
            myout = myout + ' \\'
            
    print(myout)
        
    #print('\n')

text{13 obs} & 0.5 & 0.503 & 0.512 & 0.485 & 0.473 \
text{15 obs} & 0.556 & 0.535 & 0.54 & 0.559 & 0.552 \
text{28 obs} & 0.523 & 0.531 & 0.529 & 0.52 & 0.511 \


In [64]:
all_ant_vs_nonant_sub = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    #print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        #print('Mixing values:',vals_of_a[b],'\n')
        
        all_ant_vs_nonant_sub[a][b] = mf.balanced_accuracy_subsets(all_sets[a][b],simdict,'ANTONYMS',['NONE','SYNONYMS'],'std_dev',7,72,1360+198)
        
        #print(all_ant_vs_nonant[a][b],'\n')

In [87]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(vals_of_a)
    
    myout = obs_set_names[a] + ' & '
    
    for b in range(len(vals_of_a)):
            
        myout = myout + str(round(all_ant_vs_nonant_sub[a][b][0],3)) + ' \pm ' + str(round(all_ant_vs_nonant_sub[a][b][1],3))
            
        if b < mylen -1:
            myout = myout + ' & '
        else:
            myout = myout + ' \\'
            
    print(myout)
        
    #print('\n')

text{13 obs} & 0.491 \pm 0.038 & 0.494 \pm 0.029 & 0.502 \pm 0.024 & 0.473 \pm 0.031 & 0.468 \pm 0.029 \
text{15 obs} & 0.54 \pm 0.029 & 0.525 \pm 0.035 & 0.527 \pm 0.029 & 0.543 \pm 0.034 & 0.543 \pm 0.029 \
text{28 obs} & 0.501 \pm 0.037 & 0.524 \pm 0.031 & 0.519 \pm 0.027 & 0.505 \pm 0.03 & 0.494 \pm 0.035 \


## Hyper/Hypo vs CoHypo

In [43]:
all_hyper_cohypo = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    #print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        #print('Mixing values:',vals_of_a[b],'\n')
        
        all_hyper_cohypo[a][b] = mf.balanced_accuracy(all_sets[a][b],simdict,'HYPER/HYPONYMS','COHYPONYMS','std_dev')
        
        #print(all_hyper_cohypo[a][b],'\n')

In [88]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(vals_of_a)
    
    myout = obs_set_names[a] + ' & '
    
    for b in range(len(vals_of_a)):
            
        myout = myout + str(round(all_hyper_cohypo[a][b],3))
            
        if b < mylen -1:
            myout = myout + ' & '
        else:
            myout = myout + ' \\'
            
    print(myout)
        
    #print('\n')

text{13 obs} & 0.525 & 0.529 & 0.537 & 0.556 & 0.551 \
text{15 obs} & 0.533 & 0.524 & 0.536 & 0.547 & 0.545 \
text{28 obs} & 0.531 & 0.543 & 0.528 & 0.56 & 0.566 \


In [66]:
all_hyper_cohypo_sub = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    #print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        #print('Mixing values:',vals_of_a[b],'\n')
        
        all_hyper_cohypo_sub[a][b] = mf.balanced_accuracy_subsets(all_sets[a][b],simdict,'HYPER/HYPONYMS','COHYPONYMS','std_dev',7,520,123)
        
        #print(all_hyper_cohypo[a][b],'\n')

In [89]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(vals_of_a)
    
    myout = obs_set_names[a] + ' & '
    
    for b in range(len(vals_of_a)):
            
        myout = myout + str(round(all_hyper_cohypo_sub[a][b][0],3)) + ' \pm ' + str(round(all_hyper_cohypo_sub[a][b][1],3))
            
        if b < mylen -1:
            myout = myout + ' & '
        else:
            myout = myout + ' \\'
            
    print(myout)
        
    #print('\n')

text{13 obs} & 0.517 \pm 0.023 & 0.52 \pm 0.021 & 0.535 \pm 0.024 & 0.547 \pm 0.02 & 0.545 \pm 0.025 \
text{15 obs} & 0.527 \pm 0.019 & 0.524 \pm 0.023 & 0.529 \pm 0.027 & 0.538 \pm 0.029 & 0.539 \pm 0.029 \
text{28 obs} & 0.527 \pm 0.021 & 0.536 \pm 0.023 & 0.528 \pm 0.025 & 0.56 \pm 0.024 & 0.563 \pm 0.026 \


## Syn vs None vs Ant

In [41]:
all_syn_vs_none_vs_ant = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    #print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        #print('Mixing values:',vals_of_a[b],'\n')
        
        all_syn_vs_none_vs_ant[a][b] = mf.balanced_accuracy3(all_sets[a][b],simdict,'ANTONYMS','NONE','SYNONYMS','std_dev')
        
        #print(all_syn_vs_none_vs_ant[a][b],'\n')

In [90]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(vals_of_a)
    
    myout = obs_set_names[a] + ' & '
    
    for b in range(len(vals_of_a)):
            
        myout = myout + str(round(all_syn_vs_none_vs_ant[a][b],3))
            
        if b < mylen -1:
            myout = myout + ' & '
        else:
            myout = myout + ' \\'
            
    print(myout)
        
    #print('\n')

text{13 obs} & 0.36 & 0.376 & 0.379 & 0.356 & 0.346 \
text{15 obs} & 0.382 & 0.374 & 0.366 & 0.384 & 0.37 \
text{28 obs} & 0.367 & 0.387 & 0.384 & 0.372 & 0.374 \


In [69]:
all_syn_vs_none_vs_ant_sub = [[None for _ in range(len(vals_of_a))] for _ in range(len(obs_sets))]

for a in range(len(obs_sets)):
    
    #print('Observable set:',obs_set_names[a],'\n')
    
    for b in range(len(vals_of_a)):
        
        #print('Mixing values:',vals_of_a[b],'\n')
        
        all_syn_vs_none_vs_ant_sub[a][b] = mf.balanced_accuracy_subsets3(all_sets[a][b],simdict,'ANTONYMS','NONE','SYNONYMS','std_dev',7,72,1360,198)
        
        #print(all_syn_vs_none_vs_ant[a][b],'\n')

In [91]:
# Make results ready for pasting into the paper
for a in range(len(obs_sets)):
    
    mylen = len(vals_of_a)
    
    myout = obs_set_names[a] + ' & '
    
    for b in range(len(vals_of_a)):
            
        myout = myout + str(round(all_syn_vs_none_vs_ant_sub[a][b][0],3)) + ' \pm ' + str(round(all_syn_vs_none_vs_ant_sub[a][b][1],3))
            
        if b < mylen -1:
            myout = myout + ' & '
        else:
            myout = myout + ' \\'
            
    print(myout)
        
    #print('\n')

text{13 obs} & 0.369 \pm 0.02 & 0.378 \pm 0.02 & 0.385 \pm 0.026 & 0.364 \pm 0.024 & 0.359 \pm 0.023 \
text{15 obs} & 0.384 \pm 0.027 & 0.375 \pm 0.021 & 0.378 \pm 0.027 & 0.39 \pm 0.025 & 0.374 \pm 0.021 \
text{28 obs} & 0.372 \pm 0.021 & 0.387 \pm 0.021 & 0.391 \pm 0.026 & 0.39 \pm 0.029 & 0.379 \pm 0.028 \
