**NOTE** Evaluate all systems against the crowd and compare with entropy.

In [1]:
import pickle
import json
from collections import defaultdict

In [2]:
crowd_data_loc="crowd_data"
profiler_data_loc="profiler_data"
givens_file="given.pkl"
predicted_file="predicted.pkl"

In [3]:
with open('mappings.tsv', 'r') as f:
    mapping_file=f.readlines()
mappings={}
for line in mapping_file:
    uri, old_label, new_label = line.split('\t')
    mappings[uri]=new_label.strip()

In [4]:
domains={'century':['20', '19', '18', '17', '21'], 
         'lifedur': ['71-80', '81-90', '61-70', '51-60', '91-100', '41-50', 
                      '31-40', '21-30', '101-110', '11-20'],
         'gender': ['Male', 'Female'],
         'politicalparty': ['Democratic Party', 'Republican Party'],
         'occupation': ['politician', 'actor', 'lawyer', 'baseball player', 
                        'American football player', 'singer', 'writer', 
                        'basketball player', 'judge'],
         'birthplace': ['New York City (NY)', 'Chicago (IL)', 'Los Angeles (CA)', 
                        'Philadelphia (PA)', 'Boston (MA)', 'Washington D.C.', 
                        'San Francisco (CA)', 'Detroit (MI)'],
         'deathplace': ['New York City (NY)', 'Chicago (IL)', 'Los Angeles (CA)', 
                        'Philadelphia (PA)', 'Boston (MA)', 'Washington D.C.', 
                        'San Francisco (CA)', 'Santa Monica (CA)'],
         'educatedat': ['Harvard University', 'Columbia University', 'Yale University', 
                        'University of Michigan', 'Stanford University', 
                        'Princeton University', 'University of Wisconsin–Madison', 
                        'University of California, Berkeley', 'Cornell University'],
         'worklocation': ['Washington D.C.', 'New York City (NY)', 
                          'Harrisburg (PA)', 'Sacramento (CA)', 'Austin (TX)', 
                          'Springfield (IL)', 'Tallahassee (FL)', 'Baton Rouge (LA)', 
                          'Montpelier (VT)', 'Phoenix (AZ)'],
         'religion': ['Christianity', 'atheism', 'Judaism', 'Islam']
        }

In [5]:
properties_mapping={'member of political party': 'politicalparty',
                    'educated at': 'educatedat',
                    'occupation': 'occupation',
                    'work location': 'worklocation',
                    'place of birth': 'birthplace',
                    'place of death': 'deathplace',
                    'lifespan': 'lifedur',
                    'sex or gender': 'gender',
                    'religion': 'religion',
                    'century': 'century'
                   }

### 1. Prepare profiler data

#### 1.1. Map URIs to labels

In [6]:
with open('%s/%s' % (profiler_data_loc, givens_file), 'rb') as f:
    old_prof_givens=pickle.load(f)
with open('%s/%s' % (profiler_data_loc, predicted_file), 'rb') as f:
    old_prof_preds=pickle.load(f,encoding='latin1')

In [7]:
def map_givens(old_prof_givens):    
    new_prof_givens=[]
    for row in old_prof_givens:
        new_row={}
        for k,v in row.items():
            new_k=properties_mapping[k]
            new_v = mappings[v]
            new_row[new_k]=new_v
        new_prof_givens.append(new_row)
    return new_prof_givens

In [20]:
def accumulate_and_transpose(new_prof_givens, old_prof_preds):
            
    transposed_prof_predictions=[]
    for x in range(len(new_prof_givens)):
        given_props=new_prof_givens[x].keys()
        needed_props=domains.keys()-given_props
        pred_row={}
        for prop in needed_props:
            pred_row[prop]=new_prof_preds[prop][x]
            pred_row[prop]['None of the above']=max(0,1-sum(pred_row[prop].values()))
        transposed_prof_predictions.append(pred_row)

    return transposed_prof_predictions

In [21]:
new_prof_givens=map_givens(old_prof_givens)

new_prof_preds=defaultdict(list)
for prop, data in old_prof_preds.items():
    prop=properties_mapping[prop]
    for dist in data:
        new_dist={}
        for k,v in dist:
            if k in mappings.keys() and mappings[k] in domains[prop]:
                new_k=mappings[k]
            else:
                new_k='None of the above'
            if new_k not in new_dist.keys():
                new_dist[new_k]=v
            else:
                new_dist[new_k]+=v
        new_prof_preds[prop].append(new_dist)

transposed_prof_predictions=accumulate_and_transpose(new_prof_givens, new_prof_preds)

### 2. Prepare crowd data

In [22]:
with open('%s/%s' % (crowd_data_loc, givens_file), 'rb') as f:
    old_crowd_givens=pickle.load(f)
    new_crowd_givens=old_crowd_givens
with open('%s/%s' % (crowd_data_loc, predicted_file), 'rb') as f:
    old_crowd_preds=pickle.load(f)

In [23]:
undecided='I can not decide'
def transform_undecided(prop, dist):
    if undecided in dist.keys():
        new_dist={}
        undecided_part=dist[undecided]
        split_among=len(domains[prop]) + 1
        add_to_each=undecided_part/split_among
        for value in ['None of the above'] + domains[prop]:
            if value in dist.keys():
                new_dist[value]=dist[value]+add_to_each
            else:
                new_dist[value]=add_to_each
        return new_dist
    else:
        return dist

In [24]:
new_crowd_preds=[]
for row in old_crowd_preds:
    new_row={}
    for prop, dist in row.items():
        new_dist=transform_undecided(prop, dist)
        new_row[prop]=new_dist
    new_crowd_preds.append(new_row)

In [25]:
new_crowd_preds

[{'birthplace': {'Boston (MA)': 0.38222222222222224,
   'Chicago (IL)': 0.05222222222222222,
   'Detroit (MI)': 0.05222222222222222,
   'Los Angeles (CA)': 0.12222222222222223,
   'New York City (NY)': 0.12222222222222223,
   'None of the above': 0.05222222222222222,
   'Philadelphia (PA)': 0.12222222222222223,
   'San Francisco (CA)': 0.05222222222222222,
   'Washington D.C.': 0.05222222222222222},
  'century': {'17': 0.045000000000000005,
   '18': 0.11500000000000002,
   '19': 0.11500000000000002,
   '20': 0.645,
   '21': 0.045000000000000005,
   'None of the above': 0.045000000000000005},
  'deathplace': {'Boston (MA)': 0.1288888888888889,
   'Chicago (IL)': 0.05888888888888889,
   'Los Angeles (CA)': 0.1288888888888889,
   'New York City (NY)': 0.18888888888888888,
   'None of the above': 0.1288888888888889,
   'Philadelphia (PA)': 0.05888888888888889,
   'San Francisco (CA)': 0.05888888888888889,
   'Santa Monica (CA)': 0.05888888888888889,
   'Washington D.C.': 0.1888888888888888

### 3. Prepare for evaluation

In [26]:
# Check if the order is the same in both data pieces
for x in range(len(new_crowd_givens)):
    print(new_crowd_givens[x]==new_prof_givens[x])
print(len(new_crowd_givens))

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [27]:
def prob_to_list(data, prop_domains):
    size=len(prop_domains)+1
    probs=[0]*size
    
    
    for value, probability in data.items():
        if value=='University of California Berkeley':
            value='University of California, Berkeley'
        if value=='None of the above':
            index=-1
        else:
            index = prop_domains.index(value)
        probs[index]=probability
    return probs

In [28]:
props = list(properties_mapping.values())[::-1]
props

['century',
 'religion',
 'gender',
 'lifedur',
 'deathplace',
 'birthplace',
 'worklocation',
 'occupation',
 'educatedat',
 'politicalparty']

In [29]:
import evaluation
import math
from sklearn.metrics import mutual_info_score
from scipy.spatial.distance import cosine

def compute_divergencies(system, gold, givens, props, metric='js_div'):
    divergencies = defaultdict(list)
    
    divergencies_on_known={}
    
    uncertainties={}
    
    for p in props:
        divergencies_on_known[p]=defaultdict(list)
        uncertainties[p]=defaultdict(list)
    
    for x in range(len(givens)):
        profiler_predictions=system[x]
        crowd_predictions=gold[x]
        
        for prop in crowd_predictions.keys():
            crowd_prop_predictions=crowd_predictions[prop]
            prof_prop_predictions=profiler_predictions[prop]
            crowd_probs=prob_to_list(crowd_prop_predictions, domains[prop])
            prof_probs=prob_to_list(prof_prop_predictions, domains[prop])

            if metric=='js_div':
                div=evaluation.js(crowd_probs, prof_probs)
            elif metric=='js_dist':
                tmp_div=evaluation.js(crowd_probs, prof_probs)
                div=math.sqrt(tmp_div)
            elif metric=='kl_div':
                div=evaluation.kl(crowd_probs, prof_probs)
            elif metric=='kl_max':
                div=evaluation.kl_max(crowd_probs, prof_probs)
            elif metric=='kl_avg':
                div=evaluation.kl_avg(crowd_probs, prof_probs)
            elif metric=='cos':
                div=cosine(crowd_probs, prof_probs)
            divergencies[prop].append(div)
            
            known_props=len(givens[x])
            divergencies_on_known[prop][known_props].append(div)
            
            unc=0.0
            if 'None of the above' in crowd_predictions[prop]:
                unc=crowd_predictions[prop]['None of the above']
            uncertainties[prop][known_props].append(unc)

    return divergencies, divergencies_on_known, uncertainties

In [30]:
def compute_mean_per_prop(data):
    prop_divs={}
    for prop, divs in data.items():
        avg_div=round(sum(divs)/len(divs),2)
#        print(prop, '\t', avg_div)
        prop_divs[prop]=avg_div
    return prop_divs

In [31]:
metrics=['js_div', 'js_dist', 'kl_div', 'kl_max', 'kl_avg', 'cos']
div_per_metric={}
div_array_per_metric={}
js_dist_ae=[]
for metric in metrics:
    divergencies, div_known, uncertainties = compute_divergencies(transposed_prof_predictions, 
                                                                new_crowd_preds, 
                                                                new_crowd_givens, 
                                                                props, metric=metric)
    
    mean_values=compute_mean_per_prop(divergencies)
    if metric in ['js_div', 'js_dist']:
        print(metric, mean_values)
        if metric =='js_dist':
            for prop in props:
                js_dist_ae.append(mean_values[prop])
    div_per_metric[metric]=mean_values
    div_array_per_metric[metric]=list(mean_values.values())

js_div {'century': 0.12, 'religion': 0.06, 'gender': 0.02, 'deathplace': 0.16, 'lifedur': 0.09, 'birthplace': 0.24, 'worklocation': 0.3, 'occupation': 0.32, 'educatedat': 0.23, 'politicalparty': 0.06}
js_dist {'century': 0.32, 'religion': 0.22, 'gender': 0.13, 'deathplace': 0.39, 'lifedur': 0.3, 'birthplace': 0.48, 'worklocation': 0.52, 'occupation': 0.56, 'educatedat': 0.47, 'politicalparty': 0.22}


In [32]:
from scipy.stats import spearmanr, pearsonr

pearson_correlations = [[' '] + metrics]
spearman_correlations = [[' '] + metrics]

for metric1, dist1 in div_array_per_metric.items():
    pearson_row=[metric1]
    spearman_row=[metric1]
    for metric2, dist2 in div_array_per_metric.items():
        sp_corr=spearmanr(dist1, dist2)
        pe_corr=pearsonr(dist1, dist2)
        spearman_row.append(round(sp_corr.correlation, 4))
        pearson_row.append(round(pe_corr[0], 4))
        
    pearson_correlations.append(pearson_row)
    spearman_correlations.append(spearman_row)

In [33]:
def print_matrix(A):
    print('\n'.join(['\t'.join(['{:7}'.format(item) for item in row]) 
      for row in A]))

In [34]:
print_matrix(pearson_correlations)

       	js_div 	js_dist	kl_div 	kl_max 	kl_avg 	cos    
js_div 	    1.0	 0.9855	 0.8529	 0.8948	 0.9332	 0.9955
js_dist	 0.9855	    1.0	 0.8486	  0.883	 0.9184	 0.9878
kl_div 	 0.8529	 0.8486	    1.0	 0.9933	 0.9774	  0.868
kl_max 	 0.8948	  0.883	 0.9933	    1.0	 0.9944	 0.9033
kl_avg 	 0.9332	 0.9184	 0.9774	 0.9944	    1.0	 0.9392
cos    	 0.9955	 0.9878	  0.868	 0.9033	 0.9392	    1.0


In [35]:
print_matrix(spearman_correlations)

       	js_div 	js_dist	kl_div 	kl_max 	kl_avg 	cos    
js_div 	    1.0	    1.0	 0.8936	 0.9119	  0.924	  0.997
js_dist	    1.0	    1.0	 0.8936	 0.9119	  0.924	  0.997
kl_div 	 0.8936	 0.8936	    1.0	 0.9758	 0.9515	  0.903
kl_max 	 0.9119	 0.9119	 0.9758	    1.0	 0.9879	 0.9152
kl_avg 	  0.924	  0.924	 0.9515	 0.9879	    1.0	 0.9273
cos    	  0.997	  0.997	  0.903	 0.9152	 0.9273	    1.0


In [36]:
for prop, data in div_known.items():
    for known_props, dists in data.items():
        uncs=uncertainties[prop][known_props]
        print(prop, '\t', known_props, '\t', round(sum(dists)/len(dists),2), '\t', 
              round(sum(uncs)/len(uncs), 4))

century 	 3 	 0.04 	 0.0299
century 	 4 	 0.06 	 0.0161
century 	 5 	 0.1 	 0.0241
century 	 6 	 0.22 	 0.0045
century 	 7 	 0.5 	 0.0028
century 	 8 	 0.4 	 0.0046
century 	 9 	 1.0 	 0.0
religion 	 3 	 0.04 	 0.0296
religion 	 4 	 0.06 	 0.0237
religion 	 5 	 0.08 	 0.0212
religion 	 6 	 0.06 	 0.0174
religion 	 7 	 0.02 	 0.0125
religion 	 8 	 0.02 	 0.01
gender 	 3 	 0.01 	 0.03
gender 	 4 	 0.01 	 0.026
gender 	 5 	 0.01 	 0.0297
gender 	 6 	 0.0 	 0.013
gender 	 7 	 0.0 	 0.0039
lifedur 	 3 	 0.15 	 0.0276
lifedur 	 4 	 0.16 	 0.0178
lifedur 	 5 	 0.13 	 0.0209
deathplace 	 3 	 0.49 	 0.1102
deathplace 	 4 	 0.22 	 0.125
deathplace 	 5 	 0.15 	 0.0953
deathplace 	 6 	 0.17 	 0.0776
birthplace 	 3 	 0.51 	 0.0997
birthplace 	 4 	 0.44 	 0.0554
birthplace 	 5 	 0.36 	 0.0311
worklocation 	 3 	 0.34 	 0.086
worklocation 	 4 	 0.45 	 0.1609
worklocation 	 5 	 0.54 	 0.1133
worklocation 	 6 	 0.48 	 0.1209
occupation 	 3 	 0.43 	 0.1088
occupation 	 4 	 0.53 	 0.0972
occupation 	 5 	 

### 4. MFV baseline

#### 4.1 Prepare

In [37]:
mfv_data_loc = 'mfv_data'
with open('%s/%s' % (mfv_data_loc, givens_file), 'rb') as f:
    mfv_givens=pickle.load(f)
with open('%s/%s' % (mfv_data_loc, predicted_file), 'rb') as f:
    mfv_preds = pickle.load(f)

In [38]:
new_mfv_givens=map_givens(mfv_givens)

In [39]:
new_mfv_preds=[]
for a_row in mfv_preds:
    new_row={}
    for prop, vals in a_row.items():
        prop=properties_mapping[prop]
        for k,v in vals.items():
            new_k=mappings[k]
            new_row[prop]={new_k: 1.0}
    new_mfv_preds.append(new_row)

#### 4.2 Evaluate

In [40]:
js_dist_mfv=[]
for metric in ['js_div','js_dist']:
    mfv_divs, div_known_mfv, uncertainties = \
        compute_divergencies(new_mfv_preds, new_crowd_preds, mfv_givens, props, metric=metric)
    mean_values=compute_mean_per_prop(mfv_divs)
    print(metric, mean_values)
    if metric == 'js_dist':
        for prop in props:
            js_dist_mfv.append(mean_values[prop])

js_div {'century': 0.13, 'religion': 0.05, 'gender': 0.04, 'deathplace': 0.51, 'lifedur': 0.29, 'birthplace': 0.39, 'worklocation': 0.49, 'occupation': 0.37, 'educatedat': 0.39, 'politicalparty': 0.17}
js_dist {'century': 0.35, 'religion': 0.2, 'gender': 0.17, 'deathplace': 0.71, 'lifedur': 0.53, 'birthplace': 0.6, 'worklocation': 0.68, 'occupation': 0.6, 'educatedat': 0.62, 'politicalparty': 0.39}


### 5. NB baseline

#### 5.1 Prepare

In [41]:
nb_data_loc = 'nb_data'
with open('%s/%s' % (nb_data_loc, predicted_file), 'rb') as f:
    nb_preds = pickle.load(f, encoding='latin1')

In [42]:
nb_givens=new_mfv_givens

In [43]:
new_nb_preds=[]
for x in range(len(new_crowd_givens)):
    new_row={}
    these_givens=new_crowd_givens[x]
    for prop in nb_preds.keys():
        new_prop=properties_mapping[prop]
        new_value=defaultdict(float)
        if new_prop not in these_givens.keys():
            pred=nb_preds[prop][x]
            for k, v in pred.items():
                if k in mappings.keys() and mappings[k] in domains[new_prop]:
                    new_k=mappings[k]
                else:
                    new_k='None of the above'
                new_value[new_k] += v
            new_row[new_prop]=new_value
    new_nb_preds.append(new_row)

In [44]:
transposed_nb_predictions=accumulate_and_transpose(nb_givens, new_nb_preds)

#### 5.2 Evaluate

In [45]:
print(len(transposed_prof_predictions[0]))

7


In [46]:
js_dist_nb=[]
for metric in ['js_div','js_dist']:
    nb_divs, div_known_nb, uncertainties = \
        compute_divergencies(transposed_nb_predictions, new_crowd_preds, 
                             nb_givens, props, metric=metric)
    mean_values=compute_mean_per_prop(nb_divs)
    print(metric, mean_values)
    if metric == 'js_dist':
        for prop in props:
            js_dist_nb.append(mean_values[prop])

js_div {'century': 0.12, 'religion': 0.09, 'gender': 0.02, 'deathplace': 0.2, 'lifedur': 0.09, 'birthplace': 0.26, 'worklocation': 0.28, 'occupation': 0.36, 'educatedat': 0.25, 'politicalparty': 0.06}
js_dist {'century': 0.32, 'religion': 0.26, 'gender': 0.13, 'deathplace': 0.44, 'lifedur': 0.3, 'birthplace': 0.49, 'worklocation': 0.5, 'occupation': 0.59, 'educatedat': 0.49, 'politicalparty': 0.22}


### 6. Compute correlation to entropy

In [47]:
entropy=[0.922, 1.256, 0.701, 2.403, 2.676, 2.477, 2.802, 2.903, 2.907, 0.998]
norm_entropy=[0.397, 0.628, 0.701, 0.801, 0.806, 0.826, 0.844, 0.916, 0.917, 0.998]
#js_dist_ae=[0.32, 0.27, 0.13, 0.44, 0.31, 0.5, 0.5, 0.59, 0.5, 0.23]
#js_dist_mfv=[0.36, 0.19, 0.16, 0.71, 0.53, 0.59, 0.68, 0.6, 0.62, 0.38]
#js_dist_nb=[0.57, 0.19, 0.16, 0.67, 0.58, 0.65, 0.68, 0.68, 0.62, 0.39]

for metric in [spearmanr, pearsonr]:
    print('entropy<->mfv', metric(entropy, js_dist_mfv))
    print('norm_entropy<->mfv', metric(norm_entropy, js_dist_mfv))

    print('entropy<->nb', metric(entropy, js_dist_nb))
    print('norm_entropy<->nb', metric(norm_entropy, js_dist_nb))

    print('entropy<->ae', metric(entropy, js_dist_ae))
    print('norm_entropy<->ae', metric(norm_entropy, js_dist_ae))

entropy<->mfv SpearmanrResult(correlation=0.8389096502784891, pvalue=0.0024139883688156136)
norm_entropy<->mfv SpearmanrResult(correlation=0.5288778230016562, pvalue=0.11599132062584641)
entropy<->nb SpearmanrResult(correlation=0.8571468165888911, pvalue=0.0015278398373375572)
norm_entropy<->nb SpearmanrResult(correlation=0.4012176588288426, pvalue=0.25049600352532764)
entropy<->ae SpearmanrResult(correlation=0.8267515394048879, pvalue=0.0031797140860104155)
norm_entropy<->ae SpearmanrResult(correlation=0.43161293601284584, pvalue=0.21294529187580233)
entropy<->mfv (0.9141402461350389, 0.00021412772985049123)
norm_entropy<->mfv (0.5207227235390267, 0.12277788448332227)
entropy<->nb (0.887903246196056, 0.0006021440093523258)
norm_entropy<->nb (0.35791014872201793, 0.3098910465936598)
entropy<->ae (0.8754719418492624, 0.0009028723003217823)
norm_entropy<->ae (0.36204944106039, 0.3039199826887172)
