**NOTE** This notebook gives us the inter-annotator agreement on the crowd data.

In [1]:
import numpy as np
import krippendorff

In [2]:
def fleiss_kappa(M):
    """
    See `Fleiss' Kappa <https://en.wikipedia.org/wiki/Fleiss%27_kappa>`_.
    :param M: a matrix of shape (:attr:`N`, :attr:`k`) where `N` is the number of subjects and `k` is the number of categories into which assignments are made. `M[i, j]` represent the number of raters who assigned the `i`th subject to the `j`th category.
    :type M: numpy matrix
    """
    N, k = M.shape  # N is # of items, k is # of categories
    n_annotators = float(np.sum(M[0, :]))  # # of annotators

    p = np.sum(M, axis=0) / (N * n_annotators)
    P = (np.sum(M * M, axis=1) - n_annotators) / (n_annotators * (n_annotators - 1))
    Pbar = np.sum(P) / N
    PbarE = np.sum(p * p)

    kappa = (Pbar - PbarE) / (1 - PbarE)

    return kappa
    #end def

In [3]:
import json
from collections import defaultdict
import csv
import pickle

### 1. Setup job ID and load the results JSONL file

In [4]:
domains={'century':['20', '19', '18', '17', '21'], 
         'lifedur': ['71-80', '81-90', '61-70', '51-60', '91-100', '41-50', 
                      '31-40', '21-30', '101-110', '11-20'],
         'gender': ['Male', 'Female'],
         'politicalparty': ['Democratic Party', 'Republican Party'],
         'occupation': ['politician', 'actor', 'lawyer', 'baseball player', 
                        'American football player', 'singer', 'writer', 
                        'basketball player', 'judge'],
         'birthplace': ['New York City (NY)', 'Chicago (IL)', 'Los Angeles (CA)', 
                        'Philadelphia (PA)', 'Boston (MA)', 'Washington D.C.', 
                        'San Francisco (CA)', 'Detroit (MI)'],
         'deathplace': ['New York City (NY)', 'Chicago (IL)', 'Los Angeles (CA)', 
                        'Philadelphia (PA)', 'Boston (MA)', 'Washington D.C.', 
                        'San Francisco (CA)', 'Santa Monica (CA)'],
         'educatedat': ['Harvard University', 'Columbia University', 'Yale University', 
                        'University of Michigan', 'Stanford University', 
                        'Princeton University', 'University of Wisconsin–Madison', 
                        'University of California, Berkeley', 'Cornell University'],
         'worklocation': ['Washington D.C.', 'New York City (NY)', 
                          'Harrisburg (PA)', 'Sacramento (CA)', 'Austin (TX)', 
                          'Springfield (IL)', 'Tallahassee (FL)', 'Baton Rouge (LA)', 
                          'Montpelier (VT)', 'Phoenix (AZ)'],
         'religion': ['Christianity', 'atheism', 'Judaism', 'Islam']
        }

In [5]:
job_ids=["1285337", "1286025"]
data_loc='crowd_data'

results=[]
for jid in job_ids:
    filename='%s/job_%s.json' % (data_loc, jid)
    with open(filename, 'r') as f:
        for line in f:
            results.append(json.loads(line))

len(results)

338

In [6]:
def get_judgements_count(judgements, prop):
    dists=defaultdict(int)
    for j in judgements:
        for value in j['data'][prop]:
            dists[value]+=1
    return dict(dists)

In [7]:
props=['century', 'religion', 'gender', 'deathplace', 'lifedur', 'birthplace',
      'worklocation', 'occupation', 'educatedat', 'politicalparty'] #[::-1]

In [8]:
all_rows=[]
for result in results:
    if result['state'] in ['finalized', 'judgable']:
        this_data=result['data']
        these_judgements=result['results']['judgments']
        this_id=result['id']
        this_row={}
        for k in props:
            v=this_data[k]
            if not v:
                dist=get_judgements_count(these_judgements, 'sel%s' % k)
                this_row[k]=dist
        all_rows.append(this_row)

In [9]:
len(all_rows)

305

In [30]:
data_per_property=defaultdict(list)

data_per_known_and_prop={
                            3: defaultdict(list), 4: defaultdict(list), 5: defaultdict(list), 
                            6: defaultdict(list), 7: defaultdict(list), 8: defaultdict(list), 
                            9: defaultdict(list), 
                        }
#, 7: defaultdict(list), 8: defaultdict(list)}

exclude_undecided=False

for row in all_rows:
    known=len(props)-len(row.keys())
    for prop, distribution in row.items():
        prop_domains=domains[prop]
        if exclude_undecided:
            size=len(prop_domains)+1
        else:
            size=len(prop_domains)+2
        prop_row=[0]*size
        for value, probability in distribution.items():
            if value=='University of California Berkeley':
                value='University of California, Berkeley'
            if value=='I can not decide': 
                if exclude_undecided:
                    continue
                else:
                    index=-2
            elif value=='None of the above':
                index=-1
            else:
                index = prop_domains.index(value)
            prop_row[index]=probability
        data_per_property[prop].append(prop_row)
        if known in data_per_known_and_prop.keys():
            data_per_known_and_prop[known][prop].append(prop_row)

In [31]:
alphas=[]
kappas=[]
for prop, data in data_per_property.items():
    np_data=np.asarray(data)
    kappa = fleiss_kappa(np_data)
    kappas.append(kappa)
    #print(round(kappa,3))
    alpha=krippendorff.alpha(np_data, level_of_measurement='nominal')
    print(round(alpha,3))
    alphas.append(alpha)
    
print('average alpha', sum(alphas)/len(alphas))
#print('average kappa', sum(kappas)/len(kappas))

0.272
0.269
0.282
0.138
0.331
0.102
0.179
0.154
0.149
0.156
average alpha 0.20335850198151068


### 2. Agreement per # known properties

In [32]:
for known, prop_data in data_per_known_and_prop.items():
    alphas=[]
    for prop, data in prop_data.items():
        np_data=np.asarray(data)
        kappa = fleiss_kappa(np_data)
        kappas.append(kappa)
        #print(round(kappa,3))
        alpha=krippendorff.alpha(np_data, level_of_measurement='nominal')
        alphas.append(alpha)
        print(known, prop, round(alpha,3))
    print(known, sum(alphas)/len(alphas))

3 century 0.325
3 religion 0.249
3 gender 0.267
3 deathplace 0.154
3 lifedur 0.319
3 birthplace 0.105
3 worklocation 0.223
3 occupation 0.178
3 educatedat 0.158
3 politicalparty 0.127
3 0.21048403977327923
4 century 0.354
4 religion 0.287
4 gender 0.352
4 deathplace 0.136
4 lifedur 0.313
4 birthplace 0.089
4 worklocation 0.242
4 occupation 0.173
4 educatedat 0.128
4 politicalparty 0.173
4 0.22474281573503224
5 century 0.346
5 religion 0.343
5 gender 0.356
5 deathplace 0.193
5 lifedur 0.43
5 birthplace 0.137
5 worklocation 0.195
5 occupation 0.139
5 educatedat 0.15
5 politicalparty 0.171
5 0.24592902545031992
6 century 0.251
6 religion 0.241
6 gender 0.292
6 deathplace 0.13
6 worklocation 0.149
6 occupation 0.167
6 educatedat 0.172
6 politicalparty 0.141
6 0.19281843404352708
7 century 0.327
7 religion 0.224
7 gender 0.393
7 occupation 0.239
7 educatedat 0.16
7 politicalparty 0.202
7 0.2577623925665629
8 century 0.257
8 religion 0.295
8 educatedat 0.059
8 politicalparty 0.253
8 0.215790

  return 1 - np.sum(o * d) / np.sum(e * d)
