In [1]:
import pickle
import pandas as pd
from tqdm import tqdm
import numpy as np
import json

### Data

In [2]:
lines = list(open('./model_annotations.aligned.scored.jsonl', 'rt'))
objs = [ json.loads(i) for i in lines ]

In [3]:
len(objs)

1700

In [4]:
objs[0]['turker_annotations']

[{'coherence': 3, 'consistency': 3, 'fluency': 4, 'relevance': 3},
 {'coherence': 3, 'consistency': 3, 'fluency': 4, 'relevance': 3},
 {'coherence': 3, 'consistency': 3, 'fluency': 4, 'relevance': 3},
 {'coherence': 3, 'consistency': 3, 'fluency': 4, 'relevance': 3},
 {'coherence': 3, 'consistency': 3, 'fluency': 4, 'relevance': 3}]

In [5]:
def expert_annotations(x, agg=lambda x: np.mean(list(x.values())), annotations='expert_annotations'):
    scores = []
    if annotations not in x:
        return []
    
    for expert in x[annotations]:
        scores.append(agg(expert)) 
    return scores

expert_annotations(objs[9], annotations='turker_annotations')

[4.25, 4.75, 2.5, 5.0, 3.75]

### Pooled variances

In [6]:
def pooled_variance(objs, annotations):
    stds = []
    for obj in objs:
        scores = annotations(obj)
        if scores != []:
            stds.append((len(scores), np.var(scores)))
        
    num = 0
    denom = 0

    print(len(stds))
    for n, var in stds:
        denom += (n-1)
        num += (n-1) * var

    pooled_var = num / denom
    return pooled_var

In [7]:
pooled_var_expert = pooled_variance(objs, expert_annotations)
pooled_var_expert

1700


0.0859722222222225

In [8]:
pooled_var_turker = pooled_variance(objs, lambda x: expert_annotations(x, annotations='turker_annotations'))
pooled_var_turker

1600


0.22594999999999998

### Total variances

In [9]:
all_scores = sum([ expert_annotations(obj) for obj in objs ], [])
total_var_expert = np.var(all_scores)
total_var_expert

0.5144329560745868

In [10]:
all_scores = sum([ expert_annotations(obj, annotations='turker_annotations') for obj in objs ], [])
total_var_turker = np.var(all_scores)
total_var_turker

0.5551246093750001

# Results

In [11]:
l = ['%.3f' % np.sqrt(total_var_expert), '%.3f' % np.sqrt(total_var_turker)]
l.insert(0, '$\sqrt{\Var(H(x))}$')
print(' & '.join(l) + ' \\\\')
    
l = ['%.3f' % np.sqrt(pooled_var_expert), '%.3f' % np.sqrt(pooled_var_turker)]
l.insert(0, '$\sqrt{\Expect[\Var(H(x) | x)]}$')
print(' & '.join(l) + ' \\\\')


l = ['%.3f' % np.sqrt(total_var_expert - pooled_var_expert), '%.3f' % np.sqrt(total_var_turker - pooled_var_turker)]
l.insert(0, '$\sqrt{\Var(P(x))}$')    
print(' & '.join(l) + ' \\\\')

l = ['%.3f' % (total_var_expert/(total_var_expert - pooled_var_expert)), '%.3f' % (total_var_turker/(total_var_turker - pooled_var_turker))]
l.insert(0, '$\Var(H(x)) / \Var(P(x))$')   
print(' & '.join(l) + ' \\\\')

$\sqrt{\Var(H(x))}$ & 0.717 & 0.745 \\
$\sqrt{\Expect[\Var(H(x) | x)]}$ & 0.293 & 0.475 \\
$\sqrt{\Var(P(x))}$ & 0.655 & 0.574 \\
$\Var(H(x)) / \Var(P(x))$ & 1.201 & 1.686 \\
