In [1]:
import csv
from pprint import pprint
from statistics import stdev, mean, median

In [2]:
lines = []
for line in csv.DictReader(open('Batch_3850247_batch_results.csv', 'r', encoding='utf8', errors='ignore')):
    lines.append(line)

## General Statistics

In [3]:
print('There are 20 HITS, 10 annotations per HIT, 5 annotators per HIT.\n')
# Worker ID statistics
worker_ids = {}
for line in lines:
    worker_id = line['WorkerId']
    if worker_id not in worker_ids:
        worker_ids[worker_id] = 1
    else:
        worker_ids[worker_id] += 1
print('Number of workers:', len(worker_ids))
print('Avg # HITS/worker:', round(mean(worker_ids.values()), 2),'+-', round(stdev(worker_ids.values()), 1))

# Number of candidate answer sources
source_counts = {'backtranslation': 0, 'gpt2': 0, 'mhpg': 0, 'narrativeqa': 0}
for line in lines:
    for i in range(1, 11):
        source = line['Input.source'+str(i)]
        source_counts[source] += 1
source_counts = {key: source_counts[key] //5 for key in source_counts}
print('\nCandidate answer counts:', source_counts)
        
# Distribution of scores
score_counts = {1:0,2:0,3:0,4:0,5:0}
for line in lines:
    for i in range(1, 11):
        score = int(line['Answer.score'+str(i)])
        score_counts[score] += 1
print('\nScorce counts:', score_counts)

worker_ids = worker_ids.keys()

There are 20 HITS, 10 annotations per HIT, 5 annotators per HIT.

Number of workers: 49
Avg # HITS/worker: 2.04 +- 2.1

Candidate answer counts: {'backtranslation': 31, 'gpt2': 132, 'mhpg': 31, 'narrativeqa': 6}

Scorce counts: {1: 391, 2: 118, 3: 128, 4: 187, 5: 176}


## Analysis of Scores

In [48]:
candidate_scores = {}

for line in lines:
    context = line['Input.context']
    worker_id = line['WorkerId']
    for i in range(1, 11):
        id = line['Input.id' + str(i)]
        question = line['Input.question' + str(i)]
        reference = line['Input.reference' + str(i)] 
        candidate = line['Input.candidate' + str(i)]
        source = line['Input.source' + str(i)]
        score = int(line['Answer.score' + str(i)])
        input_id = line['Input.id']
        
        if id not in candidate_scores:
            candidate_scores[id] = {'context': context, 'question': question, 'source': source, 'workers': [],
                                    'reference': reference, 'candidate': candidate, 'input_id': input_id, 'scores': []}
        candidate_scores[id]['scores'].append(score)
        candidate_scores[id]['workers'].append(worker_id)

variances = [(key, stdev(candidate_scores[key]['scores']), candidate_scores[key]['scores']) for key in candidate_scores]
variances = sorted(variances, key=lambda x : x[1])

### Candidate answers sorted from low to high variance

In [49]:
# Candidate answers sorted from low to high variance
for key, var, scores in variances:
    print('-'*20)
    print('Context:', candidate_scores[key]['context'], '\n')
    print('Question:', candidate_scores[key]['question'])
    print('Reference:', candidate_scores[key]['reference'])
    print('Candidate:', candidate_scores[key]['candidate'])
    print('Source:', candidate_scores[key]['source'])
    print('Scores:', scores, 'Variance:', var)
    print('Worker IDs:', candidate_scores[key]['workers'])
    print('input_id', candidate_scores[key]['input_id'])
    print('\n')

--------------------
Context: "Tom Bailey" is born in the fictitious town of Rivermouth, New Hampshire, but moves to New Orleans with his family when he is 18 months old. In his boyhood, his father wants his to be educated in the North and sent him back to Rivermouth to live with his grandfather, Captain Nutter. Nutter lives with his sister and an Irish servant. There, Tom becomes a member of a boys' club called the Centipedes. Together, the boys become involved in a series of adventures. In one prank, the boys steal an old carriage and push it into a bonfire for the [Fourth of July. During the winter, several boys build a snow fort on Slatter's Hill, inciting rival boys into a battle of snowballs. Later, Tom and three other boys combine their money to buy a boat named Dolphin and sneak away to an island. Tom also befriends a man nicknamed Sailor Ben, whom Tom originally meets on the ship that took him away from New Orleans. Revealed as the long-lost husband of Captain Nutter's Irish s

In [60]:
ids_to_lookat = ['81fa2859dc703f8e9e68ca3addd9e9f1', 'debe63262bfd3568cd7e6dd8fb670133', \
                 '8702b9ff0756728aa39fd84cc721bd3e', '686566a9615becdde3cb4448479cecc3', \
                 '8702b9ff0756728aa39fd84cc721bd3e', 'debe63262bfd3568cd7e6dd8fb670133', \
                 '81fa2859dc703f8e9e68ca3addd9e9f1', 'debe63262bfd3568cd7e6dd8fb670133', \
                 '8e74a06fe140ffda2bb6815f69770f2f', '123ec02fd35bdc171cba58170d9ec625', \
                 '2c9128165dc906aac9a07b2c376f300a', '0bf83ebfbdc047c6d7f2647c79d9bbbb', \
                 '0bf83ebfbdc047c6d7f2647c79d9bbbb', '1032400ca9948fdb874f5c67308aa41b', \
                 'debe63262bfd3568cd7e6dd8fb670133', '8702b9ff0756728aa39fd84cc721bd3e', \
                 '100c8301a2f858c888cf52b7f5042fd1', '49bf491b3fe1c4ff5562019a1032092f']

### Analysis of Workers

In [64]:
# Workers by deviation from median
worker_ids
worker_errors = {id:[0, 0] for id in worker_ids}

for id in worker_ids:
    for key in candidate_scores:
        candidate_workers = candidate_scores[key]['workers']
        scores = candidate_scores[key]['scores']
        
        if id in candidate_workers:
            index = candidate_workers.index(id)
            middle = median(scores[:index] + scores[index+1:])
            worker_score = scores[index]
            error = abs(worker_score - middle)
            
            worker_errors[id][0] += error 
            worker_errors[id][1] += 1
            

worker_errors = [(id, worker_errors[id][0]/worker_errors[id][1]) for id in worker_errors]
worker_errors = sorted(worker_errors, key=lambda x: x[1])

In [65]:
worker_errors

[('A9JPMRRBFO4NL', 0.25),
 ('APO6KZZ79PO9Q', 0.25),
 ('A31681CCEVDIH3', 0.25),
 ('A2BCNRHZU9V7C4', 0.3),
 ('A3DS5B06ZCD3E3', 0.3),
 ('AZ5ZYUCAQ0XDL', 0.3),
 ('A1J8TVICSRC70W', 0.35),
 ('AKSJ3C5O3V9RB', 0.3625),
 ('A3GM78FCDY293T', 0.36428571428571427),
 ('A2SYTRKH1JWJO5', 0.4),
 ('AZ8JL3QNIPY4U', 0.4),
 ('A1OSESE6RG3NFB', 0.4),
 ('A2FCQ5K0T1PYHS', 0.4),
 ('APGX2WZ59OWDN', 0.425),
 ('ACAJFF4MF5S5X', 0.5),
 ('A34DR0CVUBDL1N', 0.5),
 ('A36A780ODXBDEH', 0.55),
 ('AURYD2FH3FUOQ', 0.5571428571428572),
 ('A2TBXASXZIRNNW', 0.59375),
 ('APGBQQ5XS3Q4P', 0.6),
 ('A1X84T4EFW04GZ', 0.6),
 ('A98E8M4QLI9RS', 0.6),
 ('AKNV05F5X5YAC', 0.6),
 ('A17TKHT8FEVH0R', 0.6),
 ('A320QA9HJFUOZO', 0.6),
 ('A1YSYI926BBOHW', 0.7),
 ('ADLYO655RWJUI', 0.70625),
 ('A2TZIJ0DLY0VNJ', 0.7166666666666667),
 ('A3V1VK1A3NYN17', 0.75),
 ('A31JM9RECQGYEX', 0.75),
 ('A20NITCOBY4775', 0.75),
 ('A6HDSE80LQPR8', 0.75),
 ('A1FYFBIFYWTMJS', 0.75),
 ('A1ET2J1PIP0RGO', 0.7571428571428571),
 ('A5X1KZ9CCHREK', 0.8),
 ('A2VNSNAN1LZBAM', 

### Worker IDS sorted by their average deviation from median scores

In [45]:
good_worker_ids = ['A9JPMRRBFO4NL', 'APO6KZZ79PO9Q', 'A31681CCEVDIH3', 'A3DS5B06ZCD3E3', 'AZ5ZYUCAQ0XDL', 'A1J8TVICSRC70W', 'AKSJ3C5O3V9RB', 'A3GM78FCDY293T', 'A2SYTRKH1JWJO5', 'AZ8JL3QNIPY4U', 'A1OSESE6RG3NFB', 'A2FCQ5K0T1PYHS', 'APGX2WZ59OWDN', 'ACAJFF4MF5S5X', 'A34DR0CVUBDL1N', 'A36A780ODXBDEH', 'AURYD2FH3FUOQ', 'APGBQQ5XS3Q4P', 'A98E8M4QLI9RS', 'AKNV05F5X5YAC', 'A17TKHT8FEVH0R', 'A320QA9HJFUOZO']

In [46]:
good_worker_ids

['A9JPMRRBFO4NL',
 'APO6KZZ79PO9Q',
 'A31681CCEVDIH3',
 'A3DS5B06ZCD3E3',
 'AZ5ZYUCAQ0XDL',
 'A1J8TVICSRC70W',
 'AKSJ3C5O3V9RB',
 'A3GM78FCDY293T',
 'A2SYTRKH1JWJO5',
 'AZ8JL3QNIPY4U',
 'A1OSESE6RG3NFB',
 'A2FCQ5K0T1PYHS',
 'APGX2WZ59OWDN',
 'ACAJFF4MF5S5X',
 'A34DR0CVUBDL1N',
 'A36A780ODXBDEH',
 'AURYD2FH3FUOQ',
 'APGBQQ5XS3Q4P',
 'A98E8M4QLI9RS',
 'AKNV05F5X5YAC',
 'A17TKHT8FEVH0R',
 'A320QA9HJFUOZO']