# Inter-annotator agreement analyses


**To do:**

* Test what happens to agreement if we collapse all annotations in a single group. 

**Done:**
* Remove pair-annotations if they are contradictory [done]
* Remove workers who fail checks [done]
* Remove workers with high contradictions [done]
* Compare agreement between pairs with contradictory annotations and pairs without (of cleaned and uncleaned data)
* Remove workers with contradictions and check fails
* Relation evaluation
* Example evaluation



# Weighing annotations

**Ideas**

* 

In [28]:

from load_data import load_experiment_data
from nltk import agreement

import csv
from collections import defaultdict



def load_rel_level_mapping():
    # load mapping
    with open('../scheme/relation_overview_run3.csv') as infile:
        mapping_dicts = list(csv.DictReader(infile))
    rel_level_dict = dict()
    for d in mapping_dicts:
        rel = d['relation']
        l = d['level']
        rel_level_dict[rel] = l
    return rel_level_dict

def get_collapsed_relations(dict_list):
    
    level_rel_dict = load_rel_level_mapping()
    for d in dict_list:
        prop = d['property']
        concept = d['concept']
        rel = d['relation']
        if rel in level_rel_dict:
            level = level_rel_dict[rel]
            d['quid'] = f'{prop}-{concept}-{level}'
    

def create_matrix(dict_list):
    quid_dict = defaultdict(list)
    for d in dict_list:
        quid = d['quid']
        quid_dict[quid].append(d)

    all_rows = []
    for quid, ds in quid_dict.items():
        for n, d in enumerate(ds):
            worker = n
            answer = d['answer']
            row = [worker, quid, answer]
            all_rows.append(row)
    return all_rows

def coder_pairs(n_annotators):

    annotators = list(range(n_annotators))
    pairs = set()
    for i in annotators:
        for j in annotators:
            if i != j:
                pair = (i, j)
                pair_rev = (j, i)
                if pair_rev not in pairs:
                    pairs.add(pair)
    return pairs

def proportional_agreement_pairs(matrix):
    """
    data: list of triples representing instances: (worker, unit, label)
    """

    unit_dict = defaultdict(list)
    agreements = 0.0

    all_labels = set()
    for w, u, l in matrix:
        all_labels.add(l)
        unit_dict[u].append(l)

    for u, judgements in unit_dict.items():
        n_annotators = len(judgements)
        pairs = coder_pairs(n_annotators)
        ag_cnt = 0.0
        for i, j in pairs:
            li = judgements[i]
            lj = judgements[j]
            if li == lj:
                ag_cnt += 1
        if ag_cnt != 0:
            agreement_unit = ag_cnt /len(pairs)
        else:
            agreement_unit = 0
        agreements += agreement_unit
    overall = agreements/len(unit_dict)
    return overall

def get_agreement(dict_list_out, collapse_relations = False, v=True):
    agreement_dict = dict()
    if collapse_relations == True:
        
    matrix = create_matrix(dict_list_out)
    ratingtask = agreement.AnnotationTask(data=matrix)
    alpha = ratingtask.alpha()
    prop = proportional_agreement_pairs(matrix)
    if v == True:
        print(f"Krippendorff's alpha: {alpha}")
        print(f"Proportional agreement (pairwise): {prop}")
        print()
    agreement_dict['Krippendorff'] = alpha
    agreement_dict['Proportional'] = prop
    return agreement_dict


def main():
    run = 1
    group = 'experiment1'
    batch = '*'
    n_q = '*'
    print(f'--- analyzing run {run} ---')
    dict_list_out = load_experiment_data(run, group, n_q, batch, remove_not_val = True)
    get_agreement(dict_list_out)

if __name__ == '__main__':
    main()


--- analyzing run 1 ---
Discarded 0.0 annotations.
Krippendorff's alpha: 0.2413329996998821
Proportional agreement (pairwise): 0.6204625909422005



In [29]:

run = 3
group = 'experiment1'
batch = '*'
n_q = '*'
print(f'--- analyzing run {run} ---')
dict_list_out = load_experiment_data(run, group, n_q, batch, remove_not_val = True)
#get_agreement(dict_list_out)
    

--- analyzing run 3 ---
Discarded 655.0 annotations.


In [33]:

collapse_relations(dict_list_out)

