In [4]:
from calculate_iaa import get_alpha
from statistics import stdev
from load_data import load_experiment_data
from utils_analysis import sort_by_key
from utils_analysis import load_analysis

def filter_with_stdv(workers, measure = 'contradiction_poss_contradiction_ratio', n_stds=1):
    
    cont_rate = [float(d[measure]) for d in workers]
    av_cont = sum(cont_rate)/len(cont_rate)
    std_cont = stdev(cont_rate)
    thresh = (n_stds * std_cont) + av_cont
    workers_to_remove = []
    for d in workers:
        cont_rate = float(d[measure])
        if cont_rate > thresh:
            workers_to_remove.append(d['workerid'])
    return workers_to_remove
        
        

def remove_contraditcting_workers(all_annotations, dict_list_workers, unit,  n_stds):
    
    if unit == 'batch':
        annotations_by_unit = sort_by_key(all_annotations, ['filename','completionurl'])
        workers_by_unit = sort_by_key(dict_list_workers, ['filename-url'])
    elif unit == 'pair':
        annotations_by_unit = sort_by_key(all_annotations, ['property','concept'])
        workers_by_unit = sort_by_key(dict_list_workers, ['pair'])
        
    elif unit == 'total':
        annotations_by_unit = dict()
        annotations_by_unit['total'] = all_annotations
        workers_by_unit = dict()
        workers_by_unit['total'] = dict_list_workers
    clean_annotations = []
    
    for unit_id, workers in workers_by_unit.items(): 
        workers_to_remove = filter_with_stdv(workers, 
                         measure = 'contradiction_poss_contradiction_ratio',
                         n_stds = n_stds)
        annotations = annotations_by_unit[unit_id]
        for d in annotations:
            worker = d['workerid']
            if worker not in workers_to_remove:
                clean_annotations.append(d)
    return clean_annotations
        

def main():
    run = '*'
    group = 'experiment*'
    n_q = '*'
    batch = '*'
    
    #load_analysis(analysis_type, run, exp_name, batch)

    n_stds = 3
    all_annotations = load_experiment_data(run, group, n_q, batch, remove_not_val = True)
    print('IAA raw')
    iaa = get_alpha(all_annotations)
    iaa_levels = get_alpha(all_annotations, collapse_relations = 'levels')
    print()    
        
    units = ['total', 'batch', 'pair']
    stds = [0.5, 1, 1.5, 2, 2.5, 3]
    
    
    for unit in units:
        for n_stds in stds:
            if unit == 'total':
                analysis_type = 'workers'
            else:
                analysis_type = f'workers_by_{unit}'
            dict_list_workers = load_analysis(analysis_type, run, group, batch, as_dict = True)
            clean_annotations = remove_contraditcting_workers(all_annotations, dict_list_workers, unit,  n_stds)
            n_total = len(all_annotations)
            n_clean = len(clean_annotations)
            percent_clean = n_clean / n_total
            iaa_alpha = get_alpha(clean_annotations)
            iaa_alpha_levels = get_alpha(clean_annotations, collapse_relations = 'levels')
            print(unit, n_stds)
            print(n_total, n_clean, percent_clean)
            print(iaa_alpha, iaa_alpha_levels)
            print()

 
        
if __name__ == '__main__':
    main()

Discarded 655.0 annotations.
IAA raw

total 0.5
145935 129060 0.8843663274745606
0.343896980301426 0.5437341607591201

total 1
145935 136228 0.9334840853804776
0.3308241201916283 0.5211782660758192

total 1.5
145935 139249 0.9541850823996985
0.323757547416308 0.5074987130824716

total 2
145935 141133 0.967094939527872
0.31895612017584796 0.5009415533783598

total 2.5
145935 143019 0.980018501387604
0.3149913209196331 0.49332623090822636

total 3
145935 144377 0.9893240141158735
0.31125594034682325 0.49015085338093545

batch 0.5
145935 112258 0.769232877651009
0.3562694413206796 0.5994480736861509

batch 1
145935 122988 0.8427587624627403
0.3429972151616486 0.5704012769026419

batch 1.5
145935 130981 0.8975297221365677
0.3332045238037903 0.5525700849665135

batch 2
145935 136769 0.9371912152670709
0.32411946898006283 0.5390460872988624

batch 2.5
145935 141996 0.9730085311953952
0.31442777822025236 0.5198830057613095

batch 3
145935 145884 0.9996505293452564
0.3061935784155362 0.5339594