In [7]:
from load_data import load_experiment_data
from utils_analysis import sort_by_key
from calculate_iaa import get_agreement
from calculate_iaa import load_rel_level_mapping
from clean_annotations import clean_annotations

from collections import Counter
from collections import defaultdict
import pandas as pd
import os 


def get_cont_stats(data_dict_list, aggregated_dicts):
    total_annotations = len(data_dict_list)
    total_pairs = len(aggregated_dicts)
    pairs_cont = [d for d in aggregated_dicts if d['contradiction'] == 'yes']
    pairs_no_cont = [d for d in aggregated_dicts if d['contradiction'] == 'no']
    pairs_cont_prop = len(pairs_cont)/total_pairs
    pairs_no_cont_prop = len(pairs_no_cont)/total_pairs
    print(f'Total number of annotations: {total_annotations}')
    print(f'Total number of pairs: {total_pairs}')
    print(f'Number of pairs without contradictions: {len(pairs_no_cont)} ({pairs_no_cont_prop})')
    print(f'Number of pairs with contradictions: {len(pairs_cont)} ({pairs_cont_prop})')
    return pairs_cont, pairs_no_cont


def get_cont_no_cont_annotations(data_dict_list, pairs_no_cont, pairs_cont):
    
    annotations_no_cont = []
    annotations_cont = []
    cont = [d['pair'] for  d in pairs_cont]
    no_cont = [d['pair'] for  d in pairs_no_cont]
    
    data_by_pair = sort_by_key(data_dict_list, ['property', 'concept'])
    
    for p, data_dicts in data_by_pair.items():
        if p in no_cont:
            annotations_no_cont.extend(data_dicts)
        else:
            annotations_cont.extend(data_dicts)
    return annotations_cont, annotations_no_cont

def aggregate_labels(data_dict_list):
    n = 0
    contradiction = set(['all', 'few'])
    rel_level_mapping = load_rel_level_mapping(mapping = 'levels')
    rel_header = rel_level_mapping.keys()
    data_by_pair = sort_by_key(data_dict_list, ['property', 'concept'])
    aggregated_dicts = []
    for pair, data_dicts in data_by_pair.items():
        n += 1
        pair_d = dict()
        pair_d['pair'] = pair
        data_by_rel = sort_by_key(data_dicts, ['relation'])
        levels = set()
        majority_labels = []
        most_votes_labels = defaultdict(list)
        for rel  in rel_header:
            rel_dicts = data_by_rel[rel]
            n_annotations = len(rel_dicts)
            n_true = len([d['answer'] for d in rel_dicts if d['answer'] == 'true'])
            if n_true > 0:
                prop_true = n_true/n_annotations
            else:
                prop_true = 0.0
            if prop_true > 0.5:
                majority_labels.append(rel)
                levels.add(rel_level_mapping[rel])
            most_votes_labels[prop_true].append(rel)
        if levels == contradiction:
            pair_d['contradiction'] = 'yes'
        elif len(levels) == 1:
            pair_d['contradiction'] = 'no'
        else:
            pair_d['contradiction'] = 'no'
        pair_d['levels'] = '-'.join(sorted(list(levels)))
        pair_d['majority_labels'] = '-'.join(majority_labels)
        n_most_votes = max(list(most_votes_labels.keys()))
        pair_d['label(s)_most_vostes'] = '-'.join(most_votes_labels[n_most_votes])
        pair_d['proportion_most_votes'] = n_most_votes
        aggregated_dicts.append(pair_d)
    return aggregated_dicts


def print_iaa(data_dict_list):
    print('Total:')
    get_agreement(data_dict_list)
    print('By levels:')
    get_agreement(data_dict_list, collapse_relations = 'levels')
    print('By similar relations:')
    get_agreement(data_dict_list, collapse_relations = 'similar_relations')
    print()

def aggregated_data_to_file(run, batch, n_q, group, annotation_filter):
    
    print(f"Run {run}:")
    data_dict_list = load_experiment_data(run, group, n_q, batch, remove_not_val = True)
    print(f'Filter applied to annotations: {annotation_filter}')
    print(f'Number of annotations: {len(data_dict_list)}')
    if annotation_filter != 'none':
        data_dict_list_clean, annotations_removed = clean_annotations(run, \
                                            group, n_q, batch, annotation_filter)
    else:
        data_dict_list_clean = data_dict_list
    print(f'Number of annotations after filter: {len(data_dict_list_clean)}')
    agg_labels_all = aggregate_labels(data_dict_list)
    pairs_cont, pairs_no_cont = get_cont_stats(data_dict_list_clean, agg_labels_all)
    annotations_cont, annotations_no_cont = get_cont_no_cont_annotations(\
                                            data_dict_list_clean, pairs_no_cont, pairs_cont)
    dir_path = f'../aggregated_labels/annotation_filter-{annotation_filter}'
    os.makedirs(dir_path, exist_ok=True)
    filepath = f'{dir_path}/aggregated_labels-all_annotations.csv'
    all_df = pd.DataFrame(agg_labels_all)
    all_df.to_csv(filepath, index=False)
    print()
    print(f'Aggregated labels written to: {filepath}')
    
    ### IAA ### 
    print('IAA before filtering:')
    print_iaa(data_dict_list)
    
    if annotation_filter != 'none':
        print('IAA after filtering:')
        print_iaa(data_dict_list_clean)
        
    else:
        print('No filter applied.')
    print()
    
    return all_df
  


def main():
    run = '*'
    batch = '*'
    n_q = '*'
    group = 'experiment*'
    annotation_filter = 'none'
    all_df = aggregated_data_to_file(run, batch, n_q, group, annotation_filter)
    
    annotation_filter = 'worker_contradictions'
    all_df = aggregated_data_to_file(run, batch, n_q, group, annotation_filter)

    
if __name__ == '__main__':
    main()

Run *:
Discarded 655.0 annotations.
Filter applied to annotations: none
Number of annotations: 72049
Number of annotations after filter: 72049
Total number of annotations: 72049
Total number of pairs: 527
Number of pairs without contradictions: 517 (0.9810246679316889)
Number of pairs with contradictions: 10 (0.018975332068311195)

Aggregated labels written to: ../aggregated_labels/annotation_filter-none/aggregated_labels-all_annotations.csv
IAA before filtering:
Total:
Krippendorff's alpha: 0.2378609178333677
Proportional agreement (pairwise): 0.6243037902425451

By levels:
levels
Krippendorff's alpha: 0.36048524149912353
Proportional agreement (pairwise): 0.6220320269738886

By similar relations:
similar_relations
Krippendorff's alpha: 0.3713240612838109
Proportional agreement (pairwise): 0.6201418103436581


No filter applied.

Run *:
Discarded 655.0 annotations.
Filter applied to annotations: worker_contradictions
Number of annotations: 72049
Discarded 655.0 annotations.
Found 1452

In [43]:
get_agreement(all_pairs)
    collapse_relations = 'levels'
    print(f'collapsing {collapse_relations}')
    get_agreement(all_pairs, collapse_relations = collapse_relations)
    
    annotation_filter = 'worker_contradictions'
    agg_labels_all_df = aggregated_data_to_file(run, batch, n_q, group, annotation_filter)
    


roll-pipe implied_category-afforded_unusual-rare
roll-rail afforded_usual-unusual
roll-shovel afforded_unusual-rare-unusual
roll-driveshaft afforded_usual-unusual
roll-donkey afforded_unusual-impossible
roll-hose afforded_unusual-unusual
roll-blade afforded_unusual-rare-unusual-impossible
roll-piston afforded_unusual-rare-unusual
roll-screw implied_category-afforded_unusual-unusual
roll-windshield afforded_unusual-rare-unusual-impossible


In [45]:
for d in pairs_no_cont[:3]:
    #print(d['pair'], '\t' d['majority_labels'], )
    print(d)

{'pair': 'black-pea', 'distribution': 'more_levels', 'levels': 'few-some', 'majority_labels': 'variability_limited-variability_open-rare'}
{'pair': 'hot-fireplace', 'distribution': 'more_levels', 'levels': 'all-some', 'majority_labels': 'affording_activity-typical_of_property-implied_category-typical_of_concept-variability_open'}
{'pair': 'made_of_wood-strake', 'distribution': 'more_levels', 'levels': 'all-some', 'majority_labels': 'affording_activity-variability_open'}
