In [7]:
from load_data import load_experiment_data
from utils_analysis import sort_by_key
from calculate_iaa import get_full_report
from calculate_iaa import load_rel_level_mapping
from clean_annotations import filter_annotations

from collections import Counter
from collections import defaultdict
import pandas as pd
import os 


def get_cont_stats(data_dict_list, aggregated_dicts):
    total_annotations = len(data_dict_list)
    total_pairs = len(aggregated_dicts)
    pairs_cont = [d for d in aggregated_dicts if d['contradiction'] == 'yes']
    pairs_no_cont = [d for d in aggregated_dicts if d['contradiction'] == 'no']
    pairs_cont_prop = len(pairs_cont)/total_pairs
    pairs_no_cont_prop = len(pairs_no_cont)/total_pairs
    print(f'Total number of annotations: {total_annotations}')
    print(f'Total number of pairs: {total_pairs}')
    print(f'Number of pairs without contradictions: {len(pairs_no_cont)} ({pairs_no_cont_prop})')
    print(f'Number of pairs with contradictions: {len(pairs_cont)} ({pairs_cont_prop})')
    return pairs_cont, pairs_no_cont


def get_cont_no_cont_annotations(data_dict_list, pairs_no_cont, pairs_cont):
    
    annotations_no_cont = []
    annotations_cont = []
    cont = [d['pair'] for  d in pairs_cont]
    no_cont = [d['pair'] for  d in pairs_no_cont]
    
    data_by_pair = sort_by_key(data_dict_list, ['property', 'concept'])
    
    for p, data_dicts in data_by_pair.items():
        if p in no_cont:
            annotations_no_cont.extend(data_dicts)
        else:
            annotations_cont.extend(data_dicts)
    return annotations_cont, annotations_no_cont

def aggregate_labels(data_dict_list):
    n = 0
    contradiction = set(['all', 'few'])
    rel_level_mapping = load_rel_level_mapping(mapping = 'levels')
    rel_header = rel_level_mapping.keys()
    data_by_pair = sort_by_key(data_dict_list, ['property', 'concept'])
    aggregated_dicts = []
    for pair, data_dicts in data_by_pair.items():
        n += 1
        pair_d = dict()
        pair_d['pair'] = pair
        data_by_rel = sort_by_key(data_dicts, ['relation'])
        levels = set()
        majority_labels = []
        most_votes_labels = defaultdict(list)
        for rel  in rel_header:
            rel_dicts = data_by_rel[rel]
            n_annotations = len(rel_dicts)
            n_true = len([d['answer'] for d in rel_dicts if d['answer'] == 'true'])
            if n_true > 0:
                prop_true = n_true/n_annotations
            else:
                prop_true = 0.0
            if prop_true > 0.5:
                majority_labels.append(rel)
                levels.add(rel_level_mapping[rel])
            most_votes_labels[prop_true].append(rel)
        if levels == contradiction:
            pair_d['contradiction'] = 'yes'
        elif len(levels) == 1:
            pair_d['contradiction'] = 'no'
        else:
            pair_d['contradiction'] = 'no'
        pair_d['levels'] = '-'.join(sorted(list(levels)))
        pair_d['majority_labels'] = '-'.join(majority_labels)
        n_most_votes = max(list(most_votes_labels.keys()))
        pair_d['label(s)_most_vostes'] = '-'.join(most_votes_labels[n_most_votes])
        pair_d['proportion_most_votes'] = n_most_votes
        aggregated_dicts.append(pair_d)
    return aggregated_dicts




def aggregated_data_to_file(run, batch, n_q, group, annotation_filter, iaa=True):
    
    annotations_clean, annotations_removed = filter_annotations(\
                                            run, group, n_q, batch,\
                                            annotation_filter, iaa=True)
    agg_labels_all = aggregate_labels(annotations_clean)
    pairs_cont, pairs_no_cont = get_cont_stats(annotations_clean, agg_labels_all)
    annotations_cont, annotations_no_cont = get_cont_no_cont_annotations(\
                                            annotations_clean, pairs_no_cont, pairs_cont)
    dir_path = f'../aggregated_labels/annotation_filter-{annotation_filter}'
    os.makedirs(dir_path, exist_ok=True)
    filepath = f'{dir_path}/aggregated_labels-all_annotations.csv'
    all_df = pd.DataFrame(agg_labels_all)
    all_df.to_csv(filepath, index=False)
    print()
    print(f'Aggregated labels written to: {filepath}')
    
    iaa_dict = get_full_report(annotations_clean)
    percent_clean = len(annotations_clean)/(len(annotations_clean) + len(annotations_removed))
    print(f'Percent of clean annotations: {round(percent_clean*100, 2)}%')
    
    stats_dict = dict()
    stats_dict['filter'] = annotation_filter
    stats_dict['percent_clean'] = percent_clean
    stats_dict['Alpha_full'] = iaa_dict['full']['Krippendorff']
    stats_dict['Alpha_levels'] = iaa_dict['levels']['Krippendorff']
    return all_df, stats_dict
  


def main():
    run = '4'
    batch = '*'
    n_q = '*'
    group = 'experiment2'
    
    filters = [
        'contradiction_outliers',
        'worker_contradiction_rate_0',
        'worker_contradiction_rate_above_av',
        'worker_failed_checks_1',
        'none', 
    ]
    stats_dicts = []
    for annotation_filter in filters:
        all_df, stats_dict = aggregated_data_to_file(run, batch, n_q, group, annotation_filter)
        stats_dicts.append(stats_dict)
    stats_df = pd.DataFrame(stats_dicts)
    
   
if __name__ == '__main__':
    main()


Filtering out contradiction_outliers

Discarded 0.0 annotations.
----Filter report----
Total number of annotations: 9671
Number of clean annotations: 8795
Number of removed annotations: 876
---------------------

Percent of clean annotations: 90.94%
Total number of annotations: 8795
Total number of pairs: 106
Number of pairs without contradictions: 106 (1.0)
Number of pairs with contradictions: 0 (0.0)

Aggregated labels written to: ../aggregated_labels/annotation_filter-contradiction_outliers/aggregated_labels-all_annotations.csv
--- Full IAA report --- 
Full set:
Krippendorff's alpha: 0.29787291310131847
Proportional agreement (pairwise): 0.6640201835249607

collapsing pos_neg
pos_neg
Krippendorff's alpha: 0.4409390256043312
Proportional agreement (pairwise): 0.525321239606954

collapsing levels
levels
Krippendorff's alpha: 0.43538292767830167
Proportional agreement (pairwise): 0.48127362055933487

collapsing similar_relations
similar_relations
Krippendorff's alpha: 0.41533823078537

In [11]:
run = '*'
batch = '*'
n_q = '*'
group = 'experiment*'

filters = [
    'contradiction_outliers',
    'worker_contradiction_rate_0',
    'worker_contradiction_rate_above_av',
    'worker_failed_checks_1',
    'none', 
]
stats_dicts = []
for annotation_filter in filters:
    all_df, stats_dict = aggregated_data_to_file(run, batch, n_q, group, annotation_filter)
    stats_dicts.append(stats_dict)
stats_df = pd.DataFrame(stats_dicts)


Filtering out contradiction_outliers

Discarded 655.0 annotations.
----Filter report----
Total number of annotations: 73529
Number of clean annotations: 63368
Number of removed annotations: 10161
---------------------

Percent of clean annotations: 86.18%
Total number of annotations: 63368
Total number of pairs: 542
Number of pairs without contradictions: 540 (0.996309963099631)
Number of pairs with contradictions: 2 (0.0036900369003690036)

Aggregated labels written to: ../aggregated_labels/annotation_filter-contradiction_outliers/aggregated_labels-all_annotations.csv
--- Full IAA report --- 
Full set:
Krippendorff's alpha: 0.26244664861157885
Proportional agreement (pairwise): 0.6301511257408425

collapsing pos_neg
pos_neg
Krippendorff's alpha: 0.4093522361478831
Proportional agreement (pairwise): 0.5882438750529204

collapsing levels
levels
Krippendorff's alpha: 0.4297885032383808
Proportional agreement (pairwise): 0.5048420568445594

collapsing similar_relations
similar_relations


In [12]:
stats_df

Unnamed: 0,Alpha_full,Alpha_levels,filter,percent_clean
0,0.262447,0.429789,contradiction_outliers,0.86181
1,0.393076,0.693889,worker_contradiction_rate_0,0.2046
2,0.31099,0.476323,worker_contradiction_rate_above_av,0.66246
3,0.247709,0.361023,worker_failed_checks_1,0.980851
4,0.239275,0.356688,none,1.0


In [43]:
get_agreement(all_pairs)
    collapse_relations = 'levels'
    print(f'collapsing {collapse_relations}')
    get_agreement(all_pairs, collapse_relations = collapse_relations)
    
    annotation_filter = 'worker_contradictions'
    agg_labels_all_df = aggregated_data_to_file(run, batch, n_q, group, annotation_filter)
    


roll-pipe implied_category-afforded_unusual-rare
roll-rail afforded_usual-unusual
roll-shovel afforded_unusual-rare-unusual
roll-driveshaft afforded_usual-unusual
roll-donkey afforded_unusual-impossible
roll-hose afforded_unusual-unusual
roll-blade afforded_unusual-rare-unusual-impossible
roll-piston afforded_unusual-rare-unusual
roll-screw implied_category-afforded_unusual-unusual
roll-windshield afforded_unusual-rare-unusual-impossible


In [45]:
for d in pairs_no_cont[:3]:
    #print(d['pair'], '\t' d['majority_labels'], )
    print(d)

{'pair': 'black-pea', 'distribution': 'more_levels', 'levels': 'few-some', 'majority_labels': 'variability_limited-variability_open-rare'}
{'pair': 'hot-fireplace', 'distribution': 'more_levels', 'levels': 'all-some', 'majority_labels': 'affording_activity-typical_of_property-implied_category-typical_of_concept-variability_open'}
{'pair': 'made_of_wood-strake', 'distribution': 'more_levels', 'levels': 'all-some', 'majority_labels': 'affording_activity-variability_open'}
