In [13]:
from load_data import load_expert_data
from utils_analysis import sort_by_key
import pandas as pd
import csv


def prepare_gold_file(data_dict_list):
    data_by_triple = sort_by_key(data_dict_list, ['relation', 'property', 'concept'])
    
    rows = []
    to_remove = ['filename','listnumber','assignmentid','hitid',
                 'origin', 'timestamp', 'partid',
                 'id', 'uuid', 'time_taken_batch', 'reason', 'comment']
    for triple, data in data_by_triple.items():
        answers = set([d['answer'] for d in data])
        row = data[0]
        row_keys = list(row.keys())
        for k in to_remove:
            if k in row.keys():
                row.pop(k)
        for k in row_keys:
            if k.startswith('disagreement_'):
                row.pop(k)
        if len(answers) == 1:
            answer = list(answers)[0]
            agreement = 'agree'
            
        else:
            answer = 'ADD LABEL'
            agreement = 'ADD EXPECTATION'
        row['answer'] = answer
        row['agreement'] = agreement
        row['workerid'] = 'gold labels'
        rows.append(row)
    return rows


def write_gold_candidate_file(gold_rows, name):
    

    gold_dir = '../gold_labels/candidate_files/'
    
    path = f'{gold_dir}{name}'
    header = ['relation','property','concept', 'answer','agreement', 
                'workerid','questionid','quid','description','exampletrue',
                  'examplefalse','run','sublist','completionurl','name', 'agreement']
    with open(path, 'w') as outfile:
        writer = csv.DictWriter(outfile, fieldnames = header)
        writer.writeheader()
        for r in gold_rows:
            writer.writerow(r)

def get_overview_table(expert_data):
    row_dicts = []
    data_by_triple = sort_by_key(expert_data, ['relation', 'concept', 'property'])
    #all_workers = data_by_worker.keys()
    #all_workers = set([d['workerid'] for d in expert_data])
    workers_exclude = set(['pia_test1'])
    for t, data in data_by_triple.items():
        triple_dict = dict()
        triple_dict['triple'] = t
        for d in data:
            w = d['workerid']
            if 'answer' in d and w not in workers_exclude:
                a = d['answer']
                expected_disagreements = []
                for k, v in d.items():
                    if k.startswith('disagreement_') and v == 'true':
                        expected_disagreements.append(k)
                triple_dict[f'answer-{w}'] = a
                triple_dict[f'expected_behavior-{w}'] = '-'.join(sorted(expected_disagreements))
                if 'reason' in d:
                    triple_dict[f'reason-{w}'] = d['reason']
                if 'comment' in d:
                    triple_dict[f'comment-{w}'] = d['comment']
        row_dicts.append(triple_dict)
    return row_dicts

run = 4
n_q = '*'
batch = '1'
group = 'reason_agreement_expert_inspection2'
expert_data = load_expert_data(run, group, n_q, batch)
expert_rows = get_overview_table(expert_data)

overview_df = pd.DataFrame(expert_rows)
overview_df.to_csv(f'../analyses/expert_annotations/{group}-overview.csv')

#overview_df

gold_rows = prepare_gold_file(expert_data)
name = f'run{run}-group_{group}-batch{batch}.csv'.replace('*', '-all-')
write_gold_candidate_file(gold_rows, name)

overview_df

run4-group_reason_agreement_expert_inspection2/qu40-s_qu40-batch1.csv
no summary data


Unnamed: 0,answer-antske,answer-pia_run1,answer-piek,comment-pia_run1,comment-piek,expected_behavior-antske,expected_behavior-pia_run1,expected_behavior-piek,reason-antske,reason-pia_run1,reason-piek,triple
0,False,False,False,,,disagreement_agreement,disagreement_agreement,disagreement_agreement,,,,implied_category-shovel-roll
1,False,False,False,,,disagreement_agreement,disagreement_agreement,disagreement_agreement,,,,typical_of_property-shovel-roll
2,False,False,False,,,disagreement_agreement,disagreement_agreement,disagreement_agreement,,,,typical_of_concept-shovel-roll
3,False,False,False,,,disagreement_agreement,disagreement_agreement,disagreement_agreement,,,,afforded_unusual-shovel-roll
4,False,False,False,,,disagreement_agreement,disagreement_agreement,disagreement_agreement,,,,afforded_usual-shovel-roll
5,False,False,False,,,disagreement_agreement,disagreement_agreement,disagreement_agreement,,,,variability_limited-shovel-roll
6,False,False,False,,,disagreement_agreement,disagreement_odd_triple,disagreement_agreement,,,"""Roll like a shovel for somebody that rolls in...",creative-shovel-roll
7,True,True,True,,,disagreement_agreement-disagreement_imagination,disagreement_imagination,disagreement_imagination,,,"""Roll along the long axis but with difficulty ...",unusual-shovel-roll
8,False,False,False,,,disagreement_agreement,disagreement_agreement,disagreement_agreement,,,"""Other things than freebooter come to mind, fr...",typical_of_property-freebooter-dangerous
9,True,True,True,"""I think this is more a conceptualization issu...",,disagreement_agreement-disagreement_ambiguous_...,disagreement_ambiguous_concept-disagreement_co...,disagreement_agreement,,,,typical_of_concept-freebooter-dangerous
