# Select:


* Pairs with high agreement/few contradictions
* Pairs with low agreement/few contradictions (if possible)
* Pairs with high agrement/many contradictions
* Pairs with low agreement/many contradictions

Try to spread over property types


# Annotate:

* New run
* Define new experiment group somehow

# Ideas:

* Low agreement, low contradiction rate: knowledge issue (some people know, some don't)
* Low agreement, high contradiction rate: ambiguity



# Implementation plan:

* sort according to agreement
* sort according to contradiction rate
* get top pairs (top 5 or so)
* get intersections:
    * most problematic: low agreement - high contradiction
    * problematic: low agreement - low contradiction
    * interesting: high agreement - high contradiction (multiple interpretations?)
    * clear: high agreement - low contradiction

In [5]:
from load_data import load_experiment_data
from calculate_iaa import get_agreement
from utils_analysis import load_contradiction_pairs
from utils_analysis import collect_contradictions
from utils_analysis import sort_by_key
from utils_analysis import get_annotation_ids
from utils_analysis import load_analysis

from collections import Counter, defaultdict
import pandas as pd
from scipy.stats import spearmanr


def get_agreement_by_property(data_dict_list):

    agreement_prop_dict = dict()
    data_by_pair = sort_by_key(data_dict_list, ['property', 'concept'])
    for pair, dl_prop in data_by_pair.items():
        agreement_prop_dict[pair] = get_agreement(dl_prop, v=False)
    return agreement_prop_dict


def get_agreement_contradiction_data(run, group, batch, n_q):
    data_dict_list = load_experiment_data(run, group, n_q, batch, remove_not_val = True)
    print(run, group, batch, n_q)
    #data_by_pair = sort_by_key(data_dict_list, ['property', 'concept'])
    analysis_type = 'pairs'
    df = load_analysis(analysis_type, run, group, batch)
    pair_dicts = df.to_dict('records') 
    pair_dicts_by_pair = sort_by_key(pair_dicts, ['pair'])
    ag_pair_dict = get_agreement_by_property(data_dict_list)

    agreement_dict = Counter()
    for pair, ag in ag_pair_dict.items():
        agreement_dict[pair] =  ag['Krippendorff']

    ag_cont_dicts = []
    for pair, ag in agreement_dict.most_common():
        if 'test' not in pair and 'check' not in pair: 
            d = pair_dicts_by_pair[pair]
            if len(d) == 1:
                new_dict = dict()
                d = d[0]
                #print(d.keys())
                cont = d['contradiction_poss_contradiction_ratio']
                new_dict['pair'] = pair
                new_dict['agreement'] = ag
                new_dict['contradiction_rate'] = cont
                ag_cont_dicts.append(new_dict)

            else:
                print('unexpected length:', len(d), 'for pair', pair)
        else:
            print('test pair:', pair)
        #d = data_by_pair[pair]
    df_ag_cont = pd.DataFrame(ag_cont_dicts)
    return df_ag_cont

def get_spearman(df_cont_ag):
    # is there a correlation between agreement and contradiction rate?
    # we expect a negative correlation: high agreement - low contradiction
    agreement = df_cont_ag['agreement']
    cont = df_cont_ag['contradiction_rate']
    spr = spearmanr(agreement, cont)
    return spr

def get_pair_sets(df_cont_ag):

    pair_dict = dict()
    df_ag_sorted = df_cont_ag.sort_values('agreement', axis = 0, ascending=False, inplace=False)
    top_ag = df_ag_sorted[:30]
    bottom_ag = df_ag_sorted[-30:-1]
    df_cont_sorted = df_cont_ag.sort_values('contradiction_rate', axis = 0, ascending=False, inplace=False)
    top_cont = df_cont_sorted[:30]
    bottom_cont = df_cont_ag.loc[df_cont_ag['contradiction_rate'] == 0.0]
    #df.loc[df['column_name'] == some_value]

    pair_dict['low_ag_high_cont'] = set(bottom_ag['pair']).intersection(set(top_cont['pair']))
    pair_dict['low_ag_low_cont'] = set(bottom_ag['pair']).intersection(set(bottom_cont['pair']))
    pair_dict['high_ag_low_cont'] = set(top_ag['pair']).intersection(set(bottom_cont['pair']))
    top_ag_sorted = top_ag.sort_values('contradiction_rate', axis = 0, ascending=False, inplace=False)
    pair_dict['high_ag_high_cont'] = set(top_ag_sorted[:5]['pair'])
    return pair_dict

In [11]:
run = "*"
group = 'experiment*'
batch = '*'
n_q = '*'

df_cont_ag = get_agreement_contradiction_data(run, group, batch, n_q)
print(len(df_cont_ag))
spr = get_spearman(df_cont_ag)
print(spr)
# 
pair_dict = get_pair_sets(df_cont_ag)
for selection, pairs in pair_dict.items():
    print(selection)
    print(pairs)
    print()

Discarded 655.0 annotations.
* experiment* * *
test pair: _check1-_check1
test pair: _check2-_check2
test pair: _test1-_test1
test pair: _test2-_test2
test pair: _test3-_test
test pair: _check3-_check3
test pair: _check4-_check4
test pair: _test4-_test4
test pair: _test3-_test3
test pair: _test4-_test
842
SpearmanrResult(correlation=-0.20838758412754474, pvalue=1.0280527317640294e-09)
low_ag_high_cont
{'square-recliner', 'fly-arrow'}

low_ag_low_cont
{'round-pen', 'yellow-buttercup', 'yellow-leopard', 'red-carrot', 'yellow-blossom', 'yellow-lavandula', 'wheels-sleigh', 'dangerous-freebooter', 'wheels-motor', 'yellow-peanut', 'roll-pin', 'yellow-honey'}

high_ag_low_cont
{'yellow-seaweed', 'made_of_wood-bow', 'black-suit', 'red-rhino', 'round-wheel', 'sweet-loquat', 'round-globe', 'wheels-unicycle', 'hot-oven', 'fly-robin', 'fly-icteridae', 'square-mug', 'wheels-cabriolet', 'yellow-citrus', 'sweet-cherry'}

high_ag_high_cont
{'roll-tire', 'hot-shower', 'sweet-candy', 'wheels-ambulance',

In [12]:
df_ag_sorted = df_cont_ag.sort_values('agreement', axis = 0, ascending=False, inplace=False)
#top_ag = df_ag_sorted[:30]
bottom_ag = df_ag_sorted[-30:-1]
bottom_ag

Unnamed: 0,agreement,contradiction_rate,pair
812,-0.01596,0.066116,lay_eggs-plaice
813,-0.016807,0.226667,fly-arrow
814,-0.0175,0.116667,sweet-fennel
815,-0.01792,0.125,roll-propeller
816,-0.019351,0.08377,roll-bike
817,-0.02,0.033333,lay_eggs-ichthyosaur
818,-0.020021,0.139535,roll-lathe
819,-0.02022,0.066667,sweet-shiitake
820,-0.02022,0.053333,roll-headstock
821,-0.023148,0.0,yellow-leopard


In [2]:
# Selection for a trial round:
pairs = ['fly-arrow', 'roll-pin', 'yellow-buttercup', 'hot-vineigrette', 'roll-tire']
# write to file
with open('../analyses/expert_inspection1.txt', 'w') as outfile:
    for p in pairs:
        outfile.write(p+'\n')


In [3]:
# Selection for a trial round:
pairs = ['roll-shovel', 'dangerous-freebooter', 'yellow-pineapple', 'red-carrot', 'red-wine']
# write to file
with open('../analyses/expert_inspection2.txt', 'w') as outfile:
    for p in pairs:
        outfile.write(p+'\n')

In [14]:
# Selection for a trial round:
pairs = ['square-recliner', 'yellow-leopard', 'round-pen', 'roll-washer', 'lay_eggs-plaice']
# write to file
with open('../analyses/expert_inspection3.txt', 'w') as outfile:
    for p in pairs:
        outfile.write(p+'\n')