# Basic statistics for RoB annotations <br>
RoB file: https://drive.google.com/file/d/19R9savfPdCHC8XLz2iiMvL_71lPJERWK/view

In [106]:
import requests,json
from requests import get
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
import urllib
import itertools
from collections import defaultdict
from collections import Counter

### Helper functions

In [19]:
def get_combinations(l):
    yield from itertools.product(l, l)

In [20]:
project_name = 'RoB_preliminary_annotation'
owner = 'anjDhr'
users = ['rahel-caliesch', 'roger-annotation', 'martin-annotation', 'katia-giacomino', 'simone-annotation']
all_user_combinations = list(itertools.combinations(users, 2)) 
all_user_combinations = get_combinations(users)
all_user_combinations = list( all_user_combinations )

In [21]:
'''
exact_v1 = Stringent inter-annotator agreement
overlapping_v1 = Relaxed inter-annotator agreement
'''
metrics = ['exact_v1', 'overlapping_v1'] 

In [22]:
# Get all the entities
all_annotations_data = 'https://www.tagtog.net/-api/metrics/v0/search_stats?project=RoB_preliminary_annotation&owner=anjDhr&search=*'
all_annotations_data_response = get(all_annotations_data, auth=('anjDhr', '9J@NiScMhUy9LbR'))
print('The response for query is: ', all_annotations_data_response)
all_annotations_data_response = json.loads(all_annotations_data_response.text)

The response for query is:  <Response [200]>


In [23]:
# Get the IAA metrics
# IAA_hardcoded = 'https://www.tagtog.net/-api/metrics/v0/iaa?project=RoB_preliminary_annotation&owner=anjDhr&member1=rahel-caliesch&member2=roger-annotation&anntaskId=e_109&metric=exact_v1'
IAA = 'https://www.tagtog.net/-api/metrics/v0/iaa?'

In [28]:
global_annot = dict()
user_agreement = dict() # what users have constant high agreement (relaxed agreement) between entities

for eachEntry in all_annotations_data_response:
    
    if 'e_' in eachEntry:

        entry_name =  all_annotations_data_response[eachEntry]['name']
        entity_IAA = {}

        for eachUserPair in all_user_combinations:

            member1 = eachUserPair[0]
            member2 = eachUserPair[1]

            if member1 != member2:

                params = (('project',project_name),('owner', owner), ('member1', member1), ('member2', member2), ('anntaskId', eachEntry), ('metric', metrics[1])) # metrics[1] = overlapping_v1
                parameters = urllib.parse.urlencode(params)
                entire_command = IAA + parameters
                response = get(entire_command, auth=('anjDhr', '9J@NiScMhUy9LbR'))
                my_json_data = json.loads(response.text)

                if eachUserPair not in entity_IAA and tuple(reversed(eachUserPair)) not in entity_IAA:
                    entity_IAA[eachUserPair] = my_json_data['f1']
        global_annot[entry_name] = entity_IAA

--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
----------------------------------

In [52]:
unique_user_combinations = set()

for eachUserCombination in all_user_combinations:
    member1 = eachUserCombination[0]
    member2 = eachUserCombination[1]
    
    if member1 != member2:
        if eachUserCombination not in unique_user_combinations and tuple(reversed(eachUserCombination)) not in unique_user_combinations:
            unique_user_combinations.add(eachUserCombination)

### Which two annotators have many non-zero agreements?

In [38]:
annot_pair_non0 = dict()

for key, value in global_annot.items():
    for key_i, value_i in value.items():
        if key_i not in annot_pair_non0:
            annot_pair_non0[key_i] = [value_i]
        elif key_i in annot_pair_non0:
            annot_pair_non0[key_i].append(value_i)

In [69]:
annot_pair_mostnon0 = dict()

for eachUserComb in unique_user_combinations:
    agreement_score = []
    
    for score in annot_pair_non0[eachUserComb]:
        if score != None and score != 0:
            agreement_score.append( score )
            
    #print('Number of non-zero agreements between' , eachUserComb, ' are' , len( agreement_score ))
    annot_pair_mostnon0[eachUserComb] = len( agreement_score )

In [72]:
dict(sorted(annot_pair_mostnon0.items(), key=lambda item: item[1]))

{('roger-annotation', 'simone-annotation'): 1,
 ('rahel-caliesch', 'simone-annotation'): 2,
 ('martin-annotation', 'simone-annotation'): 2,
 ('katia-giacomino', 'simone-annotation'): 2,
 ('rahel-caliesch', 'katia-giacomino'): 6,
 ('rahel-caliesch', 'martin-annotation'): 8,
 ('rahel-caliesch', 'roger-annotation'): 10,
 ('roger-annotation', 'martin-annotation'): 11,
 ('roger-annotation', 'katia-giacomino'): 12,
 ('martin-annotation', 'katia-giacomino'): 15}

### Who has the most non-zero agreements? <br>
martin = 36 <br>
katia = 35 <br>
roger = 34 <br>
rahel = 26 <br>
simone = 7 <br>

### Who has the most LESS CONFIDENCE replies?

In [116]:
replies = dict()

for key, value in global_annot.items():
    for key_i, value_i in value.items():
        if value_i != None and value_i != 0:           
            for eachPerson in key_i:
                decision = key.split( '_' )[2]
                if eachPerson not in replies:
                    replies[eachPerson] = [decision]
                if eachPerson in replies:
                    replies[eachPerson].append(decision)

In [117]:
for key, value in replies.items():
    print(key, ' : ', len( value ))

rahel-caliesch  :  27
roger-annotation  :  35
martin-annotation  :  37
katia-giacomino  :  36
simone-annotation  :  8


In [118]:
most_less_conf = dict()

for key, value in global_annot.items():
    for key_i, value_i in value.items():
        if value_i != None and value_i != 0:           
            for eachPerson in key_i:
                decision = key.split( '_' )[2]
                if eachPerson not in most_less_conf and 'Pro' in decision:
                    most_less_conf[eachPerson] = [decision]
                if eachPerson in most_less_conf and 'Pro' in decision:
                    most_less_conf[eachPerson].append(decision)

In [119]:
# Percentage of 'less confident' annotations out of all the annotations
for key, value in most_less_conf.items():
    print(key, ' : ',  ( len( value ) / len(replies[key]) ) * 100 )

rahel-caliesch  :  25.925925925925924
roger-annotation  :  40.0
martin-annotation  :  29.72972972972973
katia-giacomino  :  36.11111111111111
simone-annotation  :  25.0


### Who has the most HIGH CONFIDENCE replies?

In [120]:
most_high_conf = dict()

for key, value in global_annot.items():
    for key_i, value_i in value.items():
        if value_i != None and value_i != 0:           
            for eachPerson in key_i:
                decision = key.split( '_' )[2]
                if eachPerson not in most_high_conf and 'Pro' not in decision:
                    most_high_conf[eachPerson] = [decision]
                if eachPerson in most_high_conf and 'Pro' not in decision:
                    most_high_conf[eachPerson].append(decision)

In [121]:
# Percentage of 'high confident' annotations out of all the annotations
for key, value in most_high_conf.items():
    print(key, ' : ',  ( len( value ) / len(replies[key]) ) * 100 )

rahel-caliesch  :  77.77777777777779
roger-annotation  :  62.857142857142854
katia-giacomino  :  66.66666666666666
martin-annotation  :  72.97297297297297
simone-annotation  :  87.5


### What "entity" class are annotators least confident about?

In [130]:
all_least_conf_annot =  []
all_high_conf_annot = []

for key, value in global_annot.items():
    for key_i, value_i in value.items():
        if value_i != None and 'Pro' in key:
            all_least_conf_annot.append( "_".join(key.split("_", 3)[:3]) )
        if value_i != None and 'Pro' not in key:
            all_high_conf_annot.append( "_".join(key.split("_", 3)[:3]) )            

In [131]:
Counter(all_least_conf_annot).most_common()

[('5_2_ProbablyNo', 10),
 ('3_1_ProbablyYes', 10),
 ('1_2_ProbablyYes', 10),
 ('4_3_ProbablyNo', 10),
 ('2_7_ProbablyNo', 9),
 ('5_3_ProbablyNo', 9),
 ('2_3_ProbablyNo', 9),
 ('4_2_ProbablyNo', 9),
 ('1_1_ProbablyYes', 8),
 ('2_1_ProbablyYes', 8),
 ('2_2_ProbablyYes', 8),
 ('4_4_ProbablyNo', 7),
 ('4_1_ProbablyNo', 7),
 ('1_3_ProbablyNo', 6),
 ('4_5_ProbablyYes', 6),
 ('5_1_ProbablyYes', 6),
 ('2_2_ProbablyNo', 6),
 ('3_1_ProbablyNo', 6),
 ('3_2_ProbablyYes', 5),
 ('3_2_ProbablyNo', 5),
 ('4_3_ProbablyYes', 5),
 ('2_6_ProbablyYes', 5),
 ('2_3_ProbablyYes', 5),
 ('1_2_ProbablyNo', 4),
 ('3_3_ProbablyNo', 4),
 ('2_1_ProbablyNo', 4),
 ('3_4_ProbablyNo', 4),
 ('3_4_ProbablyYes', 3),
 ('5_3_ProbablyYes', 3),
 ('2_5_ProbablyNo', 3),
 ('4_4_ProbablyYes', 3),
 ('2_6_ProbablyNo', 3),
 ('3_3_ProbablyYes', 3),
 ('2_5_ProbablyYes', 3),
 ('4_5_ProbablyNo', 3),
 ('2_4_ProbablyNo', 3),
 ('5_2_ProbablyYes', 3),
 ('2_4_ProbablyYes', 3),
 ('2_7_ProbablyYes', 2),
 ('5_1_ProbablyNo', 2)]

In [132]:
Counter(all_high_conf_annot).most_common()

[('1_3_No', 16),
 ('4_3_No', 13),
 ('5_1_No', 12),
 ('5_3_No', 12),
 ('2_6_No', 11),
 ('3_1_Yes', 10),
 ('2_1_No', 10),
 ('4_1_No', 10),
 ('1_1_Yes', 10),
 ('4_2_No', 10),
 ('5_2_No', 9),
 ('2_2_Yes', 9),
 ('1_2_Yes', 7),
 ('2_1_Yes', 7),
 ('2_6_Yes', 7),
 ('1_2_No', 7),
 ('3_1_No', 6),
 ('2_2_No', 6),
 ('4_3_Yes', 6),
 ('3_2_No', 6),
 ('3_3_No', 6),
 ('3_4_No', 5),
 ('5_1_Yes', 4),
 ('2_3_No', 4),
 ('3_2_Yes', 3),
 ('2_7_No', 3),
 ('2_5_Yes', 3),
 ('4_5_Yes', 3),
 ('4_4_Yes', 3),
 ('2_7_Yes', 3),
 ('1_1_No', 3),
 ('4_4_No', 2)]