# Interannotator agreement study

## calculate interannotator agreement

In [3]:
import pandas as pd
from collections import defaultdict
import nltk
from nltk.metrics.agreement import AnnotationTask

### Funtion which converts the data frame to the (coder, item, category) format used by NLTK.

In [1]:
def convert_to_triples(dataframe):
    '''Converts the format pandas data frame to the (coder, item, category) format used by NLTK'''
    entity_count = defaultdict(int)
    triple_lst = []
    i = 0
    for row in df.iterrows():
        annotation = row[1].to_dict()
        if annotation['Answer.Label.label'] == 'YES':
            correction = annotation['Input.label']
        else:
            correction = annotation['Answer.Label.label'].split(': ')[1]
        coder = "C" + str(i % 3 + 1)
        entity = annotation['Input.entities']
        if entity_count[entity] > 3:
            j = entity_count[entity] - 1 // 3 
            entity = entity + "_" + str(j)
        triple_lst.append((coder, entity, correction))
        i+=1

    return triple_lst

#### Calculate the agreement on the pilot annoations

In [6]:
df = pd.read_csv('../annotation/Mturks_plot.csv')
triple_lst = convert_to_triples(df)

print(len(triple_lst))

print(df.shape)

163
(163, 37)


In [9]:
annotation_task = AnnotationTask(triple_lst)
print(f"The agreement S is {annotation_task.S()}")
print(f"The agreement PI is {annotation_task.pi()}")
print(f"The agreement kappa is {annotation_task.kappa()}")
print(f"The agreement alpha is {annotation_task.alpha()}")

The agreement S is 0.8554652213188799
The agreement PI is 0.8064135353137913
The agreement kappa is 0.806556915140547
The agreement alpha is 0.7329313285991985


### Read the Mechanical Turk final annotations into pandas data frame and convert to triples fomart.

In [10]:
df = pd.read_csv('../annotation/batch_result_final.csv')
triple_lst = convert_to_triples(df)

print(len(triple_lst))

print(df.shape)

2370
(2370, 38)


### Calculate the agreement score by different metrics.

In [11]:
annotation_task = AnnotationTask(triple_lst)
print(f"The agreement S is {annotation_task.S()}")
print(f"The agreement PI is {annotation_task.pi()}")
print(f"The agreement kappa is {annotation_task.kappa()}")
print(f"The agreement alpha is {annotation_task.alpha()}")

The agreement S is 0.7407407407407407
The agreement PI is 0.6967858444622387
The agreement kappa is 0.6968947263611659
The agreement alpha is 0.6922922074312876


### Calculate the agreement score by Krippendorf's Alpha by scale.

In [12]:
def entity_distance(tag1,tag2):
    if tag1 == tag2:
        return 0
    elif tag1 == "N/A" or tag2 == "N/A":
        return 1
    elif (tag1 == "LOC" and tag2 == "ORG") or (tag2 == "LOC" and tag1 == "ORG"):
        return 1
    elif (tag1 == "PRODUCT" and tag2 == "WORK_OF_ART") or (tag2 == "PRODUCT" and tag1 == "WORK_OF_ART"):
        return 1
    else:
        return 2

In [13]:
annotation_task = AnnotationTask(triple_lst,distance=entity_distance)

In [14]:
annotation_task.alpha()

0.7348131720090019