This notebook calculates interannotator agreement rates for data with categorical labels (using Cohen's $\kappa$) and real values (using Krippendorff's $\alpha$).  We calculate these rates for two tasks: subjectivity/objectivity and suspense.

In [1]:
from nltk.metrics.agreement import AnnotationTask
from nltk.metrics import interval_distance, binary_distance 
import sys

In [2]:
def krippendorff_alpha(annotation_triples):

    t = AnnotationTask(annotation_triples, distance=interval_distance)
    result = t.alpha()
    print("%.3f" % result)

In [3]:
def cohens_kappa(annotation_triples):

    t = AnnotationTask(annotation_triples, distance=interval_distance)
    result = t.kappa()
    print("%.3f" % result)

In [4]:
def read_annos(filename):
    annos=[]
    sentences=[]
    with open(filename, encoding="utf-8") as file:

        header=file.readline().rstrip().split("\t")
            
        for line in file:
            cols=line.rstrip().split("\t")
            annos.append(float(cols[0]))
            sentences.append(cols[1])
    return annos, sentences

In [None]:
def convert_anno_list(annos, annotator_id):
    converted=[]
    for idx, anno in enumerate(annos):
        converted.append((annotator_id, idx, anno))
    return converted

In [None]:
anno1_filename="path to your filename name"

In [None]:
anno2_filename="path to group annotation file here"

In [None]:
anno1, sentences=read_annos(anno1_filename)

In [None]:
anno2, _=read_annos(anno2_filename)

In [None]:
if len(anno1) != len(anno2):
    print ("Different number of annotations: %s vs. %s" % len(anno1), len(anno2))

In [None]:
# print out sentences with different annotations
for idx in range(len(anno1)):
    if abs(anno1[idx]-anno2[idx]) >= 1:
        print("%s\t%s\t%s" % (anno1[idx], anno2[idx], sentences[idx]))

In [None]:
anno1=convert_anno_list(anno1, 0)

In [None]:
anno2=convert_anno_list(anno2, 1)

Objectivity is a binary rating, so use Cohen's $\kappa$.

In [None]:
cohens_kappa(anno1 + anno2)

Suspense is a real-valued rating, so use Krippendorff's $\alpha$.

In [None]:
krippendorff_alpha(anno1 + anno2)