# Calculate IAA using Fleiss Kappa

In [15]:
import pandas as pd
import numpy as np
from collections import Counter

In [57]:
annotation_result_df = pd.read_csv("iaa_0509.csv")

In [58]:
annotation_result_df

Unnamed: 0,원익,수민,유정,결론
0,2155,2111,2115,2111
1,3331,3341,3362,3341
2,4131,4131,4131,4131
3,2411,4431,4431,4431
4,2151,2151,2131,2151
...,...,...,...,...
634,2131,2133,2131,2133
635,4421,2421,2421,2421
636,2262,2162,2431,2262
637,3345,3355,2155,3345


In [59]:
""" data format
worker1	worker2	worker3
1	1	2
3	1	3
3	3	3
4	1	4
4	4	2
""" 
annotation_results = [[],[],[],[]]
annotator1 = annotation_result_df["원익"]
annotator2 = annotation_result_df["수민"]
annotator3 = annotation_result_df["유정"]

In [60]:
for a1, a2, a3 in zip(annotator1, annotator2, annotator3):
    for i in range(4):
        annotation_results[i].append(
        {
            "annotator1": int(str(a1)[i]),
            "annotator2": int(str(a2)[i]),
            "annotator3": int(str(a3)[i]),
        })

In [61]:
def calculate_iaa(annotation_result):
    import pandas as pd
    import numpy as np
    from fleiss import fleissKappa

    result = annotation_result.to_numpy()
    num_classes = int(np.max(result))

    transformed_result = []
    for i in range(len(result)):
        temp = np.zeros(num_classes)
        for j in range(len(result[i])):
            temp[int(result[i][j]-1)] += 1
        transformed_result.append(temp.astype(int).tolist())

    kappa = fleissKappa(transformed_result,len(result[0]))
    return

In [62]:
for i in range(4):
    print(f"Tag Type : {i+1}")
    calculate_iaa(pd.DataFrame(annotation_results[i]))
    print()

Tag Type : 1
#raters =  3 , #subjects =  639 , #categories =  5
PA =  0.7000521648408947
PE = 0.3565035569781836
Fleiss' Kappa = 0.534

Tag Type : 2
#raters =  3 , #subjects =  639 , #categories =  4
PA =  0.7866458007303051
PE = 0.36789220028142344
Fleiss' Kappa = 0.662

Tag Type : 3
#raters =  3 , #subjects =  639 , #categories =  6
PA =  0.7167449139280103
PE = 0.19546359087308487
Fleiss' Kappa = 0.648

Tag Type : 4
#raters =  3 , #subjects =  639 , #categories =  6
PA =  0.7230046948356783
PE = 0.3006488087123176
Fleiss' Kappa = 0.604



# Accuracy per annotators

In [63]:
gold_labels = [[],[],[],[]]
for gold in annotation_result_df["결론"]:
    for i in range(4):
        gold_labels[i].append(
            int(str(gold)[i]),
        )

In [64]:
ann1_avg = 0
ann2_avg = 0
ann3_avg = 0
for i in range(4):
    print(f"Tag Type : {i+1}")
    ann1_acc = sum(pd.DataFrame(annotation_results[i])['annotator1'].to_numpy() == np.array(gold_labels[i])) / len(gold_labels[i])
    ann2_acc = sum(pd.DataFrame(annotation_results[i])['annotator2'].to_numpy() == np.array(gold_labels[i])) / len(gold_labels[i])
    ann3_acc = sum(pd.DataFrame(annotation_results[i])['annotator3'].to_numpy() == np.array(gold_labels[i])) / len(gold_labels[i])
    ann1_avg += ann1_acc
    ann2_avg += ann2_acc
    ann3_avg += ann3_acc
    print(f"원익 : {ann1_acc:.3f}")
    print(f"수민: {ann2_acc:.3f}")
    print(f"유정: {ann3_acc:.3f}")
    print()
# print(f"Average Accuracy")
# print(f"원익 : {ann1_avg/4}")
# print(f"수민: {ann2_avg/4}")
# print(f"유정: {ann3_avg/4}")

Tag Type : 1
원익 : 0.803
수민: 0.889
유정: 0.740

Tag Type : 2
원익 : 0.875
수민: 0.942
유정: 0.818

Tag Type : 3
원익 : 0.806
수민: 0.914
유정: 0.778

Tag Type : 4
원익 : 0.787
수민: 0.886
유정: 0.801



# IAA per label

In [65]:
import copy
num_categories_per_label = {
    1: 5,
    2: 4,
    3: 6,
    4: 6,
}
for i in range(4):
    print(f"Label : {i+1}")
    # reformat into binary label per attributes
    for j in range(num_categories_per_label[i+1]):
        print(f"Attribute {j+1}")
        reformatted_annotation_result = copy.deepcopy(annotation_results[i])
        for ann in reformatted_annotation_result:
            for a in ann.keys():
                if ann[a] == j+1:
                    ann[a] = 1
                else:
                    ann[a] = 2
        calculate_iaa(pd.DataFrame(reformatted_annotation_result))
        print()

Label : 1
Attribute 1
#raters =  3 , #subjects =  639 , #categories =  2
PA =  0.8685446009389659
PE = 0.7602376561577777
Fleiss' Kappa = 0.452

Attribute 2
#raters =  3 , #subjects =  639 , #categories =  2
PA =  0.750652060511213
PE = 0.5051736256523666
Fleiss' Kappa = 0.496

Attribute 3
#raters =  3 , #subjects =  639 , #categories =  2
PA =  0.9144496609285341
PE = 0.7693481354130696
Fleiss' Kappa = 0.629

Attribute 4
#raters =  3 , #subjects =  639 , #categories =  2
PA =  0.9436619718309861
PE = 0.8601644838796492
Fleiss' Kappa = 0.597

Attribute 5
#raters =  3 , #subjects =  639 , #categories =  2
PA =  0.9227960354720918
PE = 0.8180832128535038
Fleiss' Kappa = 0.576

Label : 2
Attribute 1
#raters =  3 , #subjects =  639 , #categories =  2
PA =  0.8309859154929558
PE = 0.5025536825738138
Fleiss' Kappa = 0.66

Attribute 2
#raters =  3 , #subjects =  639 , #categories =  2
PA =  0.9227960354720921
PE = 0.8230956091462898
Fleiss' Kappa = 0.564

Attribute 3
#raters =  3 , #subjects 

# Label Distribution

In [66]:
gold_labels = np.array(gold_labels)

In [67]:
counter_per_label = {}
for i in range(len(gold_labels)):
    counter_per_label[i] = Counter(gold_labels[i])
    print(f"Label {i+1}")
    total = len(gold_labels[i])
    for label, attribute_cnt in sorted(counter_per_label[i].items()):
        print(f"{attribute_cnt}\t{attribute_cnt/total}")

Label 1
104	0.162754303599374
342	0.5352112676056338
82	0.12832550860719874
49	0.07668231611893583
62	0.09702660406885759
Label 2
345	0.539906103286385
63	0.09859154929577464
81	0.1267605633802817
150	0.2347417840375587
Label 3
55	0.08607198748043818
89	0.13928012519561817
178	0.27856025039123633
61	0.09546165884194054
167	0.26134585289514867
89	0.13928012519561817
Label 4
314	0.49139280125195617
54	0.08450704225352113
21	0.03286384976525822
72	0.11267605633802817
114	0.1784037558685446
64	0.10015649452269171
