In [3]:
import pandas as pd
from sklearn.metrics import cohen_kappa_score
from statsmodels.stats.inter_rater import fleiss_kappa
import numpy as np

# 1. Load the data
df = pd.read_csv("annotations.csv")

# Strip whitespace from columns
df["Label_from_Code"] = df["Label_from_Code"].str.strip()
df["Annotator_1"] = df["Annotator_1"].str.strip()
df["Annotator_2"] = df["Annotator_1"].str.strip()
df["Annotator_3"] = df["Annotator_1"].str.strip()

# Function to calculate Cohen's Kappa for each pair of annotators
def calculate_cohen_kappa(df, annotators):
    kappa_results = {}
    for i in range(len(annotators)):
        for j in range(i + 1, len(annotators)):
            annotator_1 = annotators[i]
            annotator_2 = annotators[j]
            kappa = cohen_kappa_score(df[annotator_1], df[annotator_2])
            kappa_results[f"{annotator_1} vs {annotator_2}"] = kappa
    return kappa_results

# Function to calculate Fleiss' Kappa
def calculate_fleiss_kappa(df, annotators):
    ratings = np.array([[list(annotator).count(label) for label in set(df['Label_from_Code'])] for annotator in df[annotators].values])
    fleiss = fleiss_kappa(ratings, method='fleiss')
    return fleiss

# Annotator columns
annotators = ["Label_from_Code", 'Annotator_1', 'Annotator_2', 'Annotator_3']

# Fleiss' Kappa
fleiss_kappa_result = calculate_fleiss_kappa(df, annotators)

# Agreement percentages for each annotator against the ground truth
agreement_percentages = {}
for annotator in annotators:
    agreement = (df['Label_from_Code'] == df[annotator]).mean() * 100
    agreement_percentages[annotator] = agreement

# Count of labels 'C' and 'D'
label_c_count = df['Label_from_Code'].value_counts().get('C', 0)
label_d_count = df['Label_from_Code'].value_counts().get('D', 0)

print("\nFleiss' Kappa Result:")
print(f"{fleiss_kappa_result:.2f}")

print("\nAgreement Percentages:")
for annotator, percentage in agreement_percentages.items():
    print(f"{annotator}: {percentage:.2f}%")

print("\nLabel Counts:")
print(f"Count of Label C: {label_c_count}")
print(f"Count of Label D: {label_d_count}")



Fleiss' Kappa Result:
0.87

Agreement Percentages:
Label_from_Code: 100.00%
Annotator_1: 87.61%
Annotator_2: 87.61%
Annotator_3: 87.61%

Label Counts:
Count of Label C: 84
Count of Label D: 134
