In [1]:
import json
# !pip install krippendorff
import krippendorff
import numpy as np
def read_annotation(file_path):
    annotators_records = {} # annotator: {sample1: [label], sample2: [label], ...}
    data = json.load(open(file_path))
    sample_ids = []
    for sample in data:
        annotations = sample['annotations']
        sample_id = sample['meta_sample_id']
        sample_ids.append(sample_id)
        for annotation in annotations:
            annotator = annotation['annotator'] if not annotation['annotator_name'] else annotation['annotator_name']
            if annotator not in annotators_records:
                annotators_records[annotator] = {}
            if sample_id not in annotators_records[annotator]:
                annotators_records[annotator][sample_id] = []
            annotators_records[annotator][sample_id] += annotation['label']
        
    return sample_ids, annotators_records

In [3]:
sample_ids, annotators_records = read_annotation('pilot_data/pilot_openai_small_annotation.json')
# sample_ids, annotators_records = read_annotation('pilot_data/pilot_all-mpnet-base-v2_annotation_revised.json')
annotators = list(annotators_records.keys())
sample_ids = sorted(sample_ids)
selected_annotators = ['Amin Ahmad', 'rogger', 'Miaoran', 'forrest']

results_binary = {annotator: [] for annotator in selected_annotators} # annotator 1: [label for sample 1, label for sample 2 ....] # either consistent or hallucinated
results_3labels = {annotator: [] for annotator in selected_annotators}
results_4labels = {annotator: [] for annotator in selected_annotators} # consistent, questionable, benign, unwanted
# binary_label {1: consistent, 0: hallucinated}
# 3 labels: {2: consistent, 1: questionable, 0: hallucinated (benign or unwanted)}
# 4 labels: {0:unwanted, 1: benign, 2:questionable, 3: consistent}
for sample_id in sample_ids:
    for annotator in selected_annotators:
        if sample_id not in annotators_records[annotator]:
            binary_label = 1
            threeLabel = 2
            refinedLabel = 3
        else:
            sample_label = set(annotators_records[annotator][sample_id])
            if 'Unwanted' in sample_label:
                refinedLabel = 0
                threeLabel = 0
                binary_label = 0
            elif 'Benign' in sample_label:
                refinedLabel = 1
                threeLabel = 1
                binary_label = 0
            else:
                refinedLabel = 2
                threeLabel = 1
                binary_label = 0

        results_binary[annotator].append(binary_label)
        results_3labels[annotator].append(threeLabel)
        results_4labels[annotator].append(refinedLabel)
print('annotators:', list(results_binary.keys()))

# print('Number of samples:',len(sample_ids))
# "nominal": for category data without order

# for annotator in results_binary:
#     print(annotator, results_binary[annotator])

# print('Nominal Krippendorff\'s alpha for binary labels:')
# print(krippendorff.alpha(np.array(list(results_binary.values()), dtype=np.dtype(float)), level_of_measurement="nominal", ))
# print()

for annotator in results_3labels:
    print(annotator, results_3labels[annotator])

print('Interval Krippendorff\'s alpha for 3 labels:')
print(krippendorff.alpha(np.array(list(results_3labels.values()), dtype=np.dtype(float)), level_of_measurement="interval"))

# print('Nominal Krippendorff\'s alpha for 4 labels:')
# print(krippendorff.alpha(np.array(list(results_4labels.values()), dtype=np.dtype(float)), level_of_measurement="nominal"))


sample_ids: [72, 223, 309, 384, 391, 406, 551, 750, 821, 1123]
annotators: ['Amin Ahmad', 'rogger', 'Miaoran', 'forrest']
Amin Ahmad [0, nan, 2, nan, 2, 0, 0, 0, nan, nan]
rogger [0, 0, 2, 1, 0, 0, 0, nan, 0, 0]
Miaoran [0, 1, 1, 0, 0, 0, 0, 0, 0, 0]
forrest [0, 0, 2, 0, 1, 1, 0, 0, 0, 0]
Interval Krippendorff's alpha for 3 labels:
0.5394581861012957
