In [1]:
import json
import os
# !pip install krippendorff
import krippendorff
import numpy as np
def read_annotation(file_path):
    annotators_records = {} # annotator: {sample1: [label], sample2: [label], ...}
    data = json.load(open(file_path))
    sample_ids = []
    for sample in data:
        annotations = sample['annotations']
        sample_id = sample['meta_sample_id']
        sample_ids.append(sample_id)
        for annotation in annotations:
            annotator = annotation['annotator'] if not annotation['annotator_name'] else annotation['annotator_name'].split()[0].lower()
            if annotator not in annotators_records:
                annotators_records[annotator] = {}
            if sample_id not in annotators_records[annotator]:
                annotators_records[annotator][sample_id] = []
            annotators_records[annotator][sample_id] += annotation['label']
        
    return sample_ids, annotators_records

In [2]:
def compute_interannotator_agreement(file_path, label_map, level_of_measurement='interval', selected_annotators = None):
    sample_ids, annotators_records = read_annotation(file_path)
    sample_ids = sorted(sample_ids)
    print(file_path)
    print('selected_annotators:', selected_annotators)
    
    if selected_annotators:
        annotators = []
        for annotator in selected_annotators:
            if annotator not in annotators_records:
                print(f"No records from annotator {annotator}")
            else:
                annotators.append(annotator)
    else:
        annotators = list(annotators_records.keys())
    results= {annotator: [] for annotator in annotators} # annotator 1: [label for sample 1, label for sample 2 ....] # either consistent or hallucinated
    for sample_id in sample_ids:
        for annotator in annotators:
            if sample_id not in annotators_records[annotator]:
                label = label_map['consistent']
            else:
                sample_label = set(annotators_records[annotator][sample_id])
                if 'Unwanted' in sample_label:
                    label = label_map['unwanted']
                elif 'Benign' in sample_label:
                    label = label_map['benign']
                else:
                    label = label_map['questionable']

            results[annotator].append(label)
    
    print(f"{level_of_measurement} Krippendorff\'s alpha for label map\n{label_map}")
    agreement = krippendorff.alpha(np.array(list(results.values()), dtype=np.dtype(float)), level_of_measurement=level_of_measurement)
    print(round(agreement,3))


In [3]:
before_path = 'pilot/virgin_data'
after_path = 'pilot/results'
label_maps = [
    {'consistent':2, 'questionable':1, 'unwanted':0, 'benign': 0},
    {'consistent':2, 'questionable':1, 'benign': 1, 'unwanted':0},
    {'consistent':2, 'benign': 2, 'questionable':1, 'unwanted':0},
    {'consistent':2, 'benign': 1, 'questionable':np.nan, 'unwanted':0},
    {'consistent':1, 'benign': 0, 'questionable':0, 'unwanted':0},
    {'consistent':1, 'benign': np.nan, 'questionable':np.nan, 'unwanted':0},
]
selected_annotators = [None, ['amin', 'rogger', 'miaoran', 'forrest'], ['amin', 'rogger', 'miaoran', 'forrest', 'renyi']]
embedders = ['bge_small', 'openai_small', 'all-mpnet-base-v2']
for folder in [after_path]:
    for selected_annotator in selected_annotators:
        for embedder in embedders:
            file_path = os.path.join(folder, f"pilot_{embedder}_annotation.json")
            for label_map in label_maps:
                # print(label_map)
            
                compute_interannotator_agreement(file_path=file_path, label_map=label_map, selected_annotators=selected_annotator)
                print()

pilot/results/pilot_bge_small_annotation.json
selected_annotators: None
interval Krippendorff's alpha for label map
{'consistent': 2, 'questionable': 1, 'unwanted': 0, 'benign': 0}
0.268

pilot/results/pilot_bge_small_annotation.json
selected_annotators: None
interval Krippendorff's alpha for label map
{'consistent': 2, 'questionable': 1, 'benign': 1, 'unwanted': 0}
0.248

pilot/results/pilot_bge_small_annotation.json
selected_annotators: None
interval Krippendorff's alpha for label map
{'consistent': 2, 'benign': 2, 'questionable': 1, 'unwanted': 0}
0.162

pilot/results/pilot_bge_small_annotation.json
selected_annotators: None
interval Krippendorff's alpha for label map
{'consistent': 2, 'benign': 1, 'questionable': nan, 'unwanted': 0}
0.267

pilot/results/pilot_bge_small_annotation.json
selected_annotators: None
interval Krippendorff's alpha for label map
{'consistent': 1, 'benign': 0, 'questionable': 0, 'unwanted': 0}
0.273

pilot/results/pilot_bge_small_annotation.json
selected_ann