# Krippendorff's Alpha

In this notebook, we will use Krippendorff's alpha, a statistical measure used to assess the reliability and agreement between two set of raters.

**Imports**

In [16]:
import pandas as pd
import numpy as np
import os
import re
import krippendorff
from json import dumps, loads
from sklearn.metrics import cohen_kappa_score
import math

### Utility Functions for Extracting Upper Triangular Matrix

This section includes utility functions designed to extract the upper triangular matrix from a given similarity matrix. These functions facilitate efficient data manipulation and analysis by allowing easy access to the relevant portions of the matrix.

In [210]:
def sort_key(image_name):
    match = re.match(r'image_(\d+)_(\d+)', image_name)
    if match:
        x = int(match.group(1))  # Extract the first number (x)
        y = int(match.group(2))  # Extract the second number (y)
        return (x, y)  # Return a tuple for sorting

def concat_lists(dict):
    slice = []
    for website, scores in dict.items():
        for cat in scores.keys():
            slice += dict[website][cat]
    
    return slice

def create_similarity_matrix(file):
    df = pd.read_csv(file, index_col=0)
    n = df.shape[0]
    similarity_matrix = []
    for i in range(n):
        similarity_matrix.append(list(df.iloc[i]))
    return similarity_matrix, [sort_key(i.strip()) for i in df.columns.tolist()]

def get_scores_list(similarity_matrix, images):
    scores_list = []
    for i in range(len(similarity_matrix)):
        for j in range(i+1,len(similarity_matrix[i])):
            x1, y1 = images[i]
            x2, y2 = images[j]

            if x1 != x2:
                scores_list.append(similarity_matrix[i][j])
    
    return scores_list

Loading labels

In [211]:
raters = {
    'Set 1': {'Rater 1': {}, 'Rater 2': {}},
    'Set 2': {'Rater 1': {}, 'Rater 2': {}}
}

for set, raters_dict in raters.items():
    for rater in raters_dict.keys():
        websites = {}
        for website in os.listdir(f'{set}/{rater}'):
            data = {}
            for category in os.listdir(f'{set}/{rater}/{website}'):
                similarity_matrix, images = create_similarity_matrix(os.path.join(set, rater, website, category))
                data[category] = get_scores_list(similarity_matrix, images)
            websites[website] = data
        raters[set][rater] = websites

In [212]:
for set in raters.keys():
    concat_lists(raters[set]['Rater 1'])
    data = [concat_lists(raters[set]['Rater 1']), concat_lists(raters[set]['Rater 2'])]
    alpha = krippendorff.alpha(reliability_data=data, level_of_measurement="ordinal", value_domain=[0,1,2,3,4])
    print(f'{set} Krippendorff\'s alpha: {alpha}')

Set 1 Krippendorff's alpha: 0.7970879700070952
Set 2 Krippendorff's alpha: 0.8435807417117765
