In [None]:
import numpy as np
import krippendorff
import random
import pandas as pd
import os
from tqdm import tqdm

def bootstrap_krippendorff_alpha(data, level_of_measurement='nominal', n_bootstrap=1000, ci=95, seed=None):
    """
    Compute Krippendorff's alpha and bootstrap confidence interval.

    Parameters:
        data (list or np.array): Raters x Items matrix. Use `None` or `np.nan` for missing values.
        level_of_measurement (str): 'nominal', 'ordinal', or 'interval'
        n_bootstrap (int): Number of bootstrap samples
        ci (float): Confidence level (e.g., 95 for 95%)
        seed (int): Random seed for reproducibility

    Returns:
        (alpha_mean, (lower_bound, upper_bound)): Point estimate and CI tuple
    """
    if seed is not None:
        np.random.seed(seed)
        random.seed(seed)

    data = np.array(data, dtype=object)
    n_items = data.shape[1]
    alphas = []

    for _ in tqdm(range(n_bootstrap)):
        sample_indices = np.random.choice(n_items, size=n_items, replace=True)
        sample = data[:, sample_indices].astype(int)

        alpha = krippendorff.alpha(reliability_data=sample, level_of_measurement=level_of_measurement)
        alphas.append(alpha)

    alphas = [a for a in alphas if not np.isnan(a)]
    alpha_mean = np.mean(alphas)
    lower = np.percentile(alphas, (100 - ci) / 2)
    upper = np.percentile(alphas, 100 - (100 - ci) / 2)

    return alpha_mean, (lower, upper)


In [None]:
paths = [
    "/media/jackson/backup/dp_data/tortuosity_study/read_csvs/score_tables/r1_scores_no_duplicates_sufficient_tissue.csv",
    "/media/jackson/backup/dp_data/tortuosity_study/read_csvs/score_tables/r2_scores_no_duplicates_sufficient_tissue.csv",
    "/media/jackson/backup/dp_data/tortuosity_study/read_csvs/score_tables/r4_scores.csv"
]

In [None]:
level_of_measurement = "ordinal"
n_bootstrap = 1000
ci = 95
seed = 42
for path in paths:
    df = pd.read_csv(path)[["Tilak", "Tuomas", "Xavier"]]
    arr = df.to_numpy().T
    # print(arr.shape)
    alpha = krippendorff.alpha(reliability_data=arr, level_of_measurement=level_of_measurement)
    # print(f"Krippendorff's alpha for {os.path.basename(path)}: {alpha}")
    mean, bounds = bootstrap_krippendorff_alpha(arr, level_of_measurement=level_of_measurement, n_bootstrap=n_bootstrap, ci=ci, seed=seed)
    print(f"Krippendorff's alpha for {os.path.basename(path)}: {mean}, CI: {bounds}")
    


In [None]:
df = pd.read_csv(path)

In [None]:
df

In [None]:
krippendorff.alpha(reliability_data=arr, level_of_measurement=level_of_measurement)