In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import accuracy_score

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
data = pd.read_csv('adl-matched.csv')
data.head(2)

In [None]:
data = data.rename(columns={'Jonathan':'a1', 'Shaza':'a2'})
data.head(2)

In [None]:
a1 = data['a1']
a2 = data['a2']

In [None]:
# uses bootstrapped resampling where the data is resampled 1000 times and the kappa calculated to provide a lower and upper range for 95% confidence intervals

def cohens_kappa_w_ci(a,b):
    kappa = cohen_kappa_score(a,b)
    n = len(a)

    bootstrapped_kappas=[]
    rng = np.random.default_rng(seed=222)

    for _ in range(1000):
        sample_indices = rng.choice(np.arange(n), size=n, replace=True)
        bootstrapped_kappas.append(cohen_kappa_score(a[sample_indices], b[sample_indices]))

    ci_lower = np.percentile(bootstrapped_kappas, 2.5)
    ci_upper = np.percentile(bootstrapped_kappas, 97.5)

    return kappa, (ci_lower, ci_upper)



In [None]:
kappa, ci = cohens_kappa_w_ci(a1, a2)

In [None]:
print(f"Cohen's Kappa: {kappa:.4f}")
print(f"95% CI: ({ci[0]:.4f}, {ci[1]:.4f})")

In [None]:
def accuracy_w_ci(a, b, n_bootstrap=1000):
    accuracy = accuracy_score(a, b)
    n= len(a)

    bootstrapped_accuracies = []
    rng = np.random.default_rng(seed=222)

    for _ in range(n_bootstrap):
        sample_indices = rng.choice(np.arange(n), size=n, replace=True)
        bootstrapped_accuracies.append(accuracy_score(a[sample_indices], b[sample_indices]))

    ci_lower = np.percentile(bootstrapped_accuracies, 2.5)
    ci_upper = np.percentile(bootstrapped_accuracies, 97.5)

    return accuracy, (ci_lower, ci_upper)


In [None]:
accuracy, ci = accuracy_w_ci(a1, a2)

In [None]:
print(f"Accuracy: {accuracy:.4f}")
print(f"95% CI: ({ci[0]:.4f}, {ci[1]:.4f})")