# ICC at the Category Level


In [33]:
import pandas as pd
import numpy as np
import pingouin as pg

# Load your dataset (modify path accordingly)
df = pd.read_csv("subjective_results_corrected.csv")

# Aggregate mean score per participant per category
df_category = df.groupby(["participant_id", "stimuli_group"])["score"].mean().reset_index()

# Display sample
df_category

Unnamed: 0,participant_id,stimuli_group,score
0,0686z3qx28ycuvnhfh47s4,A1,1.000000
1,0686z3qx28ycuvnhfh47s4,A3,1.000000
2,0686z3qx28ycuvnhfh47s4,A4,2.000000
3,0686z3qx28ycuvnhfh47s4,A5,1.333333
4,0686z3qx28ycuvnhfh47s4,A6,3.000000
...,...,...,...
2697,ztwx6tnel7ogegc0h9hjqr,E2,4.000000
2698,ztwx6tnel7ogegc0h9hjqr,E3,3.000000
2699,ztwx6tnel7ogegc0h9hjqr,E4,4.500000
2700,ztwx6tnel7ogegc0h9hjqr,E8,3.000000


In [38]:
# Pivot: Participants as rows, Categories as columns
df_wide = df_category.pivot(index="participant_id", columns="stimuli_group", values="score")

# Reset index to keep participant_id as a column
df_wide = df_wide.reset_index()

df_wide.to_csv("subjective_results_wide.csv", index=False)

# Display structure
df_wide

stimuli_group,participant_id,A1,A10,A2,A3,A4,A5,A6,A7,A8,...,E1,E10,E2,E3,E4,E5,E6,E7,E8,E9
0,0686z3qx28ycuvnhfh47s4,1.0,,,1.000000,2.0,1.333333,3.0,1.0,,...,5.0,4.0,5.0,5.0,,,4.0,3.0,,5.00
1,0om1fva4uhily4cvg6368lr,1.0,,2.0,1.000000,2.5,1.333333,,1.0,,...,5.0,3.0,,5.0,5.0,,3.5,3.0,,5.00
2,0ym8eutpogxka159e5ipr,4.0,,5.0,2.000000,2.5,,,4.0,,...,5.0,5.0,,4.5,5.0,5.0,3.0,4.0,,5.00
3,1fgxubpy0xwiv2pej4vr2b9,,,,1.000000,,2.000000,1.0,1.0,,...,5.0,,5.0,5.0,,5.0,,2.5,3.0,5.00
4,1op1nsk5as4g01i0b6df4,1.0,,2.0,1.500000,,,1.5,2.0,,...,,3.0,5.0,5.0,,,,3.5,1.5,5.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,x37bofyh0fazkr02jlztlm,,,3.0,3.666667,1.0,1.000000,3.0,,,...,,2.0,,,5.0,5.0,4.0,2.0,3.0,4.00
88,ymxfxn696we9rp1tnnub3f,1.0,,,1.333333,,1.666667,3.0,1.0,3.0,...,,,5.0,5.0,5.0,5.0,3.5,,4.0,5.00
89,ysobwzccpice783avao5c,2.0,,3.0,2.000000,,3.000000,3.0,2.0,,...,5.0,4.0,5.0,,5.0,5.0,,,4.0,4.00
90,yteg94bzq131bqsl7uyqo,2.0,,,3.000000,1.0,1.000000,2.0,3.0,,...,,3.0,5.0,5.0,,,,2.0,,5.00


In [28]:
df_wide.iloc[:, 1:] = df_wide.iloc[:, 1:].fillna(df_wide.iloc[:, 1:].mean()) # replace NaN with mean of the column

In [29]:
df_melt = df_wide.melt(id_vars="participant_id", var_name="stimuli_group", value_name="score")

# Display the structure
df_melt


Unnamed: 0,participant_id,stimuli_group,score
0,0686z3qx28ycuvnhfh47s4,A1,1.000000
1,0om1fva4uhily4cvg6368lr,A1,1.000000
2,0ym8eutpogxka159e5ipr,A1,4.000000
3,1fgxubpy0xwiv2pej4vr2b9,A1,1.934742
4,1op1nsk5as4g01i0b6df4,A1,1.000000
...,...,...,...
4595,x37bofyh0fazkr02jlztlm,E9,4.000000
4596,ymxfxn696we9rp1tnnub3f,E9,5.000000
4597,ysobwzccpice783avao5c,E9,4.000000
4598,yteg94bzq131bqsl7uyqo,E9,5.000000


In [39]:
# Compute ICC
icc_results = pg.intraclass_corr(data=df_melt, 
                                 targets="stimuli_group",  # Categories as "items"
                                 raters="participant_id",  # Participants as "raters"
                                 ratings="score")  # Ratings

# Extract ICC(2,1)
icc_2_1 = icc_results[icc_results["Type"] == "ICC2"].iloc[0]

# Print results
print(f"ICC(2,1): {icc_2_1['ICC']:.3f}, 95% CI [{icc_2_1['CI95%']}]")
print(f"F-value: {icc_2_1['F']:.3f}, p-value: {icc_2_1['pval']:.5f}")


ICC(2,1): 0.681, 95% CI [[0.6  0.77]]
F-value: 208.662, p-value: 0.00000


In [45]:
import krippendorff
import numpy as np

# Convert DataFrame to numpy array (required for Krippendorff’s function)
data_matrix = df_wide.iloc[:, 1:].to_numpy()

# Compute Krippendorff's Alpha
alpha = krippendorff.alpha(reliability_data=data_matrix, level_of_measurement='interval')

print(f"Krippendorff’s Alpha: {alpha:.3f}")


Krippendorff’s Alpha: 0.563


In [51]:
import krippendorff
import numpy as np
from scipy.stats import norm

def bootstrap_krippendorff(data, level_of_measurement='interval', n_bootstrap=1000, ci=0.95):
    """
    Computes Krippendorff's Alpha with bootstrapped confidence intervals.
    
    Parameters:
    - data: NumPy array of ratings (rows: raters, columns: items)
    - level_of_measurement: 'nominal', 'ordinal', or 'interval'
    - n_bootstrap: Number of bootstrap samples
    - ci: Confidence level (default 95%)

    Returns:
    - alpha: Krippendorff’s Alpha estimate
    - ci_lower: Lower bound of confidence interval
    - ci_upper: Upper bound of confidence interval
    """
    bootstrap_alphas = []
    n_raters, n_items = data.shape

    for _ in range(n_bootstrap):
        sampled_indices = np.random.choice(n_items, n_items, replace=True)  # Resampling with replacement
        sampled_data = data[:, sampled_indices]  # Sample new dataset
        alpha_boot = krippendorff.alpha(reliability_data=sampled_data, level_of_measurement=level_of_measurement)
        bootstrap_alphas.append(alpha_boot)

    # Compute confidence intervals
    ci_lower = np.percentile(bootstrap_alphas, (1 - ci) / 2 * 100)
    ci_upper = np.percentile(bootstrap_alphas, (1 + ci) / 2 * 100)

    return np.mean(bootstrap_alphas), ci_lower, ci_upper

# Convert DataFrame to numpy array
data_matrix = df_wide.iloc[:, 1:].to_numpy()

# Compute Krippendorff’s Alpha with bootstrapped CI
alpha, ci_lower, ci_upper = bootstrap_krippendorff(data_matrix)

print(f"Krippendorff’s Alpha: {alpha:.3f}, 95% CI [{ci_lower:.3f}, {ci_upper:.3f}]")


Krippendorff’s Alpha: 0.554, 95% CI [0.399, 0.675]
