In [1]:
import numpy as np
import pandas as pd
import pingouin as pg

def get_eval_list(file_path):
    df = pd.read_csv(file_path)
    score_list = df["score"].tolist()
    return score_list

rater1 = get_eval_list('evaluation_p1.csv')
rater2 = get_eval_list('evaluation_p2.csv')
rater3 = get_eval_list('evaluation_p3.csv')

In [4]:
df = pd.DataFrame({
    'rater1': rater1,
    'rater2': rater2,
    'rater3': rater3
})


df_long = df.reset_index().melt(
    id_vars='index',
    var_name='rater',
    value_name='score'
)


icc = pg.intraclass_corr(
    data=df_long,
    targets='index',   
    raters='rater',    
    ratings='score'    
)

# print(icc)

icc2_single = icc.loc[icc['Type'] == 'ICC2', ['ICC', 'CI95%']]
icc2_avg    = icc.loc[icc['Type'] == 'ICC2k', ['ICC', 'CI95%']]

icc3_avg    = icc.loc[icc['Type'] == 'ICC3k', ['ICC', 'CI95%']]

print(f"ICC(2,k) [Average, Absolute]: \n{icc2_avg.to_string(index=False)} \n")
print(f"ICC(3,k) [Average, Consistency]: \n{icc3_avg.to_string(index=False)}")



ICC(2,k) [Average, Absolute]: 
    ICC        CI95%
0.83922 [0.81, 0.87] 

ICC(3,k) [Average, Consistency]: 
     ICC        CI95%
0.839311 [0.81, 0.87]


In [5]:
# Weighted Kappa

import itertools
from sklearn.metrics import cohen_kappa_score

raters = [rater1, rater2, rater3]
names = ['R1', 'R2', 'R3']

kappas = []

for (i, j) in itertools.combinations(range(3), 2):
    k = cohen_kappa_score(
        raters[i],
        raters[j],
        weights='quadratic'
    )
    kappas.append(k)
    print(f"{names[i]} vs {names[j]}: {k:.3f}")

print("Average Quadratic Weighted Kappa:", sum(kappas) / len(kappas))


R1 vs R2: 0.628
R1 vs R3: 0.663
R2 vs R3: 0.614
Average Quadratic Weighted Kappa: 0.6352757240106623


In [7]:
import numpy as np
from sklearn.metrics import cohen_kappa_score
from itertools import combinations


raters_data = [rater1, rater2, rater3]

def calculate_average_weighted_kappa(raters_list, weights_type='quadratic'):

    kappa_scores = []
    pairs = list(combinations(range(len(raters_list)), 2)) 
    
    print(f"--- {weights_type} weighting ---")
    
    for i, j in pairs:
        k = cohen_kappa_score(raters_list[i], raters_list[j], weights=weights_type)
        kappa_scores.append(k)
        print(f"Rater {i+1} vs Rater {j+1}: {k:.4f}")
    
    avg_kappa = np.mean(kappa_scores)
    return avg_kappa

avg_quad_kappa = calculate_average_weighted_kappa(raters_data, weights_type='quadratic')
print(f"\n>> Average Quadratic Weighted Kappa: {avg_quad_kappa:.4f}")

--- quadratic weighting ---
Rater 1 vs Rater 2: 0.6285
Rater 1 vs Rater 3: 0.6629
Rater 2 vs Rater 3: 0.6145

>> Average Quadratic Weighted Kappa: 0.6353
