In [1]:
import tensorflow as tf
import pandas as pd
from pathlib import Path
import numpy as np
from sklearn.metrics import cohen_kappa_score, accuracy_score
import pingouin as pg

In [2]:
data_table = pd.read_csv('../../DataFolder/ProsodyData/data_valid.csv', index_col=None)
data_table['rating_min'] = (data_table['rating_min'] * 4).round().astype(int)
data_table['rating_max'] = (data_table['rating_max'] * 4).round().astype(int)
data_table['rating_min_4class'] = (data_table['rating_min'].astype(float) / 4 - 0.15).round().astype(int)
data_table['rating_max_4class'] = (data_table['rating_max'].astype(float) / 4 - 0.15).round().astype(int)

In [3]:
# 13-class kappa
cohen_kappa_score_13class = cohen_kappa_score(data_table['rating_min'], data_table['rating_max'], weights='linear')

# 4-class kappa
cohen_kappa_score_4class = cohen_kappa_score(data_table['rating_min_4class'], data_table['rating_max_4class'], weights='linear')

In [4]:
grouped = data_table.groupby('passage_id')
kappa_scores = []
for name, group in grouped:
    score_l_13c = cohen_kappa_score(group['rating_min'], group['rating_max'], weights='linear')
    score_q_13c = cohen_kappa_score(group['rating_min'], group['rating_max'], weights='quadratic')
    score_l_4c = cohen_kappa_score(group['rating_min_4class'], group['rating_max_4class'], weights='linear')
    score_q_4c = cohen_kappa_score(group['rating_min_4class'], group['rating_max_4class'], weights='quadratic')
    
    group_length = len(group)
    kappa_scores.append({'passage': name, 'data_points': group_length, 'L4': score_l_4c, 'L13': score_l_13c, 'Q4': score_q_4c, 'Q13': score_q_13c})

kappa_df = pd.DataFrame(kappa_scores)

In [5]:
kappa_df

Unnamed: 0,passage,data_points,L4,L13,Q4,Q13
0,22040,195,0.694517,0.687727,0.825619,0.84525
1,22048,193,0.610101,0.598785,0.760183,0.793215
2,22060,195,0.617139,0.574278,0.767833,0.796769
3,22071,207,0.650213,0.616353,0.797872,0.808456
4,22084,188,0.686962,0.645019,0.815173,0.832777
5,23026,189,0.589019,0.561336,0.758134,0.785566
6,23031,202,0.593959,0.583934,0.738598,0.784979
7,23041,189,0.600693,0.576665,0.758093,0.781392
8,23043,200,0.713775,0.706422,0.825995,0.855976
9,23050,195,0.678506,0.671058,0.823548,0.853591


In [7]:
# Sort the dataframe by the number of Colunm L4
kappa_df = kappa_df.sort_values(by='L4', ascending=False)

In [15]:
# now we want to split passages into two groups, we want two group has similar number L4 score, odd and even group
# put odd group in a set so that if in the set, it's an odd group else it's even group.
group1 = kappa_df.iloc[::2, :]
passage_set = set(group1['passage'])

In [22]:
passage_set

{22048,
 22084,
 23041,
 23043,
 32035,
 32038,
 32045,
 32052,
 33006,
 33018,
 33033,
 42086,
 42145,
 43001,
 43017}

In [None]:
data_table

In [None]:
# Calculate ICC for each passage_id
icc_results = []

for passage_id, group in data_table.groupby('passage_id'):
    # For ICC calculation using pingouin, we need to pivot the data so that each rater's scores are in separate columns.
    # However, as your data already has ratings from two raters in separate columns, we can use it directly.
    # Create a long-format DataFrame suitable for pingouin's intraclass_corr function
    long_df = pd.melt(group, id_vars=['filename', 'passage_id'], value_vars=['rating_min', 'rating_max'], var_name='rater', value_name='rating_melt')
    
    # Calculate ICC - assuming a two-way mixed-effects model, absolute agreement, single rater/measurement
    icc = pg.intraclass_corr(data=long_df, targets='filename', raters='rater', ratings='rating_melt', nan_policy='omit').round(3)
    icc_value = icc[icc['Type'] == 'ICC2k']['ICC'].values[0]  # Choosing ICC2k for consistency/absolute agreement for k raters
    ci95 = icc[icc['Type'] == 'ICC2k']['CI95%'].values[0]
    p_value = icc[icc['Type'] == 'ICC2k']['pval'].values[0]
    
    icc_results.append({'passage_id': passage_id, 'ICC': icc_value, 'CI95%': ci95, 'p-value': p_value})

# Convert the results into a DataFrame
icc_df = pd.DataFrame(icc_results)

In [None]:
icc_df

In [None]:

# Prepare the data for ICC calculation
# Pivot the data to long format suitable for pingouin's intraclass_corr function
long_df = pd.melt(data_table, id_vars=['filename'], value_vars=['rating_min', 'rating_max'], var_name='rater', value_name='rating_melt')

# Calculate ICC - assuming a two-way mixed-effects model, absolute agreement, single measurement
icc = pg.intraclass_corr(data=long_df, targets='filename', raters='rater', ratings='rating_melt', nan_policy='omit').round(3)
icc_value = icc[icc['Type'] == 'ICC2k']['ICC'].values[0]  # Choosing ICC2k for consistency/absolute agreement for k raters
ci95 = icc[icc['Type'] == 'ICC2k']['CI95%'].values[0]
p_value = icc[icc['Type'] == 'ICC2k']['pval'].values[0]

# Display the ICC result
icc_result = {
    'ICC': icc_value,
    'CI95%': ci95,
    'p-value': p_value
}

icc_result