In [None]:
# Assess inter-reader reliability for global categorical assessment with kappa statistic

import numpy as np
import pandas as pd
from statsmodels.stats import inter_rater as irr
import krippendorff as kd 

import re
import plotly.graph_objects as go
import plotly.subplots as sp
import plotly.express as px


# Round 1

In [2]:
# MARIA round 1

df = pd.read_csv("Inputs/round_1_maria_global.csv", header=0)

# define maria categories inactive = [0], active = [1,2], severe = [>=3]
columns = ['mariaS', 'mariaE']
for column in columns:
    df[column] = df[column].astype(int)
    df[column] = np.where(df[column]>=3, 'severe', df[column])
    df[column] = np.where(df[column]=='0', 'inactive', df[column])
    df[column] = np.where(df[column]=='1', 'active', df[column])
    df[column] = np.where(df[column]=='2', 'active', df[column])


df.drop(columns=['scoreTypeName',], inplace=True)
df['scan'] = df['patientIdentifier']

# MariaS
df_s = df.drop(columns=['mariaE'])
df_s = df_s.pivot(index='scan', columns='scorerEmail', values='mariaS') # change to wide format with scorerEmail as columns, and patientIdentifier as index
agg = irr.aggregate_raters(df_s) # returns a tuple (data, categories)
kappa_r1_s = irr.fleiss_kappa(agg[0], method='fleiss')
print(f'Fleiss Kappa for round 1 MariaS: {kappa_r1_s }')

# Krippendorff's alpha for MariaS
df_s = df_s.replace({'inactive': 0, 'active': 1, 'severe': 2})
df_s = df_s.T
alpha_r1_s = kd.alpha(df_s, level_of_measurement='nominal')
print(f'Krippendorff\'s alpha for round 1 MariaS: {alpha_r1_s}')

# MariaE
df_e = df.drop(columns=['mariaS'])
df_e = df_e.pivot(index='scan', columns='scorerEmail', values='mariaE') # change to wide format with scorerEmail as columns, and patientIdentifier as index
agg = irr.aggregate_raters(df_e) # returns a tuple (data, categories)
kappa_r1_e = irr.fleiss_kappa(agg[0], method='fleiss')
print(f'Fleiss Kappa for round 1 MariaE: {kappa_r1_e}')

# Krippendorff's alpha for MariaE
df_e = df_e.replace({'inactive': 0, 'active': 1, 'severe': 2})
df_e = df_e.T
alpha_r1_e = kd.alpha(df_e, level_of_measurement='nominal')
print(f'Krippendorff\'s alpha for round 1 MariaE: {alpha_r1_e}')

Fleiss Kappa for round 1 MariaS: 0.2656587473002159
Krippendorff's alpha for round 1 MariaS: 0.28401727861771064
Fleiss Kappa for round 1 MariaE: 0.25996204933586325
Krippendorff's alpha for round 1 MariaE: 0.2784629981024668


  df_s = df_s.replace({'inactive': 0, 'active': 1, 'severe': 2})
  df_e = df_e.replace({'inactive': 0, 'active': 1, 'severe': 2})


# Round 2

In [3]:
# MARIA round 2
# Change into Categories

df = pd.read_csv("Inputs/round_2_maria_global.csv", header=0)

# define maria categories inactive = [0], active = [1,2], severe = [>=3]

columns = ['mariaS', 'mariaE']
for column in columns:
    df[column] = df[column].astype(int)
    df[column] = np.where(df[column]>=3, 'severe', df[column])
    df[column] = np.where(df[column]=='0', 'inactive', df[column])
    df[column] = np.where(df[column]=='1', 'active', df[column])
    df[column] = np.where(df[column]=='2', 'active', df[column])

df['scan'] = df['patientIdentifier'] + ' ' + df['procedureName'] # create a new column 'scan' that is the combination of patientIdentifier and procedureName
df
df.drop(columns=['patientIdentifier', 'procedureName', 'scoreTypeName'], inplace=True)

# MariaS
df_s = df.drop(columns=['mariaE'])
df_s = df_s.pivot(index='scan', columns='scorerEmail', values='mariaS') # change to wide format with scorerEmail as columns, and patientIdentifier as index
agg = irr.aggregate_raters(df_s) # returns a tuple (data, categories)
kappa_r2_s = irr.fleiss_kappa(agg[0], method='fleiss')
print(f'Kappa for round 2 MariaS: {kappa_r2_s }')

# Krippendorff's alpha for MariaS
df_s = df_s.replace({'inactive': 0, 'active': 1, 'severe': 2})
df_s = df_s.T
alpha_r2_s = kd.alpha(df_s, level_of_measurement='nominal')
print(f'Krippendorff\'s alpha for round 2 MariaS: {alpha_r2_s}')

# MariaE
df_e = df.drop(columns=['mariaS'])
df_e = df_e.pivot(index='scan', columns='scorerEmail', values='mariaE') # change to wide format with scorerEmail as columns, and patientIdentifier as index
agg = irr.aggregate_raters(df_e) # returns a tuple (data, categories)
kappa_r2_e = irr.fleiss_kappa(agg[0], method='fleiss')
print(f'Kappa for round 2 MariaE: {kappa_r2_e}')

# Krippendorff's alpha for MariaE
df_e = df_e.replace({'inactive': 0, 'active': 1, 'severe': 2})
df_e = df_e.T
alpha_r2_e = kd.alpha(df_e, level_of_measurement='nominal')
print(f'Krippendorff\'s alpha for round 2 MariaE: {alpha_r2_e}')

Kappa for round 2 MariaS: 0.6304591265397538
Krippendorff's alpha for round 2 MariaS: 0.6345651362448674
Kappa for round 2 MariaE: 0.5485553772070626
Krippendorff's alpha for round 2 MariaE: 0.5535714285714286


  df_s = df_s.replace({'inactive': 0, 'active': 1, 'severe': 2})
  df_e = df_e.replace({'inactive': 0, 'active': 1, 'severe': 2})


# Validation

In [4]:
# MARIA round 2

df = pd.read_csv("Inputs/validation_maria_global.csv", header=0)

# define maria categories inactive = [0], active = [1,2], severe = [>=3]

columns = ['mariaS', 'mariaE']
for column in columns:
    df[column] = df[column].astype(int)
    df[column] = np.where(df[column]>=3, 'severe', df[column])
    df[column] = np.where(df[column]=='0', 'inactive', df[column])
    df[column] = np.where(df[column]=='1', 'active', df[column])
    df[column] = np.where(df[column]=='2', 'active', df[column])
    df[column] = df[column].astype(str) # convert to categorical type

df['scan'] = df['patientIdentifier'] + ' ' + df['procedureName'] # create a new column 'scan' that is the combination of patientIdentifier and procedureName

df.drop(columns=['patientIdentifier', 'procedureName', 'scoreTypeName'], inplace=True)

# MariaS
df_s = df.drop(columns=['mariaE'])
# Remove the rows with only one rating
df_s = df_s[df_s['scan'].map(df['scan'].value_counts()) > 1]
n_scans_double = df_s.value_counts('scan').shape
print(f'Number of scans which were at least double-read: {n_scans_double[0]}')
print('Number of raters and number of scans:', df_s['scan'].value_counts().value_counts()) # print how many scans were rated by 5, 4, 3, 2 raters
df_s = df_s.pivot(index='scan', columns='scorerEmail', values='mariaS') # change to wide format with scorerEmail as columns, and patientIdentifier as index
print('Pivoted df_s shape:', df_s.shape)
df_s = df_s.astype(str)
df_s = df_s.T
print('Transposed df_s shape:', df_s.shape)

# change inactive to 0, active to 1, severe to 2, and nan to np.nan
df_s = df_s.replace({'inactive': 0, 'active': 1, 'severe': 2})
df_s = df_s.astype(float)

alpha_val_s = kd.alpha(df_s)
print(f'Krippendorff alpha for validation MariaS: {alpha_val_s }')

# MariaE
df_e = df.drop(columns=['mariaS'])
# Remove the rows with only one rating
df_e = df_e[df_e['scan'].map(df['scan'].value_counts()) > 1]
n_scans_double = df_e.value_counts('scan').shape
print(f'Number of scans which were at least double-read: {n_scans_double[0]}')
print('Number of raters and number of scans:', df_e['scan'].value_counts().value_counts()) # print how many scans were rated by 5, 4, 3, 2 raters
df_e = df_e.pivot(index='scan', columns='scorerEmail', values='mariaE') # change to wide format with scorerEmail as columns, and patientIdentifier as index
print('Pivoted df_e shape:', df_e.shape)
df_e = df_e.astype(str)
df_e = df_e.T
print('Transposed df_e shape:', df_e.shape)
# change inactive to 0, active to 1, severe to 2, and nan to np.nan
df_e = df_e.replace({'inactive': 0, 'active': 1, 'severe': 2})
df_e = df_e.astype(float)
alpha_val_e = kd.alpha(df_e)
print(f'Krippendorff alpha for validation MariaE: {alpha_val_e }')

Number of scans which were at least double-read: 53
Number of raters and number of scans: count
2    51
5     2
Name: count, dtype: int64
Pivoted df_s shape: (53, 5)
Transposed df_s shape: (5, 53)
Krippendorff alpha for validation MariaS: 0.6673858324343761
Number of scans which were at least double-read: 53
Number of raters and number of scans: count
2    51
5     2
Name: count, dtype: int64
Pivoted df_e shape: (53, 5)
Transposed df_e shape: (5, 53)
Krippendorff alpha for validation MariaE: 0.7195430077996265


  df_s = df_s.replace({'inactive': 0, 'active': 1, 'severe': 2})
  df_e = df_e.replace({'inactive': 0, 'active': 1, 'severe': 2})


# Visualise results

#### Combined table

In [5]:
df_results = pd.DataFrame({
    'round': ['Round 1', 'Round 2', 'Validation set'],
    'mariaS': [alpha_r1_s, alpha_r2_s, alpha_val_s],
    'mariaE': [alpha_r1_e, alpha_r2_e, alpha_val_e]
})

df_results

Unnamed: 0,round,mariaS,mariaE
0,Round 1,0.284017,0.278463
1,Round 2,0.634565,0.553571
2,Validation set,0.667386,0.719543


#### Plot results

In [6]:
import re
import plotly.graph_objects as go
import plotly.subplots as sp
import plotly.express as px



def color_to_rgba(color, alpha=1.0):
    if color.startswith('rgba'):
        return color
    if color.startswith('rgb'):
        nums = re.findall(r'\d+', color)
        return f'rgba({nums[0]},{nums[1]},{nums[2]},{alpha})'
    color = color.lstrip('#')
    return f'rgba({int(color[0:2],16)},{int(color[2:4],16)},{int(color[4:6],16)},{alpha})'

color1 = px.colors.qualitative.D3[0]
color2 = px.colors.qualitative.D3[1]

fig = sp.make_subplots(rows=1, cols=2, subplot_titles=("MaRIAs", "MaRIAs-E"))

# Barplot for MaRIAs
fig.add_trace(
    go.Bar(
        x=df_results['round'],
        y=df_results['mariaS'],
        name='MaRIAs',
        marker=dict(
            color=color_to_rgba(color1, 0.55),
            line=dict(color=color1, width=2)
        ),
        opacity=1,
        width=0.6
    ),
    row=1, col=1
)

# Barplot for MaRIAs-E
fig.add_trace(
    go.Bar(
        x=df_results['round'],
        y=df_results['mariaE'],
        name='MaRIAs-E',
        marker=dict(
            color=color_to_rgba(color2, 0.55),
            line=dict(color=color2, width=2)
        ),
        opacity=1,
        width=0.6
    ),
    row=1, col=2
)

# Add ICC annotations for MaRIAs
for i, (x, y) in enumerate(zip(df_results['round'], df_results['mariaS'])):
    fig.add_annotation(
        x=x, y=y,
        text=f"{y:.2f}",
        showarrow=False,
        yshift=10,
        font=dict(size=11),
        row=1, col=1
    )

# Add ICC annotations for MaRIAs-E
for i, (x, y) in enumerate(zip(df_results['round'], df_results['mariaE'])):
    fig.add_annotation(
        x=x, y=y,
        text=f"{y:.2f}",
        showarrow=False,
        yshift=10,
        font=dict(size=11),
        row=1, col=2
    )

# Final layout
fig.update_layout(
    title_text="MaRIA-derived categorical classification",
    title_x=0.5,
    yaxis_title="Krippendorff's alpha",
    barmode='group',
    height=400,
    width=700,
    template="simple_white",
    showlegend=False,
)


fig.update_yaxes(range=[0, 1], showgrid=True)
fig.show()
