In [None]:
# This code calculates reliability (Cohen's Kappa) and confusion matrices for human/dictionary annotation
# It assumes there are two data files, one long and the other wide

# Dependencies
    # Pandas 
    # sklearn  
    # matplotlib

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
# Read in the reliabiltiy data file
# Assumes this is in the same workign directory as your code

data_long = pd.read_csv('reliability_data_long.csv')
data_wide = pd.read_csv('reliability_data_wide.csv')

In [None]:
# Sanity check, did it get read in correctly?

print(data_wide)

In [None]:
# Sanity check, did it get read in correctly?

print(data_long)

In [None]:
# Calculate Cohen's Kappa
    
kappa = cohen_kappa_score(data_long["human"], data_long["dictionary"])

print(kappa)

In [None]:
# Make some a confusion matrices
cm_self = confusion_matrix(data_wide["human_self_report"], data_wide["dictionary_self_report"])
cm_behavior = confusion_matrix(data_wide["human_behavior"], data_wide["dictionary_behavior"])
cm_neurophysiological = confusion_matrix(data_wide["human_neurophysiological"], data_wide["dictionary_neurophysiological"])

# Create subplots
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Plot confusion matrices
disp_self = ConfusionMatrixDisplay(confusion_matrix=cm_self)
disp_behavior = ConfusionMatrixDisplay(confusion_matrix=cm_behavior)
disp_neurophysiological = ConfusionMatrixDisplay(confusion_matrix=cm_neurophysiological)

disp_self.plot(ax=axes[0], cmap="Blues", xticks_rotation='vertical')
disp_behavior.plot(ax=axes[1], cmap="Blues", xticks_rotation='vertical')
disp_neurophysiological.plot(ax=axes[2], cmap="Blues", xticks_rotation='vertical')

# Set titles for each subplot
title_fontsize = 16
axes[0].set_title("Self Report", fontsize=title_fontsize)
axes[1].set_title("Behavior", fontsize=title_fontsize)
axes[2].set_title("Neurophysiological", fontsize=title_fontsize)

# Set axis label font size
label_fontsize = 14
for ax in axes:
    ax.set_xlabel('Dictionary Annotation', fontsize=label_fontsize)
    ax.set_ylabel('Human Annotation', fontsize=label_fontsize)

# Set font size for cell text
cell_text_fontsize = 14
for ax in axes:
    for text in ax.texts:
        text.set_fontsize(cell_text_fontsize)
        
# Set colorbar font size
cbar_self = disp_self.im_.colorbar
cbar_self.ax.tick_params(labelsize=14)
cbar_behavior = disp_behavior.im_.colorbar
cbar_behavior.ax.tick_params(labelsize=14)
cbar_neurophysiological = disp_neurophysiological.im_.colorbar
cbar_neurophysiological.ax.tick_params(labelsize=14)

# Adjust the layout
plt.tight_layout()

# Save the plot as a JPEG image
plt.savefig('confusion.jpg', format='jpeg', dpi=300, bbox_inches='tight')

# Display the combined plot
plt.show()

