# Analyzing annotations of linguistic data from paired building experiment

We ran two separate annotations studies, each with two naive participants from the lab.

In [None]:
import os
import sys
import urllib, io
os.getcwd()
sys.path.append("../../")
sys.path.append("../../utils")
sys.path.append("../../analysis/utils")


import numpy as np
import scipy.stats as stats
import pandas as pd
from sklearn.cluster import AffinityPropagation
from sklearn.manifold import TSNE

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 

from io import BytesIO
import base64

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

# import drawing_utils as drawing
import importlib
import scoring

In [None]:
! pwd

In [None]:
## directory & file hierarchy
proj_dir = os.path.abspath('../..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir =  os.path.abspath('../')
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
json_dir = os.path.join(results_dir,'json')
exp_dir = os.path.abspath(os.path.join(proj_dir,'behavioral_experiments'))
png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

### load annotations and wrangle to match

In [None]:
# first set of annotations (cogsci 2021)
df_jj = pd.read_csv('{}/csv/JJ_content.csv'.format(results_dir))

# second set of annotations (2023)
df_zc = pd.read_csv('{}/csv/ref_exp_annotations_2023.csv'.format(results_dir))

df_chat = pd.read_csv('{}/csv/df_chat_ids_cogsci21.csv'.format(results_dir))

In [None]:
df_chat.loc[:,'dyad_gameid'] = df_chat.gameid
df_chat.loc[:,'turn_num'] = df_chat.turnNum
df_chat.loc[:,'trial_num'] = df_chat.trialNum

In [None]:
df_jj.loc[:,'message_num'] = (df_jj.turnNum/2).astype(int)

df_jj_small = df_jj[['gameid','trialNum', 'message_num','turnNum','message','block_justin', 'toer_justin',
       'scene_justin', 'Flagged', 'phrases_justin', 'block_julia',
       'tower_juli', 'scene_juli', 'phrases_julia']].copy()

df_jj_small.rename(
            columns={
            'gameid': 'dyad_gameid',
            'trialNum': 'trial_num',
            'turnNum': 'turn_num',
            'toer_justin': 'tower_justin',
            'tower_juli': 'tower_julia',
            'scene_juli': 'scene_julia'
            # add more column names as needed
        }, inplace=True)

df_jj_small = df_jj_small.merge(df_chat[['dyad_gameid','trial_num','turn_num','message_id']], on = ['dyad_gameid','trial_num','turn_num'], how = 'left')

df_jj_small = df_jj_small.sort_values(['dyad_gameid','trial_num','message_num']).reset_index(drop=True)

df_jj_small

In [None]:
# melt and pivot

suffix_columns = [col for col in df_jj_small.columns if col.endswith('_justin') or col.endswith('_julia')]

suffix_df = df_jj_small[['dyad_gameid','trial_num','turn_num','message_id'] + suffix_columns]

# Then, melt the DataFrame with the new index as the identifier variable
melted_df = pd.melt(suffix_df, id_vars=['dyad_gameid','trial_num','turn_num','message_id'], var_name='Type', value_name='Value')

# melted_df

# # Now, split the 'Type' column to separate the suffix and create a new column
melted_df[['Category', 'Suffix']] = melted_df['Type'].str.split('_', expand=True)

# # Drop the 'Type' column as it's no longer needed
melted_df.drop(columns=['Type'], inplace=True)
melted_df
# # Finally, pivot the table to the desired format
pivoted_df = melted_df.pivot(index=['dyad_gameid','trial_num','turn_num','message_id','Suffix'], 
                             columns='Category', values='Value').reset_index()

pivoted_df = pivoted_df.rename(columns={'Suffix':'workerID'})


In [None]:
# adjust a bad annotation
pivoted_df.loc[pivoted_df['tower'] == 'L','tower'] = 1

In [None]:
# convert to ints
pivoted_df.loc[:,'block'] = pivoted_df['block'].fillna(0).astype(int)
pivoted_df.loc[:,'tower'] = pivoted_df['tower'].fillna(0).astype(int)
pivoted_df.loc[:,'scene'] = pivoted_df['scene'].fillna(0).astype(int)

In [None]:
pivoted_df

In [None]:
# merge in metadata
pivoted_df_merged = pivoted_df.merge(df_jj_small[['dyad_gameid','message_id','message_num','message']], 
                 on=['message_id','dyad_gameid'], how='left')

pivoted_df_merged

In [None]:
pivoted_df_merged['content'] = pivoted_df_merged['phrases'].str.lower()
pivoted_df_merged['content'] = pivoted_df_merged['content'].str.replace(r'~', '')
pivoted_df_merged['content'] = pivoted_df_merged['content'].str.replace(r'\(', '')
pivoted_df_merged['content'] = pivoted_df_merged['content'].str.replace(r'\)', '')
pivoted_df_merged['content'] = pivoted_df_merged['content'].str.replace(r'\,', '')
pivoted_df_merged['content'] = pivoted_df_merged['content'].str.replace(r"\'", '')
pivoted_df_merged['content'] = pivoted_df_merged['content'].str.replace(r"\:", '')
pivoted_df_merged['content'] = pivoted_df_merged['content'].str.replace(r"\;", '')

In [None]:
df_zc_small = df_zc[['workerID','message_id','dyad_gameid','msgNum','message','block','tower','refExps']].copy()

df_zc_small = df_zc_small.merge(df_chat[['message_id','trialNum']], 
                               how ='left',
                               on = 'message_id')

df_zc_small.rename(
            columns={
            'trialNum': 'trial_num',
            'msgNum': 'message_num'
        }, inplace=True)


df_zc_small.loc[:,'turn_num'] = (df_zc_small.message_num*2).astype(int)

df_zc_small = df_zc_small.sort_values(['workerID','dyad_gameid','trial_num','message_num']).reset_index(drop=True)


df_zc_small

In [None]:
df_zc_small['content'] = df_zc_small['refExps'].str.lower()
df_zc_small['content'] = df_zc_small['content'].str.replace(r'~', '')
df_zc_small['content'] = df_zc_small['content'].str.replace(r'\(', '')
df_zc_small['content'] = df_zc_small['content'].str.replace(r'\)', '')
df_zc_small['content'] = df_zc_small['content'].str.replace(r'\,', '')
df_zc_small['content'] = df_zc_small['content'].str.replace(r"\'", '')
df_zc_small['content'] = df_zc_small['content'].str.replace(r"\:", '')
df_zc_small['content'] = df_zc_small['content'].str.replace(r"\;", '')

In [None]:
df_ref_exps = pd.concat([pivoted_df_merged, df_zc_small], ignore_index=True)

df_ref_exps = df_ref_exps.merge(df_chat[['message_id','leftTarget','rightTarget']], how ='left', on='message_id')
df_ref_exps.loc[:,'tower_pair'] = df_ref_exps.leftTarget + '_' + df_ref_exps.rightTarget
df_ref_exps.loc[:,'rep'] = ((df_ref_exps.trial_num)/ 3).astype(int) + 1

In [None]:
df_ref_exps.loc[:,'content'] = df_ref_exps.loc[:,'content'].astype(str)

In [None]:
df_ref_exps.to_csv('{}/results/csv/df_ref_exps.csv'.format(analysis_dir))

# Inter-rater Reliability

In [None]:
df_ref_exps_melt = df_ref_exps.melt(id_vars=['workerID','dyad_gameid','message_id','message_num','trial_num','tower_pair','rep'], value_vars=['block','tower'], value_name='n_refs')
df_ref_exps_melt = df_ref_exps_melt.rename(columns={'variable': 'exp_type'})
df_ref_exps_melt

In [None]:
df_ref_exps_melt.to_csv('{}/results/csv/df_ref_exps_melt.csv'.format(results_dir))

In [None]:
df_ref_exps_table = df_ref_exps.pivot(index='message_id', columns='workerID', values=['block','tower'])
df_ref_exps_table

In [None]:
prop_all_agree_block = np.mean(
    (df_ref_exps_table['block','charles'] == df_ref_exps_table['block','julia']) &\
    (df_ref_exps_table['block','julia'] == df_ref_exps_table['block','justin']) &\
    (df_ref_exps_table['block','justin'] == df_ref_exps_table['block','zoe']))

print('%.1f' % (prop_all_agree_block*100) + '% total agreement on blocks') 

In [None]:
prop_all_agree_tower = np.mean(
    (df_ref_exps_table['tower','charles'] == df_ref_exps_table['tower','julia']) &\
    (df_ref_exps_table['tower','julia'] == df_ref_exps_table['tower','justin']) &\
    (df_ref_exps_table['tower','justin'] == df_ref_exps_table['tower','zoe']))

print('%.1f' % (prop_all_agree_tower*100) + '% total agreement on towers') 

## calculate inter rater reliability with ICC
https://en.wikipedia.org/wiki/Intraclass_correlation

In [None]:
import pingouin as pg
# https://www.statology.org/intraclass-correlation-coefficient-python/

In [None]:
pg.intraclass_corr(data = df_ref_exps_melt, targets="message_id", raters="workerID", ratings="n_refs")

In [None]:
pg.intraclass_corr(data = df_ref_exps_melt.query('exp_type=="block"'), 
                   targets="message_id", raters="workerID", ratings="n_refs")                                       

In [None]:
pg.intraclass_corr(data = df_ref_exps_melt.query('exp_type=="tower"'), 
                   targets="message_id", raters="workerID", ratings="n_refs")                                       

### calculte ICC by hand- something wrong here

In [None]:
n_message = len(df_ref_exps_table)
n_rater = 4

In [None]:
# mean for each rater
block_rater_means = df_ref_exps_table['block'].mean()

# mean for each message
block_message_means = df_ref_exps_table['block'].mean(axis=1)

# overall mean
overall_mean = block_message_means.mean()

# between messages sum of squares
block_message_ss =  n_rater * ((block_message_means - overall_mean) ** 2).sum() 

# between raters sum of squares
block_rater_ss =  n_message * ((block_rater_means - overall_mean) ** 2).sum()

# Residual (SSE)
SE = (df_ref_exps_table['block'].apply(lambda col: col - block_message_means)) ** 2
SSE = SE.sum().sum()

# mean squares for each source of variance

# between messages
MSM = block_message_ss / (n_message - 1)

# between raters
MSR = block_rater_ss / (n_rater - 1)

# Residual (MSE)
MSE = SSE / ((n_rater - 1) * (n_message - 1))

# ICC
ICC = (MSM - MSE) / (MSM + ((n_rater - 1) * MSR) + MSE)
ICC


In [None]:
# mean for each rater
tower_rater_means = df_ref_exps_table['tower'].mean()

# mean for each message
tower_message_means = df_ref_exps_table['tower'].mean(axis=1)

# overall mean
overall_mean = tower_message_means.mean()

# between messages sum of squares
tower_message_ss =  n_rater * ((tower_message_means - overall_mean) ** 2).sum() 

# between raters sum of squares
tower_rater_ss =  n_message * ((tower_rater_means - overall_mean) ** 2).sum()

# Residual (SSE)
SE = (df_ref_exps_table['tower'].apply(lambda col: col - tower_message_means)) ** 2
SSE = SE.sum().sum()

# mean squares for each source of variance

# between messages
MSM = tower_message_ss / (n_message - 1)

# between raters
MSR = tower_rater_ss / (n_rater - 1)

# Residual (MSE)
MSE = SSE / ((n_rater - 1) * (n_message - 1))

# ICC
ICC = (MSM - MSE) / (MSM + ((n_rater - 1) * MSR) + MSE)
ICC


## Comparing to baseline distributions

In [None]:
import random

In [None]:
counts = list(df_ref_exps_melt.loc[(df_ref_exps_melt.workerID == workerID) &
                                    (df_ref_exps_melt.exp_type == exp_type), 'n_refs'])

In [None]:
random.seed(0)

def shuffle_counts(df, within_exp_type=True, coupled=False):
    '''
    Shuffles counts of block and tower referring expressions.
    This decouples block and tower counts from each trial.
    '''

    df_shuffled = df.copy()

    for workerID in df.workerID.unique():
        
        if within_exp_type:
            
            if coupled:
                
                indicies = list(range(0, len(df.loc[(df.workerID == workerID) &
                                            (df.exp_type == df.exp_type.nunique())])))
                random.shuffle(indicies)
                
                for exp_type in df.exp_type.unique():
                
                    counts = df.loc[(df.workerID == workerID) &
                                                (df.exp_type == exp_type), 'n_refs'].reset_index()
                    
                    df_shuffled.loc[(df.workerID == workerID) &
                                              (df.exp_type == exp_type), 'n_refs'] = counts[indicies]
                
        
            for exp_type in df.exp_type.unique():
                
                counts = list(df.loc[(df.workerID == workerID) &
                                            (df.exp_type == exp_type), 'n_refs'])

                random.shuffle(counts)

                df_shuffled.loc[(df.workerID == workerID) &
                                              (df.exp_type == exp_type), 'n_refs'] = counts

                    
                
        else:
            if not(coupled):
                counts = list(df.loc[(df.workerID == workerID), 'n_refs'])

                random.shuffle(counts)

                df_shuffled.loc[(df.workerID == workerID), 'n_refs'] = counts
            else:
                print('does not make sense to ask for coupled block and tower responses across expression type')

    
    df_shuffled['n_refs'] = df_shuffled['n_refs'].astype(int)
    
    return df_shuffled

In [None]:
df_ref_exps_melt_shuffled = shuffle_counts(df_ref_exps_melt, within_exp_type=True, coupled=True)

In [None]:
df_ref_exps_melt_shuffled

In [None]:
df_ref_exps_shuffled_table = df_ref_exps_melt_shuffled.pivot(index='message_id', columns=['exp_type', 'workerID'], values=['n_refs'])['n_refs']

In [None]:
def prop_agreement(df_table, level = 'block'):
    prop = np.mean(
    (df_table[level,'charles'] == df_table[level,'julia']) &\
    (df_table[level,'julia'] == df_table[level,'justin']) &\
    (df_table[level,'justin'] == df_table[level,'zoe']))
    
    return prop

In [None]:
prop_agreement(df_ref_exps_shuffled_table, 'block')

In [None]:
prop_agreement(df_ref_exps_shuffled_table, 'tower')

In [None]:
random.seed(0)

agreement_baseline = {}
agreement_baseline['block'] = []
agreement_baseline['tower'] = []
icc_baseline = []
# icc_baseline['block'] = []
# icc_baseline['tower'] = []

for i in range(0,50):
    
    df_ref_exps_melt_shuffled = shuffle_counts(df_ref_exps_melt, within_exp_type=True, coupled=True)
    
    df_ref_exps_shuffled_table = df_ref_exps_melt_shuffled.pivot(index='message_id', columns=['exp_type', 'workerID'], values=['n_refs'])['n_refs']
    
    agreement_baseline['block'].append(prop_agreement(df_ref_exps_shuffled_table, 'block'))
    agreement_baseline['tower'].append(prop_agreement(df_ref_exps_shuffled_table, 'tower'))
    
    icc_baseline.append(\
            pg.intraclass_corr(data = df_ref_exps_melt_shuffled, 
                               targets="message_id", 
                               raters="workerID", 
                               ratings="n_refs").loc[0,"ICC"])
    

In [None]:
overall_icc = pg.intraclass_corr(data = df_ref_exps_melt, targets="message_id", raters="workerID", ratings="n_refs").loc[0,"ICC"]



In [None]:
# fig, ax = plt.subplots(figsize=(10,4))
sns.displot(icc_baseline, height=5, aspect=2)
plt.axvline(overall_icc, color='r', linestyle='--')
plt.show()

In [None]:
# fig, ax = plt.subplots(figsize=(10,4))
sns.displot(agreement_baseline['block'], height=5, aspect=2)
plt.axvline(prop_all_agree_block, color='r', linestyle='--')
plt.show()

In [None]:
sns.displot(agreement_baseline['tower'], height=5, aspect=2)
plt.axvline(prop_all_agree_tower, color='r', linestyle='--')

# Chi-squared

Compare word frequency distributions using chi-squared test