In [40]:
# pull in data and assign processing directories
from glob import glob

preproc_dir = '../../reliability_data/processed/'
output_dir = './output/'

episode = 'AHKJ_S1E2'
string_vars = ['setting']
files = sorted(glob(preproc_dir + '{0}/objective_codes*'.format(episode)))
files

['../../reliability_data/processed/AHKJ_S1E2/objective_codes_timeseries_DB_20210305-100947.csv',
 '../../reliability_data/processed/AHKJ_S1E2/objective_codes_timeseries_NS_20210305-100947.csv',
 '../../reliability_data/processed/AHKJ_S1E2/objective_codes_timeseries_NT_20210305-100948.csv',
 '../../reliability_data/processed/AHKJ_S1E2/objective_codes_timeseries_RK_20210305-100948.csv']

In [44]:
# pull in data and combine into 1 dataframe with a rater column
import pandas as pd

dfs = []
dfs_1hz = []
for file in files:
    rater = file[-22:-20]
    temp = pd.read_csv(file, index_col=None)
    temp = temp.drop(string_vars,axis=1)
    temp.index = pd.to_datetime(temp['time'],unit='ms')
    temp.index.name = 'timeseries'
    temp2 = temp.resample('1s').mean().round(0)
    temp['rater'] = rater
    temp2['rater'] = rater
    dfs.append(temp)
    dfs_1hz.append(temp2)
    
data = pd.concat(dfs)
data1hz = pd.concat(dfs_1hz)
data.columns

Index(['time', 'closeup', 'collective', 'has_body', 'has_faces', 'has_words',
       'interacting_nonverbal', 'interacting_verbal', 'num_chars',
       'phys_pain_object', 'phys_pain_other', 'scenenum', 'time_of_day',
       'rater'],
      dtype='object')

In [45]:
# compute ICCs
import pingouin as pg

variables = data.columns[1:-1]

for x in variables:
    print('********************* {0} *********************'.format(x))
    icc = pg.intraclass_corr(data=data, targets='time',raters='rater',ratings=x).round(3)
    print(icc.set_index('Type'))
    print('')

********************* closeup *********************
                   Description    ICC      F    df1    df2  pval         CI95%
Type                                                                          
ICC1    Single raters absolute  0.143  1.670  12829  38490   0.0  [0.13, 0.15]
ICC2      Single random raters  0.180  2.115  12829  38487   0.0  [0.13, 0.23]
ICC3       Single fixed raters  0.218  2.115  12829  38487   0.0  [0.21, 0.23]
ICC1k  Average raters absolute  0.401  1.670  12829  38490   0.0  [0.38, 0.42]
ICC2k    Average random raters  0.468  2.115  12829  38487   0.0  [0.37, 0.54]
ICC3k     Average fixed raters  0.527  2.115  12829  38487   0.0  [0.51, 0.54]

********************* collective *********************
                   Description    ICC       F    df1    df2  pval  \
Type                                                                
ICC1    Single raters absolute  0.860  25.541  12829  38490   0.0   
ICC2      Single random raters  0.860  25.715  12829 

In [46]:
for x in variables:
    print('********************* {0} *********************'.format(x))
    icc = pg.intraclass_corr(data=data1hz, targets='time',raters='rater',ratings=x).round(3)
    print(icc.set_index('Type'))
    print('')

********************* closeup *********************
                   Description    ICC      F   df1   df2  pval         CI95%
Type                                                                        
ICC1    Single raters absolute  0.143  1.667  1283  3852   0.0  [0.12, 0.17]
ICC2      Single random raters  0.179  2.102  1283  3849   0.0  [0.12, 0.23]
ICC3       Single fixed raters  0.216  2.102  1283  3849   0.0  [0.19, 0.25]
ICC1k  Average raters absolute  0.400  1.667  1283  3852   0.0  [0.34, 0.45]
ICC2k    Average random raters  0.466  2.102  1283  3849   0.0  [0.36, 0.55]
ICC3k     Average fixed raters  0.524  2.102  1283  3849   0.0  [0.48, 0.57]

********************* collective *********************
                   Description    ICC       F   df1   df2  pval         CI95%
Type                                                                         
ICC1    Single raters absolute  0.845  22.802  1283  3852   0.0  [0.83, 0.86]
ICC2      Single random raters  0.845  22.