In [48]:
import pandas as pd
import numpy as np
import os
from utils_dist_corr import *

### Get data from encoding (pos, box, stim)

In [92]:
path_script = '/Users/alouette/Documents/Perceptual_Space_ALS/Python/'
path_pca = '/Users/alouette/Documents/Perceptual_Space_ALS/df_pca/'
pca_name = path_data+'evals_su={}_stim=ALL_ALL_PCA.csv'
stims = ['music','odor','face']

#get encoding config by subj + box coordinates
df_enc = pd.read_csv(path_script+'all_encodage.csv')
df_xy = pd.read_csv(path_script+'df_boxes.csv',index_col=0)
pairs = define_pairs(df_xy['box'])
dist = compute_all_dist(df_xy, pairs, name_col='box',
                           x_col='x', y_col='y')
phys_ = np.concatenate((np.array(pairs,dtype=str),dist[:,np.newaxis]),axis=1)
df_phys = pd.DataFrame(data=phys_, columns=['box1','box2','dist_phys'])
df_phys.to_csv(path_script+'df_phys_space_boxes.csv',index=False)

#select only non-outliers subjects
subjs_sel = pd.read_csv(path_script+'all_evals_clean_n=48.csv')['subject']
subjects = np.unique(subjs_sel)

#create a df with physical and perceptual distances
for su in subjects:
    df_su = df_enc[df['subject']==su][['box_num',stim]]
#    df_su['odor'] = [str(int(r)) for r in df_su['odor']]
    df_su['x'] = [x[0] for x in df_su['box_num'].map(dict_x)]
    df_su['y'] = [y[0] for y in df_su['box_num'].map(dict_y)]
    df_su = df_su.drop_duplicates()
    
    
    print(df_su.head())
    0/0


    box_num music     x     y
0         2   M04 -2.71  1.50
1         1   M08 -5.84  1.10
2         0   M09 -4.33 -1.35
15        8   M03  3.80 -0.60
16        7   M02  5.73 -1.62


ZeroDivisionError: division by zero

In [None]:
dict_x = df_xy[['box','x']].set_index('box').T.to_dict('list')
dict_y = df_xy[['box','y']].set_index('box').T.to_dict('list')

In [None]:
stims = ['music','face','odor']
nb_stim = 18

for stim in stims:
    files_pca = [f for f in os.listdir(path_data + 'df_pca/') if f.endswith(stim+'_PCA.csv')]

    all_dist = np.zeros((nb_pairs(nb_stim),len(files_pca)))
    all_sub = []
    for i, f in enumerate(files_pca):
        df = pd.read_csv(path_data+'df_pca/'+ f, index_col=0)
        pairs = define_pairs(df.stimulus_name)
        dist = compute_all_dist(df, pairs, name_col='stimulus_name',
                           x_col='coord.Dim.1', y_col='coord.Dim.2')
        all_dist[:,i] += dist
        all_sub.append(df.subject.values[0])

    data_df = np.concatenate((np.array(pairs),all_dist),axis=1)
    df_all = pd.DataFrame(data=data_df, columns=['p0','p1']+all_sub)
    df_all.to_csv(path_data+'Python/all_dist_stim='+stim+'.csv')

## Compare perceptual space
Compare the consistency (R) btw space across subject for each sensory modality

### Compute perceptual spaces' consistency across subjects

In [None]:
path_df = path_data+'Python/all_dist_stim={}.csv'
stims = ['music','face','odor']

space_r = []
for stim in stims:
    df = pd.read_csv(path_df.format(stim), index_col=0)
    subj = [c for c in df.columns if c not in ['p0','p1']]
    pairs_su = define_pairs(subj)
    corr = compute_all_R(df, pairs_su, meth='spearman')
    space_r.append(corr) 
space_r = np.transpose(np.vstack(space_r))

df_r_var = pd.DataFrame(space_r, columns=stims)
print(df_r_var.describe())
df_r_var.to_csv(path_data+'Python/btw_subj_consistency_allstims.csv',index=False)

### Compare consistencies across modality (+ paired links)

In [None]:
import statsmodels.api as sm
from statsmodels.formula.api import ols
import pingouin as pg 

df_r_var = pd.read_csv(path_data+'Python/btw_subj_consistency_allstims.csv')

# Plot results 
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(5,4))
df_r_var.boxplot(column=stims, ax=axes)

# Compute 1-way repeated measures ANOVA
df_r_stats = df_r_var.melt()
df_r_stats.columns = ['stims','btw_su_corr']
df_r_stats['su_pairs'] = np.concatenate([np.arange(0,df_r_var.shape[0],1)]*3)
anova = pg.rm_anova(data=df_r_stats, dv='btw_su_corr', within='stims',
                    subject='su_pairs')
pg.print_table(anova, floatfmt='.3f')

# Bonf-corrected post hocs with Hedges'g effect size
posthoc = pg.pairwise_tests(data=df_r_stats, dv='btw_su_corr', within='stims',
                            subject='su_pairs', padjust='bonf')
pg.print_table(posthoc, floatfmt='.3f')

# Pairwise correlations between sensory modality
pair_r = pg.pairwise_corr(df_r_var, method='pearson')
print(pair_r)

### Compare perceptual spaces' size and dispersion

In [None]:
stims = ['music','face','odor']
cols_to_sel = ['%var_cum','pol_surf','circ_surf','avg_d']

files_pca = [f for f in os.listdir(path_data + 'df_pca/') if f.endswith('_PCA_sum.csv')]
    
all_pca = np.zeros((len(files_pca),4))
infos = np.array([])
for i,f in enumerate(files_pca):
    splits = [sp.split('_') for sp in f.split('=')]
    s_splits = np.array((splits[1][0],splits[2][0]))
    infos = np.vstack((infos,s_splits)) if np.size(infos) else np.array(s_splits)
    
    df = pd.read_csv(path_data+'df_pca/'+f, index_col=0)
    df.columns = ['cos2','%var','%var_cum','pol_surf','circ_surf','x0','y0','avg_d']
    sel = df[cols_to_sel].loc[['comp 2']].values[0]
    all_pca[i,:] += sel

data_df = np.concatenate((infos,all_pca), axis=1)
df_all = pd.DataFrame(data=data_df, columns=['subject','stim']+cols_to_sel)
df_all.to_csv(path_data+'Python/all_spaces_geometry.csv',index=False)

### Statistics and plots - spaces geometry 

In [None]:
import pingouin as pg 

df_geo = pd.read_csv(path_data+'Python/all_spaces_geometry.csv')

# Plot results 
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(20,4))
for i,c in enumerate(df_geo.columns[2:]):
    print('>> effect of modality on ',c)
    df_geo[[c,'stim']].boxplot(ax=axes[i], by='stim')
    anova = pg.rm_anova(data=df_geo, dv=c, within='stim',
                    subject='subject')
    pg.print_table(anova, floatfmt='.3f')
    
    if anova.iloc[0,4] < 0.05:
        # Bonf-corrected post hocs with Hedges'g effect size
        posthoc = pg.pairwise_tests(data=df_geo, dv=c, within='stim',
                            subject='subject', padjust='bonf')
        pg.print_table(posthoc, floatfmt='.3f')
    