In [None]:
%load_ext blackcellmagic

#### Imports and setup

In [None]:
import deepdish as dd
import pandas as pd
import numpy as np

import statsmodels.formula.api as sm
from statsmodels.stats.multitest import multipletests

In [None]:
from spacestream.core.constants import SUBJECTS, CORE_ROI_NAMES
from spacestream.core.paths import RESULTS_PATH

In [None]:
seeds = [0,1,2,3,4]
hemis = ["lh","rh"]
model_types = ["MB_RN18","TDANN_Supervised","TDANN_Supervised_0.0","TDANN_SimCLR",  "TDANN_SimCLR_0.0","MB_RN50_v2", "MB_RN50", "MB_RN18"]

In [None]:
checkpoint_mb = "0"
checkpoint_tdann = "0VALIDATE"

#### Load and format data

In [None]:
# read in data
long = {
    "model_type": [],
    "hemi": [],
    "subject": [],
    "seed": [],
    "ROIS": [],
    "result": [],
}
for hidx, hemi in enumerate(hemis):

    for tidx, mtype in enumerate(model_types):

        if "TDANN" in mtype:
            if mtype == "TDANN_Supervised":
                sw_idx = 5 #2.5
            elif mtype == "TDANN_SimCLR":
                sw_idx = 2 #0.25
            elif mtype == "TDANN_SimCLR_0.0" or mtype == "TDANN_Supervised_0.0":
                sw_idx = 0
            
            for seedx, seed in enumerate(seeds):
                smooth_path = (
                    RESULTS_PATH
                    + "analyses/spatial/TDANNs/"
                    + "smoothness_calc_by_stream_"
                    + ("lh_" if hemi == "lh" else "")
                    + "unit2voxel"
                    + ("_supervised" if "Supervised" in mtype else "")
                    + (("_seed" + str(seed)) if seed > 0 else "")
                    + "_correlations_by_unit_ckpt"
                    + checkpoint_tdann
                    + ".hdf"
                )
                u2v_smoothness = dd.io.load(smooth_path)
                for sidx, subj in enumerate(SUBJECTS):

                    for r in range(len(CORE_ROI_NAMES)):
                        long["model_type"].append(mtype)
                        long["hemi"].append(hemi)
                        long["subject"].append(subj)
                        long["seed"].append(seed)
                        long["ROIS"].append(CORE_ROI_NAMES[r])
                        long["result"].append(
                            u2v_smoothness["thirddist_r"][sw_idx, sidx, r]
                        )
            
        else:
            smooth_path = (
                    RESULTS_PATH
                    + "analyses/spatial/MBs/"
                    + "smoothness_calc_by_stream_"
                    + ("lh_" if hemi == "lh" else "")
                    + "unit2voxel"
                    + "_correlations_by_unit_ckpt"
                    + checkpoint_mb
                    + ".hdf"
                )
            u2v_smoothness = dd.io.load(smooth_path)
            if mtype == "MB_RN50_v2":
                midx = 2
            elif mtype == "MB_RN50":
                midx = 1
            elif mtype == "MB_RN18":
                midx = 0
            for sidx, subj in enumerate(SUBJECTS):

                for r in range(len(CORE_ROI_NAMES)):
                    long["model_type"].append(mtype)
                    long["hemi"].append(hemi)
                    long["subject"].append(subj)
                    long["seed"].append(0)
                    long["ROIS"].append(CORE_ROI_NAMES[r])
                    long["result"].append(
                        u2v_smoothness["thirddist_r"][midx, sidx, r]
                    )

In [None]:
df = pd.DataFrame(long)
#average across seeds
df_avg = pd.DataFrame(df.groupby(['model_type', 'hemi', 'subject', 'ROIS'])['result'].mean()).reset_index()

In [None]:
s2s_data = np.zeros((8,7,3,len(hemis), len(seeds)))

for hidx, hemi in enumerate(hemis):
    for sidx, seed in enumerate(seeds):
        smooth_path = (
                    RESULTS_PATH
                    + "analyses/spatial/brains/"
                    + "smoothness_calc_by_stream_"
                    + ("lh_" if hemi == "lh" else "")
                    + "voxel2voxel"
                    + (("_seed" + str(seed)) if seed > 0 else "")
                    + "_correlations_by_unit_ckpt"
                    + checkpoint_tdann
                    + ".hdf"
                )
        v2v_smoothness = dd.io.load(smooth_path)
        s2s_data[:,:,:,hidx,sidx] = v2v_smoothness["thirddist_r"]
        
del v2v_smoothness

s2s = np.mean(np.mean(s2s_data, axis=-1), axis=-1)
s2s = np.mean(s2s,axis=1)

In [None]:
# Reformat data
rows = []
for i, roi in enumerate(CORE_ROI_NAMES):
    for j, subject in enumerate(SUBJECTS):
        rows.append({"subject": subject, "ROI": roi, "result": s2s[j, i]})
s2s_reformatted = pd.DataFrame(rows)

In [None]:
# Save the dataframes for matlab plotting function
# matlab/F02_B.m

s2s_reformatted.to_csv('/oak/stanford/groups/kalanit/biac2/kgs/projects/Dawn/SpaceStreamPaper/Revision/code/new_Fig2b_noiseCeiling_ckpt0.csv', index=False)
df_avg.to_csv('/oak/stanford/groups/kalanit/biac2/kgs/projects/Dawn/SpaceStreamPaper/Revision/code/new_Fig2b_dataFrame_checkpoint0.csv', index=False)

#### Statistics

In [None]:
# Cat & SimCLR Cat included in figure for visualization purposes but statistics are
# run on the spatial constraints vs. multiple behaviors comparisons
# i.e. MB v1 RN50, MB v2 RN50, MB v1 RN18, TDANN Supervised, TDANN SimCLR
# All statistics are collapsed across the two hemispheres given the minimal 
# hemispheric differences (and no corresponding hypotheses re hemispheres)
df = df[~df["model_type"].isin(["TDANN_Supervised_0.0", "TDANN_SimCLR_0.0"])]

In [None]:
#set up separate models by ROI
dorsal_df = df_avg[df_avg["ROIS"]=="Dorsal"]
lateral_df = df_avg[df_avg["ROIS"]=="Lateral"]
ventral_df = df_avg[df_avg["ROIS"]=="Ventral"]

In [None]:
# Dorsal
dorsal_mod = sm.mixedlm('result~model_type', data = dorsal_df, groups=dorsal_df["subject"]).fit()
print(dorsal_mod.summary())

res = pd.concat([dorsal_mod.params,dorsal_mod.pvalues],axis=1)
res.columns=['coefficient','pvalues']
print(res)
res = res[res.index.str.contains('model_type')]
res['corrected_p'] = multipletests(res['pvalues'],method="bonferroni")[1]
print(res)

In [None]:
# Lateral
lateral_mod = sm.mixedlm('result~model_type', data = lateral_df, groups=lateral_df["subject"]).fit()
print(lateral_mod.summary())

res = pd.concat([lateral_mod.params,lateral_mod.pvalues],axis=1)
res.columns=['coefficient','pvalues']
print(res)

res = res[res.index.str.contains('model_type')]

res['corrected_p'] = multipletests(res['pvalues'],method="bonferroni")[1]
print(res)

In [None]:
# Ventral
ventral_mod = sm.mixedlm('result~model_type', data = ventral_df, groups=ventral_df["subject"]).fit()
print(ventral_mod.summary())

res = pd.concat([ventral_mod.params,ventral_mod.pvalues],axis=1)
res.columns=['coefficient','pvalues']
print(res)

res = res[res.index.str.contains('model_type')]

res['corrected_p'] = multipletests(res['pvalues'],method="bonferroni")[1]
print(res)