In [None]:
%load_ext blackcellmagic

#### Imports and setup

In [None]:
import deepdish as dd
import pandas as pd
import numpy as np

from scipy.stats import ttest_1samp

In [None]:
from spacestream.core.constants import SUBJECTS, CORE_ROI_NAMES
from spacestream.core.paths import RESULTS_PATH

In [None]:
seeds = [0,1,2,3,4]
hemis = ["lh","rh"]
model_types = ["MB_RN50_v2", "MB_RN50", "MB_RN18"]
checkpoint = "checkpoint0"

In [None]:
tasks = {
    "MB_RN50_v2": ["categorization", "clip", "detection"],
    "MB_RN50": ["categorization", "action", "detection"],
    "MB_RN18": ["categorization", "action", "detection"],
}

#### Load and format data

In [None]:
# read in data
long = {
    "model_type": [],
    "hemi": [],
    "subject": [],
    "seed": [],
    "ROIS": [],
    "result": [],
}
for hidx, hemi in enumerate(hemis):

    for tidx, mtype in enumerate(model_types):

        
        load_path = (
            RESULTS_PATH
            + "analyses/spatial/MBs/"
            + ("RN18" if "18" in mtype else "RN50_v2" if "50_v2" in mtype else "RN50")
            + "/full_percent_by_task_"
            + hemi
            + "_"
            + checkpoint
            + ".hdf"
        )
        data = dd.io.load(load_path)[hemi]

        for sidx, subj in enumerate(SUBJECTS):

            for r in range(len(CORE_ROI_NAMES)*3):
                task = tasks[mtype][r % 3] 
                long["model_type"].append(mtype + "_" + task)
                long["hemi"].append(hemi)
                long["subject"].append(subj)
                long["seed"].append(0)
                long["ROIS"].append(CORE_ROI_NAMES[r // 3])
                long["result"].append(data[sidx, r] * 100)

In [None]:
data.shape

In [None]:
df = pd.DataFrame(long)
#average across seeds
df_avg = pd.DataFrame(df.groupby(['model_type', 'hemi', 'subject', 'ROIS'])['result'].mean()).reset_index()

In [None]:
df_avg

In [None]:
subj2subj = {}
for hidx, hemi in enumerate(hemis):
    load_path = (
        RESULTS_PATH + "analyses/spatial/brains/vox2vox_correspondence_" + hemi + "_" + checkpoint + ".hdf"
    )
    subj2subj[hemi] = dd.io.load(load_path)
s2s_hemi_avg = np.mean((subj2subj["lh"], subj2subj["rh"]),axis=0)
s2s_hemi_avg = s2s_hemi_avg * 100

In [None]:
# Reformat data
rows = []
for i, roi in enumerate(CORE_ROI_NAMES):
    for j, subject in enumerate(SUBJECTS):
        rows.append({"subject": subject, "ROI": roi, "result": s2s_hemi_avg[j, i]})
s2s_reformatted = pd.DataFrame(rows)

In [None]:
print(s2s_reformatted[s2s_reformatted["ROI"] == "Ventral"]["result"].mean())
print(s2s_reformatted[s2s_reformatted["ROI"] == "Ventral"]["result"].std())

In [None]:
# Save the dataframes for matlab plotting function
# matlab/F03_A.m

s2s_reformatted.to_csv('/oak/stanford/groups/kalanit/biac2/kgs/projects/Dawn/SpaceStreamPaper/Revision/code/new_Fig3a_noiseCeiling_checkpoint0.csv', index=False)
df_avg.to_csv('/oak/stanford/groups/kalanit/biac2/kgs/projects/Dawn/SpaceStreamPaper/Revision/code/new_Fig3a_dataFrame_0420_checkpoint0.csv', index=False)


#### Statistics

In [None]:
#set up separate models by ROI
dorsal_df = df_avg[df_avg["ROIS"]=="Dorsal"]
lateral_df = df_avg[df_avg["ROIS"]=="Lateral"]
ventral_df = df_avg[df_avg["ROIS"]=="Ventral"]

In [None]:
corrected_by = 3 * 3 # 3 hypothesized tasks, 3 streams

In [None]:
dorsal_model_types = [
    "MB_RN50_v2_detection",
    "MB_RN50_detection",
    "MB_RN18_detection",
]
lateral_model_types = [
    "MB_RN50_v2_clip",
    "MB_RN50_action",
    "MB_RN18_action",
]
ventral_model_types = [
    "MB_RN50_v2_categorization",
    "MB_RN50_categorization",
    "MB_RN18_categorization",
]

In [None]:
individual_dorsal_dfs = {}
for model in dorsal_model_types:
    individual_dorsal_dfs[model] = dorsal_df[dorsal_df["model_type"]==model].groupby('subject')['result'].mean().reset_index()['result']

individual_lateral_dfs = {}
for model in lateral_model_types:
    individual_lateral_dfs[model] = lateral_df[lateral_df["model_type"]==model].groupby('subject')['result'].mean().reset_index()['result']

individual_ventral_dfs = {}
for model in ventral_model_types:
    individual_ventral_dfs[model] = ventral_df[ventral_df["model_type"]==model].groupby('subject')['result'].mean().reset_index()['result']


In [None]:
print(np.mean(individual_dorsal_dfs["MB_RN50_v2_detection"]))

In [None]:
print("Dorsal")
for model in dorsal_model_types:
    print(model)
    print(np.mean(individual_dorsal_dfs[model]))
    print(np.std(individual_dorsal_dfs[model]))
    print(ttest_1samp(individual_dorsal_dfs[model], 33.33)[1]*corrected_by)

print("--------------------")
print("Lateral")
for model in lateral_model_types:
    print(model)
    print(np.mean(individual_lateral_dfs[model]))
    print(np.std(individual_lateral_dfs[model]))
    print(ttest_1samp(individual_lateral_dfs[model], 33.33)[1]*corrected_by)
    
print("--------------------")
print("Ventral")
for model in ventral_model_types:
    print(model)
    print(np.mean(individual_ventral_dfs[model]))
    print(np.std(individual_ventral_dfs[model]))
    print(ttest_1samp(individual_ventral_dfs[model], 33.33)[1]*corrected_by)