In [3]:
import numpy as np
import torch

from tqdm.auto import tqdm
from tqdm.notebook import tqdm_notebook, trange

import itertools

In [7]:
import os
from dotenv import find_dotenv, load_dotenv
from pathlib import Path

load_dotenv(find_dotenv(), verbose=True)

True

# prepare data

In [8]:
from src.utils.ensemble import optimize_val_correlation
from src.config.config import combine_cfgs, get_cfg_defaults
from src.data.datamodule import MyDataModule

In [9]:
# prepare validation data
cfg = combine_cfgs('../src/config/experiments/algonauts2021_i3d_flow.yml')
dm = MyDataModule(cfg)
dm.prepare_data()
dm.setup()

val_indices = dm.val_dataset.indices

cache_path = Path('/data_smr/huze/.cache/val_fmris.pt')

if cache_path.exists():
    val_fmris = torch.load(cache_path)
else:
    val_fmris = [dm.dataset_train_val.__getitem__(i)[1] for i in tqdm(val_indices)]
    val_fmris = np.stack(val_fmris, 0)
    val_fmris = torch.tensor(val_fmris)
    torch.save(val_fmris, cache_path)

In [10]:
# load from notebook 900. (cross-notebook-ref)

In [11]:
import pandas as pd

In [12]:
submission_df = torch.load('./tmp/submission_df.pt')

In [13]:
submission_df

Unnamed: 0,model_sch,he_sch,roi_sch,cross_roi_sch,i_submission,path
0,multi_layer&i3d_rgb,H3,ROIxSMC,croi_ensemble,0,/data/huze/ray_results/algonauts2021/ensemble_...
1,single_layer&i3d_rgb+i3d_flow,H3,ROIxSC,croi_ensemble,1,/data/huze/ray_results/algonauts2021/ensemble_...
2,single_layer&i3d_rgb+i3d_flow,H3,ROI,,2,/data/huze/ray_results/algonauts2021/ensemble_...
3,single_layer&i3d_rgb+i3d_flow,H3,WB,,3,/data/huze/ray_results/algonauts2021/ensemble_...
4,single_layer&i3d_flow,H3,ROIxSMC,croi_ensemble,4,/data/huze/ray_results/algonauts2021/ensemble_...
5,single_layer&i3d_rgb+i3d_flow,H1,ROIxSMC,croi_ensemble,5,/data/huze/ray_results/algonauts2021/ensemble_...
6,single_layer&i3d_rgb+i3d_flow,H3,ROIxLC,croi_ensemble,6,/data/huze/ray_results/algonauts2021/ensemble_...
7,multi_layer&i3d_flow,H3,ROIxSMC,croi_ensemble,7,/data/huze/ray_results/algonauts2021/ensemble_...
8,single_layer&i3d_rgb+i3d_flow,H3,SMC,,8,/data/huze/ray_results/algonauts2021/ensemble_...
9,single_layer&i3d_rgb+i3d_flow,H2,ROIxSMC,croi_ensemble,9,/data/huze/ray_results/algonauts2021/ensemble_...


In [36]:
submission_df.path[:3][1]

PosixPath('/data/huze/ray_results/algonauts2021/ensemble_outputs/he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=ROIxSC,cross_roi_sch=croi_ensemble-prediction.pt')

# validation score for each ROI and kROI

In [24]:
from src.utils.metrics import vectorized_correlation

In [25]:
rois = ['WB', 'V1', 'V2', 'V3', 'V4', 'LOC', 'EBA', 'FFA', 'STS', 'PPA', 'REST', 'SMC1', 'SMC2', 'MC2', 'MC1', 'SC3', 'SC4', 'LC1',
        'LC2', 'LC3', 'LC4', 'LC5']

In [49]:
# validation set score

score_res_dicts = []

for i_row, row in tqdm(list(submission_df.iterrows())):
    # model_sch = row.model_sch
    # he_sch = row.he_sch
    # roi_sch = row.roi_sch
    
    path = row.path
    prediction = torch.load(path)
    scores = vectorized_correlation(prediction[val_indices], val_fmris)
    res_dict = {}
    for roi in rois:
        voxel_indices = torch.load(os.path.join(cfg.DATASET.VOXEL_INDEX_DIR, f'{roi}.pt'))
        score = scores[voxel_indices].mean().item()
        key = f'{roi}_val_score'
        res_dict[key] = f'{score:.3f}'
    score_res_dicts.append(res_dict)
roi_val_score_df = pd.DataFrame(score_res_dicts)

  0%|          | 0/22 [00:00<?, ?it/s]

In [53]:
all_score_df = submission_df.join(roi_val_score_df)

# whole brain submission score

In [67]:
# test set score

In [68]:
from io import StringIO

In [118]:
results = """NUM\tSCORE\tFILENAME\tSUBMISSION\tDATE\tSTATUS\t
91	0.3726401715	i=000,he_sch=H3,model_sch=multi_layer&i3d_rgb,roi_sch=ROIxSMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:03:28	Finished		
92	0.3822759446	i=001,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=ROIxSC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:04:23	Finished		
93	0.3658464045	i=002,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=ROI_full_track.zip	02/28/2022 14:06:27	Finished		
94	0.3566607576	i=003,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=WB_full_track.zip	02/28/2022 14:08:08	Finished		
95	0.3371816907	i=004,he_sch=H3,model_sch=single_layer&i3d_flow,roi_sch=ROIxSMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:17:15	Finished		
96	0.3755210745	i=005,he_sch=H1,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=ROIxSMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:20:31	Finished		
97	0.3765596377	i=006,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=ROIxLC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:21:09	Finished		
98	0.3409911341	i=007,he_sch=H3,model_sch=multi_layer&i3d_flow,roi_sch=ROIxSMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:23:47	Finished		
99	0.3777393409	i=008,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=SMC_full_track.zip	02/28/2022 14:24:18	Finished		
100	0.3782319207	i=009,he_sch=H2,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=ROIxSMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:25:02	Finished		
101	0.3780098575	i=010,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=ROIxSMC,cross_roi_sch=voxel_swap_full_track.zip	02/28/2022 14:26:42	Finished		
102	0.3823800812	i=011,he_sch=H3,model_sch=single_layer+multi_layer&i3d_rgb+i3d_flow,roi_sch=ROIxSMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:28:01	Finished		
103	0.3386854364	i=012,he_sch=H3,model_sch=single_layer+multi_layer&i3d_flow,roi_sch=ROIxSMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:28:31	Finished		
104	0.3780980566	i=013,he_sch=H3,model_sch=single_layer+multi_layer&i3d_rgb,roi_sch=ROIxSMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:29:19	Finished		
105	0.3823963473	i=014,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=ROIxSMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:30:10	Finished		
106	0.3792904924	i=015,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=ROIxMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:30:54	Finished		
107	0.3779275382	i=016,he_sch=H3,model_sch=single_layer&i3d_rgb,roi_sch=ROIxSMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:31:34	Finished		
108	0.3820716002	i=017,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=ROIxSMC,cross_roi_sch=croi_swap_full_track.zip	02/28/2022 14:32:14	Finished		
109	0.3710070445	i=018,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=LC_full_track.zip	02/28/2022 14:33:03	Finished		
110	0.3778493967	i=019,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=SC_full_track.zip	02/28/2022 14:33:56	Finished		
111	0.3747412699	i=020,he_sch=H3,model_sch=single_layer&i3d_rgb+i3d_flow,roi_sch=MC_full_track.zip	02/28/2022 14:34:33	Finished		
112	0.3771323259	i=021,he_sch=H3,model_sch=multi_layer&i3d_rgb+i3d_flow,roi_sch=ROIxSMC,cross_roi_sch=croi_ensemble_full_track.zip	02/28/2022 14:36:07	Finished	
"""

In [141]:
df = pd.read_csv(StringIO(results), sep="\t")

df['WB_test_score'] = df['SCORE'].apply(lambda x: f'{x:.3f}')

df = df[['WB_test_score']]

In [142]:
all_score_df = all_score_df.drop(columns=['WB_test_score'])

In [143]:
all_score_df = all_score_df.join(df)

# ROI submission score

In [56]:
submission_output_dir = './submissions'

In [63]:
# test set score
import zipfile

score_res_dicts = []

for i in range(1, 22):
    # make sure the first file is `output_file (1).zip`
    with zipfile.ZipFile(os.path.join(submission_output_dir, f'output_file ({i}).zip'), 'r') as zip_ref:
        zip_ref.extractall(submission_output_dir)
    df = pd.read_csv(os.path.join(submission_output_dir, 'scores.txt'), sep=': ', header=None, engine='python')
    
    res_dict = {}
    for j in range(len(df)):
        roi, score = df.iloc[j]
        if roi == 'score': continue
        key = f'{roi}_test_score'
        res_dict[key] = f'{score:.3f}'
    score_res_dicts.append(res_dict)

roi_test_score_df = pd.DataFrame(score_res_dicts)

In [65]:
all_score_df = all_score_df.join(roi_test_score_df)

# preprocess table

In [None]:
print(rois)

['WB', 'V1', 'V2', 'V3', 'V4', 'LOC', 'EBA', 'FFA', 'STS', 'PPA', 'REST', 'SMC1', 'SMC2', 'MC2', 'MC1', 'SC3', 'SC4', 'LC1', 'LC2', 'LC3', 'LC4', 'LC5']


In [152]:
for roi in rois:
    val_key = f'{roi}_val_score'
    test_key = f'{roi}_test_score'
    if test_key in all_score_df.keys():
        all_score_df[roi] = all_score_df[val_key] + '/' + all_score_df[test_key]
    else:
        all_score_df[roi] = all_score_df[val_key] + '/' + '-'

In [155]:
all_score_df.keys()

Index(['model_sch', 'he_sch', 'roi_sch', 'cross_roi_sch', 'i_submission',
       'path', 'WB_val_score', 'V1_val_score', 'V2_val_score', 'V3_val_score',
       'V4_val_score', 'LOC_val_score', 'EBA_val_score', 'FFA_val_score',
       'STS_val_score', 'PPA_val_score', 'REST_val_score', 'SMC1_val_score',
       'SMC2_val_score', 'MC2_val_score', 'MC1_val_score', 'SC3_val_score',
       'SC4_val_score', 'LC1_val_score', 'LC2_val_score', 'LC3_val_score',
       'LC4_val_score', 'LC5_val_score', 'V1_test_score', 'V2_test_score',
       'V3_test_score', 'V4_test_score', 'LOC_test_score', 'EBA_test_score',
       'FFA_test_score', 'STS_test_score', 'PPA_test_score', 'WB_test_score',
       'WB', 'V1', 'V2', 'V3', 'V4', 'LOC', 'EBA', 'FFA', 'STS', 'PPA', 'REST',
       'SMC1', 'SMC2', 'MC2', 'MC1', 'SC3', 'SC4', 'LC1', 'LC2', 'LC3', 'LC4',
       'LC5'],
      dtype='object')

In [156]:
interesting_keys = ['model_sch', 'he_sch', 'roi_sch', 'cross_roi_sch'] + rois

In [177]:
full_table_df = all_score_df[interesting_keys]

# make tables

In [169]:
# load from notebook 900. (cross-notebook-ref)
submission_configs_keys = ['model_sch', 'he_sch', 'roi_sch', 'cross_roi_sch']
table_configs = [
    [
        ('single_layer&i3d_rgb+i3d_flow', 'H3', 'WB', ''),
        ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxSMC', 'croi_ensemble'),
        ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxMC', 'croi_ensemble'),
        ('single_layer&i3d_rgb+i3d_flow', 'H3', 'LC', ''),
        ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxSC', 'croi_ensemble'),
        ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxLC', 'croi_ensemble'),
        ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROI', ''),
        ('single_layer&i3d_rgb+i3d_flow', 'H3', 'SMC', ''),
        ('single_layer&i3d_rgb+i3d_flow', 'H3', 'SC', ''),
        ('single_layer&i3d_rgb+i3d_flow', 'H3', 'MC', '')
    ]
    
]

In [189]:
from src.utils.misc import my_query_df
from pprint import pprint
for table_config in table_configs:
    # table_config = np.asarray(table_config)
    pprint(table_config)
    print()
    # break
    # dfs = []
    # for config in table_config:
    #     config_dict = {k: v for k, v in zip(submission_configs_keys, config)}
    #     df = my_query_df(full_table_df, equal_dict=config_dict)
    #     dfs.append(df)
    # table_df = pd.concat(dfs)

[('single_layer&i3d_rgb+i3d_flow', 'H3', 'WB', ''),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxSMC', 'croi_ensemble'),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxMC', 'croi_ensemble'),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'LC', ''),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxSC', 'croi_ensemble'),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxLC', 'croi_ensemble'),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROI', ''),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'SMC', ''),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'SC', ''),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'MC', '')]

[('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxSMC', 'croi_ensemble'),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxSMC', 'croi_swap'),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxSMC', 'voxel_swap'),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROI', ''),
 ('single_layer&i3d_rgb+i3d_flow', 'H3', 'SMC', '')]

[('single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxSMC', 'croi_ensemble'),
 ('single_layer&i3d_rgb+i3d_

In [185]:
table_config[:, 2]

array(['WB', 'ROIxSMC', 'ROIxMC', 'LC', 'ROIxSC', 'ROIxLC', 'ROI', 'SMC',
       'SC', 'MC'], dtype='<U29')

In [184]:
table_config[table_config[:, 2].argsort()]

array([['single_layer&i3d_rgb+i3d_flow', 'H3', 'LC', ''],
       ['single_layer&i3d_rgb+i3d_flow', 'H3', 'MC', ''],
       ['single_layer&i3d_rgb+i3d_flow', 'H3', 'ROI', ''],
       ['single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxLC', 'croi_ensemble'],
       ['single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxMC', 'croi_ensemble'],
       ['single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxSC', 'croi_ensemble'],
       ['single_layer&i3d_rgb+i3d_flow', 'H3', 'ROIxSMC',
        'croi_ensemble'],
       ['single_layer&i3d_rgb+i3d_flow', 'H3', 'SC', ''],
       ['single_layer&i3d_rgb+i3d_flow', 'H3', 'SMC', ''],
       ['single_layer&i3d_rgb+i3d_flow', 'H3', 'WB', '']], dtype='<U29')

In [179]:
table_df

Unnamed: 0,model_sch,he_sch,roi_sch,cross_roi_sch,WB,V1,V2,V3,V4,LOC,...,SMC2,MC2,MC1,SC3,SC4,LC1,LC2,LC3,LC4,LC5
0,multi_layer&i3d_rgb,H3,ROIxSMC,croi_ensemble,0.175/0.373,0.333/0.677,0.322/0.689,0.306/0.679,0.281/0.718,0.355/0.718,...,0.044/-,0.287/-,0.058/-,0.315/-,0.260/-,0.088/-,0.322/-,0.251/-,0.470/-,0.419/-
14,single_layer&i3d_rgb+i3d_flow,H3,ROIxSMC,croi_ensemble,0.180/0.382,0.334/0.662,0.323/0.682,0.308/0.679,0.284/0.724,0.363/0.727,...,0.046/-,0.295/-,0.062/-,0.324/-,0.267/-,0.091/-,0.328/-,0.261/-,0.482/-,0.419/-
16,single_layer&i3d_rgb,H3,ROIxSMC,croi_ensemble,0.178/0.378,0.334/0.660,0.322/0.679,0.305/0.675,0.281/0.718,0.359/0.721,...,0.046/-,0.292/-,0.061/-,0.320/-,0.264/-,0.090/-,0.322/-,0.260/-,0.477/-,0.416/-
13,single_layer+multi_layer&i3d_rgb,H3,ROIxSMC,croi_ensemble,0.179/0.378,0.337/0.679,0.326/0.693,0.309/0.683,0.284/0.722,0.360/0.716,...,0.046/-,0.293/-,0.062/-,0.322/-,0.265/-,0.091/-,0.323/-,0.262/-,0.477/-,0.422/-
7,multi_layer&i3d_flow,H3,ROIxSMC,croi_ensemble,0.151/0.341,0.260/0.540,0.268/0.592,0.272/0.604,0.250/0.652,0.328/0.677,...,0.032/-,0.262/-,0.044/-,0.284/-,0.240/-,0.071/-,0.292/-,0.209/-,0.449/-,0.355/-
11,single_layer+multi_layer&i3d_rgb+i3d_flow,H3,ROIxSMC,croi_ensemble,0.181/0.382,0.338/0.680,0.326/0.695,0.311/0.689,0.287/0.729,0.363/0.725,...,0.047/-,0.295/-,0.062/-,0.323/-,0.267/-,0.092/-,0.329/-,0.264/-,0.483/-,0.423/-
21,multi_layer&i3d_rgb+i3d_flow,H3,ROIxSMC,croi_ensemble,0.176/0.377,,,,,,...,0.045/-,0.290/-,0.059/-,0.319/-,0.262/-,0.088/-,0.326/-,0.253/-,0.477/-,0.421/-
12,single_layer+multi_layer&i3d_flow,H3,ROIxSMC,croi_ensemble,0.158/0.339,0.264/0.545,0.270/0.594,0.275/0.611,0.255/0.649,0.334/0.668,...,0.034/-,0.267/-,0.048/-,0.289/-,0.245/-,0.076/-,0.305/-,0.221/-,0.455/-,0.357/-
4,single_layer&i3d_flow,H3,ROIxSMC,croi_ensemble,0.157/0.337,0.255/0.532,0.262/0.580,0.269/0.602,0.253/0.642,0.332/0.661,...,0.034/-,0.266/-,0.048/-,0.288/-,0.246/-,0.076/-,0.302/-,0.220/-,0.454/-,0.350/-


In [55]:
# drafts

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

for model_sch, he_sch in list(itertools.product(model_schs, he_schs)):
    idf = my_query_df(score_df, {'model_sch': model_sch, 'he_sch': he_sch})
    x = idf.trained_roi_sch.unique()
    y = idf.roi.unique()
    shape = (len(x), len(y))
    scores = idf.score.values.reshape(shape)
    ax, fig = plt.subplots(figsize=(20, 4))
    sns.heatmap(scores, xticklabels=y, yticklabels=x, annot=True, fmt='.4f', cmap='Reds')
    plt.yticks(rotation=0)
    plt.title(f'model_sch={model_sch}\n he_sch={he_sch}')
    plt.show()