In [1]:
import numpy as np
import torch

from tqdm.auto import tqdm
from pathlib import Path

import itertools

import os
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv(), verbose=True)

True

In [2]:
from src.utils.runs import load_run_df, filter_multi_layer_runs, filter_single_layer_runs, my_query_df

In [3]:
RESULTS_DIR = Path('/data/huze/ray_results/algonauts2021/')

In [4]:
run_df = load_run_df(RESULTS_DIR)

  0%|          | 0/2664 [00:00<?, ?it/s]

In [5]:
run_df = run_df[run_df['TRAINER.CALLBACKS.BACKBONE.DEFROST_SCORE'] < 1.]
run_df = run_df[run_df['DATASET.ROI'] == 'WB']

In [6]:
len(run_df)

191

# prepare data

In [7]:
from src.utils.ensemble import optimize_val_correlation
from src.config.config import combine_cfgs, get_cfg_defaults
from src.data.datamodule import MyDataModule
from pathlib import Path
import torch
import numpy as np

In [8]:
# prepare train and validation data
cfg = get_cfg_defaults()
cfg.DATASET.TRANSFORM = 'i3d_flow'
dm = MyDataModule(cfg)
dm.prepare_data()
dm.setup()

val_indices = dm.val_dataset.indices
fmris_cache_path = Path('/data/huze/.cache/trainval_fmris.pt')

if fmris_cache_path.exists():
    fmris = torch.load(fmris_cache_path)
else:
    fmris = [dm.dataset_train_val.__getitem__(i)[1]
             for i in tqdm(range(dm.dataset_train_val.__len__()))]
    fmris = np.stack(fmris, 0)
    fmris = torch.tensor(fmris)
    torch.save(fmris, fmris_cache_path)

val_fmris = fmris[val_indices]

In [23]:
def get_ensemble_prediction_from_tensor_list(predicions_list, roi_val_fmris, val_indices, opt_verbose=False, tol=1e-2):
    predictions = torch.stack(predicions_list, -1)
    ws = optimize_val_correlation(predictions[val_indices],
                                  roi_val_fmris,
                                  verbose=opt_verbose,
                                  device=DEVICE,
                                  tol=tol)
    new_predictions = predictions @ ws
    return new_predictions, ws

def multiply_and_flatten_voxel_embeddings_by_ensemble_weight(voxel_embeddings_list, ws, voxel_embeddings_dims=2, cat_dim=-1):
    for i in range(len(ws)):
        voxel_embeddings_list[i] *= ws[i]
    
    if len(voxel_embeddings_list[0].shape) == voxel_embeddings_dims:
        return torch.stack(voxel_embeddings_list, cat_dim)
    elif len(voxel_embeddings_list[0].shape) == voxel_embeddings_dims + 1:
        return torch.cat(voxel_embeddings_list, cat_dim)
    else:
        NotImplementedError()

In [10]:
run_df.groupby('MODEL.BACKBONE.NAME')['path'].nunique()

MODEL.BACKBONE.NAME
2d_bdcnvgg_warp_3d         17
2d_colorizer_warp_3d       17
2d_densnet_warp_3d         17
2d_moby_swin_warp_3d       17
2d_pyconvsegnet_warp_3d    17
2d_seg_swin_warp_3d        17
2d_simclr_warp_3d          17
3d_swin                    17
audio_vggish               21
i3d_flow                   17
i3d_rgb                    17
Name: path, dtype: int64

In [11]:
(17*10+21)*2048*162000*4/1024**3

236.0687255859375

In [12]:
my_query_df(run_df, equal_dict={'MODEL.BACKBONE.NAME': '3d_swin'})[['MODEL.BACKBONE.NAME', 'MODEL.BACKBONE.LAYERS', 'MODEL.NECK.SPP_LEVELS', 'DATASET.ROI', 'score']].sort_values(['MODEL.BACKBONE.LAYERS', 'MODEL.NECK.SPP_LEVELS'])

Unnamed: 0,MODEL.BACKBONE.NAME,MODEL.BACKBONE.LAYERS,MODEL.NECK.SPP_LEVELS,DATASET.ROI,score
945,3d_swin,"(x1,)","(1,)",WB,0.094153
949,3d_swin,"(x1,)","(2,)",WB,0.098252
956,3d_swin,"(x1,)","(3,)",WB,0.101645
944,3d_swin,"(x1,)","(7,)",WB,0.096773
1635,3d_swin,"(x1, x2, x3, x4)","(1, 2, 3, 7)",WB,0.141382
941,3d_swin,"(x2,)","(1,)",WB,0.103469
952,3d_swin,"(x2,)","(2,)",WB,0.112857
947,3d_swin,"(x2,)","(3,)",WB,0.117669
955,3d_swin,"(x2,)","(7,)",WB,0.105365
943,3d_swin,"(x3,)","(1,)",WB,0.142096


In [13]:
roi = 'WB'
# backbone_names = ['i3d_rgb', 'i3d_flow']
# run_df = my_query_df(run_df, isin_dict={'MODEL.BACKBONE.NAME': backbone_names})


verbose=True
opt_verbose=False

DEVICE = 'cuda:1' # the bottle neck is Disk I/O

In [14]:
!free -h

              total        used        free      shared  buff/cache   available
Mem:           251G         38G         74G        144M        139G        211G
Swap:          7.6G          0B        7.6G


In [16]:
from src.utils.misc import my_query_df

ORDERED_HIERACHY_KEYS = ['MODEL.BACKBONE.NAME', 'MODEL.BACKBONE.LAYERS', 'MODEL.NECK.SPP_LEVELS']
he_keys=ORDERED_HIERACHY_KEYS
    
# assert roi == 'WB' # comment this line to subdivide other rois

# some backbone has different layers
avaliable_configs = list(run_df.groupby(he_keys[:2]).groups)

roi_voxel_indices = torch.load(os.path.join(cfg.DATASET.VOXEL_INDEX_DIR, f'{roi}.pt'))
roi_val_fmris = val_fmris[..., roi_voxel_indices]

loaded_backbone_names = []

# this 2 is for reloading of voxel embeddings (not used)
level1_config_tasks_ws = []
level2_config_ws = []

oo_predictions_list = []
oo_voxel_embeddings_list = []
# saved_res_dict = {}
# voxel_embeddings_dict = {}
oo_ws_list = []
for v11 in run_df[he_keys[0]].unique():
# for v11 in ['i3d_rgb']:
    o_predictions_list = []
    o_voxel_embeddings_list = []
    o_ws_list = []
    for v1 in run_df[he_keys[1]].unique():

        vs = (v11, v1)
        if vs not in avaliable_configs: continue
        _l1_df = my_query_df(run_df, equal_dict={k: v for k, v in zip(he_keys[:2], vs)})
        roi_df = _l1_df.loc[_l1_df['DATASET.ROI'] == roi]

        predictions_list = [
            torch.tensor(np.load(path.joinpath('prediction.npy'))).float()
            for path in roi_df['path'].values
        ]

        voxel_embeddings_list = [
            torch.tensor(np.load(path.joinpath('voxel_embedding.npy')))
            for path in roi_df['path'].values
        ]

        if verbose:
            print('Level 1...\t', roi, v11, v1, '\t')
        new_predictions, ws = get_ensemble_prediction_from_tensor_list(predictions_list, roi_val_fmris, val_indices, opt_verbose=opt_verbose)
        new_voxel_embeddings = multiply_and_flatten_voxel_embeddings_by_ensemble_weight(voxel_embeddings_list, ws)
        
        level1_config_tasks_ws.append((vs, roi_df['path'].values, ws.numpy()))

        o_predictions_list.append(new_predictions)
        o_voxel_embeddings_list.append(new_voxel_embeddings)
        o_ws_list.append(ws)
    if verbose:
        print('Level 2...\t', roi, v11, '\t')
    new_predictions, ws = get_ensemble_prediction_from_tensor_list(o_predictions_list, roi_val_fmris, val_indices, opt_verbose=opt_verbose)
    new_voxel_embeddings = multiply_and_flatten_voxel_embeddings_by_ensemble_weight(o_voxel_embeddings_list, ws)

    level2_config_ws.append((v11, ws.numpy()))

    oo_predictions_list.append(new_predictions)
    oo_voxel_embeddings_list.append(new_voxel_embeddings)
    # voxel_embeddings_dict[v11] = new_voxel_embeddings
    
    loaded_backbone_names.append(v11)
    assert len(ws) == len(o_ws_list)
    for i in range(len(ws)):
        o_ws_list[i] *= ws[i]
    oo_ws_list.append(torch.cat(o_ws_list))
if verbose:
    print('Level 3...\t', roi, '\t')

new_predictions, backbone_ensemble_ws = get_ensemble_prediction_from_tensor_list(oo_predictions_list, roi_val_fmris, val_indices, opt_verbose=opt_verbose)

Level 1...	 WB i3d_rgb ('x4',) 	
Level 1...	 WB i3d_rgb ('x3',) 	
Level 1...	 WB i3d_rgb ('x2',) 	
Level 1...	 WB i3d_rgb ('x1',) 	
Level 1...	 WB i3d_rgb ('x1', 'x2', 'x3', 'x4') 	
Level 2...	 WB i3d_rgb 	
Level 3...	 WB 	


In [None]:
!free -h

              total        used        free      shared  buff/cache   available
Mem:           503G        285G        9.2G        1.8M        208G        214G
Swap:          7.6G        734M        6.9G


In [12]:
TEMP_DIR = RESULTS_DIR.joinpath(Path('tmp/notebook-010/'))

In [None]:
TEMP_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
from src.utils.metrics import vectorized_correlation
from einops import rearrange

In [None]:
for backbone_name, voxel_embeddings, predictions in zip(loaded_backbone_names, oo_voxel_embeddings_list, oo_predictions_list):
    voxel_scores = vectorized_correlation(predictions[val_indices], val_fmris).numpy()
    predictions = predictions.numpy()
    voxel_embeddings = rearrange(voxel_embeddings, 'num_voxels d num_models -> num_voxels (d num_models)').numpy()
    
    np.save(TEMP_DIR.joinpath(Path(f'{backbone_name}-voxel_embeddings.npy')), voxel_embeddings)
    np.save(TEMP_DIR.joinpath(Path(f'{backbone_name}-voxel_scores.npy')), voxel_scores)
    np.save(TEMP_DIR.joinpath(Path(f'{backbone_name}-predictions.npy')), predictions)

In [20]:
TEMP_DIR

PosixPath('/data/huze/ray_results/algonauts2021/tmp/notebook-010')

In [15]:
!ls -lah /data/huze/ray_results/algonauts2021/tmp/notebook-010/

total 146G
drwxrwxr-x 2 huze huze 4.0K 3月  13 00:06 .
drwxrwxr-x 3 huze huze 4.0K 3月  11 23:45 ..
-rw-rw-r-- 1 huze huze 679M 3月  11 23:46 2d_bdcnvgg_warp_3d-predictions.npy
-rw-rw-r-- 1 huze huze  21G 3月  11 23:46 2d_bdcnvgg_warp_3d-voxel_embeddings.npy
-rw-rw-r-- 1 huze huze 631K 3月  11 23:46 2d_bdcnvgg_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 huze huze 679M 3月  11 23:53 2d_colorizer_warp_3d-predictions.npy
-rw-rw-r-- 1 huze huze  21G 3月  11 23:53 2d_colorizer_warp_3d-voxel_embeddings.npy
-rw-rw-r-- 1 huze huze 631K 3月  11 23:53 2d_colorizer_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 huze huze 679M 3月  11 23:45 2d_densnet_warp_3d-predictions.npy
-rw-rw-r-- 1 huze huze  21G 3月  11 23:45 2d_densnet_warp_3d-voxel_embeddings.npy
-rw-rw-r-- 1 huze huze 631K 3月  11 23:45 2d_densnet_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 huze huze 679M 3月  11 23:48 2d_moby_swin_warp_3d-predictions.npy
-rw-rw-r-- 1 huze huze  21G 3月  11 23:48 2d_moby_swin_warp_3d-voxel_embeddings.npy
-rw-rw-r-- 1 huze huze 631K 3月  1

In [22]:
del o_voxel_embeddings_list, o_predictions_list

In [23]:
!free -h

              total        used        free      shared  buff/cache   available
Mem:           503G        259G         26G        1.8M        217G        241G
Swap:          7.6G        750M        6.9G


In [24]:
backbone_name = 'all'

voxel_scores = vectorized_correlation(new_predictions[val_indices], val_fmris).numpy()
np.save(TEMP_DIR.joinpath(Path(f'{backbone_name}-voxel_scores.npy')), voxel_scores)

np.save(TEMP_DIR.joinpath(Path(f'{backbone_name}-predictions.npy')), new_predictions.numpy())

In [25]:
np.save(TEMP_DIR.joinpath(Path(f'{backbone_name}-backbone_ensemble_ws.npy')), backbone_ensemble_ws.numpy())

In [27]:
np.save(TEMP_DIR.joinpath(Path(f'{backbone_name}-loaded_backbone_names.npy')), loaded_backbone_names)

In [30]:
del oo_voxel_embeddings_list

In [13]:
TEMP_DIR

PosixPath('/data/huze/ray_results/algonauts2021/tmp/notebook-010')

In [15]:
ll tmp

total 8
drwxrwxr-x 2 amax 4096 3月  12 13:11 [0m[01;34mnotebook011[0m/
drwxrwxr-x 5 amax 4096 3月  12 17:14 [01;34mnotebook012[0m/


In [16]:
mkdir tmp/notebook010

In [1]:
ll /data/huze/ray_results/algonauts2021/tmp/notebook-010/

total 156269336
-rw-rw-r-- 1 huze   711125136 3月  11 23:46 2d_bdcnvgg_warp_3d-predictions.npy
-rw-rw-r-- 1 huze 22466904192 3月  11 23:46 2d_bdcnvgg_warp_3d-voxel_embeddings.npy
-rw-rw-r-- 1 huze      645432 3月  11 23:46 2d_bdcnvgg_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 huze   711125136 3月  11 23:53 2d_colorizer_warp_3d-predictions.npy
-rw-rw-r-- 1 huze 22466904192 3月  11 23:53 2d_colorizer_warp_3d-voxel_embeddings.npy
-rw-rw-r-- 1 huze      645432 3月  11 23:53 2d_colorizer_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 huze   711125136 3月  11 23:45 2d_densnet_warp_3d-predictions.npy
-rw-rw-r-- 1 huze 22466904192 3月  11 23:45 2d_densnet_warp_3d-voxel_embeddings.npy
-rw-rw-r-- 1 huze      645432 3月  11 23:45 2d_densnet_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 huze   711125136 3月  11 23:48 2d_moby_swin_warp_3d-predictions.npy
-rw-rw-r-- 1 huze 22466904192 3月  11 23:48 2d_moby_swin_warp_3d-voxel_embeddings.npy
-rw-rw-r-- 1 huze      645432 3月  11 23:48 2d_moby_swin_warp_3d-voxel_scores.npy
-rw-rw-r-- 1

In [17]:
!find /data/huze/ray_results/algonauts2021/tmp/notebook-010/ -name '*-voxel_scores.npy' -exec cp "{}" tmp/notebook010/ \;

In [20]:
!find /data/huze/ray_results/algonauts2021/tmp/notebook-010/ -name '*-loaded_backbone_names.npy' -exec cp "{}" tmp/notebook010/ \;

In [21]:
ll tmp/notebook010

total 7588
-rw-rw-r-- 1 amax 645432 3月  12 17:51 2d_bdcnvgg_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 amax 645432 3月  12 17:51 2d_colorizer_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 amax 645432 3月  12 17:51 2d_densnet_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 amax 645432 3月  12 17:51 2d_moby_swin_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 amax 645432 3月  12 17:51 2d_pyconvsegnet_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 amax 645432 3月  12 17:51 2d_seg_swin_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 amax 645432 3月  12 17:51 2d_simclr_warp_3d-voxel_scores.npy
-rw-rw-r-- 1 amax 645432 3月  12 17:51 3d_swin-voxel_scores.npy
-rw-rw-r-- 1 amax   1140 3月  12 17:52 all-loaded_backbone_names.npy
-rw-rw-r-- 1 amax 645432 3月  12 17:51 all-voxel_scores.npy
-rw-rw-r-- 1 amax 645432 3月  12 17:51 audio_vggish-voxel_scores.npy
-rw-rw-r-- 1 amax 645432 3月  12 17:51 i3d_flow-voxel_scores.npy
-rw-rw-r-- 1 amax 645432 3月  12 17:51 i3d_rgb-voxel_scores.npy
