In [1]:
import numpy as np
import torch

from tqdm.auto import tqdm
from pathlib import Path

import itertools

import os
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv(), verbose=True)

True

In [2]:
DEVICE = torch.device('cuda:1')

In [3]:
# results_dir = Path(os.getenv('RESULTS_DIR'))
results_dir = Path('/data/huze/ray_results/algonauts2021')
print('results_dir', results_dir)

finished_runs = [path.parent for path in results_dir.glob('**/prediction.npy')]
# print('finished_runs', len(finished_runs))

results_dir /data/huze/ray_results/algonauts2021


In [4]:
from pprint import pprint

exapmle_files = list(path.name for path in finished_runs[0].iterdir())
pprint(exapmle_files)

['params.pkl',
 'voxel_embedding.npy',
 'result.json',
 'hparams.yaml',
 'events.out.tfevents.1645601235.yfwu-guslab',
 'params.json',
 'progress.csv',
 'prediction.npy',
 'events.out.tfevents.1645601256.yfwu-guslab.3793893.0']


In [5]:
import yaml
from yaml import CLoader
import json
import pandas as pd
from src.config.config import flatten

finished_runs = [path.parent for path in results_dir.glob('**/prediction.npy')]

run_meta_infos = []
for run_dir in tqdm(finished_runs):
    hparams = yaml.load(run_dir.joinpath('hparams.yaml').open(), Loader=CLoader)
    run_meta_info = flatten(hparams)
    run_meta_info['path'] = run_dir

    data = [json.loads(line) for line in run_dir.joinpath('result.json').open()]
    ddf = pd.DataFrame(data)
    run_meta_info['score'] = ddf.val_corr.max()
    run_meta_info['time'] = ddf.time_total_s.max()

    run_meta_infos.append(run_meta_info)

run_df = pd.DataFrame(run_meta_infos)

# fix list unhashable
run_df['MODEL.BACKBONE.LAYERS'] = run_df['MODEL.BACKBONE.LAYERS'].apply(lambda x: tuple(x))
run_df['MODEL.NECK.SPP_LEVELS'] = run_df['MODEL.NECK.SPP_LEVELS'].apply(lambda x: tuple(x))

print(f'total GPU time {run_df.time.sum() / 3600:.2f}h')

  0%|          | 0/1496 [00:00<?, ?it/s]

total GPU time 279.05h


# prepare data

In [6]:
from src.utils.ensemble import optimize_val_correlation
from src.config.config import combine_cfgs, get_cfg_defaults
from src.data.datamodule import MyDataModule

In [7]:
# prepare validation data
cfg = combine_cfgs('../src/config/experiments/algonauts2021_i3d_flow.yml')
dm = MyDataModule(cfg)
dm.prepare_data()
dm.setup()

val_indices = dm.val_dataset.indices

cache_path = Path('/data_smr/huze/.cache/val_fmris.pt')

if cache_path.exists():
    val_fmris = torch.load(cache_path)
else:
    val_fmris = [dm.dataset_train_val.__getitem__(i)[1] for i in tqdm(val_indices)]
    val_fmris = np.stack(val_fmris, 0)
    val_fmris = torch.tensor(val_fmris)
    torch.save(val_fmris, cache_path)

In [159]:
def get_ensemble_prediction_from_tensor_list(predicions_list, roi_val_fmris, val_indices, opt_verbose=False):
    predictions = torch.stack(predicions_list, -1)
    ws = optimize_val_correlation(predictions,
                                  roi_val_fmris,
                                  verbose=opt_verbose,
                                  device=DEVICE)
    new_predictions = predictions @ ws
    return new_predictions, ws

def multiply_and_flatten_voxel_embeddings_by_ensemble_weight(voxel_embeddings_list, ws, voxel_embeddings_dims=2, cat_dim=-1):
    for i in range(len(ws)):
        voxel_embeddings_list[i] *= ws[i]
    
    if len(voxel_embeddings_list[0].shape) == voxel_embeddings_dims:
        return torch.stack(voxel_embeddings_list, cat_dim)
    elif len(voxel_embeddings_list[0].shape) == voxel_embeddings_dims + 1:
        return torch.cat(voxel_embeddings_list, cat_dim)
    else:
        NotImplementedError()
        
def multiply_and_flatten_ensemble_weights(previous_ws_list, ws, ws_dim=1, cat_dim=0):
    for i in range(len(ws)):
        voxel_embeddings_list[i] *= ws[i]
    
    if len(voxel_embeddings_list[0].shape) == voxel_embeddings_dims:
        return torch.stack(voxel_embeddings_list, cat_dim)
    elif len(voxel_embeddings_list[0].shape) == voxel_embeddings_dims + 1:
        return torch.cat(voxel_embeddings_list, cat_dim)
    else:
        NotImplementedError()

# 3 level of hierarchical ensemble

In [160]:
roi = 'V1'
verbose=True
opt_verbose=False

In [170]:
# do ensemble, but not load voxel_embedding.npy
# this part should run on GPU

In [168]:
from src.utils.misc import my_query_df

ORDERED_HIERACHY_KEYS = ['MODEL.BACKBONE.NAME', 'MODEL.BACKBONE.LAYERS', 'MODEL.NECK.SPP_LEVELS']
he_keys=ORDERED_HIERACHY_KEYS
    
# assert roi == 'WB' # comment this line to subdivide other rois

avaliable_configs = list(run_df.groupby(he_keys[:2]).groups)

roi_voxel_indices = torch.load(os.path.join(cfg.DATASET.VOXEL_INDEX_DIR, f'{roi}.pt'))
roi_val_fmris = val_fmris[..., roi_voxel_indices]

level1_config_tasks_ws = []
level2_config_ws = []
level3_ws = []

oo_predictions_list = []
# oo_voxel_embeddings_list = []
oo_ws_list = []
for v11 in run_df[he_keys[0]].unique():
    o_predictions_list = []
    # o_voxel_embeddings_list = []
    o_ws_list = []
    for v1 in run_df[he_keys[1]].unique():

        vs = (v11, v1)
        if vs not in avaliable_configs: continue
        _l1_df = my_query_df(run_df, equal_dict={k: v for k, v in zip(he_keys[:2], vs)})
        roi_df = _l1_df.loc[_l1_df['DATASET.ROI'] == roi]

        predictions_list = [
            torch.tensor(np.load(path.joinpath('prediction.npy'))).float()[val_indices]
            for path in roi_df['path'].values
        ]

        # voxel_embeddings_list = [
        #     torch.tensor(np.load(path.joinpath('voxel_embedding.npy')))
        #     for path in roi_df['path'].values
        # ]

        if verbose:
            print('Level 1...\t', roi, v11, v1, '\t')
        new_predictions, ws = get_ensemble_prediction_from_tensor_list(predictions_list, roi_val_fmris, val_indices, opt_verbose=opt_verbose)
        # new_voxel_embeddings = multiply_and_flatten_voxel_embeddings_by_ensemble_weight(voxel_embeddings_list, ws)
        
        level1_config_tasks_ws.append((vs, roi_df['path'].values, ws.numpy()))

        o_predictions_list.append(new_predictions)
        # o_voxel_embeddings_list.append(new_voxel_embeddings)
        o_ws_list.append(ws)
    if verbose:
        print('Level 2...\t', roi, v11, '\t')
    new_predictions, ws = get_ensemble_prediction_from_tensor_list(o_predictions_list, roi_val_fmris, val_indices, opt_verbose=opt_verbose)
    # new_voxel_embeddings = multiply_and_flatten_voxel_embeddings_by_ensemble_weight(o_voxel_embeddings_list, ws)

    level2_config_ws.append((v11, ws.numpy()))

    oo_predictions_list.append(new_predictions)
    # oo_voxel_embeddings_list.append(new_voxel_embeddings)
    assert len(ws) == len(o_ws_list)
    for i in range(len(ws)):
        o_ws_list[i] *= ws[i]
    oo_ws_list.append(torch.cat(o_ws_list))
if verbose:
    print('Level 3...\t', roi, '\t')
new_predictions, ws = get_ensemble_prediction_from_tensor_list(oo_predictions_list, roi_val_fmris, val_indices, opt_verbose=opt_verbose)
# new_voxel_embeddings = multiply_and_flatten_voxel_embeddings_by_ensemble_weight(oo_voxel_embeddings_list, ws)

level3_ws.append(ws.numpy())

assert len(ws) == len(oo_ws_list)
for i in range(len(ws)):
    oo_ws_list[i] *= ws[i]
full_ws = torch.cat(oo_ws_list)

# voxel_embeddings_dict = {k: v for k, v in zip(run_df[he_keys[0]].unique(), oo_voxel_embeddings_list)}

# voxel_embeddings_dict['all'] = new_voxel_embeddings

Level 1...	 WB i3d_rgb ('x3',) 	
Level 1...	 WB i3d_rgb ('x2',) 	
Level 1...	 WB i3d_rgb ('x4',) 	
Level 1...	 WB i3d_rgb ('x1',) 	
Level 1...	 WB i3d_rgb ('x1', 'x2', 'x3') 	
Level 1...	 WB i3d_rgb ('x1', 'x2', 'x3', 'x4') 	
Level 1...	 WB i3d_rgb ('x2', 'x3', 'x4') 	
Level 2...	 WB i3d_rgb 	
Level 1...	 WB i3d_flow ('x3',) 	
Level 1...	 WB i3d_flow ('x2',) 	
Level 1...	 WB i3d_flow ('x4',) 	
Level 1...	 WB i3d_flow ('x1',) 	
Level 1...	 WB i3d_flow ('x1', 'x2', 'x3') 	
Level 1...	 WB i3d_flow ('x1', 'x2', 'x3', 'x4') 	
Level 1...	 WB i3d_flow ('x2', 'x3', 'x4') 	
Level 2...	 WB i3d_flow 	
Level 3...	 WB 	


In [171]:
torch.save(full_ws, 'tmp/ensemble_weights.pt')