In [14]:
%load_ext autoreload
%autoreload 2

import os
import sys
import random
import json
import gc
from typing import Tuple, Optional, Dict
from functools import partial
from pathlib import Path

import numpy as np
import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
import h5py
from ipywidgets import interact
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import nibabel as nib
import wandb

from torch_experiment import TorchExperiment, KeyDataset, TensorDataset
from metrics import r2_score
from models import DenseNetwork
from loss import CosineSimilarityLoss
from utils import require_dataset, reconstruct_volume

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# Load and create the pytorch datasets

with h5py.File('./example_data/nsd.hdf5', 'r') as f:
    betas_indices = f['betas_indices'][:]
    volume_shape = f['betas_indices'].attrs['volume_shape']
    
    train_betas = f['train/betas'][:]
    train_stimulus = f['train/stimulus'][:]
    
    test_betas = f['test/betas'][:]
    test_stimulus = f['test/stimulus'][:]
    
train_dataset = KeyDataset({
    'betas': TensorDataset(torch.from_numpy(train_betas)),
    'stimulus': TensorDataset(torch.from_numpy(train_stimulus))
})
test_dataset = KeyDataset({
    'betas': TensorDataset(torch.from_numpy(test_betas)),
    'stimulus': TensorDataset(torch.from_numpy(test_stimulus))
})

In [5]:
def decoding_experiment(
        train_dataset: Dataset,
        val_dataset: Dataset,
        group: str = None,
        max_iterations: int = 10001,
        notes: str = None,
):
    config = {}
    device = torch.device('cuda')
    
    sample = train_dataset[0]
    betas_shape = sample['betas'].shape
    stimulus_shape = sample['stimulus'].shape
    
    model_params = dict(
        layer_sizes=[
            betas_shape[0],
            512,
            stimulus_shape[0],
        ],
        dropout_p=0.9,
    )
    model = DenseNetwork(**model_params)
    model.to(device)
    
    criterion_params = dict()
    criterion = CosineSimilarityLoss(**criterion_params)
    
    optimizer_params = dict(lr=1e-4)
    optimizer = Adam(
        params=model.parameters(),
        **optimizer_params,
    )
    
    training_params = dict(
        batch_size=128,
        evaluation_interval=250,
        evaluation_subset_size=500,
    )
    experiment = TorchExperiment(
        train_dataset=train_dataset,
        val_dataset=val_dataset,
        device=device,
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        mode='decode',
        **training_params
    )

    config = {
        **config,
        'model': model,
        **model_params,
        'criterion': criterion,
        **criterion_params,
        'optimizer': optimizer,
        **optimizer_params,
        **training_params,
    }
    wandb.init(project='neuro-ml', config=config, group=group, notes=notes)
    wandb.define_metric("*", summary="max")
    wandb.define_metric("*", summary="min")

    experiment.train_model(max_iterations=max_iterations, logger=wandb.log)
    return experiment

In [6]:
experiment = decoding_experiment(
    train_dataset, 
    test_dataset,
)

_, stimulus_prediction = experiment.run_all(test_dataset)

save_file_path = Path('./example_results/decoding') / wandb.run.name 
save_file_path.mkdir(exist_ok=True, parents=True)

attributes = dict(wandb.config)
attributes['wandb_run_name'] = wandb.run.name
attributes['wandb_run_url'] = wandb.run.url
attributes['wandb_group'] = wandb.run.group
attributes['wandb_notes'] = wandb.run.notes

with h5py.File(save_file_path / 'results.hdf5', 'a') as f:

    for k, v in attributes.items():
        f.attrs[k] = v
    f.attrs['iteration'] = experiment.iteration
    require_dataset(f, 'test/stimulus_pred', stimulus_prediction.detach().cpu())

    model_group = f.require_group('model')
    for param_name, weights in experiment.model.state_dict().items():
        weights = weights.cpu()
        require_dataset(model_group, param_name, weights)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mefirdc[0m. Use [1m`wandb login --relogin`[0m to force relogin


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10001/10001 [00:51<00:00, 192.48it/s]


In [10]:
def encoding_experiment(
        train_dataset: Dataset,
        val_dataset: Dataset,
        group: str = None,
        max_iterations: int = 10001,
        notes: str = None,
):
    config = {}
    device = torch.device('cuda')
    
    sample = train_dataset[0]
    betas_shape = sample['betas'].shape
    stimulus_shape = sample['stimulus'].shape
    
    model_params = dict(
        layer_sizes=[
            stimulus_shape[0],
            betas_shape[0],
        ],
    )
    model = DenseNetwork(**model_params)
    model.to(device)
    
    criterion_params = dict()
    criterion = nn.MSELoss(**criterion_params)
    
    optimizer_params = dict(lr=1e-3)
    optimizer = Adam(
        params=model.parameters(),
        **optimizer_params,
    )
    
    training_params = dict(
        batch_size=128,
        evaluation_interval=250,
        evaluation_subset_size=500,
    )
    experiment = TorchExperiment(
        train_dataset=train_dataset,
        val_dataset=val_dataset,
        device=device,
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        mode='encode',
        **training_params
    )

    config = {
        **config,
        'model': model,
        **model_params,
        'criterion': criterion,
        **criterion_params,
        'optimizer': optimizer,
        **optimizer_params,
        **training_params,
    }
    wandb.init(project='neuro-ml', config=config, group=group, notes=notes)
    wandb.define_metric("*", summary="max")
    wandb.define_metric("*", summary="min")

    experiment.train_model(max_iterations=max_iterations, logger=wandb.log)
    return experiment

In [11]:
experiment = encoding_experiment(
    train_dataset, 
    test_dataset,
)

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10001/10001 [00:41<00:00, 240.05it/s]


In [36]:
with torch.no_grad():
    betas, betas_pred = experiment.run_all(test_dataset)

betas_r2 = r2_score(betas, betas_pred, reduction=None)
betas_r2_volume = reconstruct_volume(betas_r2, tuple(volume_shape), torch.from_numpy(betas_indices).long())

save_file_path = Path('./example_results/encoding') / wandb.run.name 
save_file_path.mkdir(exist_ok=True, parents=True)

attributes = dict(wandb.config)
attributes['wandb_run_name'] = wandb.run.name
attributes['wandb_run_url'] = wandb.run.url
attributes['wandb_group'] = wandb.run.group
attributes['wandb_notes'] = wandb.run.notes

with h5py.File(save_file_path / f'results.hdf5', 'a') as f:

    for k, v in attributes.items():
        f.attrs[k] = v
    f.attrs['iteration'] = experiment.iteration
    require_dataset(f, 'test/betas_pred', betas_pred.detach().cpu())
    require_dataset(f, 'test/betas_r2', betas_r2.detach().cpu())

    model_group = f.require_group('model')
    for param_name, weights in experiment.model.state_dict().items():
        weights = weights.cpu()
        require_dataset(model_group, param_name, weights)
        
image = nib.Nifti1Image(betas_r2_volume.T.numpy(), np.eye(4))
nib.save(image, save_file_path / 'betas_r2.nii.gz')