In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

import numpy as np
import torch
import torchio as tio
import h5py
from ipywidgets import interact
import matplotlib.pyplot as plt

dir2 = os.path.abspath('..')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path: 
    sys.path.append(dir1)

In [None]:
from research.models.fmri_decoders import ConvolutionalDecoder

model = ConvolutionalDecoder(in_channels=7, 
                             extractor_channels=(100, 200, 400), 
                             decoder_channels=(512, 512, 256, 256, 128),
                             decoder_base_shape=(512, 6, 6),
                             decoder_output_shapes={'visual': (768,), 
                                                    'visual_layer1': (384, 96, 96), 
                                                    'visual_layer2': (768, 48, 48), 
                                                    'visual_layer3': (1536, 24, 24), 
                                                    'visual_layer4': (3072, 12, 12)},)


In [4]:
from research.data.kamitani_2019 import Kamitani2019, RawKamitani2019, Kamitani2019H5
from pathlib import Path

root = "X:\\Datasets\\Deep-Image-Reconstruction\\"
h5_path = Path(root) / "derivatives" / "kamitani2019.hdf5"
features_path = Path(root) / "derivatives" / "RN50x16-features.hdf5"
dataset = Kamitani2019H5(h5_path, 
                         subjects=['sub-02'], 
                         func_sessions=['natural_training', 'natural_test'], 
                         window=(0, 7), 
                         drop_out_of_window_events=False,)
                         #features_path=features_path)
                         #folds=[0, 1, 2, 3], split='train')

In [6]:
@interact(i=(0, len(dataset)-1))
def show_event(i):
    event = dataset[i]
    
    if 'features' in event:
        for k, v in event['features'].items():
            print(k, v.shape, v.numel())
            
    data = event['data']
    print(data.shape)
    
    T, H, W, D = data.shape
    @interact(d=(0, D-1), t=(0, T-2), derivative=False)
    def show_volume(d, t, derivative):
        fig = plt.figure(figsize=(12, 12))
        if derivative:
            x = data[t:t+2, :, :, d]
            #x = (x - event['run_mean']) / event['run_std']
            x = x[0, :, :] - x[1, :, :]
            plt.imshow(x, cmap='bwr', vmin=-3, vmax=3)
        else:
            x = data[t, :, :, d]
            #x = (x - event['run_mean']) / event['run_std']
            plt.imshow(x, cmap='gray', vmin=-1, vmax=3)
        plt.show()
        plt.close(fig)

interactive(children=(IntSlider(value=3599, description='i', max=7199), Output()), _dom_classes=('widget-inter…

In [None]:
x = torch.randn([1, 7, 72, 88, 76])
y = model(x)

In [None]:
for k, v in y.items():
    print(k, v.shape)

In [None]:
from research.models.fmri_decoders import BlurConvTranspose2d
model = BlurConvTranspose2d(10, 60)

x = torch.randn(2, 10, 64, 64)
print(x.mean(), x.std())

with torch.no_grad():
    y = model(x)
print(y.mean(), y.std())

In [None]:
import torchmetrics.functional as TMF
import sklearn.metrics as SKM

N = 4
d = 10000
eps = 1e-7

y = torch.randn(N, d)
#y[y < 0] = 0
#y += torch.randn(N, d) * eps

error = torch.randn(N, d) * 0.25
y_pred = y + error
#y_pred[y < 0] = 0
#y += torch.randn(N, d) * eps

#print(cosine_similarity(y, y_pred, reduction='mean'))
#print(explained_variance(y.T, y_pred.T))

print(TMF.r2_score(y[0], y_pred[0]), SKM.r2_score(y[0], y_pred[0]))


In [None]:
[f.name for f in Path(root).iterdir()]

In [None]:
from pathlib import Path

root = "X:\\Datasets\\Deep-Image-Reconstruction\\"
#file_name = 'sub-02_perceptionNaturalImageTraining_VC_v2.h5'
file_name = 'sub-02_perceptionNaturalImageTraining_VC_v2.h5'

f = h5py.File(Path(root) / "derivatives" / 'kamitani-preprocessed' / file_name, "r")
N, V = f['dataset'].shape

@interact(i=(0, V-1))
def show(i):
    data = f['dataset'][:, i]
    print(data.mean(), data.max(), data.min(), data.std())
    plt.hist(data)

In [None]:
f['dataset'][:, 14468].max()

In [None]:
print(values.size)
np.isnan(values).sum() + (values == 1.).sum()
print(np.unique(values))

In [None]:
f = h5py.File(Path(root) / "derivatives" / 'kamitani-preprocessed' / file_name, "r")
description = f['metadata/description'][:]
keys = f['metadata/key'][:]
values = f['metadata/value'][:]
print(values.shape)
print(np.argwhere(values[71] == 1.))
#print(
[(i, k, v) for i, (k, v) in enumerate(zip(keys, description))]

In [None]:
f = h5py.File('X:\\Downloads\\Subject1.h5', 'r')
data = f['dataSet'][:, :-10]

print(data.shape, data.mean(), data.min(), data.max(), data.std())
low, high = np.percentile(data, [1, 99], axis=0)
print(low.min(), low.max())

In [2]:
from research.data.kamitani_2019 import Kamitani2019H5Preprocessed

root = "X:\\Datasets\\Deep-Image-Reconstruction\\derivatives\\kamitani-preprocessed"
dataset = Kamitani2019H5Preprocessed(root, 
                                     subjects=['sub-02',], 
                                     func_sessions=['natural_training',],
                                     features_path='X:\\Datasets\\Deep-Image-Reconstruction\\derivatives\\RN50x16-features.hdf5',)

In [3]:
X_key = 'ROI_VC'
Y_key = 'visual.layer4.7.bn3'
#Y_key = 'visual'
data = dataset.get_data(brain_keys=[X_key, 'image_index'], feature_keys=[Y_key])

In [4]:
natural_training = data['sub-02']['natural_training']

image_index = natural_training['image_index'].astype(int)[:, 0]
X = natural_training[X_key]
if len(natural_training[Y_key].shape) > 2:
    natural_training[Y_key] = torch.flatten(torch.from_numpy(natural_training[Y_key]), start_dim=1).numpy()
if natural_training[Y_key].shape[1] > 1000:
    natural_training[Y_key] = natural_training[Y_key][:, np.random.choice(1000, size=1000)]
Y = natural_training[Y_key]
N, V, F = *X.shape, Y.shape[1]
print(N, V, F)

6000 14462 1000


In [5]:
from sklearn.model_selection import train_test_split

N = 1200

sorted_indices = np.argsort(image_index)
X = X[sorted_indices]
Y = Y[sorted_indices]
image_index = image_index[sorted_indices]

X = np.stack([x.mean(axis=0) for x in np.split(X, N)])
Y = np.stack([y.mean(axis=0) for y in np.split(Y, N)])

train_indices, _ = train_test_split(np.arange(N), train_size=0.8)
train_mask = np.zeros(N, dtype=bool)
train_mask[train_indices] = True
test_mask = ~train_mask

N_train = train_mask.sum()
N_test = N - N_train
print(N_train, N_test)

960 240


In [None]:
from sklearn.model_selection import train_test_split

unique_indices, inverse_indices = np.unique(image_index, return_inverse=True)
print(image_index.shape, unique_indices.shape)

train_indices, _ = train_test_split(unique_indices, train_size=0.8)

train_indices = set(train_indices)
train_mask = np.array([i in train_indices for i in image_index])
test_mask = ~train_mask

N_train = train_mask.sum()
N_test = N - N_train

print(N_train, N_test)

In [6]:
X_train, Y_train = X[train_mask], Y[train_mask]
X_test, Y_test = X[test_mask], Y[test_mask]

X_train_mean = X_train.mean(axis=0, keepdims=True)
X_train_std = X_train.std(axis=0, keepdims=True)
Y_train_mean = Y_train.mean(axis=0, keepdims=True)
Y_train_std = Y_train.std(axis=0, keepdims=True)

X_train = (X_train - X_train_mean) / X_train_std
Y_train = (Y_train - Y_train_mean) / Y_train_std

X_test = (X_test - X_train_mean) / X_train_std
Y_test = (Y_test - Y_train_mean) / Y_train_std

X_train_norm = X_train / np.linalg.norm(X_train, axis=0, keepdims=True)
Y_train_norm = Y_train / np.linalg.norm(Y_train, axis=0, keepdims=True)

# A (V, F) matrix of pearson correlation coefficients between individual voxels and features
pearson_correlations = X_train_norm.T @ Y_train_norm
print(pearson_correlations.shape)

(14462, 1000)


In [None]:
highest_correlation_indices = np.abs(pearson_correlations).argsort(axis=0)
# print(highest_correlation_indices.max(), highest_correlation_indices.min())
num_voxels = 100
highest_correlation_indices = highest_correlation_indices[-num_voxels:, :]
print(highest_correlation_indices.shape)

In [None]:
from sklearn.cross_decomposition import PLSRegression, CCA

model = CCA(n_components=2)
model.fit(X_train, Y_train)

Y_train_pred = model.predict(X_train)
Y_test_pred = model.predict(X_test)
print('r2', model.score(X_train, Y_train), model.score(X_test, Y_test))

In [41]:
from sklearn.metrics import r2_score
from fastl2lir import FastL2LiR


model = FastL2LiR()
model.fit(X_train, Y_train, alpha=1.0, n_feat=5)

Y_train_pred = model.predict(X_train)
Y_test_pred = model.predict(X_test)

def pearsonr_mat(Y, Y_pred):
    Y_norm = Y / np.linalg.norm(Y, axis=0, keepdims=True)
    Y_pred_norm = Y_pred / np.linalg.norm(Y_pred, axis=0, keepdims=True)
    return (Y_norm * Y_pred_norm).sum(axis=0).mean(axis=0)

print('r2', r2_score(Y_train, Y_train_pred), r2_score(Y_test, Y_test_pred))
print('r', pearsonr_mat(Y_train, Y_train_pred), pearsonr_mat(Y_test, Y_test_pred))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:01<00:00, 985.22it/s]


r2 0.052132344851014124 -0.012755424407262347
r 0.22657621915379134 0.0945683656663706


In [31]:
from tqdm.notebook import tqdm
import slir
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.cross_decomposition import PLSRegression
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression
from scipy.stats import pearsonr
from sklearn.ensemble import AdaBoostRegressor

F2 = F // 10

models = []
r2_train = r2_test = r_train = r_test = 0
for i in tqdm(range(F2)):
    
    #selection_indices = highest_correlation_indices[:, i]
    
    #X_train_selection = X_train[:, selection_indices]
    y_train = Y_train[:, i]
   
    #X_test_selection = X_test[:, selection_indices]
    y_test = Y_test[:, i]
    
    #select_k_best = SelectKBest(mutual_info_regression, k=10)
    #X_train_selection = select_k_best.fit_transform(X_train, y_train)
    #X_test_selection = select_k_best.transform(X_test)
    
    select_k_best = SelectKBest(f_regression, k=5)
    X_train_selection = select_k_best.fit_transform(X_train, y_train)
    X_test_selection = select_k_best.transform(X_test)
    
    model = LinearRegression()
    #model = CCA(n_components=20)
    #model = PLSRegression(n_components=4)
    #model = Ridge(alpha=1, fit_intercept=False)
    #model = Lasso(alpha=0.1)
    #model = AdaBoostRegressor(n_estimators=5)
    #model = slir.SparseLinearRegression(n_iter=200, verbose=True, verbose_skip=100)

    model.fit(X_train_selection, y_train)
    y_train_pred = model.predict(X_train_selection)
    y_test_pred = model.predict(X_test_selection)
    r2_train += model.score(X_train_selection, y_train)
    r2_test += model.score(X_test_selection, y_test)
    r_train += pearsonr(y_train, y_train_pred)[0]
    r_test += pearsonr(y_test, y_test_pred)[0]

print(f'r2_train={r2_train / F2:.03}, r2_test={r2_test / F2:.03}')
print(f'r_train={r_train / F2:.03}, r_test={r_test / F2:.03}')

  0%|          | 0/100 [00:00<?, ?it/s]

r2_train=0.0528, r2_test=-0.0176
r_train=0.226, r_test=0.0888


In [None]:
print([(k, v.shape) for k, v in data['sub-02']['natural_training'].items()])

In [None]:
print([(k, v.shape) for k, v in data['sub-03']['natural_test'].items()])

In [None]:
data['sub-03']['natural_test']['stimulus_id']

In [None]:
from pathlib import Path

with h5py.File(Path('X:\\Datasets\\Deep-Image-Reconstruction\\derivatives') / 'stimulus_images.hdf5', 'r') as f:
    keys = list(f.keys())
    for func_session, keymap in dataset.stimulus_info.items():
        for stimulus_id in keymap.values():
            if stimulus_id[0] == 'n':
                wordnet_id, dataset_id = stimulus_id.split('_')
                stimulus_id = f'{int(wordnet_id[1:])}.{int(dataset_id):06}'
            
            if stimulus_id not in keys:
                print(stimulus_id)

In [None]:
import xarray as xr
a = xr.open_dataarray('C:\\Users\\Cefir\\.brainio\\assy_dicarlo_MajajHong2015_public\\assy_dicarlo_MajajHong2015_public.nc', engine='netcdf4')