In [1]:
### Packages import
import os
import gc
import time
start_time = time.time()

import numpy as np

import torch
from torchvision import models
from src.cuda_checker import cuda_torch_check, memory_checker

### My modules import
from src.data_loader import argObj, data_loaders_stimuli_fmri
from src import image_preprocessing
from src.feature_extraction import model_loader, fit_pca, pca_batch_calculator, extract_and_pca_features, extract_features_no_pca
from src.encoding import linear_regression, compute_perason_numpy
from src.evaluation_metrics import median_squared_noisenorm_correlation

from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
from sklearn.metrics import make_scorer
from scipy.stats import pearsonr
from src.visualize import histogram, box_plot

### Cuda setup and check
import torch
# Select the device to run the model on
device = 'cuda' #@param ['cpu', 'cuda'] {allow-input: true}
# Check if cuda is available
device = torch.device(device)
cuda_torch_check()

### Parameters definition
train_percentage = 90 # X% of the training data will be used for training, (100-X)% for validation
transform = image_preprocessing.imagenet_transform_alt

batch_size = 64
pca_component = 300
min_pca_batch_size = pca_component + 200 # pca_component * 2

compute_pca = True
feature_model_type = "RetinaNet" #@param ["alexnet", "vgg16", "vgg19_bn, ""efficientnetb2", "efficientnet_b5", "efficientnetb2lib", "ZFNet", "DINOv2"]
model_layer = "fpn"
regression_type = "ridge" #@param ["linear", "ridge"]

save_predictions = False

alpha_l = None
alpha_r = None
grid_search = False

subj = 1
noise_norm_corr_dict = {}

### Path definition
if isinstance(model_layer, list):
    model_layer_full = '+'.join(model_layer)
else:
    model_layer_full = model_layer
submission_name = f'{feature_model_type}_{model_layer}-pca_{pca_component}-{regression_type}-alpha_{alpha_l}'

# Data folder definition
data_dir = '../Datasets/Biomedical/algonauts_2023_challenge_data'
# Used to save the prediction of saved model
parent_submission_dir = f'./files/submissions/{submission_name}'
images_submission_dir = f"./files/submissions/imgs/{submission_name}"
ncsnr_dir = '../Datasets/Biomedical/algonauts_ncsnr'
images_trials_dir = '../Datasets/Biomedical/algonauts_train_images_trials'

Check if GPU is available and if torch is using it ..


Torch Cuda is available?
True
Torch Cuda device count is :
1
Torch Cuda current device is :
0
Torch Cuda device is :
<torch.cuda.device object at 0x0000018F1995ECA0>
NVIDIA GeForce RTX 3070 Laptop GPU
Pytorch version：
1.13.0
CUDA Version: 
11.6
cuDNN version is :
8302




In [2]:
submission_name

'RetinaNet_fpn-pca_300-ridge-alpha_None'

In [3]:
print(submission_name + "\n")
print('############################ Subject: ' + str(subj) + ' ############################ \n')
# Definining paths to data and submission directories ##
args = argObj(subj, data_dir, parent_submission_dir, ncsnr_dir, images_trials_dir, images_submission_dir) 
# Obtain the indices of the training, validation and test data
idxs_train, idxs_val, idxs_test, train_imgs_paths, test_imgs_paths = args.images_idx_splitter(train_percentage)

# Defining the images data loaderds
data_loaders = data_loaders_stimuli_fmri(idxs_train, 
                                            idxs_val, 
                                            idxs_test, 
                                            train_imgs_paths, 
                                            test_imgs_paths,
                                            lh_fmri_path = args.lh_fmri,
                                            rh_fmri_path = args.rh_fmri)

train_imgs_dataloader, val_imgs_dataloader, test_imgs_dataloader = data_loaders.images_dataloader(batch_size, transform)

model, feature_extractor = model_loader(feature_model_type, model_layer, device)

# Fit the PCA model
if compute_pca:
    # Fit the PCA model
    pca_batch_size, n_stacked_batches = pca_batch_calculator(len(idxs_train),
                                                            batch_size,
                                                            min_pca_batch_size,
                                                            pca_component)
    
    pca = fit_pca(feature_extractor,
                    train_imgs_dataloader,
                    pca_component,
                    n_stacked_batches,
                    pca_batch_size,
                    device)
    print("Comulative Explained variance ratio: ", sum(pca.explained_variance_ratio_))
    print("Number of components: ", pca.n_components_)
    
    print('## Extracting features from training, validation and test data...')
    features_train = extract_and_pca_features(feature_extractor, train_imgs_dataloader, pca, n_stacked_batches, device)
    features_val = extract_and_pca_features(feature_extractor, val_imgs_dataloader, pca, n_stacked_batches, device)
    features_test = extract_and_pca_features(feature_extractor, test_imgs_dataloader, pca, n_stacked_batches, device)
    
    # print("\n")
    # print('## Checking and Freeing  GPU memory...')
    # memory_checker()
    model.to('cpu') # sposto sulla ram
    feature_extractor.to('cpu') # sposto sulla ram
    del model, feature_extractor, pca, train_imgs_dataloader, val_imgs_dataloader, test_imgs_dataloader  # elimino dalla ram
    torch.cuda.empty_cache() # elimino la chache vram
    gc.collect() # elimino la cache ram
    # memory_checker()
else:
    print('## Extracting features from training, validation and test data...')
    features_train = extract_features_no_pca(feature_extractor, train_imgs_dataloader, device)
    features_val = extract_features_no_pca(feature_extractor, val_imgs_dataloader, device)
    features_test = extract_features_no_pca(feature_extractor, test_imgs_dataloader, device)
    
    model.to('cpu') # sposto sulla ram
    feature_extractor.to('cpu') # sposto sulla ram
    del model, feature_extractor, train_imgs_dataloader, val_imgs_dataloader, test_imgs_dataloader  # elimino dalla ram
    torch.cuda.empty_cache() # elimino la chache vram
    gc.collect() # elimino la cache ram

RetinaNet_fpn-pca_300-ridge-alpha_None

############################ Subject: 1 ############################ 

## Stimulus Images Loading: Info
Total train images: 9841
Training stimulus images: 8857
Validation stimulus images: 984
Test stimulus images: 159


## Loading feature extraction model...


Feature extractor: RetinaNet, layer: fpn


## Calculating PCA batch size...
Batches size: 64
Total train instances: 8857
PCA components: 300
Minimum pca batch size: 500
Number of stacked batches for pca: 10
PCA batch size (batch_size * n_stacked_batches): 640
Last pca batch size: 537
## Fitting Incremental PCA (300 components) to training data...


100%|██████████| 139/139 [25:39<00:00, 11.08s/it] 


Comulative Explained variance ratio:  0.7038941523115233
Number of components:  300
## Extracting features from training, validation and test data...


100%|██████████| 139/139 [03:38<00:00,  1.58s/it]


Inital features number: 268544, final features number: 300


100%|██████████| 16/16 [00:27<00:00,  1.74s/it]


Inital features number: 268544, final features number: 300


100%|██████████| 3/3 [00:04<00:00,  1.35s/it]


Inital features number: 268544, final features number: 300


In [None]:
lh_fmri_train, lh_fmri_val, rh_fmri_train, rh_fmri_val = data_loaders.fmri_splitter()

# Grid Search

In [4]:
## Fit the linear model ##
print('\n ## Fit Encoder and Predict...')
lh_fmri_train, lh_fmri_val, rh_fmri_train, rh_fmri_val = data_loaders.fmri_splitter()
print('LH fMRI number of vertices:', lh_fmri_train.shape)
print('RH fMRI number of vertices:', rh_fmri_train.shape)
# param_grid = {'alpha': [0.0001, 0.0002, 0.001, 0.01, 0.1, 1, 10, 100, 1e3, 5e3, 1e4, 2e4, 5e4, 1e5, 1e6]}

param_grid = {'alpha': [1, 10, 100, 1e3, 5e3, 1e4, 2e4, 5e4, 1e5, 2e5, 5e5, 1e6]}
#param_grid = {'alpha': [1e6, 2e6, 5e6, 1e7, 2e7, 5e7]}


 ## Fit Encoder and Predict...
LH fMRI number of vertices: (8857, 19004)
RH fMRI number of vertices: (8857, 20544)


In [5]:
grid_search_l = GridSearchCV(Ridge(), param_grid=param_grid, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
grid_search_l.fit(X=features_train, y=lh_fmri_train)
print("Best Param: {}".format(grid_search_l.best_params_))
print("Best Score: {}".format(grid_search_l.best_score_))
alpha_l = grid_search_l.best_params_['alpha']

Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Param: {'alpha': 100000.0}
Best Score: 0.4171876543623486


In [6]:
alpha_r = alpha_l

In [9]:
print(alpha_r)

1000000.0


In [7]:
grid_search_r = GridSearchCV(Ridge(), param_grid=param_grid, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
grid_search_r.fit(X=features_train, y=rh_fmri_train)
print("Best Param: {}".format(grid_search_r.best_params_))
print("Best Score: {}".format(grid_search_r.best_score_))
alpha_r = grid_search_r.best_params_['alpha']

Fitting 5 folds for each of 10 candidates, totalling 50 fits


# Predict and evaluate 

In [7]:
lh_fmri_val_pred, lh_fmri_test_pred, rh_fmri_val_pred, rh_fmri_test_pred = linear_regression(regression_type, 
                                                                                                features_train, 
                                                                                                features_val, 
                                                                                                features_test, 
                                                                                                lh_fmri_train, 
                                                                                                rh_fmri_train, 
                                                                                                save_predictions,
                                                                                                args.subject_test_submission_dir,
                                                                                                alpha_l,
                                                                                                alpha_r,
                                                                                                grid_search= False)

noise_norm_corr_dict[f'lh_{subj}'], noise_norm_corr_dict[f'rh_{subj}'] = median_squared_noisenorm_correlation(lh_fmri_val_pred, 
                                                                                                                rh_fmri_val_pred,
                                                                                                                lh_fmri_val,
                                                                                                                rh_fmri_val,
                                                                                                                args.data_dir,
                                                                                                                args.ncsnr_dir,
                                                                                                                args.images_trials_dir,
                                                                                                                idxs_val)
print("\n Score -> Median Noise Normalized Squared Correlation Percentage (LH and RH)")
print("LH subj",subj,"| Score: ",np.median(noise_norm_corr_dict[f'lh_{subj}'])*100)
print("RH subj",subj,"| Score: ",np.median(noise_norm_corr_dict[f'rh_{subj}'])*100)

Fitting ridge regressions on the training data...
Predicting fMRI data on the validation and test data...
Computing the correlation between the predicted and actual fMRI data...


100%|██████████| 19004/19004 [00:01<00:00, 9556.57it/s] 
100%|██████████| 20544/20544 [00:01<00:00, 11662.10it/s]



 Score -> Median Noise Normalized Squared Correlation Percentage (LH and RH)
LH subj 1 | Score:  48.39405012388089
RH subj 1 | Score:  47.737441465482924


# Visualize

In [10]:
histogram(args.data_dir, noise_norm_corr_dict[f'lh_{subj}'], 
          noise_norm_corr_dict[f'rh_{subj}'], 
          submission_name, 
          save = args.subject_images_submission_dir)



TypeError: expected str, bytes or os.PathLike object, not bool

In [None]:
box_plot(args.data_dir, noise_norm_corr_dict[f'lh_{subj}'], 
          noise_norm_corr_dict[f'rh_{subj}'], 
          submission_name, 
          save = args.subject_images_submission_dir)