<a href="https://colab.research.google.com/github/cesar-claros/brain_maps_analysis/blob/master/website/docs/notebooks/brain_maps_CV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
!mkdir volume_maps/
!mkdir stiffness_maps/
!mkdir splits/
!cp -r drive/Shareddrives/'Brain Voxels'/'Final Data Used for Paper'/Volume_FINAL/. volume_maps/
!cp -r drive/Shareddrives/'Brain Voxels'/'Final Data Used for Paper'/Stiffness_FINAL/. stiffness_maps/
!cp -r drive/Shareddrives/'Brain Voxels'/splits/. splits/
!cp -r drive/Shareddrives/'Brain Voxels'/labels_final.csv .

In [3]:
!pip install keras-hypetune

Collecting keras-hypetune
  Downloading keras_hypetune-0.1.3-py3-none-any.whl (10 kB)
Installing collected packages: keras-hypetune
Successfully installed keras-hypetune-0.1.3


In [5]:
! git clone https://github.com/cesar-claros/brain_maps_analysis
% cd brain_maps_analysis/src/utils/

/content/brain_maps_analysis/src/utils


In [7]:
# Functions definitions
import utils
import preprocessing
import model
# Libraries
import os
import sklearn
import pandas as pd
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import KFold
from sklearn.preprocessing import OneHotEncoder
from kerashypetune import KerasRandomSearchCV

In [8]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [12]:
train_id = pd.read_csv('../../../splits/train_split.csv', delimiter=',', header=None).to_numpy().squeeze()
val_id = pd.read_csv('../../../splits/val_split.csv', delimiter=',', header=None).to_numpy().squeeze()
test_id = pd.read_csv('../../../splits/test_split.csv', delimiter=',', header=None).to_numpy().squeeze()

In [17]:
folder_path_input_volume = '../../../volume_maps/'
folder_path_input_stiffness = '../../../stiffness_maps/'
folder_path_labels = '../../../'

print('Loading stiffness maps training instances')
X_train_stf, X_train_sex, X_train_study, y_train, m_train = utils.read_files(folder_path_input_stiffness, folder_path_labels, train_id)
print('Loading stiffness maps validation instances')
X_val_stf, X_val_sex, X_val_study, y_val, m_val = utils.read_files(folder_path_input_stiffness, folder_path_labels, val_id)
print('Loading stiffness maps test instances')
X_test_stf, X_test_sex, X_test_study, y_test, m_test = utils.read_files(folder_path_input_stiffness, folder_path_labels, test_id)

print('Loading volume maps training instances')
X_train_vol = utils.read_files(folder_path_input_volume, folder_path_labels, train_id, only_map=True)
print('Loading volume maps validation instances')
X_val_vol = utils.read_files(folder_path_input_volume, folder_path_labels, val_id, only_map=True)
print('Loading volume maps test instances')
X_test_vol = utils.read_files(folder_path_input_volume, folder_path_labels, test_id, only_map=True)

# One hot encoding for categorical variables
# define one hot encoding
encoder = OneHotEncoder(sparse=False)
# transform categorical variables
X_train_sex = encoder.fit_transform(X_train_sex.reshape(-1,1))
X_val_sex = encoder.fit_transform(X_val_sex.reshape(-1,1))
X_test_sex = encoder.fit_transform(X_test_sex.reshape(-1,1))
X_train_study = encoder.fit_transform(X_train_study.reshape(-1,1))
X_val_study = encoder.fit_transform(X_val_study.reshape(-1,1))
X_test_study = encoder.fit_transform(X_test_study.reshape(-1,1))

# Merge train and validation sets
X_train_stf = np.concatenate((X_train_stf, X_val_stf), axis=0)
X_train_vol = np.concatenate((X_train_vol, X_val_vol), axis=0)
X_train_sex = np.concatenate((X_train_sex, X_val_sex), axis=0)
X_train_study = np.concatenate((X_train_study, X_val_study), axis=0)
y_train = np.concatenate((y_train, y_val), axis=0)

Loading stiffness maps training instances
Longest common prefix : Stiffness_
Longest common suffix : .nii
Loading stiffness maps validation instances
Longest common prefix : Stiffness_
Longest common suffix : .nii
Loading stiffness maps test instances
Longest common prefix : Stiffness_
Longest common suffix : .nii
Loading volume maps training instances
Longest common prefix : MPRAGE_
Longest common suffix : _struc_GM_to_T.nii
Loading volume maps validation instances
Longest common prefix : MPRAGE_
Longest common suffix : _struc_GM_to_T.nii
Loading volume maps test instances
Longest common prefix : MPRAGE_
Longest common suffix : _struc_GM_to_T.nii


In [21]:
seed = 12345
PREPROC_TYPE = 'std'
X_train_stf_pp, X_test_stf_pp = preprocessing.preprocess(X_train_stf, X_test_stf, preproc_type=PREPROC_TYPE)
X_train_vol_pp, X_test_vol_pp = preprocessing.preprocess(X_train_vol, X_test_vol, preproc_type=PREPROC_TYPE)
# Hyperparameter Grid
param_grid = {
    # 'arc_type' : [1],
    'arc_type' : [1, 2, 3, 4],
    # 'lr' : [5e-5],
    # 'lr' : stats.uniform(1e-4, 0.1),
    'lr' : [1e-2, 1e-3, 1e-4, 1e-5],
    # 'batch_size' : [4],
    'batch_size' : [4, 12, 20, 28],
    'epochs' : [40],
    # 'epochs' : [20, 30, 40],
    'cat_input_type': ['None', 'sex', 'study', 'sex_study'],
    # 'cat_input_type': ['sex_study']
    'n_maps' : [2]
}
# Define model
# wrap our model into a scikit-learn compatible classifier
print("[INFO] initializing model...")
utils.seed_everything(seed)
cv = KFold(n_splits=5, random_state=seed, shuffle=True)
krs = KerasRandomSearchCV(model.make_model, param_grid, cv=cv, monitor='val_loss', greater_is_better=False,
                          n_iter=1, sampling_seed=seed)


[INFO] initializing model...


In [22]:
X_train_cat = np.concatenate((X_train_sex,X_train_study), axis=1)
print("[INFO] performing random search...")
utils.seed_everything(seed)
trainData = [X_train_stf_pp, X_train_vol_pp, X_train_cat]
trainTarget = y_train
krs.search(trainData, trainTarget)

[INFO] performing random search...

##################
###  Fold 001  ###
##################

1 trials detected for ('arc_type', 'lr', 'batch_size', 'epochs', 'cat_input_type', 'n_maps')

***** (1/1) *****
Search({'arc_type': 4, 'lr': 0.01, 'batch_size': 12, 'epochs': 40, 'cat_input_type': 'None', 'n_maps': 2})
SCORE: 5.94333 at epoch 36

##################
###  Fold 002  ###
##################

1 trials detected for ('arc_type', 'lr', 'batch_size', 'epochs', 'cat_input_type', 'n_maps')

***** (1/1) *****
Search({'arc_type': 4, 'lr': 0.01, 'batch_size': 12, 'epochs': 40, 'cat_input_type': 'None', 'n_maps': 2})
SCORE: 5.16087 at epoch 40

##################
###  Fold 003  ###
##################

1 trials detected for ('arc_type', 'lr', 'batch_size', 'epochs', 'cat_input_type', 'n_maps')

***** (1/1) *****
Search({'arc_type': 4, 'lr': 0.01, 'batch_size': 12, 'epochs': 40, 'cat_input_type': 'None', 'n_maps': 2})
SCORE: 5.25177 at epoch 36

##################
###  Fold 004  ###
###########

<kerashypetune.KerasRandomSearchCV>

In [23]:
pd.DataFrame(krs.best_params)

Unnamed: 0,arc_type,lr,batch_size,epochs,cat_input_type,n_maps,steps_per_epoch
0,4,0.01,12,36,,2,15
1,4,0.01,12,40,,2,15
2,4,0.01,12,36,,2,15
3,4,0.01,12,37,,2,15
4,4,0.01,12,33,,2,15


In [24]:
pd.DataFrame(krs.folds_best_params)

Unnamed: 0,fold 1,fold 2,fold 3,fold 4,fold 5
arc_type,4.0,4.0,4.0,4.0,4.0
lr,0.01,0.01,0.01,0.01,0.01
batch_size,12.0,12.0,12.0,12.0,12.0
epochs,36.0,40.0,36.0,37.0,33.0
cat_input_type,,,,,
n_maps,2.0,2.0,2.0,2.0,2.0
steps_per_epoch,15.0,15.0,15.0,15.0,15.0


In [25]:
pd.DataFrame(krs.folds_scores)

Unnamed: 0,fold 1,fold 2,fold 3,fold 4,fold 5
0,5.94333,5.16087,5.25177,7.50409,7.22326
