# Transfer learning Deep Autoencoder
---

In this notebook, we will generate a deep autoencoder that will be applied to a more extensive dataset than the stroke one: The Human connectome Project (HCP) which is an available dataset.

Importing libraries...

In [None]:
%cd ..
%cd "Notebook utilities"

In [None]:
from pathlib import Path
import numpy as np
from sklearn.model_selection import train_test_split

import glob
import os
import sys
import gc
import inspect

from Datasets import dataset
from Model_utilities import *
from Data_Preprocessing import get_arrays, to_vector, get_HCP, NormalizeData
from ConvAutoencoder_noweigth import ConvAutoEncoder
from callbacks import MetricsCallback, LitProgressBar

In [None]:
import torch
import torch.nn.functional as F
from torch import nn
import torch.optim as optim

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping 
from pytorch_lightning.metrics.functional import accuracy

import optuna
from optuna.integration import PyTorchLightningPruningCallback

In [None]:
MODEL_SAVE_FOLDER = Path("SavedModels")

# Check if the GPU is available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f"Training device: {device}")

### Set the random seed for reproducible results
torch.manual_seed(1234)
np.random.seed(1234)

## 1.1 Loading HCP Dataset

---

Similarly to the stroke dataset, the data is contained in a .mat file and is converted in a 3d arrat containing for each patient the corresonding FC matrix. Empty data are simply not consider. 

In [None]:
#get directory
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir) 
files = glob.glob(parentdir+"/DATA/FC_HCP-20210921T174417Z-001/FC_HCP/*")

In [None]:
#get data
FC_3D  = get_HCP(files)

#vectorizing matrices
vect_mat = to_vector(FC_3D)

#standarize
for i in range(len(vect_mat)):
    vect_mat[i] = (vect_mat[i] - vect_mat[i].mean())/ vect_mat[i].std()
    
#splitting the vectorize data in train-test using the sklearn library
vect_train, vect_test = train_test_split(vect_mat, test_size=0.25, shuffle=True)

#splitting the vectorize data in train-val using the sklearn library
vect_train, vect_val = train_test_split(vect_train, test_size=0.2, shuffle=True)

In [None]:
#Create dataset
train_dataset = dataset(vect_train)
val_dataset = dataset(vect_val)
test_dataset = dataset(vect_test)

In [None]:
# Parameters
params = {'batch_size': 16,
          'shuffle': True,
          'num_workers': 2}

#create dataloader
train_dataloader = torch.utils.data.DataLoader(train_dataset, **params)
val_dataloader = torch.utils.data.DataLoader(val_dataset, **params)
test_dataloader = torch.utils.data.DataLoader(test_dataset,  **params)

## 1.2 Hyperparameter Optimization for the HCP dataset
---

#### Autoencoder 
A convolutional autoencoder consisted of 3 conv layers and 2 linear layers are implemented using the `Pytorch Lighting` module.
The model is found in `ConvAutoencoder_noweight.py`. The only difference between the other one is that here we have fixed the weight decay in the opt.



![ConvAutoencoder.png](attachment:ConvAutoencoder.png)

##### Hyperparameter search


Searching the optimal values using the `Optuna` library for each latent space. This is done by minimizing the mean of the validation loss function 

In [None]:
#---Hyperparameter Optimization with Optuna---#
def objective(trial: optuna.trial.Trial) -> float:
    
    optimizer = trial.suggest_categorical("optimizer", ["Adam"])

    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    
    dropout = trial.suggest_float("dropout", 0.0, 1)
   
    conv1 =  trial.suggest_categorical("conv1", [8, 16, 32, 64, 128])
    
    conv2 =  trial.suggest_categorical("conv2", [8, 16, 32, 64, 128])
    
    conv3 =  trial.suggest_categorical("conv3", [8, 16, 32, 64, 128])
    
    fc = trial.suggest_categorical("fc", [8, 16, 32, 64, 128])

    #Convert to a dict 
    hyper_parameters = {
        'optimizer' : optimizer,
        'learning_rate' : learning_rate,
        'dropout' : dropout,
        'conv1' : conv1,
        'conv2' : conv2,
        'conv3': conv3, 
        'fc' : fc, 
    }
    cnn_autoencoder = ConvAutoEncoder(encoded_space,hyper_parameters = hyper_parameters) 
    
    bar = LitProgressBar()

    #Define training
    metrics_callback = MetricsCallback()
    trainer = pl.Trainer(
        logger = True,
        limit_val_batches=1., #percentage of validation batches to be used
        checkpoint_callback=False, #Do not save models during hyperparams opt.
        max_epochs=50,
        gpus = 1 if torch.cuda.is_available() else None,
        callbacks = [PyTorchLightningPruningCallback(trial, monitor="val_loss"),metrics_callback, bar]
    )
    #Define model
    trainer.logger.log_hyperparams(hyper_parameters)

    #Train
    trainer.fit(cnn_autoencoder, train_dataloader, val_dataloader)

    return trainer.callback_metrics["val_loss"].item() #Minimize "val_loss"
    

In [None]:
components = np.arange(10,95,5)
for encoded_space_dim in components:
    pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10) #Prune (=terminate a trial early) if the trial's best intermediate result is worse than the median of intermediate results of previous trials at the same step. It is used to avoid wasting time evaluating hyperparameter choices that are "really bad".
    study = optuna.create_study(study_name="Study_HCP_"+str(encoded_space), storage="sqlite:///Study_HCP_"+str(encoded_space)+".db", direction="minimize", pruner=pruner, load_if_exists=True)
    #Do not run if you dont want to perform the hyperparameter search
    # study.optimize(objective, n_trials=100, timeout=None) #timeout = stop after this many seconds (set to None to proceed without time limitation)
    
    #Free RAM
    torch.cuda.empty_cache()
    gc.collect()
    del study
        
        



## 1.3 Training with optimal values

In [None]:
components = np.arange(55,95,5)


for encoded_space_dim in components:    
    #upload best study    
    study = optuna.create_study(study_name="Study_HCP_"+str(encoded_space_dim), storage="sqlite:///Study_HCP_"+str(encoded_space_dim)+".db", direction="minimize", pruner=pruner, load_if_exists=True)
    best_trial = study.best_trial
    best_hyperparameters = best_trial.params 
    
   #Checkpoints for the model

    bar = LitProgressBar()
    metrics_callback = MetricsCallback()
    early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, verbose=True)
    checkpoint_callback = ModelCheckpoint(monitor='val_loss', dirpath='Models/experiments_90', filename='cnn_autoencoder-{epoch:02d}-{val_loss:.2f}', save_top_k=5, mode='min')

    #Define training
    trainer = pl.Trainer(gpus=1, max_epochs=200, callbacks=[metrics_callback, bar, early_stopping_callback])
    #Define model
    cnn_autoencoder = ConvAutoEncoder(encoded_space_dim, best_hyperparameters) 
    #Train
    trainer.fit(cnn_autoencoder,  train_dataloader, val_dataloader)
  
    save_state("cnn_HCP_best_"+str(encoded_space_dim), trainer, metrics_callback.metrics)
    
    #relase RAM
    del study
    torch.cuda.empty_cache()
    gc.collect()


## 1.4 Fine Tuning

#### FC matrices and a behavioural score (language) of 131 stroke patient contained in a mat file. 
The data is loaded from the .mat and .xlsl file and converted to a 3d array contining per each patient the correspondent fc matrix. Since not all patients have values, some of them are empty, those patients are simply removed and not consider. Furthermore NA values are converted to 0 values.


##### Vectorizing matrices
Following the paper we will exploit the symmetry of the matrix and convert each of them as vector

In [None]:
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir) 

#get data
mat_path = parentdir+'/DATA/FC_Stroke/FCMatrixImage_131subj.mat'
lang_path = parentdir+'/DATA/FC_Stroke/language_score.xlsx'
Normalize = True

fc_3d, language_score, ID = get_arrays(mat_path, lang_path, Normalize)

#vectorizing matrices
vect_mat_stroke = to_vector(fc_3d)

#standarize
for i in range(len(vect_mat)):
    vect_mat_stroke[i] = (vect_mat_stroke[i] - vect_mat_stroke[i].mean())/ vect_mat_stroke[i].std()

In [None]:
#splitting the vectorize data in train-test using the sklearn library
vect_train_stroke, vect_test_stroke = train_test_split(vect_mat_stroke, test_size=0.4, shuffle=True)

train_dataloader_stroke = dataset(vect_train_stroke)
test_dataloader_stroke = dataset(vect_test_stroke)

params  = {'batch_size': 16, 
          'shuffle': True, 'num_workers': 2}

#create dataloader
train_dataloader_strk = torch.utils.data.DataLoader(train_dataloader_stroke, **params)
test_dataloader_strk = torch.utils.data.DataLoader(test_dataloader_stroke)
total_dataloader = torch.utils.data.DataLoader(dataset(vect_mat_stroke), **params)

## 1.5 Model for the fine tuning

---

We will upload the convolutional autoencoder learnt by using only the HCP dataset and we will freeze all the convolutional weights, therefore only the linear parts are able to learn something new from this dataset. 

In [None]:
class Transfer(pl.LightningModule):

    def __init__(self,encoded_space_dim,  hyper_parameters: dict = None, *args, **kwargs):
        super().__init__()
        
        if hyper_parameters is None:
            self.hyper_parameters = { #Default values
                'learning_rate' : 1e-3,
                'fc' : 16
            }
            self.hyper_parameters.update(**kwargs)
        else:
            self.hyper_parameters = hyper_parameters    
            
        self.encoded_space_dim = encoded_space_dim
        self.fc = self.hyper_parameters['fc']
        self.save_hyperparameters() #store hyper_parameters in checkpoints
        self.autoencoder = ConvAutoEncoder.load_from_checkpoint(checkpoint_path("cnn_HCP_best_"+str(encoded_space_dim)))
         

        self.autoencoder.eval()
        # freeze convolutional params and leav only the linear part to be learnt
        ct = 0
        for param in  self.autoencoder.encoder_cnn.parameters():
          ct+=1
          if ct <7:
              param.requires_grad = False

        ct = 0
        for param in  self.autoencoder.decoder_cnn.parameters():
          ct+=1
          if ct >4:
              param.requires_grad = False

        self.encoder_cnn =  self.autoencoder.encoder_cnn                       
        self.decoder_cnn = self.autoencoder.decoder_cnn

        
   
    def forward(self, x : "torch.tensor"):
        embedding = self.encoder_cnn(x)
        return embedding
        
    def training_step(self, batch, batch_idx):
        x = batch #ignore labels
        internal_repr = self.encoder_cnn(x)
        x_hat = self.decoder_cnn(internal_repr)
        loss = F.mse_loss(x_hat, x, reduction='mean')
        self.log('train_loss', loss, on_epoch=True)
        return loss

    def configure_optimizers(self):
        optimizer = getattr(optim, "Adam")(self.parameters(), lr=self.hyper_parameters['learning_rate']) #, weight_decay=1e-5)
        return optimizer

    def validation_step(self, batch, batch_idx):
        x = batch
        internal_repr = self.encoder_cnn(x)
        x_hat = self.decoder_cnn(internal_repr)
        loss = F.mse_loss(x_hat, x, reduction='mean')
        self.log('val_loss', loss)
        return loss

    def test_step(self, batch, batch_idx):
        return self.validation_step(batch, batch_idx, log_name='test_loss')
    

## 1.6 Hyperparameter optimization for transfer learning

---

The only parameters that we are going to optimize are only the one size of the fully connected layer and the learning rate (since the rest one is all freeze).

In [None]:
#---Hyperparameter Optimization with Optuna---#
def objective_TRANSFER(trial: optuna.trial.Trial) -> float:
    
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)   
    fc = trial.suggest_categorical("fc", [8, 16, 32, 64, 128])

    #Convert to a dict 
    hyper_parameters = {
        'learning_rate' : learning_rate,
        'fc' : fc
    }
    transfer_autoencoder = Transfer(encoded_space_dim = encoded_space, hyper_parameters = hyper_parameters)
    
    bar = LitProgressBar()

    #Define training
    metrics_callback = MetricsCallback()
    trainer = pl.Trainer(
        logger = True,
        limit_val_batches=1., #percentage of validation batches to be used
        checkpoint_callback=False, #Do not save models during hyperparams opt.
        max_epochs=50,
        gpus = 1 if torch.cuda.is_available() else None,
        callbacks = [PyTorchLightningPruningCallback(trial, monitor="val_loss"),metrics_callback, bar]
    )
    #Define model
    trainer.logger.log_hyperparams(hyper_parameters)

    #Train
    trainer.fit(transfer_autoencoder, train_dataloader_strk, test_dataloader_strk)

    return trainer.callback_metrics["val_loss"].item() #Minimize "val_loss"

In [None]:
components = np.arange(10,95,5)
for encoded_space_dim in components:
    pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10) #Prune (=terminate a trial early) if the trial's best intermediate result is worse than the median of intermediate results of previous trials at the same step. It is used to avoid wasting time evaluating hyperparameter choices that are "really bad".
    study = optuna.create_study(study_name="Study_transfer"+str(encoded_space), storage="sqlite:///Study_TRANSFER"+str(encoded_space)+".db", direction="minimize", pruner=pruner, load_if_exists=True)
   # study.optimize(objective_TRANSFER, n_trials=100, timeout=None) #timeout = stop after this many seconds (set to None to proceed without time limitation)
    
    #Free RAM
    torch.cuda.empty_cache()
    gc.collect()
    del study

## 1.7 Training with optimal values

In [None]:
from tqdm import tqdm
components = np.arange(10,95,5)

for encoded_space in tqdm(components):
    #upload best study    
    study = optuna.create_study(study_name="Study_transfer_"+str(encoded_space), storage="sqlite:///Study_TRANSFER_"+str(encoded_space)+".db", direction="minimize", pruner=pruner, load_if_exists=True)
    best_trial = study.best_trial
    best_hyperparameters = best_trial.params 
    
    #Checkpoints for the model
    bar = LitProgressBar()
    metrics_callback = MetricsCallback()
    early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, verbose=True)
    checkpoint_callback = ModelCheckpoint(monitor='val_loss', dirpath='Models/experiments_90', filename='cnn_autoencoder-{epoch:02d}-{val_loss:.2f}', save_top_k=20, mode='min')

    #Define training
    trainer = pl.Trainer(gpus=1, max_epochs=200, callbacks=[checkpoint_callback, metrics_callback, bar, early_stopping_callback])

    #Define model
    transfer_autoencoder = Transfer(encoded_space_dim = encoded_space, hyper_parameters = best_hyperparameters)
    #Train
    trainer.fit(transfer_autoencoder, train_dataloader_strk, test_dataloader_strk)

    #Save model & learning curv
    save_state("cnn_autoencoder_best_strk_"+str(encoded_space), trainer, metrics_callback.metrics)
    
    #plot some samples
    plot_samples(transfer_autoencoder, total_dataloader.dataset, encoded_space)
   
     #relase RAM
    del study
    torch.cuda.empty_cache()
    gc.collect()

## 1.8 Saving values

In [None]:
components = np.arange(10,95,5)
for encoded_space_dim in components:
    print(encoded_space_dim)
    #loading state
    cnn_autoencoder_strk, metrics = load_state(Transfer,"cnn_autoencoder_best_strk_"+str(encoded_space_dim))
    
    #plotting samples with the lowest MSE
    #idx, re = plot_samples(cnn_autoencoder_strk, total_dataset, encoded_space_dim)
    
    #compute reconstructed errors
    mse, sd = reconstruction_error(cnn_autoencoder_strk,  total_dataloader.dataset, encoded_space_dim,vect_mat_stroke)
    np.savetxt('RESULTS/MSE_'+str(encoded_space_dim), [mse, sd])
    
   # s_mean, s_sd =  sim_error(cnn_autoencoder_strk,  total_dataloader.dataset, encoded_space_dim,vect_mat_stroke)
    #np.savetxt('RESULTS/SSIM_'+str(encoded_space_dim), [s_mean, s_sd])
