# Predicting Fluid Intelligence with Regression

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
import numpy as np
from collections import OrderedDict
import torch
torch.manual_seed(0)
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from tqdm import tqdm
from abcd.local.paths import output_path
from abcd.data.read_data import get_subjects_events_sf, subject_cols_to_events
import abcd.data.VARS as VARS
from abcd.data.define_splits import SITES, save_restore_sex_fmri_splits
from abcd.data.divide_with_splits import divide_events_by_splits
from abcd.data.var_tailoring.normalization import normalize_var
from abcd.data.pytorch.get_dataset import PandasDataset

#regresssion-specific imports
from abcd.models.regression.MLPRegressor import MLPRegressor, LinearRegressor, MLPRegressorCustom
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from abcd.training.RegressorTrainer import RegressorTrainer

#plotting
import matplotlib.pyplot as plt
import pygal
from abcd.plotting.pygal.rendering import display_html
from sklearn.metrics import confusion_matrix
import seaborn as sns
import sklearn.metrics as metrics
from sklearn.metrics import confusion_matrix
from abcd.plotting.seaborn.confusion_matrix import plot_confusion_matrix

from datetime import datetime

In [None]:
from abcd.analysis.regression import preprocess, train_model

In [None]:
# Determine device for training (TODO: figure out why doesn't work with mps)
device = "cpu" #("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print("Using {} device".format(device))

In [None]:
bucketing_scheme = "fluid intelligence norm"
target_col = "nihtbx_fluidcomp_uncorrected"
normalize_targets=True

dataloaders, events_train, events_id_test, events_ood_test, feature_cols, thresholds = preprocess(target_col, ['fmri', 'smri'], ood_site_num=0, normalize_targets=normalize_targets)

target_col = target_col + "_norm"

In [None]:
events_train.head()

# Linear Regression Baseline

In [None]:
#modify
config = {'target_col': target_col,
          'features': ['fmri', 'smri'],
          'model': ['abcd.models.regression.MLPRegressor', 'LinearRegressor'],
          'lr': 1e-5,
          'batch_size': 64,
          'nr_epochs': 1000,
        }

#leave unmodified
exp_prefix = 'ABCD_fluid_'
if target_col == 'nihtbx_fluidcomp_uncorrected_norm':
    exp_prefix += 'norm_'
experiment_title = exp_prefix + config['model'][1] + "_" + datetime.now().strftime("%Y-%m-%d %H:%M:%S") #for saving results
models_path = os.path.join(output_path, experiment_title, 'models')

In [None]:
model = LinearRegressor(save_path=models_path, input_size=len(feature_cols)) #modfiy

model = model.to(device)
print(model)
trainer = train_model(model, device, config, experiment_title, dataloaders, verbose=False, bucketing_scheme=bucketing_scheme)
best_model_details = trainer.export_best_model(config=config)

# Custom MLP

In [None]:
config = {'target_col': target_col,
          'features': ['fmri', 'smri'],
          'model': ['abcd.models.regression.MLPRegressor', 'MLPRegressorCustom'],
          'batch_size': 64,

          #tune
          'lr': 1e-5,
          'nr_epochs': 500,
          'hidden_sizes': [256, 64],
          'l2_lambda': 0.1,
          'dropout': 0.0,
        }

#modify
exp_prefix = 'ABCD_fluid_'
if target_col == 'nihtbx_fluidcomp_uncorrected_norm':
    exp_prefix += 'norm_'
experiment_title = exp_prefix + config['model'][1] + "_" + datetime.now().strftime("%Y-%m-%d %H:%M:%S") #for saving results
print(experiment_title)

#leave unmodified
models_path = os.path.join(output_path, experiment_title, 'models')

In [None]:
model = MLPRegressorCustom(save_path=models_path, input_size=len(feature_cols), hidden_sizes=config['hidden_sizes'], dropout_p=config['dropout']) #modfiy

model = model.to(device)
print(model)
trainer = train_model(model, device, config, experiment_title, dataloaders, verbose=True, bucketing_scheme=bucketing_scheme, l2_lambda=config['l2_lambda'])
trainer.export_best_model(config=config)
best_model_details = trainer.export_best_model(config=config)

# Hyperparameter Search with Cusom MLP

In [None]:
# hidden_sizes = [
#     (256, 128, 64), 
#     (512, 256, 128, 64),
#     (2048, 1028, 512, 256, 128, 64, 32, 16)
# ]
hidden_sizes = [
    (512, 256, 128, 64),
]

learning_rates = [1e-5] #[1e-5, 1e-7]
l2_lambdas = [0, 1e-4, 1e-3]
dropout_probs = [0, 0.1, 0.3, 0.5, 0.7]

In [None]:
experiments = {}
global_best_val_mse = float('inf')
best_model_experiment_name = None
best_model = None

for i,learning_rate in enumerate(learning_rates):
    for j,sizes in enumerate(hidden_sizes):
      for k, l2_lambda in enumerate(l2_lambdas):
        for l, dropout_prob in enumerate(dropout_probs):

          experiment_title = 'fluid_norm_MLPReg_' + datetime.now().strftime("%Y-%m-%d %H:%M:%S")
          experiment_num = i*len(hidden_sizes)*len(l2_lambdas)*len(dropout_probs) + j*len(l2_lambdas)*len(dropout_probs) + k*len(dropout_probs)+ l
          print("experiment", experiment_num, ":", experiment_title)

          config = {'target_col': target_col,
            'features': ['fmri', 'smri'],
            'model': ['abcd.models.regression.MLPRegressor', 'MLPRegressorCustom'],
            'batch_size': 64,
            'nr_epochs': 1000,
          }

          config['hidden_sizes'] = sizes
          config['lr'] = learning_rate
          config['l2_lambda'] = l2_lambda
          config['dropout'] = dropout_prob
          
          #define and train model
          models_path = os.path.join(output_path, experiment_title, 'models')
          model = MLPRegressorCustom(save_path=models_path, input_size=len(feature_cols), hidden_sizes=config['hidden_sizes'], dropout_p=config['dropout']) #modfiy
          model = model.to(device)
          trainer = train_model(model, device, config, experiment_title, dataloaders, verbose=False, bucketing_scheme=bucketing_scheme, l2_lambda=config['l2_lambda'])
          details = trainer.export_best_model(config=config)
          
          #update best model
          local_best_val_mse = details['metrics']['val']['MSE']
          if local_best_val_mse < global_best_val_mse:
              global_best_val_mse = local_best_val_mse
              best_model = details
              best_model_experiment_name = experiment_title

          #save experiment
          experiments[experiment_title] = details

print("\n\nExperiment over. Best model:", best_model_experiment_name)