# ML testing: experiment #3f- harmonized with covbat

This notebook involves testing for the MRI harmonization. This notebook shows mixed_dataset (TOP + StrokeMRI) based models with covbat harmonization to Insight 46, hELIUS and Sabre datasets (the strokeMRI and TOP become one dataset)

Data: StrokeMRI, TOP, SABRE, Insight46, EDIS, and HELIUS pending

Harmonisation: covbat

Training data: Norment which is StrokeMRI and TOP togehter

Testing data: test set from Norment

Futher data applied to: SABRE, Insight46, EDIS, and HELIUS pending

Validation method: K-fold, double-stratified

Brain-age algorithms: LR, lasso, extra trees, elasticCV net fully tested (but not optimized parameters), additionals partly

Outputs: ? what do we want to have here?

### import libraries

In [None]:
import os       # using operating system dependent functionality (folders)
import sys

import glob
from functools import reduce

import pandas as pd # data analysis and manipulation
import numpy as np    # numerical computing (manipulating and performing operations on arrays of data)
import matplotlib.pyplot as plt

# # demo stuff
import ipywidgets as widgets
# import seaborn 

# ml stuff
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
#from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
#from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn import tree
from sklearn import metrics
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_absolute_error
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import ElasticNetCV
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import cross_val_score

import joblib

sys.path.insert(0, '../') # path to functions
import cvasl.seperated as sep
from cvasl.file_handler import Config

### import data

In [None]:
filepath_mri_for_ids = 'our_datasets/StrokeMRI/' 
filename_mri_for_ids = os.path.join(filepath_mri_for_ids,'TrainingDataComplete.csv') 

filepath_top_for_ids = 'our_datasets/TOP/' 
filename_top_for_ids = os.path.join(filepath_top_for_ids,'TrainingDataComplete.csv') 


#C:\Projects\cvasl\extended_harm_paper\harmonizations\harm_results\covbat
filepath_topmri = 'harmonizations/harm_results/covbat/' 
filename_topmri = os.path.join(filepath_topmri,'topmri_covbat_5way.csv') 

filepath_sabre = 'harmonizations/harm_results/covbat/'
filename_sabre = os.path.join(filepath_topmri,'sabre_covbat_5way.csv') 

filepath_insight46 = 'harmonizations/harm_results/covbat/'
filename_insight46 =  os.path.join(filepath_topmri,'insight_covbat_5way.csv') 

filepath_edis = filepath_sabre
filename_edis =  os.path.join(filepath_sabre,'edis_covbat_5way.csv') 
# 
filepath_helius = filepath_sabre
filename_helius =  os.path.join(filepath_sabre,'helius_covbat_5way.csv') 



# read in data

TOPMRI = pd.read_csv(filename_topmri, index_col=0)
SABRE = pd.read_csv(filename_sabre, index_col=0)
Insight46 = pd.read_csv(filename_insight46, index_col=0)
EDIS = pd.read_csv(filename_edis, index_col=0)
HELIUS =pd.read_csv(filename_helius, index_col=0)

In [None]:
filepath_mri_for_ids = 'our_datasets/StrokeMRI/' 
filename_mri_for_ids = os.path.join(filepath_mri_for_ids,'TrainingDataComplete.csv') 

filepath_top_for_ids = 'our_datasets/TOP/' 
filename_top_for_ids = os.path.join(filepath_top_for_ids,'TrainingDataComplete.csv') 

IDS_TOP =  pd.read_csv(filename_top_for_ids)
IDS_MRI =  pd.read_csv(filename_mri_for_ids)

## Make a dropbox for outputs

In [None]:
#give a checkbox for out put folder
loged_feat = widgets.ToggleButton(
    value=False,
    description='Click me if some features logged',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Description',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)
loged_feat

In [None]:
loged_feat.value

In [None]:
if loged_feat.value == False:
    output_folder = '3f_no_log_outputs'
else:
    output_folder = '3f_loged_outputs'

os.makedirs(output_folder, exist_ok=True)

# Now we need to break up the top and MRI datasets as well, and format them like the others

In [None]:
set_top_ids = set(IDS_TOP.participant_id)
set_mri_ids = set(IDS_MRI.participant_id)
StrokeMRI = TOPMRI[TOPMRI['participant_id'].isin(list(set_mri_ids))]
TOP = TOPMRI[TOPMRI['participant_id'].isin(list(set_top_ids))]
TOP.head(3)

In [None]:
TOPMRI.columns= TOPMRI.columns.str.lower()
SABRE.columns= SABRE.columns.str.lower()
Insight46.columns= Insight46.columns.str.lower()
EDIS.columns= EDIS.columns.str.lower()
HELIUS.columns= HELIUS.columns.str.lower()

In [None]:
StrokeMRI

In [None]:
#TOP = TOP.assign(sex = TOP.sex.map(sex_mapping))
TOP.head(3)

In [None]:
# check for any duplicated patients between helius and sabre
helis = set(HELIUS.participant_id)
sabs = set(SABRE.participant_id)
z = sabs.intersection(helis)
print(z) 

In [None]:
# check for any duplicated patients between stroke and mri
strokers = set(StrokeMRI.participant_id)
topers = set(TOP.participant_id)
z = strokers.intersection(topers)
print(z) 

In [None]:
# make mixed StrokeMRI and TOP dataset
#mixed_data = pd.concat([TOP, StrokeMRI], sort=False)
mixed_data = TOPMRI

## Build ML models

In [None]:
ml_matrix = mixed_data #.drop('participant_id', axis=1)
X = ml_matrix.drop('age', axis =1)
X = X.values
y = ml_matrix['age'].values
y=y.astype('float')

In [None]:
linr_k_frame, linr_y_frame, models = sep.stratified_cat_and_cont_categories_shuffle_split('linear regression', 'req_covbatharm_mix_linr', LinearRegression(), ml_matrix, X[:,1:], y)

In [None]:
linr_k_frame

In [None]:
linr_k_frame.to_csv(output_folder +'/linr_k_frame_covbat_harm.csv')

In [None]:
avg_linr = sep.avg_k_folds(linr_k_frame)
avg_linr

In [None]:
linr_y_frame.to_csv(output_folder +'/linr_y_frame_covbat_harm.csv')
linr_y_frame

In [None]:
linr_y_frame.to_csv(output_folder +'/linr_y_frame_covbat_harm.csv')

In [None]:
linr = models[0]
linr[0]

In [None]:
# check if model folder exists and if not , then create
model_folder = '../result_models/'
if not os.path.exists(model_folder):
    os.makedirs(model_folder)

In [None]:
## optional save models
#joblib.dump(linr[0], ('../result_models/'+ 'unharm_mix_linr0.sav'))
#joblib.dump(linr[1], ('../result_models/'+ 'unharm_mix_linr1.sav'))
#joblib.dump(linr[2], ('../result_models/'+ 'unharm_mix_linr2.sav'))
#joblib.dump(linr[3], ('../result_models/'+ 'unharm_mix_linr3.sav'))
#joblib.dump(linr[4], ('../result_models/'+ 'unharm_mix_linr4.sav'))

In [None]:
llreg_k_frame, llreg_y_frame, models = sep.stratified_cat_and_cont_categories_shuffle_split('lasso regression', 'unharm_mix_llreg',  linear_model.LassoLars(alpha=0.01), ml_matrix, X[:,1:], y)
llreg_k_frame

In [None]:
llreg_k_frame.to_csv(output_folder +'/llreg_k_frame_covbat_harm.csv')

In [None]:
avg_llreg = sep.avg_k_folds(llreg_k_frame)
avg_llreg

In [None]:
llreg_y_frame

In [None]:
llreg_y_frame.to_csv(output_folder +'/llreg_y_frame_covbat_harm.csv')

In [None]:
llreg = models[0]
llreg[0]

In [None]:
## optional save models
#joblib.dump(linr[0], ('../result_models/'+ 'unharm_mix_linr0.sav'))
#joblib.dump(linr[1], ('../result_models/'+ 'unharm_mix_linr1.sav'))
#joblib.dump(linr[2], ('../result_models/'+ 'unharm_mix_linr2.sav'))
#joblib.dump(linr[3], ('../result_models/'+ 'unharm_mix_linr3.sav'))
#joblib.dump(linr[4], ('../result_models/'+ 'unharm_mix_linr4.sav'))

In [None]:
dtree_k_frame, dtree_y_frame, models = sep.stratified_cat_and_cont_categories_shuffle_split('decision tree', 'unharm_mix_dtree',  tree.DecisionTreeRegressor(), ml_matrix, X[:,1:], y)
dtree_k_frame

In [None]:
dtree_k_frame.to_csv(output_folder +'/dtree_k_frame_covbat_harm.csv')

In [None]:
avg_dtree = sep.avg_k_folds(dtree_k_frame)
avg_dtree

In [None]:
dtree_y_frame

In [None]:
dtree_y_frame.to_csv(output_folder +'/dtree_y_frame_covbat_harm.csv')

In [None]:
dtree = models[0]
dtree[0]

In [None]:
regr_k_frame, regr_y_frame, models = sep.stratified_cat_and_cont_categories_shuffle_split('MLP regression', 'unharm_mix_regr',   MLPRegressor(random_state=1, max_iter=700), ml_matrix, X[:,1:], y)
regr_k_frame

In [None]:
regr_k_frame.to_csv(output_folder +'/regr_k_frame_covbat_harm.csv')

In [None]:
avg_regr = sep.avg_k_folds(regr_k_frame)
avg_regr

In [None]:
regr_y_frame

In [None]:
regr_y_frame.to_csv(output_folder +'/regr_y_frame_covbat_harm.csv')

In [None]:
regr = models[0]
regr[0]

In [None]:
svrp2_k_frame, svrp2_y_frame, models = sep.stratified_cat_and_cont_categories_shuffle_split('support vector reg poly2', 'unharm_mix_svrp2',   SVR(C=1.0, kernel='poly', degree =2, epsilon=0.2), ml_matrix, X[:,1:], y)
svrp2_k_frame

In [None]:
svrp2_k_frame.to_csv(output_folder +'/svrp2_k_frame_covbat_harm.csv')

In [None]:
avg_svrp2 = sep.avg_k_folds(svrp2_k_frame)
avg_svrp2

In [None]:
svrp2_y_frame

In [None]:
svrp2_y_frame.to_csv(output_folder +'/svrp2_y_frame_covbat_hamr.csv')

In [None]:
svrp2 = models[0]
svrp2[0]

In [None]:
eregr_k_frame, eregr_y_frame, models = sep.stratified_cat_and_cont_categories_shuffle_split('elasticnetCV', 'unharm_mix_eregr',  ElasticNetCV(cv=5, random_state=12), ml_matrix, X[:,1:], y)
eregr_k_frame

In [None]:
eregr_k_frame.to_csv(output_folder +'/eregr_k_frame_covbat_harm.csv')

In [None]:
avg_eregr = sep.avg_k_folds(eregr_k_frame)
avg_eregr

In [None]:
eregr_y_frame

In [None]:
eregr_y_frame.to_csv(output_folder +'/eregr_y_frame_covbat_harm.csv')

In [None]:
eregr = models[0]
eregr[0]

In [None]:
etreg_k_frame, etreg_y_frame, models = sep.stratified_cat_and_cont_categories_shuffle_split('extra trees', 'unharm_mix_etreg',  ExtraTreesRegressor(n_estimators=100, random_state=0), ml_matrix, X[:,1:], y)
etreg_k_frame

In [None]:
etreg_k_frame.to_csv(output_folder +'/etreg_k_frame_covbat_harm.csv')

In [None]:
avg_etreg = sep.avg_k_folds(etreg_k_frame)
avg_etreg

In [None]:
etreg_y_frame

In [None]:
etreg_y_frame.to_csv(output_folder +'/etreg_y_frame_covbat_harm.csv')

In [None]:
etreg = models[0]
etreg[0]

In [None]:
## optional save models
#joblib.dump(etreg[0], ('../result_models/'+ 'unharm_mix_etreg0.sav'))
#joblib.dump(etreg[1], ('../result_models/'+ 'unharm_mix_etreg1.sav'))
#joblib.dump(etreg[2], ('../result_models/'+ 'unharm_mix_etreg2.sav'))
#joblib.dump(etreg[3], ('../result_models/'+ 'unharm_mix_etreg3.sav'))
#joblib.dump(etreg[4], ('../result_models/'+ 'unharm_mix_etreg4.sav'))

Show results ON AVERAGE for each model

In [None]:
mixed_based_covbat_harmonized_on_testmix =pd.concat([avg_linr,
                   avg_llreg,
                   avg_dtree,
                   avg_regr,
                   avg_svrp2,
                   avg_eregr,
                   avg_etreg],
                  axis=0)
mixed_based_covbat_harmonized_on_testmix

In [None]:
mixed_based_covbat_harmonized_on_testmix.to_csv(output_folder +'/mixed_based_covbat_harmonized_on_testmix.csv')

# Running mixed model over SABRE dataset

## Here we will do an example of running  models made of the entire StrokeMRI and TOP dataset mixed as one

#### Build new models

In [None]:
TOPMRI.head(3)

In [None]:
TOPMRI = TOPMRI.drop(['binned', 'fuse_bin'], axis=1)

In [None]:
ml_matrix = TOPMRI.drop('participant_id', axis=1)
X = ml_matrix.drop('age', axis =1)
X = X.values
X = X.astype('float')
y = ml_matrix['age'].values
y=y.astype('float')

In [None]:
# now we decide to do no test train split, rather take all
X_train = X
y_train = y

In [None]:
MIXlinr = LinearRegression()
MIXlinr.fit(X_train, y_train)

In [None]:
MIXllreg = linear_model.LassoLars(alpha=0.01)
MIXllreg.fit(X_train, y_train)

In [None]:
MIXeregr = ElasticNetCV(cv=5, random_state=17)
MIXeregr.fit(X_train, y_train)


In [None]:
MIXetreg = ExtraTreesRegressor(n_estimators=100, random_state=0)
MIXetreg.fit(X_train, y_train)

In [None]:
##  Save these four best models

In [None]:
## optional save models
#joblib.dump(MIXlinr, ('../result_models/'+  'neuro_harm_mix_MIXlinr.sav'))
#joblib.dump(MIXllreg, ('../result_models/'+ 'neuro_harmm_mix_MIXllreg.sav'))
#joblib.dump(MIXeregr, ('../result_models/'+ 'neuro_harm_mix_MIXeregr.sav'))
#joblib.dump(MIXetreg, ('../result_models/'+ 'neuro_harm_mix_MIXetreg.sav'))

# run over SABRE

In [None]:
sabre_ml_matrix = SABRE.drop('participant_id', axis=1)
X_sabre = sabre_ml_matrix.drop('age', axis =1)
X_sabre = X_sabre.values
X_sabre = X_sabre.astype('float')
y_sabre = sabre_ml_matrix['age'].values
y_sabre=y_sabre.astype('float')

In [None]:
X_sabre_test = X_sabre
y_sabre_test = y_sabre

In [None]:
y_sabre_pred = MIXlinr.predict(X_sabre_test)

In [None]:
data= [[
    'Linear Reg',
    'unharm_mix_linr0.sav',
    mean_absolute_error(y_sabre_test, y_sabre_pred),
    MIXlinr.score(X_sabre_test,y_sabre_test),
    metrics.explained_variance_score(y_sabre_test, y_sabre_pred)]]
linr_results_sabre = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
linr_results_sabre

In [None]:
data = {'real': y_sabre_test, 'predicted': y_sabre_pred}
y_frame_linr_sabre = pd.DataFrame(data)
y_frame_linr_sabre

In [None]:
y_frame_linr_sabre.to_csv(output_folder +'/y_frame_linr_sabre_covbat_harm.csv')

In [None]:
y_sabre_pred = MIXllreg.predict(X_sabre_test)

In [None]:
data= [[
    'Lasso',
    'unharm_mix_lassor0.sav',
    mean_absolute_error(y_sabre_test, y_sabre_pred),
    MIXllreg.score(X_sabre_test,y_sabre_test),
    metrics.explained_variance_score(y_sabre_test, y_sabre_pred)]]
llreg_results_sabre = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
llreg_results_sabre

In [None]:
data = {'real': y_sabre_test, 'predicted': y_sabre_pred}
y_frame_llreg_sabre = pd.DataFrame(data)
y_frame_llreg_sabre

In [None]:
y_frame_llreg_sabre.to_csv(output_folder +'/y_frame_llreg_sabre_covbat_harm.csv')

In [None]:
y_sabre_pred = MIXeregr.predict(X_sabre_test)

In [None]:
data= [[
    'ElasticnetCV',
    'harm_mix_elasticregr.sav',
    mean_absolute_error(y_sabre_test, y_sabre_pred),
    MIXeregr.score(X_sabre_test,y_sabre_test),
    metrics.explained_variance_score(y_sabre_test, y_sabre_pred)]]
eregr_results_sabre = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
eregr_results_sabre

In [None]:
data = {'real': y_sabre_test, 'predicted': y_sabre_pred}
y_frame_eregr_sabre = pd.DataFrame(data)
y_frame_eregr_sabre

In [None]:
y_frame_eregr_sabre.to_csv(output_folder +'/y_frame_eregr_sabre_covbat_harm.csv')

In [None]:
y_sabre_pred = MIXetreg.predict(X_sabre_test)

In [None]:
data= [[
    'Extra trees',
    'harm_mix_etreg.sav',
    mean_absolute_error(y_sabre_test, y_sabre_pred),
    MIXetreg.score(X_sabre_test,y_sabre_test),
    metrics.explained_variance_score(y_sabre_test, y_sabre_pred)]]
etreg_results_sabre = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
etreg_results_sabre

In [None]:
data = {'real': y_sabre_test, 'predicted': y_sabre_pred}
y_frame_etregr_sabre = pd.DataFrame(data)
y_frame_etregr_sabre

In [None]:
y_frame_etregr_sabre.to_csv(output_folder +'/y_frame_etregr_sabre_covbat_harm.csv')

In [None]:
mix_based_covbatharmonized_on_sabre =pd.concat([linr_results_sabre,
                   llreg_results_sabre,
                   eregr_results_sabre,
                  etreg_results_sabre],
                  axis=0)
mix_based_covbatharmonized_on_sabre

In [None]:
mix_based_covbatharmonized_on_sabre.to_csv(output_folder +'/mix_based_covbatharmonized_on_sabre.csv')

# Running mixed model over Insight46 dataset

## Here we will do an example of running allthe [0] models

In [None]:
insight_ml_matrix = Insight46.drop('participant_id', axis=1)
X_insight = insight_ml_matrix.drop('age', axis =1)
X_insight = X_insight.values
X_insight = X_insight.astype('float')
y_insight = insight_ml_matrix['age'].values
y_insight= y_insight.astype('float')

In [None]:
X_insight_test = X_insight
y_insight_test = y_insight

In [None]:
y_insight_pred = MIXlinr.predict(X_insight_test)

In [None]:
data= [[
    'Linear Reg',
    'neuroharm_mix_linr0.sav',
    mean_absolute_error(y_insight_test, y_insight_pred),
    MIXlinr.score(X_insight_test,y_insight_test),
    metrics.explained_variance_score(y_insight_test, y_insight_pred)]]
linr_results_insight = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
linr_results_insight

In [None]:
data = {'real': y_insight_test, 'predicted': y_insight_pred}
y_frame_linr_insight = pd.DataFrame(data)
y_frame_linr_insight

In [None]:
y_frame_linr_insight.to_csv(output_folder +'/y_frame_linr_insight_covbat_harm.csv')

In [None]:
y_insight_pred = MIXllreg.predict(X_insight_test)

In [None]:
data= [[
    'Lasso',
    'neuroharm_mix_lassor0.sav',
    mean_absolute_error(y_insight_test, y_insight_pred),
    MIXllreg.score(X_insight_test,y_insight_test),
    metrics.explained_variance_score(y_insight_test, y_insight_pred)]]
llreg_results_insight = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
llreg_results_insight

In [None]:
data = {'real': y_insight_test, 'predicted': y_insight_pred}
y_frame_llreg_insight = pd.DataFrame(data)
y_frame_llreg_insight

In [None]:
y_frame_llreg_insight.to_csv(output_folder +'/y_frame_llreg_insight_covbat_harm.csv')

In [None]:
y_insight_pred = MIXeregr.predict(X_insight_test)

In [None]:
data= [[
    'ElasticnetCV',
    'neuroharm_mix_elasticregr.sav',
    mean_absolute_error(y_insight_test, y_insight_pred),
    MIXeregr.score(X_insight_test,y_insight_test),
    metrics.explained_variance_score(y_insight_test, y_insight_pred)]]
eregr_results_insight = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
eregr_results_insight

In [None]:
data = {'real': y_insight_test, 'predicted': y_insight_pred}
y_frame_eregr_insight = pd.DataFrame(data)
y_frame_eregr_insight

In [None]:
y_frame_eregr_insight.to_csv('y_frame_eregr_insight_covbat_harm.csv')

In [None]:
y_insight_pred = MIXetreg.predict(X_insight_test)

In [None]:
data= [[
    'Extra trees',
    'neuroharm_mix_etreg.sav',
    mean_absolute_error(y_insight_test, y_insight_pred),
    MIXetreg.score(X_insight_test,y_insight_test),
    metrics.explained_variance_score(y_insight_test, y_insight_pred)]]
etreg_results_insight = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
etreg_results_insight

In [None]:
data = {'real': y_insight_test, 'predicted': y_insight_pred}
y_frame_etreg_insight = pd.DataFrame(data)
y_frame_etreg_insight

In [None]:
y_frame_etreg_insight.to_csv(output_folder +'/y_frame_etreg_insight_covbat_harm.csv')

In [None]:
mix_based_covbatharmonized_on_insight =pd.concat([linr_results_insight,
                   llreg_results_insight,
                   eregr_results_insight,
                   etreg_results_insight],
                  axis=0)
mix_based_covbatharmonized_on_insight

In [None]:
mix_based_covbatharmonized_on_insight.to_csv(output_folder +'/mix_based_covbatharmonized_on_insight.csv')

# run over HELIUS

In [None]:
# ml_matrix = TOPMRI.drop('participant_id', axis=1)
# X = ml_matrix.drop('age', axis =1)
# X = X.values
# X = X.astype('float')
# y = ml_matrix['age'].values
# y=y.astype('float')

In [None]:
# # now we decide to do no test train split, rather take all
# X_train = X
# y_train = y

In [None]:
# MIXlinr = LinearRegression()
# MIXlinr.fit(X_train, y_train)

In [None]:
# MIXllreg = linear_model.LassoLars(alpha=0.01)
# MIXllreg.fit(X_train, y_train)

In [None]:
# MIXeregr = ElasticNetCV(cv=5, random_state=17)
# MIXeregr.fit(X_train, y_train)


In [None]:
# MIXetreg = ExtraTreesRegressor(n_estimators=100, random_state=0)
# MIXetreg.fit(X_train, y_train)

In [None]:
##  Save these four best models

In [None]:
## optional save models
#joblib.dump(MIXlinr, ('../result_models/'+  'neuro_harm_mix_MIXlinr.sav'))
#joblib.dump(MIXllreg, ('../result_models/'+ 'neuro_harmm_mix_MIXllreg.sav'))
z#joblib.dump(MIXeregr, ('../result_models/'+ 'neuro_harm_mix_MIXeregr.sav'))
#joblib.dump(MIXetreg, ('../result_models/'+ 'neuro_harm_mix_MIXetreg.sav'))

In [None]:
helius_ml_matrix = HELIUS.drop('participant_id', axis=1)
X_helius = helius_ml_matrix.drop('age', axis =1)
X_helius = X_helius.values
X_helius = X_helius.astype('float')
y_helius = helius_ml_matrix['age'].values
y_helius=y_helius.astype('float')

In [None]:
X_helius_test = X_helius
y_helius_test = y_helius

In [None]:
y_helius_pred = MIXlinr.predict(X_helius_test)

In [None]:
data = {'real': y_helius_test, 'predicted': y_helius_pred}
y_frame_linr_helius = pd.DataFrame(data)
y_frame_linr_helius

In [None]:
data= [[
    'Linear Reg',
    'unharm_mix_linr0.sav',
    mean_absolute_error(y_helius_test, y_helius_pred),
    MIXlinr.score(X_helius_test,y_helius_test),
    metrics.explained_variance_score(y_helius_test, y_helius_pred)]]
linr_results_helius = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
linr_results_helius

In [None]:
y_frame_linr_helius.to_csv(output_folder +'/y_frame_linr_helius_covbat_harm.csv')

In [None]:
y_helius_pred = MIXllreg.predict(X_helius_test)

In [None]:
data= [[
    'Lasso',
    'unharm_mix_lassor0.sav',
    mean_absolute_error(y_helius_test, y_helius_pred),
    MIXllreg.score(X_helius_test,y_helius_test),
    metrics.explained_variance_score(y_helius_test, y_helius_pred)]]
llreg_results_helius = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
llreg_results_helius

In [None]:
data = {'real': y_helius_test, 'predicted': y_helius_pred}
y_frame_llreg_helius = pd.DataFrame(data)
y_frame_llreg_helius

In [None]:
y_frame_llreg_helius.to_csv(output_folder +'/y_frame_llreg_helius_covbat_harm.csv')

In [None]:
y_helius_pred = MIXeregr.predict(X_helius_test)

In [None]:
data= [[
    'ElasticnetCV',
    'harm_mix_elasticregr.sav',
    mean_absolute_error(y_helius_test, y_helius_pred),
    MIXeregr.score(X_helius_test,y_helius_test),
    metrics.explained_variance_score(y_helius_test, y_helius_pred)]]
eregr_results_helius = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
eregr_results_helius

In [None]:
data = {'real': y_helius_test, 'predicted': y_helius_pred}
y_frame_eregr_helius = pd.DataFrame(data)
y_frame_eregr_helius

In [None]:
y_frame_eregr_helius.to_csv(output_folder +'/y_frame_eregr_helius_covbat_harm.csv')

In [None]:
y_helius_pred = MIXetreg.predict(X_helius_test)

In [None]:
data= [[
    'Extra trees',
    'harm_mix_etreg.sav',
    mean_absolute_error(y_helius_test, y_helius_pred),
    MIXetreg.score(X_helius_test,y_helius_test),
    metrics.explained_variance_score(y_helius_test, y_helius_pred)]]
etreg_results_helius = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
etreg_results_helius

In [None]:
data = {'real': y_helius_test, 'predicted': y_helius_pred}
y_frame_etregr_helius = pd.DataFrame(data)
y_frame_etregr_helius

In [None]:
y_frame_etregr_helius.to_csv(output_folder +'/y_frame_etregr_helius_covbat_harm.csv')

In [None]:
mix_based_covbatharmonized_on_helius =pd.concat([linr_results_helius,
                   llreg_results_helius,
                   eregr_results_helius,
                  etreg_results_helius],
                  axis=0)
mix_based_covbatharmonized_on_helius

In [None]:
mix_based_covbatharmonized_on_helius.to_csv(output_folder +'/mix_based_covbatharmonized_on_helius.csv')

# run over EDIS

In [None]:
ml_matrix = TOPMRI.drop('participant_id', axis=1)
X = ml_matrix.drop('age', axis =1)
X = X.values
X = X.astype('float')
y = ml_matrix['age'].values
y=y.astype('float')

In [None]:
# now we decide to do no test train split, rather take all
X_train = X
y_train = y

In [None]:
# MIXlinr = LinearRegression()
# MIXlinr.fit(X_train, y_train)

In [None]:
# MIXllreg = linear_model.LassoLars(alpha=0.01)
# MIXllreg.fit(X_train, y_train)

In [None]:
# MIXeregr = ElasticNetCV(cv=5, random_state=17)
# MIXeregr.fit(X_train, y_train)


In [None]:
# MIXetreg = ExtraTreesRegressor(n_estimators=100, random_state=0)
# MIXetreg.fit(X_train, y_train)

In [None]:
##  Save these four best models

In [None]:
## optional save models
#joblib.dump(MIXlinr, ('../result_models/'+  'neuro_harm_mix_MIXlinr.sav'))
#joblib.dump(MIXllreg, ('../result_models/'+ 'neuro_harmm_mix_MIXllreg.sav'))
#joblib.dump(MIXeregr, ('../result_models/'+ 'neuro_harm_mix_MIXeregr.sav'))
#joblib.dump(MIXetreg, ('../result_models/'+ 'neuro_harm_mix_MIXetreg.sav'))

In [None]:
edis_ml_matrix = EDIS.drop('participant_id', axis=1)
X_edis = edis_ml_matrix.drop('age', axis =1)
X_edis = X_edis.values
X_edis = X_edis.astype('float')
y_edis = edis_ml_matrix['age'].values
y_edis=y_edis.astype('float')

In [None]:
X_edis_test = X_edis
y_edis_test = y_edis

In [None]:
y_edis_pred = MIXlinr.predict(X_edis_test)

In [None]:
data= [[
    'Linear Reg',
    'unharm_mix_linr0.sav',
    mean_absolute_error(y_edis_test, y_edis_pred),
    MIXlinr.score(X_edis_test,y_edis_test),
    metrics.explained_variance_score(y_edis_test, y_edis_pred)]]
linr_results_edis = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
linr_results_edis

In [None]:
data = {'real': y_edis_test, 'predicted': y_edis_pred}
y_frame_linr_edis = pd.DataFrame(data)
y_frame_linr_edis

In [None]:
y_frame_linr_edis.to_csv(output_folder +'/y_frame_linr_edis_covbat_harm.csv')

In [None]:
y_edis_pred = MIXllreg.predict(X_edis_test)

In [None]:
data= [[
    'Lasso',
    'unharm_mix_lassor0.sav',
    mean_absolute_error(y_edis_test, y_edis_pred),
    MIXllreg.score(X_edis_test,y_edis_test),
    metrics.explained_variance_score(y_edis_test, y_edis_pred)]]
llreg_results_edis = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
llreg_results_edis

In [None]:
data = {'real': y_edis_test, 'predicted': y_edis_pred}
y_frame_llreg_edis = pd.DataFrame(data)
y_frame_llreg_edis 

In [None]:
y_frame_llreg_edis.to_csv(output_folder +'/y_frame_llreg_edis_covbat_harm.csv')

In [None]:
y_edis_pred = MIXeregr.predict(X_edis_test)

In [None]:
data= [[
    'ElasticnetCV',
    'harm_mix_elasticregr.sav',
    mean_absolute_error(y_edis_test, y_edis_pred),
    MIXeregr.score(X_edis_test,y_edis_test),
    metrics.explained_variance_score(y_edis_test, y_edis_pred)]]
eregr_results_edis = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
eregr_results_edis

In [None]:
data = {'real': y_edis_test, 'predicted': y_edis_pred}
y_frame_eregr_edis = pd.DataFrame(data)
y_frame_eregr_edis

In [None]:
y_frame_eregr_edis.to_csv(output_folder +'/y_frame_eregr_edis_covbat_harm.csv')

In [None]:
y_edis_pred = MIXetreg.predict(X_edis_test)

In [None]:
data= [[
    'Extra trees',
    'harm_mix_etreg.sav',
    mean_absolute_error(y_edis_test, y_edis_pred),
    MIXetreg.score(X_edis_test,y_edis_test),
    metrics.explained_variance_score(y_edis_test, y_edis_pred)]]
etreg_results_edis = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
etreg_results_edis

In [None]:
data = {'real': y_edis_test, 'predicted': y_edis_pred}
y_frame_etregr_edis = pd.DataFrame(data)
y_frame_etregr_edis 

In [None]:
y_frame_etregr_edis.to_csv(output_folder +'/y_frame_etregr_edis_covbat_harm.csv')

In [None]:
mix_based_covbatharmonized_on_edis =pd.concat([linr_results_edis,
                   llreg_results_edis,
                   eregr_results_edis,
                  etreg_results_edis],
                  axis=0)
mix_based_covbatharmonized_on_edis

In [None]:
mix_based_covbatharmonized_on_edis.to_csv(output_folder +'/mix_based_covbatharmonized_on_edis.csv')