# ML testing: experiment #2f- harmonized with autocombat (TOPMRI on insight)

This notebook involves testing for the MRI conference abstract. This notebook shows harmonized StrokeMRI+TOP based models, and how they perform on Insight46 dataset

### import libraries

In [1]:
import os       # using operating system dependent functionality (folders)
import sys
import glob
from functools import reduce

import pandas as pd # data analysis and manipulation
import numpy as np    # numerical computing (manipulating and performing operations on arrays of data)
import matplotlib.pyplot as plt

# # demo stuff
# import ipywidgets as widgets
# import seaborn 

# ml stuff
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn import tree
from sklearn import metrics
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_absolute_error
#from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.linear_model import ElasticNetCV
from sklearn.ensemble import ExtraTreesRegressor

import joblib

sys.path.insert(0, '../') # path to functions
import cvasl.seperated as sep
from cvasl.file_handler import Config

### import data

In [2]:
filepath_topmri = '../open_work/internal_results/autocombat/' 
filename_topmri = os.path.join(filepath_topmri,'autocom_harm_top_and_mri_against_insight.csv')

filepath_insight = '../open_work/internal_results/autocombat/' 
filename_insight = os.path.join(filepath_insight,'autocom_harm_insight_against_top_and_mri.csv') 

In [3]:
TOPMRI = pd.read_csv(filename_topmri)
Insight = pd.read_csv(filename_insight)

In [4]:
TOPMRI = TOPMRI.drop(TOPMRI.columns[0],axis=1)
Insight = Insight.drop(Insight.columns[0],axis=1)
Insight.head(3)

Unnamed: 0,participant_id,age,sex,gm_vol,wm_vol,csf_vol,gm_ivc_ratio,gmwm_ivc_ratio,wmh_vol,wmh_count,deepwm_b_cov,aca_b_cov,mca_b_cov,pca_b_cov,totalgm_b_cov,deepwm_b,aca_b,mca_b,pca_b,totalgm_b
0,sub-18345613_1_ses-1_run-1,69.263889,1,0.643947,0.54952,0.371105,0.409342,0.767309,9.898875,30.878484,8.800512,3.305479,3.053712,2.536093,2.662789,24.010155,55.234879,48.879508,42.587962,45.115408
1,sub-13929827_1_ses-1_run-1,69.272222,0,0.521343,0.422758,0.290169,0.426999,0.775902,7.324736,26.396947,15.336155,2.35083,2.190774,2.689229,2.530877,33.152151,89.068821,76.044601,51.975169,60.748066
2,sub-11630017_1_ses-1_run-1,69.275,1,0.689975,0.559295,0.414411,0.410962,0.754088,7.699072,31.774792,12.355704,2.785571,3.326262,2.523791,3.018289,21.744253,68.10557,63.616157,51.997487,54.880373


In [5]:
TOPMRI.head(3)

Unnamed: 0,participant_id,age,sex,gm_vol,wm_vol,csf_vol,gm_ivc_ratio,gmwm_ivc_ratio,wmh_vol,wmh_count,deepwm_b_cov,aca_b_cov,mca_b_cov,pca_b_cov,totalgm_b_cov,deepwm_b,aca_b,mca_b,pca_b,totalgm_b
0,sub-0386_1_ses-1_run-1,17.72,1,0.745314,0.510018,0.268325,0.488396,0.822912,-0.349024,18.315523,-1.790197,1.0666,2.068944,2.591794,2.223037,28.334118,92.833214,92.235729,70.76311,78.863105
1,sub-0337_1_ses-1_run-1,18.15,0,0.7103,0.439386,0.179159,0.532017,0.863312,-0.472728,18.315523,4.988828,1.461021,1.79971,1.739099,1.936799,24.530789,97.810782,91.67155,69.827431,83.044208
2,sub-0628_1_ses-1_run-1,18.49,1,0.867973,0.529309,0.22853,0.532607,0.85903,-0.072991,29.682955,4.584775,1.455995,2.033758,1.071352,1.55004,21.364752,84.161887,77.68359,58.879963,71.478485


In [6]:
# # Now we do NOGTneed to flip the sex back to numbers for a correlation
# sex_mapping = {'F':0,'M':1}
# TOPMRI = TOPMRI.assign(sex = TOPMRI.sex.map(sex_mapping))
# TOPMRI.head(3)

In [7]:
# Insight = Insight.assign(sex = Insight.sex.map(sex_mapping))
# Insight.head(3)

## Build ML models based on auto-combat StrokeMRI-TOP mixed set (TOPMRI)

In [8]:
ml_matrix = TOPMRI.drop('participant_id', axis=1)
X = ml_matrix.drop('age', axis =1)
X = X.values
X = X.astype('float')
y = ml_matrix['age'].values
y=y.astype('float')

In [9]:
linr_k_frame, linr_y_frame, models = sep.stratified_one_category_shuffle_split('linear regression', 'auto_harm_topmri_linr', LinearRegression(), ml_matrix, X, y)

In [10]:
linr_k_frame

Unnamed: 0,algorithm,fold,file_name,mae,r2,explained_variance
0,linear regression-0,0,auto_harm_topmri_linr.0,5.816535,0.415968,0.417488
0,linear regression-1,1,auto_harm_topmri_linr.1,5.300496,0.854478,0.854797
0,linear regression-2,2,auto_harm_topmri_linr.2,4.908838,0.866424,0.866426
0,linear regression-3,3,auto_harm_topmri_linr.3,5.063947,0.864634,0.866265
0,linear regression-4,4,auto_harm_topmri_linr.4,5.619162,0.502724,0.503443


In [11]:
linr_k_frame.to_csv('auto_harmonized_topmri_insight_linr_k_frame.csv')

In [12]:
avg_linr = sep.avg_k_folds(linr_k_frame)
avg_linr

Unnamed: 0,algorithm,file_name,mae,r2,explained_variance
0,0 linear regression-0 0 linear regressio...,0 auto_harm_topmri_linr.0 0 auto_harm_to...,5.341795,0.700846,0.701684


In [13]:
linr_y_frame

Unnamed: 0,y_test,y_pred
0,44.700000,48.160356
1,63.769863,62.302195
2,28.820000,21.927351
3,63.695890,62.384944
4,59.632877,53.873038
...,...,...
256,49.340000,44.260355
257,70.456808,71.259943
258,53.390000,62.526992
259,52.495890,51.317150


In [14]:
linr_y_frame.to_csv('auto_harmonized_topmri_insight_linr_y_frame.csv')

In [15]:
linr = models[0]
linr[0]

In [16]:
# check if model folder exists and if not , then create
model_folder = '../result_models/'
if not os.path.exists(model_folder):
    os.makedirs(model_folder)

In [17]:
## optional save models
#joblib.dump(linr[0], ('../result_models/'+ 'auto_harm_topmri_insight_linr0.sav'))
#joblib.dump(linr[1], ('../result_models/'+ 'auto_harm_topmri_insight_linr1.sav'))
#joblib.dump(linr[2], ('../result_models/'+ 'auto_harm_topmri_insight_linr2.sav'))
#joblib.dump(linr[3], ('../result_models/'+ 'auto_harm_topmri_insight_linr3.sav'))
#joblib.dump(linr[4], ('../result_models/'+ 'auto_harm_topmri_insight_linr4.sav'))

In [18]:
llreg_k_frame, llreg_y_frame, models = sep.stratified_one_category_shuffle_split('lasso regression', 'auto_harm_topmri_insight_llreg',  linear_model.LassoLars(alpha=0.01), ml_matrix, X, y)
llreg_k_frame

Unnamed: 0,algorithm,fold,file_name,mae,r2,explained_variance
0,lasso regression-0,0,auto_harm_topmri_insight_llreg.0,5.841231,0.381872,0.383458
0,lasso regression-1,1,auto_harm_topmri_insight_llreg.1,5.339182,0.852806,0.853137
0,lasso regression-2,2,auto_harm_topmri_insight_llreg.2,4.8858,0.867733,0.867735
0,lasso regression-3,3,auto_harm_topmri_insight_llreg.3,5.083264,0.862813,0.864698
0,lasso regression-4,4,auto_harm_topmri_insight_llreg.4,5.625946,0.49002,0.490799


In [19]:
llreg_k_frame.to_csv('auto_harmonized_topmri_insight_llreg_k_frame.csv')

In [20]:
avg_llreg = sep.avg_k_folds(llreg_k_frame)
avg_llreg

Unnamed: 0,algorithm,file_name,mae,r2,explained_variance
0,0 lasso regression-0 0 lasso regression-...,0 auto_harm_topmri_insight_llreg.0 0 aut...,5.355085,0.691049,0.691966


In [21]:
llreg_y_frame

Unnamed: 0,y_test,y_pred
0,44.700000,48.393509
1,63.769863,62.780682
2,28.820000,22.131197
3,63.695890,62.021990
4,59.632877,53.112406
...,...,...
256,49.340000,44.105567
257,70.456808,71.495922
258,53.390000,61.901211
259,52.495890,50.741474


In [22]:
llreg_y_frame.to_csv('auto_harmonized_topmri_inisght_llreg_y_frame.csv')

In [23]:
llreg = models[0]
llreg[0]

In [24]:
## optional save models
#joblib.dump(llreg[0], ('../result_models/'+ 'auto_harm_topmri_insight_llreg0.sav'))
#joblib.dump(llreg[1], ('../result_models/'+ 'auto_harm_topmri_insight_llreg1.sav'))
#joblib.dump(llreg[2], ('../result_models/'+ 'auto_harm_topmri_insight_llreg2.sav'))
#joblib.dump(llreg[3], ('../result_models/'+ 'auto_harm_topmri_insight_llreg3.sav'))
#joblib.dump(llreg[4], ('../result_models/'+ 'auto_harm_topmri_insight_llreg4.sav'))

In [25]:
dtree_k_frame, dtree_y_frame, models = sep.stratified_one_category_shuffle_split('decision tree', 'auto_harm_topmri_insight_dtree',  tree.DecisionTreeRegressor(), ml_matrix, X, y)
dtree_k_frame

Unnamed: 0,algorithm,fold,file_name,mae,r2,explained_variance
0,decision tree-0,0,auto_harm_topmri_insight_dtree.0,6.740742,0.72523,0.725469
0,decision tree-1,1,auto_harm_topmri_insight_dtree.1,6.629355,0.74569,0.746961
0,decision tree-2,2,auto_harm_topmri_insight_dtree.2,6.431192,0.727632,0.727744
0,decision tree-3,3,auto_harm_topmri_insight_dtree.3,6.888785,0.737837,0.738062
0,decision tree-4,4,auto_harm_topmri_insight_dtree.4,6.653458,0.708564,0.709442


In [26]:
dtree_k_frame.to_csv('auto_harmonized_topmri_inisght_dtree_k_frame.csv')

In [27]:
avg_dtree = sep.avg_k_folds(dtree_k_frame)
avg_dtree

Unnamed: 0,algorithm,file_name,mae,r2,explained_variance
0,0 decision tree-0 0 decision tree-1 0 ...,0 auto_harm_topmri_insight_dtree.0 0 aut...,6.668706,0.728991,0.729535


In [28]:
dtree_y_frame

Unnamed: 0,y_test,y_pred
0,44.700000,48.890000
1,63.769863,60.695890
2,28.820000,20.979250
3,63.695890,52.728767
4,59.632877,43.160000
...,...,...
256,49.340000,44.330000
257,70.456808,69.158904
258,53.390000,78.412568
259,52.495890,50.180822


In [29]:
dtree_y_frame.to_csv('auto_harmonized_topmri_insight_dtree_y_frame.csv')

In [30]:
dtree = models[0]
dtree[0]

In [31]:
regr_k_frame, regr_y_frame, models = sep.stratified_one_category_shuffle_split('MLP regression', 'auto_harm_topmri_insight_regr',   MLPRegressor(random_state=1, max_iter=700), ml_matrix, X, y)
regr_k_frame

Unnamed: 0,algorithm,fold,file_name,mae,r2,explained_variance
0,MLP regression-0,0,auto_harm_topmri_insight_regr.0,8.143255,-2.584062,-2.563758
0,MLP regression-1,1,auto_harm_topmri_insight_regr.1,6.577056,0.767709,0.773613
0,MLP regression-2,2,auto_harm_topmri_insight_regr.2,6.33082,0.770245,0.773789
0,MLP regression-3,3,auto_harm_topmri_insight_regr.3,7.021025,0.754417,0.754857
0,MLP regression-4,4,auto_harm_topmri_insight_regr.4,8.27647,-2.355151,-2.353849


In [32]:
regr_k_frame.to_csv('auto_harmonized_topmri_insight_regr_k_frame.csv')

In [33]:
avg_regr = sep.avg_k_folds(regr_k_frame)
avg_regr

Unnamed: 0,algorithm,file_name,mae,r2,explained_variance
0,0 MLP regression-0 0 MLP regression-1 0 ...,0 auto_harm_topmri_insight_regr.0 0 auto...,7.269725,-0.529368,-0.52307


In [34]:
regr_y_frame

Unnamed: 0,y_test,y_pred
0,44.700000,47.744612
1,63.769863,64.932041
2,28.820000,38.357120
3,63.695890,59.862447
4,59.632877,56.946421
...,...,...
256,49.340000,40.163076
257,70.456808,75.414054
258,53.390000,63.316313
259,52.495890,45.658194


In [35]:
regr_y_frame.to_csv('auto_harmonized_topmri_insight_regr_y_frame.csv')

In [36]:
regr = models[0]
regr[0]

### Note I'm not actually saving the mlp or svr models here because they are not the best performers. if neccesary this can be added

In [37]:
svrp2_k_frame, svrp2_y_frame, models = sep.stratified_one_category_shuffle_split('support vector reg poly2', 'auto_harm_topmriInsight_svrp2',   SVR(C=1.0, kernel='poly', degree =2, epsilon=0.2), ml_matrix, X, y)
svrp2_k_frame

Unnamed: 0,algorithm,fold,file_name,mae,r2,explained_variance
0,support vector reg poly2-0,0,auto_harm_topmriInsight_svrp2.0,10.495422,-1.484532,-1.480133
0,support vector reg poly2-1,1,auto_harm_topmriInsight_svrp2.1,9.537922,0.555434,0.555775
0,support vector reg poly2-2,2,auto_harm_topmriInsight_svrp2.2,8.903852,0.558514,0.558525
0,support vector reg poly2-3,3,auto_harm_topmriInsight_svrp2.3,9.910225,0.549676,0.549806
0,support vector reg poly2-4,4,auto_harm_topmriInsight_svrp2.4,10.30467,-2.008502,-1.999603


In [38]:
svrp2_k_frame.to_csv('auto_harmonized_topmri_insight_svrp2_k_frame.csv')

In [39]:
avg_svrp2 = sep.avg_k_folds(svrp2_k_frame)
avg_svrp2

Unnamed: 0,algorithm,file_name,mae,r2,explained_variance
0,0 support vector reg poly2-0 0 support v...,0 auto_harm_topmriInsight_svrp2.0 0 auto...,9.830418,-0.365882,-0.363126


In [40]:
svrp2_y_frame

Unnamed: 0,y_test,y_pred
0,44.700000,47.481194
1,63.769863,52.482419
2,28.820000,39.093661
3,63.695890,59.395867
4,59.632877,37.962900
...,...,...
256,49.340000,49.476035
257,70.456808,62.345512
258,53.390000,52.145440
259,52.495890,51.871661


In [41]:
svrp2_y_frame.to_csv('auto_harmonized_topmri_insight_svrp2_y_frame.csv')

In [42]:
svrp2 = models[0]
svrp2[0]

In [43]:
eregr_k_frame, eregr_y_frame, models = sep.stratified_one_category_shuffle_split('elasticnetCV', 'auto_harm_topmri_insight_eregr',  ElasticNetCV(cv=5, random_state=12), ml_matrix, X, y)
eregr_k_frame

Unnamed: 0,algorithm,fold,file_name,mae,r2,explained_variance
0,elasticnetCV-0,0,auto_harm_topmri_insight_eregr.0,7.125431,0.198259,0.199457
0,elasticnetCV-1,1,auto_harm_topmri_insight_eregr.1,7.850596,0.688607,0.689544
0,elasticnetCV-2,2,auto_harm_topmri_insight_eregr.2,7.09995,0.707425,0.707754
0,elasticnetCV-3,3,auto_harm_topmri_insight_eregr.3,7.606786,0.709282,0.710782
0,elasticnetCV-4,4,auto_harm_topmri_insight_eregr.4,7.371423,0.356008,0.356743


In [44]:
eregr_k_frame.to_csv('auto_harmonized_topmri_insight_eregr_k_frame.csv')

In [45]:
avg_eregr = sep.avg_k_folds(eregr_k_frame)
avg_eregr

Unnamed: 0,algorithm,file_name,mae,r2,explained_variance
0,0 elasticnetCV-0 0 elasticnetCV-1 0 e...,0 auto_harm_topmri_insight_eregr.0 0 aut...,7.410837,0.531916,0.532856


In [46]:
eregr_y_frame

Unnamed: 0,y_test,y_pred
0,44.700000,50.272719
1,63.769863,60.776237
2,28.820000,29.791260
3,63.695890,58.201794
4,59.632877,46.935498
...,...,...
256,49.340000,41.779773
257,70.456808,70.722615
258,53.390000,59.969356
259,52.495890,47.666434


In [47]:
eregr_y_frame.to_csv('auto_harmonized_topmri_insight_eregr_y_frame.csv')

In [48]:
eregr = models[0]
eregr[0]

In [49]:
etreg_k_frame, etreg_y_frame, models = sep.stratified_one_category_shuffle_split('extra trees', 'auto_harm_topmri_insight_etreg',  ExtraTreesRegressor(n_estimators=100, random_state=0), ml_matrix, X, y)
etreg_k_frame

Unnamed: 0,algorithm,fold,file_name,mae,r2,explained_variance
0,extra trees-0,0,auto_harm_topmri_insight_etreg.0,4.740584,0.865067,0.866066
0,extra trees-1,1,auto_harm_topmri_insight_etreg.1,4.626148,0.879141,0.879153
0,extra trees-2,2,auto_harm_topmri_insight_etreg.2,4.506073,0.875167,0.876033
0,extra trees-3,3,auto_harm_topmri_insight_etreg.3,4.683715,0.881201,0.882534
0,extra trees-4,4,auto_harm_topmri_insight_etreg.4,4.880231,0.852699,0.852845


In [50]:
etreg_k_frame.to_csv('auto_haromized_topmri_insight_etreg_k_frame.csv')

In [51]:
avg_etreg = sep.avg_k_folds(etreg_k_frame)
avg_etreg

Unnamed: 0,algorithm,file_name,mae,r2,explained_variance
0,0 extra trees-0 0 extra trees-1 0 ext...,0 auto_harm_topmri_insight_etreg.0 0 aut...,4.68735,0.870655,0.871326


In [52]:
etreg_y_frame

Unnamed: 0,y_test,y_pred
0,44.700000,51.333830
1,63.769863,66.851874
2,28.820000,22.874600
3,63.695890,61.841483
4,59.632877,51.916136
...,...,...
256,49.340000,44.291867
257,70.456808,67.721492
258,53.390000,64.731309
259,52.495890,50.774769


In [53]:
etreg_y_frame.to_csv('auto_harmonized_topmri_insight_etreg_y_frame.csv')

In [54]:
etreg = models[0]
etreg[0]

In [55]:
## optional save models
#joblib.dump(etreg[0], ('../result_models/'+ 'auto_harm_topmri_insight_etreg0.sav'))
#joblib.dump(etreg[1], ('../result_models/'+ 'auto_harm_topmri_insight_etreg1.sav'))
#joblib.dump(etreg[2], ('../result_models/'+ 'auto_harm_topmri_insight_etreg2.sav'))
#joblib.dump(etreg[3], ('../result_models/'+ 'auto_harm_topmri_insight_etreg3.sav'))
#joblib.dump(etreg[4], ('../result_models/'+ 'auto_harm_topmri_insight_etreg4.sav'))

Show results ON AVERAGE for each model

In [56]:
topmri_insight_based_auto_harmonized_on_testtopmri =pd.concat([avg_linr,
                   avg_llreg,
                   avg_dtree,
                   avg_regr,
                   avg_svrp2,
                   avg_eregr,
                   avg_etreg],
                  axis=0)
topmri_insight_based_auto_harmonized_on_testtopmri

Unnamed: 0,algorithm,file_name,mae,r2,explained_variance
0,0 linear regression-0 0 linear regressio...,0 auto_harm_topmri_linr.0 0 auto_harm_to...,5.341795,0.700846,0.701684
0,0 lasso regression-0 0 lasso regression-...,0 auto_harm_topmri_insight_llreg.0 0 aut...,5.355085,0.691049,0.691966
0,0 decision tree-0 0 decision tree-1 0 ...,0 auto_harm_topmri_insight_dtree.0 0 aut...,6.668706,0.728991,0.729535
0,0 MLP regression-0 0 MLP regression-1 0 ...,0 auto_harm_topmri_insight_regr.0 0 auto...,7.269725,-0.529368,-0.52307
0,0 support vector reg poly2-0 0 support v...,0 auto_harm_topmriInsight_svrp2.0 0 auto...,9.830418,-0.365882,-0.363126
0,0 elasticnetCV-0 0 elasticnetCV-1 0 e...,0 auto_harm_topmri_insight_eregr.0 0 aut...,7.410837,0.531916,0.532856
0,0 extra trees-0 0 extra trees-1 0 ext...,0 auto_harm_topmri_insight_etreg.0 0 aut...,4.68735,0.870655,0.871326


In [57]:
topmri_insight_based_auto_harmonized_on_testtopmri.to_csv('topmri_insight_based_auto_harmonized_on_testtopmri_AVERAGES.csv')

## Now we will build  models based on the whole  autocombat harmonized TOPMRI dataset, and apply them to Insight . 

In [58]:
ml_matrix = TOPMRI.drop('participant_id', axis=1)
X = ml_matrix.drop('age', axis =1)
X = X.values
X = X.astype('float')
y = ml_matrix['age'].values
y=y.astype('float')


In [59]:
# now we decide to do no test train split, rather take all
X_train = X
y_train = y

In [60]:
TOPMRIlinr = LinearRegression()
TOPMRIlinr.fit(X_train, y_train)

In [61]:
TOPMRIllreg = linear_model.LassoLars(alpha=0.01)
TOPMRIllreg.fit(X_train, y_train)

In [62]:
TOPMRIeregr = ElasticNetCV(cv=5, random_state=17)
TOPMRIeregr.fit(X_train, y_train)


In [63]:
TOPMRIetreg = ExtraTreesRegressor(n_estimators=100, random_state=0)
TOPMRIetreg.fit(X_train, y_train)

In [64]:
##  Save these four best models

In [65]:

## optional save models
#joblib.dump(TOPMRIlinr, ('../result_models/' + 'auto_harm_topmri_insight_linr.sav'))
#joblib.dump(TOPMRIllreg, ('../result_models/'+ 'auto_harm_topmri_insight_llreg.sav'))
#joblib.dump(TOPMRIeregr, ('../result_models/'+ 'auto_harm_topmri_insight_eregr.sav'))
#joblib.dump(TOPMRIetreg, ('../result_models/'+ 'auto_harm_topmri_insight_etreg.sav'))

# Running whole TOPMRI model over Insight dataset (autoharmonized to each other)

In [66]:
insight_ml_matrix = Insight.drop('participant_id', axis=1)
X_insight = insight_ml_matrix.drop('age', axis =1)
X_insight = X_insight.values
X_insight = X_insight.astype('float')
y_insight = insight_ml_matrix['age'].values
y_insight=y_insight.astype('float')

In [67]:
X_insight_test = X_insight
y_insight_test = y_insight


In [68]:
y_insight_pred = TOPMRIlinr.predict(X_insight_test)

In [69]:
data= [[
    'linear regression',
    'auto_harm_topmri_insight_linr.sav',
    mean_absolute_error(y_insight_test, y_insight_pred),
    TOPMRIlinr.score(X_insight_test,y_insight_test),
    metrics.explained_variance_score(y_insight_test, y_insight_pred)]]
linr_results = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
#linr_results

In [70]:
linr_y_test = y_insight_test
linr_y_pred = y_insight_pred
linr_compare = pd.DataFrame(
    {'y_test_real_age': linr_y_test,
     'linr_y_pred_age': linr_y_pred,
    })
#linr_compare = linr_compare.reset_index()
linr_compare

Unnamed: 0,y_test_real_age,linr_y_pred_age
0,69.263889,58.429197
1,69.272222,61.523206
2,69.275000,50.817051
3,69.425000,54.794479
4,69.427778,49.967595
...,...,...
277,71.733333,64.444266
278,71.736111,59.832142
279,71.844444,56.943809
280,71.847222,56.568561


In [71]:
linr_compare.to_csv('whole_auto_harm_topmri_insight_linr_compare_on_top.csv')

In [72]:
y_insight_pred = TOPMRIllreg.predict(X_insight_test)

In [73]:
data= [[
    'lasso regression',
    'auto_harm_whole_topmri_insight_llreg.sav',
    mean_absolute_error(y_insight_test, y_insight_pred),
    TOPMRIllreg.score(X_insight_test,y_insight_test),
    metrics.explained_variance_score(y_insight_test, y_insight_pred)]]
llreg_results = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
llreg_results

Unnamed: 0,algorithm,file_name,mae,r2,explained_variance
0,lasso regression,auto_harm_whole_topmri_insight_llreg.sav,12.587199,-474.629224,-123.211688


In [74]:
llreg_y_test = y_insight_test
llreg_y_pred = y_insight_pred
llreg_compare = pd.DataFrame(
    {'y_test_real_age': llreg_y_test,
     'linr_y_pred_age': llreg_y_pred,
    })
llreg_compare

Unnamed: 0,y_test_real_age,linr_y_pred_age
0,69.263889,57.176128
1,69.272222,61.861612
2,69.275000,49.488108
3,69.425000,52.139704
4,69.427778,47.912488
...,...,...
277,71.733333,61.687534
278,71.736111,56.963877
279,71.844444,54.711996
280,71.847222,55.630984


In [75]:
llreg_compare.to_csv('whole_auto_harm_topmri_insight_llreg_compare_on_top.csv')

In [76]:
y_insight_pred = TOPMRIeregr.predict(X_insight_test)

In [77]:
data= [[
    'elasticnetCV',
    'auto_harm_whole_topmri_insight_linr.sav',
    mean_absolute_error(y_insight_test, y_insight_pred),
    TOPMRIeregr.score(X_insight_test, y_insight_test),
    metrics.explained_variance_score(y_insight_test, y_insight_pred)]]
eregr_results = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
#eregr_results

In [78]:
eregr_y_test = y_insight_test
eregr_y_pred = y_insight_pred
eregr_compare = pd.DataFrame(
    {'y_test_real_age': eregr_y_test,
     'linr_y_pred_age': eregr_y_pred,
    })
#eregr_compare = eregr_compare.reset_index()
eregr_compare

Unnamed: 0,y_test_real_age,linr_y_pred_age
0,69.263889,51.652235
1,69.272222,48.283550
2,69.275000,42.076876
3,69.425000,35.752881
4,69.427778,32.398613
...,...,...
277,71.733333,45.678644
278,71.736111,43.736485
279,71.844444,37.083267
280,71.847222,40.608551


In [79]:
eregr_compare.to_csv('whole_auto_harm_topmri_insight_eregr_compare_on_top.csv')

In [80]:
y_insight_pred = TOPMRIetreg.predict(X_insight_test)

In [81]:
data= [[
    'extra trees',
    'auto_harm_topmri_insight_linr.sav',
    mean_absolute_error(y_insight_test, y_insight_pred),
    TOPMRIetreg.score(X_insight_test,y_insight_test),
    metrics.explained_variance_score(y_insight_test, y_insight_pred)]]
etreg_results = pd.DataFrame(data, columns=['algorithm','file_name','mae', 'r2', 'explained_variance'])
#etreg_results

In [82]:
etreg_y_test = y_insight_test
etreg_y_pred = y_insight_pred
etreg_compare = pd.DataFrame(
    {'y_test_real_age': etreg_y_test,
     'etreg_y_pred_age': etreg_y_pred,
    })
#etreg_compare = etreg_compare.reset_index()
etreg_compare

Unnamed: 0,y_test_real_age,etreg_y_pred_age
0,69.263889,58.918976
1,69.272222,59.432918
2,69.275000,55.487878
3,69.425000,63.698170
4,69.427778,58.997339
...,...,...
277,71.733333,68.685040
278,71.736111,61.455223
279,71.844444,61.986909
280,71.847222,58.795440


In [83]:
etreg_compare.to_csv('whole_auto_harm_topmri_insight_etreg_compare_on_top.csv')

In [84]:
topmri_based_auto_combat_harmonized_on_insight =pd.concat([linr_results,
                   llreg_results,
                   eregr_results,
                  etreg_results],
                  axis=0)
topmri_based_auto_combat_harmonized_on_insight

Unnamed: 0,algorithm,file_name,mae,r2,explained_variance
0,linear regression,auto_harm_topmri_insight_linr.sav,10.962132,-380.166637,-128.040932
0,lasso regression,auto_harm_whole_topmri_insight_llreg.sav,12.587199,-474.629224,-123.211688
0,elasticnetCV,auto_harm_whole_topmri_insight_linr.sav,26.328632,-1795.768338,-185.814428
0,extra trees,auto_harm_topmri_insight_linr.sav,8.349267,-199.917339,-37.143606


In [85]:
topmri_based_auto_combat_harmonized_on_insight.to_csv('topmri_based_autocombat_harmonized_on_insight.csv')