# ML testing: experiment #3

This notebook involves testing for the MRI conference abstract. This notebook shows mixed_dataset (TOP + StrokeMRI) based models

### import libraries

In [33]:
import os       # using operating system dependent functionality (folders)
import sys

import glob

import pandas as pd # data analysis and manipulation
import numpy as np    # numerical computing (manipulating and performing operations on arrays of data)
import matplotlib.pyplot as plt

# demo stuff
import ipywidgets as widgets
import seaborn 

# ml stuff
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
#from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
#from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn import tree
from sklearn import metrics
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_absolute_error
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import ElasticNetCV
from sklearn.ensemble import ExtraTreesRegressor

import joblib

sys.path.insert(0, '../') # path to functions
import cvasl.seperated as sep
from cvasl.file_handler import Config

### import data

In [2]:
filepath_mri = '../open_work/internal_results/cleaned_pvc2s/' 
filename_mri = os.path.join(filepath_mri,'StrokeMRI_pvc2c.csv') 

filepath_top = '../open_work/internal_results/cleaned_pvc2s/' 
filename_top = os.path.join(filepath_top,'TOP_pvc2c.csv') 

In [3]:
TOP = pd.read_csv(filename_top)
StrokeMRI = pd.read_csv(filename_mri)

In [4]:
TOP = TOP.drop(TOP.columns[0],axis=1)
#TOP

In [5]:
StrokeMRI = StrokeMRI.drop(StrokeMRI.columns[0],axis=1)
#StrokeMRI

In [6]:
# Now we need to flip the sex back to numbers for a correlation
sex_mapping = {'F':0,'M':1}
TOP = TOP.assign(sex = TOP.sex.map(sex_mapping))
TOP.head(3)

Unnamed: 0,participant_id,age,sex,gm_vol,wm_vol,csf_vol,gm_ivc_ratio,gmwm_ivc_ratio,wmh_vol,wmh_count,deepwm_b_cov,aca_b_cov,mca_b_cov,pca_b_cov,totalgm_b_cov,deepwm_b,aca_b,mca_b,pca_b,totalgm_b
0,sub-0001_1_ses-1_run-1,43.49,1,0.71736,0.52803,0.31812,0.45881,0.45881,1.743,24.0,8.762,2.2748,2.3205,2.6858,2.4316,22.1022,80.4744,68.3224,52.4614,60.6981
1,sub-0002_1_ses-1_run-1,38.3,0,0.72383,0.62394,0.25673,0.45112,0.45112,1.629,23.0,9.0749,1.7564,2.3989,1.5982,1.9738,23.5401,87.3972,78.0359,63.9932,71.6047
2,sub-0019_1_ses-1_run-1,32.3,1,0.71224,0.53295,0.33594,0.45046,0.45046,0.621,13.0,8.8791,1.985,1.8702,2.1648,2.1723,27.5573,94.0855,86.3816,62.6012,74.0588


In [7]:
StrokeMRI = StrokeMRI.assign(sex = StrokeMRI.sex.map(sex_mapping))
StrokeMRI.head(3)

Unnamed: 0,participant_id,age,sex,gm_vol,wm_vol,csf_vol,gm_ivc_ratio,gmwm_ivc_ratio,wmh_vol,wmh_count,deepwm_b_cov,aca_b_cov,mca_b_cov,pca_b_cov,totalgm_b_cov,deepwm_b,aca_b,mca_b,pca_b,totalgm_b
0,sub-59082_1_ses-1_run-1,43.172603,0,0.64754,0.49441,0.3132,0.445,0.445,1.249,24.0,8.0434,1.7273,1.9776,1.7141,1.914,26.8262,104.3155,90.5047,73.8758,84.4527
1,sub-59083_1_ses-1_run-1,66.367123,0,0.60517,0.48594,0.42304,0.39968,0.39968,14.597,25.0,3.7791,1.9669,1.7154,2.6394,1.8815,24.9597,75.7051,65.3032,65.6771,67.1158
2,sub-59085_1_ses-1_run-1,55.838356,0,0.61724,0.53779,0.33692,0.41371,0.41371,6.341,30.0,5.1248,1.8146,1.9845,2.0827,1.9501,27.7117,81.0966,72.998,60.8317,64.3183


In [8]:
# check for any duplicated patients
strokers = set(StrokeMRI.participant_id)
topers = set(TOP.participant_id)
z = strokers.intersection(topers)
print(z) 

set()


In [9]:
# make mixed dataset
mixed_data = pd.concat([TOP, StrokeMRI], sort=False)


## Build ML models

# keeping patient ID until right when model is fed, then use patient ID as key to what went where

In [10]:
ml_matrix = mixed_data #.drop('participant_id', axis=1)
X = ml_matrix.drop('age', axis =1)
X = X.values
y = ml_matrix['age'].values
y=y.astype('float')


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=12)

In [12]:
X_train_cut = X_train[:,1:]
X_train_cut = X_train_cut.astype('float')
X_train_cut.shape

(780, 18)

In [13]:
X_test_cut = X_test[:,1:]
X_test_cut = X_test_cut.astype('float')
X_test_cut.shape

(261, 18)

In [14]:
# svr_poly = SVR(kernel="poly", C=100, gamma="auto", degree=2, epsilon=0.1, coef0=1)
# svr_poly.fit(X_train_cut, y_train)

In [15]:
# y_pred = svr_poly.predict(X_test_cut)

In [16]:
# print('R2 score SV polynomial regression: %.3f' % svr_poly.score(X_test_cut,y_test))
# print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_test, y_pred))
# print('MAE: % .3f' % mean_absolute_error(y_test, y_pred))

In [17]:

regr = MLPRegressor(random_state=1, max_iter=700)
regr.fit(X_train_cut, y_train)

In [18]:
y_pred = regr.predict(X_test_cut)

In [19]:
print('R2 score neural network mlp regression: %.3f' % regr.score(X_test_cut,y_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_test, y_pred))
print('MAE: % .3f' % mean_absolute_error(y_test, y_pred))

R2 score neural network mlp regression: 0.774
Explained variance score: 0.774
MAE:  6.808


In [20]:
linr = LinearRegression()
linr.fit(X_train_cut, y_train)

In [21]:
y_pred = linr.predict(X_test_cut)

In [22]:
print('R2 score Linear regression: %.3f' % linr.score(X_test_cut,y_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_test, y_pred))
print('MAE: % .3f' % mean_absolute_error(y_test, y_pred))

R2 score Linear regression: 0.861
Explained variance score: 0.861
MAE:  5.253


In [23]:
llreg = linear_model.LassoLars(alpha=0.01)
llreg.fit(X_train_cut, y_train)



In [24]:
y_pred = llreg.predict(X_test_cut)
print('R2 score Lasso regression: %.3f' % llreg.score(X_test_cut,y_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_test, y_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_test, y_pred))

R2 score Lasso regression: 0.856
Explained variance score: 0.856
The mean absolute error: 5.361


In [25]:
dtree = tree.DecisionTreeRegressor()
dtree.fit(X_train_cut, y_train)

In [26]:
y_pred = dtree.predict(X_test_cut)
print('R2 score dtree regression: %.3f' % dtree.score(X_test_cut,y_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_test, y_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_test, y_pred))

R2 score dtree regression: 0.773
Explained variance score: 0.773
The mean absolute error: 6.312


In [28]:
svr_p2 = SVR(C=1.0, kernel='poly', degree =2, epsilon=0.2)
svr_p2.fit(X_train_cut, y_train)

In [31]:
y_pred = svr_p2.predict(X_test_cut)
print('R2 score SVR 2nd degree poly kernel regression: %.3f' % svr_p2.score(X_test_cut,y_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_test, y_pred))
print('MAE: % .3f' % mean_absolute_error(y_test, y_pred))

R2 score SVR 2nd degree poly kernel regression: 0.437
Explained variance score: 0.438
MAE:  10.976


In [34]:
eregr = ElasticNetCV(cv=5, random_state=12)
eregr.fit(X_train_cut, y_train)

In [35]:
y_pred = eregr.predict(X_test_cut)
print('R2 score elasticnetcv regression: %.3f' % eregr.score(X_test_cut,y_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_test, y_pred))
print('MAE: % .3f' % mean_absolute_error(y_test, y_pred))

R2 score elasticnetcv regression: 0.737
Explained variance score: 0.738
MAE:  7.370


In [37]:
etreg = ExtraTreesRegressor(n_estimators=100, random_state=0)
etreg.fit(X_train_cut, y_train)

In [38]:
y_pred = etreg.predict(X_test_cut)
print('R2 score extra trees regression: %.3f' % etreg.score(X_test_cut,y_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_test, y_pred))
print('MAE: % .3f' % mean_absolute_error(y_test, y_pred))

R2 score extra trees regression: 0.882
Explained variance score: 0.883
MAE:  4.743


## Save off models

In [39]:
# check if model folder exists and if not , then create
model_folder = '../result_models/'
if not os.path.exists(model_folder):
    os.makedirs(model_folder)

In [41]:
# joblib.dump(linr, ('../result_models/'+ 'unharm_mixed_linr.sav'))
# joblib.dump(llreg, ('../result_models/'+ 'unharm_mixed_lassor.sav'))
# joblib.dump(dtree, ('../result_models/'+ 'unharm_mixed_dtree.sav'))
# joblib.dump(regr, ('../result_models/'+'unharm_mixed_regr.sav'))
# joblib.dump(svr_p2, ('../result_models/'+'unharm_mixed_svrp2.sav'))
# joblib.dump(eregr, ('../result_models/'+ 'unharm_mixed_elasticnet.sav'))
# joblib.dump(etreg, ('../result_models/'+ 'unharm_mixed_extratree.sav'))

['../result_models/unharm_mixed_extratree.sav']

## Run models on other datasets (TOP, StrokeMRI)
but without re-running the training data

# # Here we check tht no rows once patient IDs were pulled 
(if not we can map them back)

In [42]:
X_train_pandas = pd.DataFrame(X_train)
X_train_pandas.duplicated().sum()

0

top_ml_matrix
needs to be mapped to top rows in X_train,
we will use ese MD5 hashes

now we need to make a dataframe of TOP minus what is in X_train

In [43]:
X_train_pandas.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,sub-59168_2_ses-2_run-1,0,0.5728,0.44099,0.32872,0.42666,0.42666,2.694,27.0,5.7466,2.4044,1.9572,2.0226,2.0232,20.4581,62.7407,56.4464,44.0702,50.8079
1,sub-1109_1_ses-1_run-1,0,0.61626,0.4806,0.24631,0.45881,0.45881,2.364,17.0,7.0078,1.7119,1.6905,1.7671,1.6694,21.3399,64.97,56.4941,55.1801,56.3065
2,sub-59292_1_ses-1_run-1,0,0.59074,0.52605,0.34487,0.40416,0.40416,4.423,20.0,6.1642,1.6372,1.9432,1.8627,1.7752,31.6105,114.8866,98.6019,89.3701,92.8638


In [44]:
#X_train_pandas[0]

In [45]:
trained_subjects = set(X_train_pandas[0])
#trained_subjects 

In [46]:
TOP_subjects = set(TOP.participant_id)
#TOP_subjects

In [47]:
# take trained subjects out of top subjects
# we can use set math here

new_top=(trained_subjects^TOP_subjects)&TOP_subjects
print(len(new_top))
#print(new_top)

132


filter down to only top where they are in new_top set

In [48]:
TOP_new = TOP[TOP['participant_id'].isin(list(new_top))]
TOP_new

Unnamed: 0,participant_id,age,sex,gm_vol,wm_vol,csf_vol,gm_ivc_ratio,gmwm_ivc_ratio,wmh_vol,wmh_count,deepwm_b_cov,aca_b_cov,mca_b_cov,pca_b_cov,totalgm_b_cov,deepwm_b,aca_b,mca_b,pca_b,totalgm_b
3,sub-0020_1_ses-1_run-1,21.97,0,0.67042,0.43458,0.25974,0.49124,0.49124,0.736,23.0,6.2080,1.8657,1.9092,1.6402,1.8734,27.1813,100.3259,90.7345,68.0812,79.1128
10,sub-0033_1_ses-1_run-1,29.21,1,0.59992,0.43360,0.24903,0.46776,0.46776,1.067,14.0,7.9519,1.7067,2.2134,1.9870,1.9942,23.9504,93.1921,86.2951,74.0285,78.9537
12,sub-0035_1_ses-1_run-1,31.25,1,0.70624,0.55290,0.27722,0.45968,0.45968,2.573,21.0,9.3348,1.6970,2.0326,1.8642,1.9049,24.3174,104.1033,95.0303,67.9324,79.9913
13,sub-0036_1_ses-1_run-1,44.57,1,0.78839,0.66709,0.41974,0.42043,0.42043,2.687,27.0,9.4991,1.6870,1.8951,1.9739,1.8552,27.5337,94.3162,89.4557,68.0983,79.6752
14,sub-0037_1_ses-1_run-1,46.06,0,0.65670,0.57055,0.37825,0.40903,0.40903,2.759,25.0,5.6076,2.1001,2.0220,2.5060,2.4160,23.8801,69.8487,60.6050,41.0306,50.2663
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
508,sub-1139_1_ses-1_run-1,44.44,1,0.69288,0.61756,0.30975,0.42765,0.42765,6.074,23.0,8.2863,1.9019,1.8616,1.6780,1.9797,25.6606,94.1454,81.4243,57.0763,70.0432
512,sub-1149_1_ses-1_run-1,31.61,1,0.74811,0.57229,0.27552,0.46876,0.46876,1.642,19.0,9.0315,2.0425,2.0859,1.4591,1.9126,19.7793,70.8518,63.2505,45.3822,55.2508
513,sub-1152_1_ses-1_run-1,29.42,1,0.82354,0.59415,0.36183,0.46279,0.46279,0.867,15.0,8.1299,2.2361,2.3858,1.8466,2.3558,27.6770,118.4624,104.2936,75.2855,93.1837
519,sub-1159_1_ses-1_run-1,36.76,0,0.74380,0.52266,0.30226,0.47414,0.47414,1.144,18.0,7.6305,1.6827,1.9455,1.9445,1.9204,26.7737,90.4672,82.6515,72.8932,76.7546


In [49]:
top_ml_matrix = TOP_new.drop('participant_id', axis=1) 

X_top = top_ml_matrix.drop('age', axis =1)

X_top = X_top.values
X_top = X_top.astype('float')
y_top = top_ml_matrix['age'].values
y_top=y_top.astype('float')

In [50]:
# X_top_train, X_top_test, y_top_train, y_top_test = train_test_split(
#     X_top, y_top, test_size=0.99, random_state=42)

In [51]:
X_top_test = X_top
y_top_test = y_top

In [52]:
y_top_pred = linr.predict(X_top_test)
print('R2 score Linear regression: %.3f' % linr.score(X_top_test,y_top_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_top_test, y_top_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_top_test, y_top_pred))

R2 score Linear regression: 0.549
Explained variance score: 0.592
The mean absolute error: 5.005


In [53]:
y_top_pred = llreg.predict(X_top_test)
print('R2 score Lasso linear regression: %.3f' % llreg.score(X_top_test,y_top_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_top_test, y_top_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_top_test, y_top_pred))

R2 score Lasso linear regression: 0.530
Explained variance score: 0.575
The mean absolute error: 5.103


In [54]:
y_top_pred = dtree.predict(X_top_test)
print('R2 score decision tree regression: %.3f' % dtree.score(X_top_test,y_top_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_top_test, y_top_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_top_test, y_top_pred))

R2 score Lasso linear regression: 0.152
Explained variance score: 0.243
The mean absolute error: 6.593


In [58]:
y_top_pred = eregr.predict(X_top_test)
print('R2 score ElasticnetCV regression: %.3f' % eregr.score(X_top_test,y_top_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_top_test, y_top_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_top_test, y_top_pred))

R2 score Lasso linear regression: 0.085
Explained variance score: 0.171
The mean absolute error: 7.100


In [56]:
y_top_pred = svr_p2.predict(X_top_test)
print('R2 score SVR poly 2 regression: %.3f' % svr_p2.score(X_top_test,y_top_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_top_test, y_top_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_top_test, y_top_pred))

R2 score SVR poly 2 regression: -0.589
Explained variance score: 0.146
The mean absolute error: 9.514


In [57]:
y_top_pred = etreg.predict(X_top_test)
print('R2 score Extra treen: %.3f' % etreg.score(X_top_test,y_top_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_top_test, y_top_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_top_test, y_top_pred))

R2 score Extra treen: 0.573
Explained variance score: 0.646
The mean absolute error: 4.709


In [60]:
y_top_pred = regr.predict(X_top_test)
print('R2 score MLP regression: %.3f' % regr.score(X_top_test,y_top_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_top_test, y_top_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_top_test, y_top_pred))

R2 score MLP regression: 0.252
Explained variance score: 0.317
The mean absolute error: 6.392


In [61]:
StrokeMRI_subjects = set(StrokeMRI.participant_id)
#StrokeMRI_subjects

In [62]:
# take trained subjects out of top subjects
# we can use set math here

new_mri=(trained_subjects^StrokeMRI_subjects)&StrokeMRI_subjects
print(len(new_mri))
#print(new_mri)

129


In [63]:
StrokeMRI_new = StrokeMRI[StrokeMRI['participant_id'].isin(list(new_mri))]
StrokeMRI_new

Unnamed: 0,participant_id,age,sex,gm_vol,wm_vol,csf_vol,gm_ivc_ratio,gmwm_ivc_ratio,wmh_vol,wmh_count,deepwm_b_cov,aca_b_cov,mca_b_cov,pca_b_cov,totalgm_b_cov,deepwm_b,aca_b,mca_b,pca_b,totalgm_b
1,sub-59083_1_ses-1_run-1,66.367123,0,0.60517,0.48594,0.42304,0.39968,0.39968,14.597,25.0,3.7791,1.9669,1.7154,2.6394,1.8815,24.9597,75.7051,65.3032,65.6771,67.1158
6,sub-59089_1_ses-1_run-1,58.419178,0,0.59539,0.42689,0.30742,0.44776,0.44776,10.007,63.0,2.5252,1.8820,1.8123,1.9660,1.8493,31.1810,86.5265,77.1446,66.0136,70.0935
9,sub-59090_2_ses-2_run-1,74.610959,0,0.52956,0.43469,0.44153,0.37670,0.37670,6.586,28.0,3.7965,1.7711,1.8558,1.8400,1.7029,39.2243,121.8465,100.5382,97.1110,98.7511
10,sub-59091_1_ses-1_run-1,74.063014,1,0.57798,0.48552,0.64199,0.33889,0.33889,51.150,18.0,1.6695,1.8533,1.7590,1.9329,1.5357,32.2655,71.2736,68.7675,62.9893,64.8501
11,sub-59092_1_ses-1_run-1,50.668493,0,0.63332,0.50157,0.32473,0.43389,0.43389,3.254,21.0,7.5185,1.8133,2.0189,2.1782,1.9153,21.5882,84.9830,74.6997,57.6342,64.3967
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
487,sub-59421_1_ses-1_run-1,73.868493,1,0.56809,0.53818,0.36818,0.38529,0.38529,5.593,23.0,6.1768,2.0553,2.1493,1.8570,1.9294,22.8499,60.9506,54.3851,44.0679,49.0354
491,sub-59425_1_ses-1_run-1,79.624658,0,0.51422,0.41864,0.43308,0.37646,0.37646,33.736,36.0,1.9466,2.4963,1.9048,1.8218,1.6353,23.5518,60.0265,51.1026,47.2077,49.4722
499,sub-59430_2_ses-2_run-1,80.528767,1,0.60668,0.45482,0.65419,0.35361,0.35361,22.663,70.0,2.8841,2.5654,2.2015,2.5214,2.5211,29.3052,77.2196,72.4349,48.1619,56.8724
501,sub-59431_2_ses-2_run-1,69.221918,0,0.61392,0.49976,0.36144,0.41618,0.41618,4.135,21.0,6.0937,1.9948,1.9879,1.9857,2.1003,24.0503,77.2072,72.9001,54.2003,61.7585


In [64]:
strokemri_ml_matrix = StrokeMRI_new.drop('participant_id', axis=1) 

X_mri = strokemri_ml_matrix.drop('age', axis =1)
X_mri = X_mri.values
X_mri = X_mri.astype('float')
y_mri = strokemri_ml_matrix['age'].values
y_mri=y_mri.astype('float')

In [65]:
X_mri_test = X_mri
y_mri_test = y_mri

In [66]:
y_mri_pred = linr.predict(X_mri_test)
print('R2 score Linear regression: %.3f' % linr.score(X_mri_test,y_mri_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_mri_test, y_mri_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_mri_test, y_mri_pred))

R2 score Linear regression: 0.792
Explained variance score: 0.812
The mean absolute error: 5.507


In [67]:
y_mri_pred = llreg.predict(X_mri_test)
print('R2 score Lasso-linear regression: %.3f' % llreg.score(X_mri_test,y_mri_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_mri_test, y_mri_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_mri_test, y_mri_pred))

R2 score Lasso-linear regression: 0.785
Explained variance score: 0.808
The mean absolute error: 5.626


In [68]:
y_mri_pred = dtree.predict(X_mri_test)
print('R2 score decision tree regression: %.3f' % dtree.score(X_mri_test,y_mri_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_mri_test, y_mri_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_mri_test, y_mri_pred))

R2 score decision tree regression: 0.704
Explained variance score: 0.735
The mean absolute error: 6.024


In [69]:
y_mri_pred = regr.predict(X_mri_test)
print('R2 score Lasso linear regression: %.3f' % regr.score(X_mri_test,y_mri_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_mri_test, y_mri_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_mri_test, y_mri_pred))

R2 score Lasso linear regression: 0.666
Explained variance score: 0.695
The mean absolute error: 7.234


In [70]:
y_mri_pred = svr_p2.predict(X_mri_test)
print('R2 score SVR poly2 regression: %.3f' % svr_p2.score(X_mri_test,y_mri_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_mri_test, y_mri_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_mri_test, y_mri_pred))

R2 score SVR poly2 regression: 0.065
Explained variance score: 0.419
The mean absolute error: 12.472


In [71]:
y_mri_pred = etreg.predict(X_mri_test)
print('R2 score Extra tree regression: %.3f' % etreg.score(X_mri_test,y_mri_test))
print('Explained variance score: %.3f'  % metrics.explained_variance_score(y_mri_test, y_mri_pred))
print('The mean absolute error: %.3f' % mean_absolute_error(y_mri_test, y_mri_pred))

R2 score Extra tree regression: 0.840
Explained variance score: 0.852
The mean absolute error: 4.779
