In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LassoCV, RidgeCV, Lasso
from sklearn.svm import SVR
from sklearn.experimental import enable_iterative_imputer 
from sklearn.impute import IterativeImputer
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import *
from random import seed
from scipy import stats
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
%matplotlib inline

seed(888)

# 1 Load data from "1 Preprocessing"

In [None]:
# load data from pickle and convert to dataframe
brain_test = pd.read_pickle("1_brain_test.pkl")
brain_train = pd.read_pickle("1_brain_train.pkl")
main_test = pd.read_pickle("1_main_test.pkl")
main_train = pd.read_pickle("1_main_train.pkl")
brain_test = pd.DataFrame(brain_test)
brain_train = pd.DataFrame(brain_train)
main_test = pd.DataFrame(main_test)
main_train = pd.DataFrame(main_train)

Validation set data

In [None]:
val_brain_test = pd.read_pickle("brain_test_validate.pkl")
val_main_test = pd.read_pickle("main_test_validate.pkl")
val_brain_test = pd.DataFrame(val_brain_test)
val_main_test = pd.DataFrame(val_main_test)

# 2 BrainAge with different segmentations

## 2.1 A2009s Segmentation

In [None]:
# select variables belonging to the A2009s segmentation
X_train = brain_train.iloc[:,1657:2544:2]
Y_train = brain_train.iloc[:,-1]
X_test = brain_test.iloc[:,1657:2544:2]
Y_test = brain_test.iloc[:,-1]

# standardize x-data
X_train = stats.zscore(X_train)
X_test = stats.zscore(X_test)

# set of alphas to try (=penalization)
alpha_parameters = np.power(10,np.linspace(start=-3, stop=5, num=100))

### 2.1.1 Cross-validation

In [None]:
X_CV = X_train
Y_CV = Y_train

# randomly split data (only training no PA) into training and testing set
X_train_cv, X_test_cv, Y_train_cv, Y_test_cv = train_test_split(X_CV,Y_CV)

# define the model
model_ridge = RidgeCV(alphas = alpha_parameters)

# perform 10-fold cross validation
scores_ridge = cross_val_score(model_ridge, X_CV, Y_CV, cv=10, scoring='neg_mean_absolute_error')

# calculate the mean-absolute error
mae_ridge = -1 * scores_ridge.mean()

print("The MAE for cross-validation (Ridge):", mae_ridge)

# for correction
model_ridge.fit(X_train_cv,Y_train_cv)
y_pred_cv = model_ridge.predict(X_test_cv)

### 2.1.2 Prediction

In [None]:
model_Ridge = RidgeCV(alphas = alpha_parameters)

# train model 
model_Ridge.fit(X_train,Y_train)

# get predicted values for test set
y_pred_Ridge = model_Ridge.predict(X_test)

# calculate brain age gap
brain_age_delta_Ridge = y_pred_Ridge-Y_test

# get mean absolute error (MAE)
print("The MAE for testing set using Ridge:", mean_absolute_error(Y_test,y_pred_Ridge))

# top x coefficients and their names
coeff = np.asarray(model_Ridge.coef_)
coeffabs = np.asarray(abs(model_Ridge.coef_))
top_coefficients = np.argsort(coeffabs)[-20:]
print(X_train.iloc[:,top_coefficients].columns)
print(coeff[top_coefficients])

# plot figure with x: actual age Y: predicted age, and a line with slope 1 for reference
plt.figure()
plt.scatter(Y_test, y_pred_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((60,60), slope=1, color='r')
plt.xticks(range(40,90,5))
plt.yticks(range(40,90,5))
plt.ylabel('Predicted Age', fontsize = 16)
plt.xlabel('Age', fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.show()

# plot the brainAGE and a line with the correlation between brainAGE and age 
plt.figure()
plt.scatter(Y_test,brain_age_delta_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((65,0), slope=-0.69, color="blue")
plt.axline((60,0),slope=0, color = "black")
plt.ylabel("BrainAGE", fontsize = 16)
plt.xlabel("Age", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.show()

### 2.1.3 Coefficients for later plotting

In [None]:
# get the coefficients and names of the destrieux atlas areas in the ridge regression model
coefficients = np.asarray(model_Ridge.coef_)
names = np.asarray(X_train.columns[:])

# change formatting
plot_coef_array = [names,coefficients]
plot_coef_array = pd.DataFrame(plot_coef_array)
plot_coef_array = plot_coef_array.transpose()

# change formatting
matrix_coeff = [plot_coef_array.iloc[:74],plot_coef_array.iloc[74:148],plot_coef_array.iloc[148:222],plot_coef_array.iloc[222:296], plot_coef_array.iloc[296:370],plot_coef_array.iloc[370:444]]
new_matrix = [matrix_coeff[0][0],matrix_coeff[0][1],matrix_coeff[1][0],matrix_coeff[1][1],matrix_coeff[2][0],matrix_coeff[2][1],matrix_coeff[3][0],matrix_coeff[3][1],matrix_coeff[4][0],matrix_coeff[4][1],matrix_coeff[5][0],matrix_coeff[5][1]]

new_matrix = np.array(new_matrix)
new_matrix = pd.DataFrame(new_matrix)
new_matrix = new_matrix.transpose()
new_matrix = pd.DataFrame(new_matrix)

# only get the coefficients, not the names (including names was only to have a better overview during inital development)
coef_frame = new_matrix.iloc[:,[1,3,5,7,9,11]]
coef_frame = pd.DataFrame(coef_frame)

# array with the maximum (absolute) coefficient for each area ordered in the inital ordering of the areas
final_coef = []
for n in range(74):
    index = np.argmax(abs(coef_frame.iloc[n]))
    final_coef.append(coef_frame.iloc[n,index])


### 2.1.4 Correction

In [None]:
# reformat cross validation data for regression model
y_pred_cv = np.asarray(y_pred_cv)
y_pred_cv = np.reshape(y_pred_cv, (-1,1))
Y_test_cv = np.asarray(Y_test_cv)
Y_test_cv = np.reshape(Y_test_cv, (-1,1))

# now using correction from the R code that's online from Cole
reg = LinearRegression().fit(Y_test_cv, y_pred_cv)
coef = float(reg.coef_)
intercept = float(reg.intercept_)
print("Coefficient and Intercept:", coef, intercept)

# correction by cole
function = lambda t: (t-intercept)/coef
vfunc = np.vectorize(function)
corr_brainage_1 = vfunc(y_pred_Ridge)
corr_brainage_delta_1 = corr_brainage_1-Y_test

# plots corrected
plt.figure()
plt.scatter(Y_test, corr_brainage_1, alpha=0.5, s=10, color="darkcyan")
plt.axline((65,65), slope=coef, color="blue")
plt.ylabel("Predicted Age Corrected", fontsize = 16)
plt.axline((60,60), slope=1, color='r')
plt.xlabel("Age", fontsize = 16)
plt.xticks(range(40,90,5), fontsize = 14)
plt.yticks(range(30,115,10), fontsize = 14)
plt.show()

plt.figure()
plt.scatter(Y_test, corr_brainage_delta_1, alpha=0.5, s=10, color="darkcyan")
plt.axline((60,0),slope=0, color = "black")
plt.ylabel("BrainAGE Corrected", fontsize = 16)
plt.xlabel("Age", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.show()

## 2.2 ASEG Segmentation

In [None]:
# select Freesurfer ASEG for training data
whole_brain = brain_train.iloc[:,27:70:2]
left_hemi = brain_train.iloc[:,103:134:2]
right_hemi = brain_train.iloc[:,165:196:2]
age = brain_train["Age"]
ASEG_train = pd.DataFrame(pd.concat([whole_brain,left_hemi, right_hemi,age],axis = 1))

# select Freesurfer ASEG for testing data
whole_brain = brain_test.iloc[:,27:70:2]
left_hemi = brain_test.iloc[:,103:134:2]
right_hemi = brain_test.iloc[:,165:196:2]
age = brain_test["Age"]
ASEG_test = pd.DataFrame(pd.concat([whole_brain,left_hemi, right_hemi,age],axis = 1))

X_train = ASEG_train.iloc[:,:-1]
Y_train = ASEG_train.iloc[:,-1]
X_test = ASEG_test.iloc[:,:-1]
Y_test = ASEG_test.iloc[:,-1]

# standardize x-data
X_train = stats.zscore(X_train)
X_test = stats.zscore(X_test)

### 2.2.1 Cross-validation

In [None]:
X_CV = X_train
Y_CV = Y_train

# randomly split data (only training no PA) into training and testing set
X_train_cv, X_test_cv, Y_train_cv, Y_test_cv = train_test_split(X_CV,Y_CV)

# define the model
model_ridge = RidgeCV(alphas = alpha_parameters)

# perform 10-fold cross validation
scores_ridge = cross_val_score(model_ridge, X_CV, Y_CV, cv=10, scoring='neg_mean_absolute_error')

# calculate the mean-absolute error
mae_ridge = -1 * scores_ridge.mean()

print("The MAE for cross-validation (Ridge):", mae_ridge)

# for correction
model_ridge.fit(X_train_cv,Y_train_cv)
y_pred_cv = model_ridge.predict(X_test_cv)

### 2.2.2 Prediction

In [None]:
# models to try
model_Ridge = RidgeCV(alphas = alpha_parameters)

# train model 
model_Ridge.fit(X_train,Y_train)

# get predicted values for test set
y_pred_Ridge = model_Ridge.predict(X_test)

# calculate brain age gap
brain_age_delta_Ridge = y_pred_Ridge-Y_test

# get mean absolute error (MAE)
print("The MAE for testing set using Ridge:", mean_absolute_error(Y_test,y_pred_Ridge))

# top x coefficients and their names
coeff = np.asarray(model_Ridge.coef_)
coeffabs = np.asarray(abs(model_Ridge.coef_))
top_coefficients = np.argsort(coeffabs)[-20:]
print(X_train.iloc[:,top_coefficients].columns)
print(coeff[top_coefficients])

# plot figure with x: actual age Y: predicted age, and a line with slope 1 for reference
plt.figure()
plt.scatter(Y_test, y_pred_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((60,60), slope=1, color='r')
plt.xticks(range(40,90,5))
plt.yticks(range(40,90,5))
plt.ylabel('Predicted Age', fontsize = 16)
plt.xlabel('Age', fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.show()

# plot the brainAGE and a line with the correlation between brainAGE and age 
plt.figure()
plt.scatter(Y_test,brain_age_delta_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((65,0), slope=-0.69, color="blue")
plt.axline((60,0),slope=0, color = "black")
plt.ylabel("BrainAGE", fontsize = 16)
plt.xlabel("Age", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.show()

### 2.2.3 Correction

In [None]:
# reformat cross validation data for regression model
y_pred_cv = np.asarray(y_pred_cv)
y_pred_cv = np.reshape(y_pred_cv, (-1,1))
Y_test_cv = np.asarray(Y_test_cv)
Y_test_cv = np.reshape(Y_test_cv, (-1,1))

# now using correction from the R code that's online from Cole
reg = LinearRegression().fit(Y_test_cv, y_pred_cv)
coef = float(reg.coef_)
intercept = float(reg.intercept_)
print("Coefficient and Intercept:", coef, intercept)

# correction by cole
function = lambda t: (t-intercept)/coef
vfunc = np.vectorize(function)
corr_brainage_2 = vfunc(y_pred_Ridge)
corr_brainage_delta_2 = corr_brainage_2-Y_test

# plots corrected
plt.figure()
plt.scatter(Y_test, corr_brainage_2, alpha=0.5, s=10, color="darkcyan")
plt.axline((65,65), slope=coef, color="blue")
plt.ylabel("Predicted Age Corrected", fontsize = 16)
plt.axline((60,60), slope=1, color='r')
plt.xlabel("Age", fontsize = 16)
plt.xticks(range(40,90,5), fontsize = 14)
plt.yticks(range(30,115,10), fontsize = 14)
plt.show()

plt.figure()
plt.scatter(Y_test, corr_brainage_delta_2, alpha=0.5, s=10, color="darkcyan")
plt.axline((60,0),slope=0, color = "black")
plt.ylabel("BrainAGE Corrected", fontsize = 16)
plt.xlabel("Age", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.show()

## 2.3 Desikan Segmentation

In [None]:
# select variables belonging to the segmentation
X_train = brain_train.iloc[:,441:844:2]
Y_train = brain_train.iloc[:,-1]
X_test = brain_test.iloc[:,441:844:2]
Y_test = brain_test.iloc[:,-1]

# standardize x-data
X_train = stats.zscore(X_train)
X_test = stats.zscore(X_test)

### 2.3.1 Cross-validation

In [None]:
X_CV = X_train
Y_CV = Y_train

# randomly split data (only training no PA) into training and testing set
X_train_cv, X_test_cv, Y_train_cv, Y_test_cv = train_test_split(X_CV,Y_CV)

# define the model
model_ridge = RidgeCV(alphas = alpha_parameters)

# perform 10-fold cross validation
scores_ridge = cross_val_score(model_ridge, X_CV, Y_CV, cv=10, scoring='neg_mean_absolute_error')

# calculate the mean-absolute error
mae_ridge = -1 * scores_ridge.mean()

print("The MAE for cross-validation (Ridge):", mae_ridge)

# for correction
model_ridge.fit(X_train_cv,Y_train_cv)
y_pred_cv = model_ridge.predict(X_test_cv)

### 2.3.2 Prediction

In [None]:
# models to try
model_Ridge = RidgeCV(alphas = alpha_parameters)

# train model 
model_Ridge.fit(X_train,Y_train)

# get predicted values for test set
y_pred_Ridge = model_Ridge.predict(X_test)

# calculate brain age gap
brain_age_delta_Ridge = y_pred_Ridge-Y_test

# get mean absolute error (MAE)
print("The MAE for testing set using Ridge:", mean_absolute_error(Y_test,y_pred_Ridge))

# top x coefficients and their names
coeff = np.asarray(model_Ridge.coef_)
coeffabs = np.asarray(abs(model_Ridge.coef_))
top_coefficients = np.argsort(coeffabs)[-20:]
print(X_train.iloc[:,top_coefficients].columns)
print(coeff[top_coefficients])

# plot figure with x: actual age Y: predicted age, and a line with slope 1 for reference
plt.figure()
plt.scatter(Y_test, y_pred_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((60,60), slope=1, color='r')
plt.xticks(range(40,90,5))
plt.yticks(range(40,90,5))
plt.ylabel('Predicted Age', fontsize = 16)
plt.xlabel('Age', fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.show()

# plot the brainAGE and a line with the correlation between brainAGE and age 
plt.figure()
plt.scatter(Y_test,brain_age_delta_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((65,0), slope=-0.69, color="blue")
plt.axline((60,0),slope=0, color = "black")
plt.ylabel("BrainAGE", fontsize = 16)
plt.xlabel("Age", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.show()

### 2.3.3 Correction

In [None]:
# reformat cross validation data for regression model
y_pred_cv = np.asarray(y_pred_cv)
y_pred_cv = np.reshape(y_pred_cv, (-1,1))
Y_test_cv = np.asarray(Y_test_cv)
Y_test_cv = np.reshape(Y_test_cv, (-1,1))

# now using correction from the R code that's online from Cole
reg = LinearRegression().fit(Y_test_cv, y_pred_cv)
coef = float(reg.coef_)
intercept = float(reg.intercept_)
print("Coefficient and Intercept:", coef, intercept)

# correction by cole
function = lambda t: (t-intercept)/coef
vfunc = np.vectorize(function)
corr_brainage_3 = vfunc(y_pred_Ridge)
corr_brainage_delta_3 = corr_brainage_3-Y_test

# plots corrected
plt.figure()
plt.scatter(Y_test, corr_brainage_3, alpha=0.5, s=10, color="darkcyan")
plt.axline((65,65), slope=coef, color="blue")
plt.ylabel("Predicted Age Corrected", fontsize = 16)
plt.axline((60,60), slope=1, color='r')
plt.xlabel("Age", fontsize = 16)
plt.xticks(range(40,90,5), fontsize = 14)
plt.yticks(range(30,115,10), fontsize = 14)
plt.show()

plt.figure()
plt.scatter(Y_test, corr_brainage_delta_3, alpha=0.5, s=10, color="darkcyan")
plt.axline((60,0),slope=0, color = "black")
plt.ylabel("BrainAGE Corrected", fontsize = 16)
plt.xlabel("Age", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.show()

## 2.4 Combined ASEG Destrieux Segmentation

In [None]:
# select Freesurfer ASEG for training data
whole_brain = brain_train.iloc[:,27:70:2]
left_hemi = brain_train.iloc[:,103:134:2]
right_hemi = brain_train.iloc[:,165:196:2]
age = brain_train["Age"]
ASEG_train = pd.DataFrame(pd.concat([whole_brain,left_hemi, right_hemi,age],axis = 1))

# select Freesurfer ASEG for testing data
whole_brain = brain_test.iloc[:,27:70:2]
left_hemi = brain_test.iloc[:,103:134:2]
right_hemi = brain_test.iloc[:,165:196:2]
age = brain_test["Age"]
ASEG_test = pd.DataFrame(pd.concat([whole_brain,left_hemi, right_hemi,age],axis = 1))

ASEG_X_train = ASEG_train.iloc[:,:-1]
ASEG_X_test = ASEG_test.iloc[:,:-1]


# select variables belonging to the A2009s segmentation
Des_X_train = brain_train.iloc[:,1657:2544:2]
Des_X_test = brain_test.iloc[:,1657:2544:2]


# select age
Y_train = brain_train.iloc[:,-1]
Y_test = brain_test.iloc[:,-1]

# Combine ASEG and A2009s for training data
X_train = pd.concat([ASEG_X_train, Des_X_train], axis=1)
Y_train = Y_train

# Combine ASEG and A2009s for testing data
X_test = pd.concat([ASEG_X_test, Des_X_test], axis=1)
Y_test = Y_test

# standardize x-data
X_train = stats.zscore(X_train)
X_test = stats.zscore(X_test)

# set of alphas to try (=penalization)
alpha_parameters = np.power(10,np.linspace(start=-3, stop=5, num=100))

Validation data

In [None]:
# select Freesurfer ASEG for testing data
val_whole_brain = val_brain_test.iloc[:,27:70:2]
val_left_hemi = val_brain_test.iloc[:,103:134:2]
val_right_hemi = val_brain_test.iloc[:,165:196:2]
val_age = val_brain_test["Age"]
val_ASEG_test = pd.DataFrame(pd.concat([val_whole_brain,val_left_hemi, val_right_hemi,val_age],axis = 1))

val_ASEG_X_test = val_ASEG_test.iloc[:,:-1]

# select variables belonging to the A2009s segmentation
val_Des_X_test = val_brain_test.iloc[:,1657:2544:2]

# select age
val_Y_test = val_brain_test.iloc[:,-1]

# Combine ASEG and A2009s for testing data
val_X_test = pd.concat([val_ASEG_X_test, val_Des_X_test], axis=1)
val_Y_test = val_Y_test

# standardize x-data
val_X_test = stats.zscore(val_X_test)

### 2.4.1 Cross-validation

In [None]:
X_CV = X_train
Y_CV = Y_train

# randomly split data (only training no PA) into training and testing set
X_train_cv, X_test_cv, Y_train_cv, Y_test_cv = train_test_split(X_CV,Y_CV, random_state=888)

# define the model
model_ridge = RidgeCV(alphas = alpha_parameters)

# perform 10-fold cross validation
scores_ridge = cross_val_score(model_ridge, X_CV, Y_CV, cv=10, scoring='neg_mean_absolute_error')

# calculate the mean-absolute error
mae_ridge = -1 * scores_ridge.mean()

print("The MAE for cross-validation (Ridge):", mae_ridge)

# for correction
model_ridge.fit(X_train_cv,Y_train_cv)
y_pred_cv = model_ridge.predict(X_test_cv)

### 2.4.2 Prediction

In [None]:
model_Ridge = RidgeCV(alphas = alpha_parameters)

# train model 
model_Ridge.fit(X_train,Y_train)

# get predicted values for test set
y_pred_Ridge = model_Ridge.predict(X_test)

# calculate brain age gap
brain_age_delta_Ridge = y_pred_Ridge-Y_test

# get mean absolute error (MAE)
print("The MAE for testing set using Ridge:", mean_absolute_error(Y_test,y_pred_Ridge))

# Calculate R^2
r2_score = model_Ridge.score(X_test, Y_test)
print("The R² for the testing set using Ridge:", r2_score)

# top x coefficients and their names
coeff = np.asarray(model_Ridge.coef_)
coeffabs = np.asarray(abs(model_Ridge.coef_))
top_coefficients = np.argsort(coeffabs)[-20:]
print(X_train.iloc[:,top_coefficients].columns)
print(coeff[top_coefficients])

# plot figure with x: actual age Y: predicted age, and a line with slope 1 for reference
plt.figure(figsize=(7,6))
plt.scatter(Y_test, y_pred_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((60,60), slope=1, color='r')
plt.xticks(range(40,90,5))
plt.yticks(range(40,95,5))
plt.ylabel('brain predicted age [years]', fontsize = 16)
plt.xlabel('chronological age [years]', fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
#plt.savefig('mainbrainpredage.png', format='png', dpi=800)
plt.show()

# plot the brainAGE and a line with the correlation between brainAGE and age 
plt.figure(figsize=(7,6))
plt.scatter(Y_test,brain_age_delta_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((65,0), slope=-0.5, color="blue")
plt.axline((60,0),slope=0, color = "black")
plt.ylabel("BrainPAD", fontsize = 16)
plt.xlabel("Age", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.show()

Validation Prediction

In [None]:
val_y_pred_Ridge = model_Ridge.predict(val_X_test)

val_brain_age_delta_Ridge = val_y_pred_Ridge - val_Y_test

# get mean absolute error (MAE)
print("The MAE for testing set using Ridge:", mean_absolute_error(val_Y_test, val_y_pred_Ridge))

# plot figure with x: actual age Y: predicted age, and a line with slope 1 for reference
plt.figure(figsize=(7,6))
plt.scatter(val_Y_test, val_y_pred_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((60,60), slope=1, color='r')
plt.xticks(range(40,90,5))
plt.yticks(range(40,95,5))
plt.ylabel('brain predicted age [years]', fontsize = 16)
plt.xlabel('chronological age [years]', fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
#plt.savefig('valbrainpredage.png', format='png', dpi=800)  
plt.show()

# plot the brainAGE and a line with the correlation between brainAGE and age 
plt.figure(figsize=(7,6))
plt.scatter(val_Y_test, val_brain_age_delta_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((65,0), slope=-0.69, color="blue")
plt.axline((60,0),slope=0, color = "black")
plt.ylabel("BrainAGE", fontsize = 16)
plt.xlabel("Age", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.show()

### 2.4.3 Coefficients for later plotting

In [None]:
# # get the coefficients and names of the destrieux atlas areas in the ridge regression model
# coefficients = np.asarray(model_Ridge.coef_)
# names = np.asarray(X_train.columns[:])

# # change formatting
# plot_coef_array = [names,coefficients]
# plot_coef_array = pd.DataFrame(plot_coef_array)
# plot_coef_array = plot_coef_array.transpose()

# # change formatting
# matrix_coeff = [plot_coef_array.iloc[:74],plot_coef_array.iloc[74:148],plot_coef_array.iloc[148:222],plot_coef_array.iloc[222:296], plot_coef_array.iloc[296:370],plot_coef_array.iloc[370:444]]
# new_matrix = [matrix_coeff[0][0],matrix_coeff[0][1],matrix_coeff[1][0],matrix_coeff[1][1],matrix_coeff[2][0],matrix_coeff[2][1],matrix_coeff[3][0],matrix_coeff[3][1],matrix_coeff[4][0],matrix_coeff[4][1],matrix_coeff[5][0],matrix_coeff[5][1]]

# new_matrix = np.array(new_matrix)
# new_matrix = pd.DataFrame(new_matrix)
# new_matrix = new_matrix.transpose()
# new_matrix = pd.DataFrame(new_matrix)

# # only get the coefficients, not the names (including names was only to have a better overview during inital development)
# coef_frame = new_matrix.iloc[:,[1,3,5,7,9,11]]
# coef_frame = pd.DataFrame(coef_frame)

# # array with the maximum (absolute) coefficient for each area ordered in the inital ordering of the areas
# final_coef = []
# for n in range(74):
#     index = np.argmax(abs(coef_frame.iloc[n]))
#     final_coef.append(coef_frame.iloc[n,index])


### 2.4.4 Correction

In [None]:
# reformat cross validation data for regression model
y_pred_cv = np.asarray(y_pred_cv)
y_pred_cv = np.reshape(y_pred_cv, (-1,1))
Y_test_cv = np.asarray(Y_test_cv)
Y_test_cv = np.reshape(Y_test_cv, (-1,1))

# now using correction from the R code that's online from Cole
reg = LinearRegression().fit(Y_test_cv, y_pred_cv)
coef = float(reg.coef_)
intercept = float(reg.intercept_)
print("Coefficient and Intercept:", coef, intercept)

# correction by cole
function = lambda t: (t-intercept)/coef
vfunc = np.vectorize(function)
corr_brainage_4 = vfunc(y_pred_Ridge)
corr_brainage_delta_4 = corr_brainage_4-Y_test

plt.figure(figsize=(7,6))
plt.scatter(Y_test, y_pred_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((60,60), slope=1, color='r')
plt.axline((65,65), slope=coef, color="blue")
plt.xticks(range(40,90,5))
plt.yticks(range(40,95,5))
plt.ylabel('brain predicted age [years]', fontsize = 16)
plt.xlabel('chronological age [years]', fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.savefig('mainbrainpredage.png', format='png', dpi=800)  
plt.show()

# plots corrected
plt.figure(figsize=(7,6))
plt.scatter(Y_test, corr_brainage_4, alpha=0.5, s=10, color="darkcyan")
#plt.axline((65,65), slope=coef, color="blue")
plt.ylabel("brain predicted age corrected [years]", fontsize = 16)
plt.axline((60,60), slope=1, color='r')
plt.xlabel("chronological age [years]", fontsize = 16)
plt.xticks(range(40,90,5), fontsize = 14)
plt.yticks(range(30,115,10), fontsize = 14)
plt.savefig('mainbrainpredagecorr.png', format='png', dpi=800)
plt.show()

plt.figure(figsize=(7,6))
plt.scatter(Y_test, corr_brainage_delta_4, alpha=0.5, s=10, color="darkcyan")
plt.axline((60,0),slope=0, color = "black")
plt.ylabel("BrainAGE corrected [years]", fontsize = 16)
plt.xlabel("chronological age [years]", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.savefig('mainbrainage.png', format='png', dpi=800)
plt.show()

Validation correction

In [None]:
# reformat cross validation data for regression model
y_pred_cv = np.asarray(y_pred_cv)
y_pred_cv = np.reshape(y_pred_cv, (-1,1))
Y_test_cv = np.asarray(Y_test_cv)
Y_test_cv = np.reshape(Y_test_cv, (-1,1))

# now using correction from the R code that's online from Cole
reg = LinearRegression().fit(Y_test_cv, y_pred_cv)
coef = float(reg.coef_)
intercept = float(reg.intercept_)
print("Coefficient and Intercept:", coef, intercept)

# correction by cole
function = lambda t: (t - intercept)/coef
vfunc = np.vectorize(function)
val_corr_brainage_4 = vfunc(val_y_pred_Ridge)
val_corr_brainage_delta_4 = val_corr_brainage_4 - val_Y_test

plt.figure(figsize=(7,6))
plt.scatter(val_Y_test, val_y_pred_Ridge, alpha=0.5, s=10, color = "darkcyan")
plt.axline((60,60), slope=1, color='r')
plt.axline((65,65), slope=coef, color="blue")
plt.xticks(range(40,90,5))
plt.yticks(range(40,95,5))
plt.ylabel('brain predicted age [years]', fontsize = 16)
plt.xlabel('chronological age [years]', fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.savefig('valbrainpredage.png', format='png', dpi=800)  
plt.show()

# plots corrected
plt.figure(figsize=(7,6))
plt.scatter(val_Y_test, val_corr_brainage_4, alpha=0.5, s=10, color="darkcyan")
#plt.axline((65,65), slope=coef, color="blue")
plt.ylabel("brain predicted age corrected [years]", fontsize = 16)
plt.axline((60,60), slope=1, color='r')
plt.xlabel("chronological age [years]", fontsize = 16)
plt.xticks(range(40,90,5), fontsize = 14)
plt.yticks(range(30,115,10), fontsize = 14)
plt.savefig('valbrainpredagecorr.png', format='png', dpi=800)  
plt.show()

plt.figure(figsize=(7,6))
plt.scatter(val_Y_test, val_corr_brainage_delta_4, alpha=0.5, s=10, color="darkcyan")
plt.axline((60,0),slope=0, color = "black")
plt.ylabel("BrainAGE corrected [years]", fontsize = 16)
plt.xlabel("chronological age [years]", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize=14)
plt.savefig('valbrainage.png', format='png', dpi=800)  
plt.show()

# 3 Add BrainAge Deltas to main_test

In [None]:
main_test["BADA2009"] = corr_brainage_delta_1
main_test["BADASEG"] = corr_brainage_delta_2
main_test["BADDesikan"] = corr_brainage_delta_3
main_test["BADCombined"] = corr_brainage_delta_4

In [None]:
val_main_test["BA"] = val_corr_brainage_delta_4

# 4 Correlation between deltas of different segmentations

In [None]:
print("Correlation ASEG x Destrieux:", stats.spearmanr(corr_brainage_delta_2, corr_brainage_delta_1))
print("Correlation ASEG x Desikan:", stats.spearmanr(corr_brainage_delta_2, corr_brainage_delta_3))
print("Correlation Desikan x Destreux:", stats.spearmanr(corr_brainage_delta_3, corr_brainage_delta_1))
print("Correlation Combined x Destreux:", stats.spearmanr(corr_brainage_delta_1, corr_brainage_delta_4))
print("Correlation Combined x ASEG:", stats.spearmanr(corr_brainage_delta_2, corr_brainage_delta_4))
print("Correlation Combined x Desikan:", stats.spearmanr(corr_brainage_delta_3, corr_brainage_delta_4))

# 5 Pickle data for next script

In [None]:
# pickle results
#main_test.to_pickle("2_main_test.pkl")
#main_train.to_pickle("2_main_train.pkl")

#val_main_test.to_pickle("2_val_test.pkl")

# 6 Plot top coefficients on brain

In [None]:
from nilearn import datasets
from nilearn import plotting
import numpy as np
import matplotlib.cm as cm

# color map for plotting -> red for positive, blue for negative values
cmap = cm.get_cmap('coolwarm')

# get destrieux atlas and area labels
destrieux_atlas = datasets.fetch_atlas_surf_destrieux()
labels = destrieux_atlas["labels"]

# mask with zeros, to be replaced by coefficient values
mask = np.zeros_like(destrieux_atlas['map_left'], dtype=float)

# skip index 0 and 42 -> areas not in the UK biobank files
# assign all locations of a certain area in the mask the value of the coeffiicent from 2.1.3
for i in range(1,42,1):
   index = int(i)
   mask[destrieux_atlas["map_left"] == index] = final_coef[i-1]

for i in range(43,76,1):
   index = int(i)
   mask[destrieux_atlas["map_left"] == index] = final_coef[i-2]


# Retrieve fsaverage5 surface dataset for the plotting background
fsaverage = datasets.fetch_surf_fsaverage()

# plot medial and lateral view on inflated surface
plotting.plot_surf_roi(fsaverage['infl_left'], roi_map=mask, cmap = cmap, hemi='left', view='medial', bg_map=fsaverage['sulc_left'], bg_on_data=True, darkness=.2)
plotting.plot_surf_roi(fsaverage['infl_left'], roi_map=mask, cmap = cmap, hemi='left', view='lateral', bg_map=fsaverage['sulc_left'], bg_on_data=True, darkness=.5)

# plot medial and lateral view on pial surface
plotting.plot_surf_roi(fsaverage['pial_left'], roi_map=mask, cmap = cmap, hemi='left', view='medial', bg_map=fsaverage['sulc_left'], bg_on_data=True, darkness=.2)
plotting.plot_surf_roi(fsaverage['pial_left'], roi_map=mask, cmap = cmap, hemi='left', view='lateral', bg_map=fsaverage['sulc_left'], bg_on_data=True, darkness=.5)