In [None]:
#import relevant libraries
import sys; sys.path
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import time

from sklearn.metrics import explained_variance_score, r2_score
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
#load in functional connectivity data + subj data file
fc = pd.read_csv('fc.csv', header=None).values
T = pd.read_csv('subj_data.csv',header=0)

#names of specific cognitive metrics you want to evaluate
cognition = ['Crystal','Fluid','Total','PicVocab','Reading','Flanker','CardSort','PicSeq','ListSort','ProcSpeed']

#load in optimised hyperparameters for each model type
opt_alpha_m = pd.read_csv('alpha_m.txt', header=None).values
opt_alpha_f = pd.read_csv('alpha_f.txt', header=None).values
opt_alpha_b = pd.read_csv('alpha_b.txt', header=None).values

In [None]:
#extract the specific cognitive metrics
crystal = T.CogCrystalComp_AgeAdj.values
fluid = T.CogFluidComp_AgeAdj.values
total = T.CogTotalComp_AgeAdj.values
picvocab = T.PicVocab_AgeAdj.values
reading = T.ReadEng_AgeAdj.values
flanker = T.Flanker_AgeAdj.values
cardsort = T.CardSort_AgeAdj.values
picseq = T.PicSeq_AgeAdj.values
listsort = T.ListSort_AgeAdj.values
procspeed = T.ProcSpeed_AgeAdj.values

#put them all into one array
cog_metric = np.transpose(np.asarray([crystal, fluid, total, picvocab, reading, flanker, cardsort, picseq, listsort, procspeed]))

In [None]:
#set the number of permutations you want to perform
perm = 250
iters = 100
#set the proportion of data you want in your training set
train_size = .8
#set the number of variable you want to predict to be the number of variables stored in the cognition variablse
n_cog = np.size(cognition)

#set regression model type
regr = Ridge(normalize=True, max_iter=1000000)

#set optimised hyperparameter you want to use
alphas_b = np.median(opt_alpha_b,axis=0)

#set optimised hyperparameter you want to use
alphas_m = np.median(opt_alpha_m,axis=0)

#set optimised hyperparameter you want to use
alphas_f = np.median(opt_alpha_f,axis=0)

#set x data to be the input variable you want to use
#ie fc, sc, or hc
X = fc_392

#set y to be the cognitive metrics you want to predict
Y = cog_metric

X_m = fc_392[T_392.Gender=='M',:]
Y_m = cog_metric[T_392.Gender=='M',:]

X_f = fc_392[T_392.Gender=='F',:]
Y_f = cog_metric[T_392.Gender=='F',:]

In [None]:
#create arrays to store coefficient of determination from different models
#bb indicates trained on both, tested on both (sex-independent model)
r2_bb = np.zeros([perm,iters,n_cog])

#bm indicates trained on male, tested on male (sex-independent model)
r2_bm = np.zeros([perm,iters,n_cog])

#bf indicates trained on male, tested on female (sex-independent model)
r2_bf = np.zeros([perm,iters,n_cog])

#mm indicates trained on male, tested on male (male-specific model)
r2_mm = np.zeros([perm,iters,n_cog])

#mf indicates trained on male, tested on female (male-specific model)
r2_mf = np.zeros([perm,iters,n_cog])

#ff indicates trained on female, tested on female (female-specific model)
r2_ff = np.zeros([perm,iters,n_cog])

#fm indicates trained on female, tested on male (female-specific model)
r2_fm = np.zeros([perm,iters,n_cog])


cogtest_m = np.zeros([perm,int(np.ceil(X_m.shape[0]*(1-train_size))),n_cog])
cogtest_f = np.zeros([perm,int(np.ceil(X_f.shape[0]*(1-train_size))),n_cog])
cogtest_b = np.zeros([perm,cogtest_m.shape[2]+cogtest_f.shape[2],n_cog])

preds_bb = np.zeros([perm,cogtest_m.shape[2]+cogtest_f.shape[2],100,n_cog])
preds_bm = np.zeros([perm,int(np.ceil(X_m.shape[0]*(1-train_size))),iters,n_cog])
preds_bf = np.zeros([perm,int(np.ceil(X_f.shape[0]*(1-train_size))),iters,n_cog])
preds_mm = np.zeros([perm,int(np.ceil(X_m.shape[0]*(1-train_size))),iters,n_cog])
preds_mf = np.zeros([perm,int(np.ceil(X_f.shape[0]*(1-train_size))),iters,n_cog])
preds_fm = np.zeros([perm,int(np.ceil(X_m.shape[0]*(1-train_size))),iters,n_cog])
preds_ff = np.zeros([perm,int(np.ceil(X_f.shape[0]*(1-train_size))),iters,n_cog])

In [None]:
#iterate through permutations
for p in range(perm):
    #print permutation # you're on
    print('Permutation %d' %(p+1))
    #split data into train and test sets
    for iter in range(iters):
        Y_shuffle_m = Y_m

        np.random.shuffle(Y_shuffle_m[:,0])
        np.random.shuffle(Y_shuffle_m[:,1])
        np.random.shuffle(Y_shuffle_m[:,2])
        np.random.shuffle(Y_shuffle_m[:,3])
        np.random.shuffle(Y_shuffle_m[:,4])
        np.random.shuffle(Y_shuffle_m[:,5])
        np.random.shuffle(Y_shuffle_m[:,6])
        np.random.shuffle(Y_shuffle_m[:,7])
        np.random.shuffle(Y_shuffle_m[:,8])
        np.random.shuffle(Y_shuffle_m[:,9])


        Y_shuffle_f = Y_f

        np.random.shuffle(Y_shuffle_f[:,0])
        np.random.shuffle(Y_shuffle_f[:,1])
        np.random.shuffle(Y_shuffle_f[:,2])
        np.random.shuffle(Y_shuffle_f[:,3])
        np.random.shuffle(Y_shuffle_f[:,4])
        np.random.shuffle(Y_shuffle_f[:,5])
        np.random.shuffle(Y_shuffle_f[:,6])
        np.random.shuffle(Y_shuffle_f[:,7])
        np.random.shuffle(Y_shuffle_f[:,8])
        np.random.shuffle(Y_shuffle_f[:,9])


        x_train_m, x_test_m, cog_train_m, cog_test_m = train_test_split(X_m, Y_shuffle_m, test_size=1-train_size, shuffle=True, random_state=p)
        x_train_f, x_test_f, cog_train_f, cog_test_f = train_test_split(X_f, Y_shuffle_f, test_size=1-train_size, shuffle=True, random_state=p)


        x_train_b = np.concatenate((x_train_m, x_train_f), axis=0)
        x_test_b = np.concatenate((x_test_m, x_test_f), axis=0)

        cog_train_b = np.concatenate((cog_train_m, cog_train_f), axis=0)
        cog_test_b = np.concatenate((cog_test_m, cog_test_f), axis=0)

        #iterate through the cognitive metrics you want to predict
        for cog in range (1):

            #print cognitive metrics being predicted 
            #print ("Cognition: %s" % cognition[cog])

            #set y values for male and female train and test             
            y_train_b = cog_train_b[:,cog]
            y_train_m = cog_train_m[:,cog]
            y_train_f = cog_train_f[:,cog]

            y_test_b = cog_test_b[:,cog]
            y_test_m = cog_test_m[:,cog]
            y_test_f = cog_test_f[:,cog]


            #fit model using optimised hyperparameter
            model_b = Ridge(alpha = alphas_b[cog], normalize=True, max_iter=1000000)
            model_b.fit(x_train_b, y_train_b);

            model_m = Ridge(alpha = alphas_m[cog], normalize=True, max_iter=1000000)
            model_m.fit(x_train_m, y_train_m);

            model_f = Ridge(alpha = alphas_f[cog], normalize=True, max_iter=1000000)
            model_f.fit(x_train_f, y_train_f);

            #compute r^2 (coefficient of determination)
            r2_bb[p,iter,cog]=model_b.score(x_test_b,y_test_b)
            r2_bm[p,iter,cog]=model_b.score(x_test_m,y_test_m)
            r2_bf[p,iter,cog]=model_b.score(x_test_f,y_test_f)

            r2_mm[p,iter,cog]=model_m.score(x_test_m,y_test_m)
            r2_mf[p,iter,cog]=model_m.score(x_test_f,y_test_f)
            r2_fm[p,iter,cog]=model_f.score(x_test_m,y_test_m)
            r2_ff[p,iter,cog]=model_f.score(x_test_f,y_test_f)

            #generate predictions from model
            preds_bb[p,:,iter,cog] = model_b.predict(x_test_b).ravel()
            preds_bm[p,:,iter,cog] = model_b.predict(x_test_m).ravel()
            preds_bf[p,:,iter,cog] = model_b.predict(x_test_f).ravel()

            preds_mm[p,:,iter,cog] = model_m.predict(x_test_m).ravel()
            preds_mf[p,:,iter,cog] = model_m.predict(x_test_f).ravel()
            preds_fm[p,:,iter,cog] = model_f.predict(x_test_m).ravel()
            preds_ff[p,:,iter,cog] = model_f.predict(x_test_f).ravel()

            #compute explained variance 
            var_bb[p,iter,cog] = explained_variance_score(y_test_b, preds_bb[p,:,iter,cog])
            var_bm[p,iter,cog] = explained_variance_score(y_test_m, preds_bm[p,:,iter,cog])
            var_bf[p,iter,cog] = explained_variance_score(y_test_f, preds_bf[p,:,iter,cog])

            var_mm[p,iter,cog] = explained_variance_score(y_test_m, preds_mm[p,:,iter,cog])
            var_mf[p,iter,cog] = explained_variance_score(y_test_f, preds_mf[p,:,iter,cog])
            var_fm[p,iter,cog] = explained_variance_score(y_test_m, preds_fm[p,:,iter,cog])
            var_ff[p,iter,cog] = explained_variance_score(y_test_f, preds_ff[p,:,iter,cog])

            #compute correlation between true and predicted
            corr_bb[p,iter,cog] = np.corrcoef(y_test_b, preds_bb[p,:,iter,cog])[1,0]
            corr_bm[p,iter,cog] = np.corrcoef(y_test_m, preds_bm[p,:,iter,cog])[1,0]
            corr_bf[p,iter,cog] = np.corrcoef(y_test_f, preds_bf[p,:,iter,cog])[1,0]

            corr_mm[p,iter,cog] = np.corrcoef(y_test_m, preds_mm[p,:,iter,cog])[1,0]
            corr_mf[p,iter,cog] = np.corrcoef(y_test_f, preds_mf[p,:,iter,cog])[1,0]
            corr_fm[p,iter,cog] = np.corrcoef(y_test_m, preds_fm[p,:,iter,cog])[1,0]
            corr_ff[p,iter,cog] = np.corrcoef(y_test_f, preds_ff[p,:,iter,cog])[1,0]

In [None]:
#reshape arrays to 2D
r2_bb = r2_bb.reshape(-1, r2_bb.shape[-1])
r2_bm = r2_bm.reshape(-1, r2_bm.shape[-1])
r2_bf = r2_bf.reshape(-1, r2_bf.shape[-1])

r2_mm = r2_mm.reshape(-1, r2_mm.shape[-1])
r2_mf = r2_mf.reshape(-1, r2_mf.shape[-1])

r2_fm = r2_fm.reshape(-1, r2_fm.shape[-1])
r2_ff = r2_ff.reshape(-1, r2_ff.shape[-1])


var_bb = var_bb.reshape(-1, var_bb.shape[-1])
var_bm = var_bm.reshape(-1, var_bm.shape[-1])
var_bf = var_bf.reshape(-1, var_bf.shape[-1])

var_mm = var_mm.reshape(-1, var_mm.shape[-1])
var_mf = var_mf.reshape(-1, var_mf.shape[-1])

var_fm = var_fm.reshape(-1, var_fm.shape[-1])
var_ff = var_ff.reshape(-1, var_ff.shape[-1])

corr_bb = corr_bb.reshape(-1, corr_bb.shape[-1])
corr_bm = corr_bm.reshape(-1, corr_bm.shape[-1])
corr_bf = corr_bf.reshape(-1, corr_bf.shape[-1])

corr_mm = corr_mm.reshape(-1, corr_mm.shape[-1])
corr_mf = corr_mf.reshape(-1, corr_mf.shape[-1])

corr_fm = corr_fm.reshape(-1, corr_fm.shape[-1])
corr_ff = corr_ff.reshape(-1, corr_ff.shape[-1])

In [None]:
#save all of the outputs for null models once everything is done
np.savetxt('null_r2_bb.txt', r2_bb, delimiter=',')
np.savetxt('null_var_bb.txt', var_bb, delimiter=',')
np.savetxt('null_corr_bb.txt', corr_bb, delimiter=',')

np.savetxt('null_r2_bm.txt', r2_bm, delimiter=',')
np.savetxt('null_var_bm.txt', var_bm, delimiter=',')
np.savetxt('null_corr_bm.txt', corr_bm, delimiter=',')

np.savetxt('null_r2_bf.txt', r2_bf, delimiter=',')
np.savetxt('null_var_bf.txt', var_bf, delimiter=',')
np.savetxt('null_corr_bf.txt', corr_bf, delimiter=',')

np.savetxt('null_r2_mm.txt', r2_mm, delimiter=',')
np.savetxt('null_var_mm.txt', var_mm, delimiter=',')
np.savetxt('null_corr_mm.txt', corr_mm, delimiter=',')

np.savetxt('null_r2_mf.txt', r2_mf, delimiter=',')
np.savetxt('null_var_mf.txt', var_mf, delimiter=',')
np.savetxt('null_corr_mf.txt', corr_mf, delimiter=',')

np.savetxt('null_r2_fm.txt', r2_fm, delimiter=',')
np.savetxt('null_var_fm.txt', var_fm, delimiter=',')
np.savetxt('null_corr_fm.txt', corr_fm, delimiter=',')

np.savetxt('null_r2_ff.txt', r2_ff, delimiter=',')
np.savetxt('null_var_ff.txt', var_ff, delimiter=',')
np.savetxt('null_corr_ff.txt', corr_ff, delimiter=',')