In [1]:
#import relevant libraries
import sys; sys.path
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import time

from sklearn.metrics import explained_variance_score, r2_score
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
#load in functional connectivity data + subj data file
fc = pd.read_csv('fc.csv', header=None).values
T = pd.read_csv('subj_data.csv',header=0)

#names of specific cognitive metrics you want to evaluate
cognition = ['Crystal','Fluid','Total','PicVocab','Reading','Flanker','CardSort','PicSeq','ListSort','ProcSpeed']

#load in optimised hyperparameters for each model type
opt_alpha_m = pd.read_csv('alpha_m.txt', header=None).values
opt_alpha_f = pd.read_csv('alpha_f.txt', header=None).values
opt_alpha_b = pd.read_csv('alpha_b.txt', header=None).values

In [None]:
#extract the specific cognitive metrics
crystal = T.CogCrystalComp_AgeAdj.values
fluid = T.CogFluidComp_AgeAdj.values
total = T.CogTotalComp_AgeAdj.values
picvocab = T.PicVocab_AgeAdj.values
reading = T.ReadEng_AgeAdj.values
flanker = T.Flanker_AgeAdj.values
cardsort = T.CardSort_AgeAdj.values
picseq = T.PicSeq_AgeAdj.values
listsort = T.ListSort_AgeAdj.values
procspeed = T.ProcSpeed_AgeAdj.values

#put them all into one array
cog_metric = np.transpose(np.asarray([crystal, fluid, total, picvocab, reading, flanker, cardsort, picseq, listsort, procspeed]))

In [None]:
#set the number of permutations you want to perform
perm = 25000
#set the proportion of data you want in your training set
train_size = .8
#set the number of variable you want to predict to be the number of variables stored in the cognition variablse
n_cog = np.size(cognition)

#set regression model type
regr = Ridge(normalize=True, max_iter=1000000)

#set optimised hyperparameter you want to use
alphas_b = np.median(opt_alpha_b,axis=0)

#set optimised hyperparameter you want to use
alphas_m = np.median(opt_alpha_m,axis=0)

#set optimised hyperparameter you want to use
alphas_f = np.median(opt_alpha_f,axis=0)

#set x data to be the input variable you want to use
#ie fc, sc, or hc
X = fc

#set y to be the cognitive metrics you want to predict
Y = cog_metric


X_m = fc[T.Gender=='M',:]
Y_m = cog_metric[T.Gender=='M',:]

X_f = fc[T.Gender=='F',:]
Y_f = cog_metric[T.Gender=='F',:]

In [None]:
#create arrays to store coefficient of determination from different models
#bb indicates trained on both, tested on both (sex-independent model)
r2_bb = np.zeros([perm,n_cog])

#bm indicates trained on male, tested on male (sex-independent model)
r2_bm = np.zeros([perm,n_cog])

#bf indicates trained on male, tested on female (sex-independent model)
r2_bf = np.zeros([perm,n_cog])

#mm indicates trained on male, tested on male (male-specific model)
r2_mm = np.zeros([perm,n_cog])

#mf indicates trained on male, tested on female (male-specific model)
r2_mf = np.zeros([perm,n_cog])

#ff indicates trained on female, tested on female (female-specific model)
r2_ff = np.zeros([perm,n_cog])

#fm indicates trained on female, tested on male (female-specific model)
r2_fm = np.zeros([perm,n_cog])

#create variables to store explained variance
var_bb = np.zeros([perm,n_cog])
var_bm = np.zeros([perm,n_cog])
var_bf = np.zeros([perm,n_cog])
var_mm = np.zeros([perm,n_cog])
var_mf = np.zeros([perm,n_cog])
var_ff = np.zeros([perm,n_cog])
var_fm = np.zeros([perm,n_cog])

#create variables to store prediction accuracy
corr_bb = np.zeros([perm,n_cog])
corr_bm = np.zeros([perm,n_cog])
corr_bf = np.zeros([perm,n_cog])
corr_mm = np.zeros([perm,n_cog])
corr_mf = np.zeros([perm,n_cog])
corr_ff = np.zeros([perm,n_cog])
corr_fm = np.zeros([perm,n_cog])

#create variables to store optimised hyperparameters
opt_alpha_b = np.zeros([perm,n_cog])
opt_alpha_m = np.zeros([perm,n_cog])
opt_alpha_f = np.zeros([perm,n_cog])

#create variables to store output variables for test set
cogtest_m = np.zeros([perm,n_cog,int(np.ceil(X_m.shape[0]*(1-train_size)))])
cogtest_f = np.zeros([perm,n_cog,int(np.ceil(X_f.shape[0]*(1-train_size)))])
cogtest_b = np.zeros([perm,n_cog,cogtest_m.shape[2]+cogtest_f.shape[2]])

#create variabels to store predictions from the models
preds_mm = np.zeros([perm,n_cog,int(np.ceil(X_m.shape[0]*(1-train_size)))])
preds_mf = np.zeros([perm,n_cog,int(np.ceil(X_f.shape[0]*(1-train_size)))])
preds_ff = np.zeros([perm,n_cog,int(np.ceil(X_f.shape[0]*(1-train_size)))])
preds_fm = np.zeros([perm,n_cog,int(np.ceil(X_m.shape[0]*(1-train_size)))])
preds_bb = np.zeros([perm,n_cog,cogtest_m.shape[2]+cogtest_f.shape[2]])
preds_bm = np.zeros([perm,n_cog,int(np.ceil(X_m.shape[0]*(1-train_size)))])
preds_bf = np.zeros([perm,n_cog,int(np.ceil(X_f.shape[0]*(1-train_size)))])

In [None]:
#iterate through permutations
for p in range(perm):
    #print permutation # you're on
    print('Permutation %d' %(p+1))
    
    #randomly shuffle all of the cognition data for males
    Y_shuffle_m = Y_m
    np.random.shuffle(Y_shuffle_m[:,0])
    np.random.shuffle(Y_shuffle_m[:,1])
    np.random.shuffle(Y_shuffle_m[:,2])
    np.random.shuffle(Y_shuffle_m[:,3])
    np.random.shuffle(Y_shuffle_m[:,4])
    np.random.shuffle(Y_shuffle_m[:,5])
    np.random.shuffle(Y_shuffle_m[:,6])
    np.random.shuffle(Y_shuffle_m[:,7])
    np.random.shuffle(Y_shuffle_m[:,8])
    np.random.shuffle(Y_shuffle_m[:,9])
    
    #randomly shuffle all of the cognition data for males
    Y_shuffle_f = Y_f
    np.random.shuffle(Y_shuffle_f[:,0])
    np.random.shuffle(Y_shuffle_f[:,1])
    np.random.shuffle(Y_shuffle_f[:,2])
    np.random.shuffle(Y_shuffle_f[:,3])
    np.random.shuffle(Y_shuffle_f[:,4])
    np.random.shuffle(Y_shuffle_f[:,5])
    np.random.shuffle(Y_shuffle_f[:,6])
    np.random.shuffle(Y_shuffle_f[:,7])
    np.random.shuffle(Y_shuffle_f[:,8])
    np.random.shuffle(Y_shuffle_f[:,9])

    #split male and female data into train and test splits using shuffled cognition data as output variable
    x_train_m, x_test_m, cog_train_m, cog_test_m = train_test_split(X_m, Y_shuffle_m, test_size=1-train_size, shuffle=True, random_state=p)
    x_train_f, x_test_f, cog_train_f, cog_test_f = train_test_split(X_f, Y_shuffle_f, test_size=1-train_size, shuffle=True, random_state=p)
    
    
    #concatenate train and test data across the sexes for sex-independent model
    x_train_b = np.concatenate((x_train_m, x_train_f), axis=0)
    x_test_b = np.concatenate((x_test_m, x_test_f), axis=0)
    
    cog_train_b = np.concatenate((cog_train_m, cog_train_f), axis=0)
    cog_test_b = np.concatenate((cog_test_m, cog_test_f), axis=0)
    
    #iterate through the cognitive metrics to predict
    for cog in range (n_cog):

        #print cognitive metrics being predicted 
        print ("Cognition: %s" % cognition[cog])
        
        #set y values for train and test sets for sex-independent and sex-specific models     
        y_train_b = cog_train_b[:,cog]
        y_train_m = cog_train_m[:,cog]
        y_train_f = cog_train_f[:,cog]
    
        y_test_b = cog_test_b[:,cog]
        y_test_m = cog_test_m[:,cog]
        y_test_f = cog_test_f[:,cog]
        
        
        #fit models using optimised hyperparameters 
        model_b = Ridge(alpha = alphas_b[cog], normalize=True, max_iter=1000000)
        model_b.fit(x_train_b, y_train_b);
        
        model_m = Ridge(alpha = alphas_m[cog], normalize=True, max_iter=1000000)
        model_m.fit(x_train_m, y_train_m);
        
        model_f = Ridge(alpha = alphas_m[cog], normalize=True, max_iter=1000000)
        model_f.fit(x_train_f, y_train_f);
        
       #evaluate sex-independent model when testing on both sexes, testing on males, and testing on females
        r2_bb[p,cog]=model_b.score(x_test_b,y_test_b)
        r2_bm[p,cog]=model_b.score(x_test_m,y_test_m)
        r2_bf[p,cog]=model_b.score(x_test_f,y_test_f)
        
        #evaluate sex-specific models when testing on males, and testing on females
        r2_mm[p,cog]=model_m.score(x_test_m,y_test_m)
        r2_mf[p,cog]=model_m.score(x_test_f,y_test_f)

        r2_fm[p,cog]=model_f.score(x_test_m,y_test_m)
        r2_ff[p,cog]=model_f.score(x_test_f,y_test_f)
        
        #generate predictions from sex-independent model
        preds_bb[p,cog,:] = model_b.predict(x_test_b).ravel()
        preds_bm[p,cog,:] = model_b.predict(x_test_m).ravel()
        preds_bf[p,cog,:] = model_b.predict(x_test_f).ravel()
        
        #generate predictions from male-specific model
        preds_mm[p,cog,:] = model_m.predict(x_test_m).ravel()
        preds_mf[p,cog,:] = model_m.predict(x_test_f).ravel()

        #generate predictions from female-specifc model
        preds_ff[p,cog,:] = model_f.predict(x_test_f).ravel()
        preds_fm[p,cog,:] = model_f.predict(x_test_m).ravel()

        
        #compute explained variance from sex-independent model
        var_bb[p,cog] = explained_variance_score(y_test_b, preds_bb[p,cog,:])
        var_bm[p,cog] = explained_variance_score(y_test_m, preds_bm[p,cog,:])
        var_bf[p,cog] = explained_variance_score(y_test_f, preds_bf[p,cog,:])
        
        #compute explained variance from male-specific model
        var_mm[p,cog] = explained_variance_score(y_test_m, preds_mm[p,cog,:])
        var_mf[p,cog] = explained_variance_score(y_test_f, preds_mf[p,cog,:])

        #compute explained variance from male-specific model
        var_ff[p,cog] = explained_variance_score(y_test_f, preds_ff[p,cog,:])
        var_fm[p,cog] = explained_variance_score(y_test_m, preds_fm[p,cog,:])


        #compute prediciton accuracy from sex-independent model
        corr_bb[p,cog] = np.corrcoef(y_test_b, preds_bb[p,cog,:])[1,0]
        corr_bm[p,cog] = np.corrcoef(y_test_m, preds_bm[p,cog,:])[1,0]
        corr_bf[p,cog] = np.corrcoef(y_test_f, preds_bf[p,cog,:])[1,0]
        
        #compute prediciton accuracy from male-specific model
        corr_mm[p,cog] = np.corrcoef(y_test_m, preds_mm[p,cog,:])[1,0]
        corr_mf[p,cog] = np.corrcoef(y_test_f, preds_mf[p,cog,:])[1,0]
        
        #compute prediciton accuracy from female-specific model
        corr_ff[p,cog] = np.corrcoef(y_test_f, preds_ff[p,cog,:])[1,0]
        corr_fm[p,cog] = np.corrcoef(y_test_m, preds_fm[p,cog,:])[1,0]


In [None]:
#save all of the outputs for null models once everything is done
np.savetxt('null_r2_bb.txt', r2_bb, delimiter=',')
np.savetxt('null_var_bb.txt', var_bb, delimiter=',')
np.savetxt('null_corr_bb.txt', corr_bb, delimiter=',')

np.savetxt('null_r2_bm.txt', r2_bm, delimiter=',')
np.savetxt('null_var_bm.txt', var_bm, delimiter=',')
np.savetxt('null_corr_bm.txt', corr_bm, delimiter=',')

np.savetxt('null_r2_bf.txt', r2_bf, delimiter=',')
np.savetxt('null_var_bf.txt', var_bf, delimiter=',')
np.savetxt('null_corr_bf.txt', corr_bf, delimiter=',')

np.savetxt('null_r2_mm.txt', r2_mm, delimiter=',')
np.savetxt('null_var_mm.txt', var_mm, delimiter=',')
np.savetxt('null_corr_mm.txt', corr_mm, delimiter=',')

np.savetxt('null_r2_mf.txt', r2_mf, delimiter=',')
np.savetxt('null_var_mf.txt', var_mf, delimiter=',')
np.savetxt('null_corr_mf.txt', corr_mf, delimiter=',')

np.savetxt('null_r2_fm.txt', r2_fm, delimiter=',')
np.savetxt('null_var_fm.txt', var_fm, delimiter=',')
np.savetxt('null_corr_fm.txt', corr_fm, delimiter=',')

np.savetxt('null_r2_ff.txt', r2_ff, delimiter=',')
np.savetxt('null_var_ff.txt', var_ff, delimiter=',')
np.savetxt('null_corr_ff.txt', corr_ff, delimiter=',')