In [None]:
#load in modules and libraries
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt

from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [None]:
#read in subj_num and test_holdout indices
num = pd.read_csv('subj_num.txt', header=None)
num = num.values
test = pd.read_csv('test_holdout.csv', header=None)
test = test.values

In [None]:
#load in HE and sex data
#data contains HE for all subjects x ROIs for all atlases
#each filename (i.e. filename1.csv) corresponds to different atlas
#aal atlas
data_aal = pd.read_csv('filename1.csv', header=0)
data_aal = data_aal.values
#cc200 atlas
data_cc200 = pd.read_csv('filename2.csv', header=0)
data_cc200 = data_cc200.values
#cc400 atlas
data_cc400 = pd.read_csv('filename3.csv', header=0)
data_cc400 = data_cc400.values
#ez atlas
data_ez = pd.read_csv('filename4.csv', header=0)
data_ez = data_ez.values
#fs86 atlas
data_fs86 = pd.read_csv('filename5.csv', header=0)
data_fs86 = data_fs86.values
#ho atlas
data_ho = pd.read_csv('filename6.csv', header=0)
data_ho = data_ho.values
#tt atlas
data_tt = pd.read_csv('filename7.csv', header=0)
data_tt = data_tt.values

#sex is binary variable - males are '1', females are '0'
sex = pd.read_csv('subj_sex.csv', header=None)
sex = sex.values

In [None]:
#split data into training and testing sets using pre-determined indices (generated in HCP_HE_sexclf_traintestsplit notebook)
#this was done this way to make sure all atlases have the exact same train-test split 

#get indices for training and testing subsets
train_idx=np.isin(num,test,invert=True)
test_idx=np.isin(num,test)

#partition data and sex labels into training and testing subsets
#aal atlas
x_train_aal=data_aal[train_idx.ravel(),:]
x_test_aal=data_aal[test_idx.ravel(),:]
#cc200 atlas
x_train_cc200=data_cc200[train_idx.ravel(),:]
x_test_cc200=data_cc200[test_idx.ravel(),:]
#cc400 atlas
x_train_cc400=data_cc400[train_idx.ravel(),:]
x_test_cc400=data_cc400[test_idx.ravel(),:]
#ez atlas
x_train_ez=data_ez[train_idx.ravel(),:]
x_test_ez=data_ez[test_idx.ravel(),:]
#fs86 atlas
x_train_fs86=data_fs86[train_idx.ravel(),:]
x_test_fs86=data_fs86[test_idx.ravel(),:]
#ho atlas
x_train_ho=data_ho[train_idx.ravel(),:]
x_test_ho=data_ho[test_idx.ravel(),:]
#tt atlas
x_train_tt=data_tt[train_idx.ravel(),:]
x_test_tt=data_tt[test_idx.ravel(),:]

#sex labels
y_train=sex[train_idx.ravel(),:]
y_test=sex[test_idx.ravel(),:]

In [None]:
#normalize all training data and testing data; standardscaler fit on training data and applied to test
#aal atlas
scaler_aal = StandardScaler()
scaler_aal.fit(x_train_aal)
x_train_aal_t = scaler_aal.transform(x_train_aal)
x_test_aal_t = scaler_aal.transform(x_test_aal)
#cc200 atlas
scaler_cc200 = StandardScaler()
scaler_cc200.fit(x_train_cc200)
x_train_cc200_t = scaler_cc200.transform(x_train_cc200)
x_test_cc200_t = scaler_cc200.transform(x_test_cc200)
#cc400 atlas
scaler_cc400 = StandardScaler()
scaler_cc400.fit(x_train_cc400)
x_train_cc400_t = scaler_cc400.transform(x_train_cc400)
x_test_cc400_t = scaler_cc400.transform(x_test_cc400)
#ez atlas
scaler_ez = StandardScaler()
scaler_ez.fit(x_train_ez)
x_train_ez_t = scaler_ez.transform(x_train_ez)
x_test_ez_t = scaler_ez.transform(x_test_ez)
#fs86 atlas
scaler_fs86 = StandardScaler()
scaler_fs86.fit(x_train_fs86)
x_train_fs86_t = scaler_fs86.transform(x_train_fs86)
x_test_fs86_t = scaler_fs86.transform(x_test_fs86)
#ho atlas
scaler_ho = StandardScaler()
scaler_ho.fit(x_train_ho)
x_train_ho_t = scaler_ho.transform(x_train_ho)
x_test_ho_t = scaler_ho.transform(x_test_ho)
#tt atlas
scaler_tt = StandardScaler()
scaler_tt.fit(x_train_tt)
x_train_tt_t = scaler_tt.transform(x_train_tt)
x_test_tt_t = scaler_tt.transform(x_test_tt)

In [None]:
#set models for each of the atlases
#need to specify C values for each model corresponding to a given atlas
#aal atlas
model_aal = SVC(C=c_aal, kernel='linear', probability=True);
model_aal.fit(x_train_aal_t, y_train.ravel());
#cc200 atlas
model_cc200 = SVC(C=c_cc200, kernel='linear', probability=True);
model_cc200.fit(x_train_cc200_t, y_train.ravel());
#cc400 atlas
model_cc400 = SVC(C=c_cc400, kernel='linear', probability=True);
model_cc400.fit(x_train_cc400_t, y_train.ravel());
#ez atlas
model_ez = SVC(C=c_ez, kernel='linear', probability=True);
model_ez.fit(x_train_ez_t, y_train.ravel());
#fs86 atlas
model_fs86 = SVC(C=c_fs86, kernel='linear', probability=True);
model_fs86.fit(x_train_fs86_t, y_train.ravel());
#ho atlas
model_ho = SVC(C=c_ho, kernel='linear', probability=True);
model_ho.fit(x_train_ho_t, y_train.ravel());
#tt atlas
model_tt = SVC(C=c_tt, kernel='linear', probability=True);
model_tt.fit(x_train_tt_t, y_train.ravel());



In [None]:
#generate prediction probabilities for test samples across all atlases
pred_aal=model_aal.predict_proba(x_test_aal_t);
pred_cc200=model_cc200.predict_proba(x_test_cc200_t);
pred_cc400=model_cc400.predict_proba(x_test_cc400_t);
pred_fs86=model_fs86.predict_proba(x_test_fs86_t);
pred_ez=model_ez.predict_proba(x_test_ez_t);
pred_ho=model_ho.predict_proba(x_test_ho_t);
pred_tt=model_tt.predict_proba(x_test_tt_t);


In [None]:
#print model accuracies
print("AAL Score = %1.4f" %(model_aal.score(x_test_aal_t,y_test)))
print("CC200 Score = %1.4f" %(model_cc200.score(x_test_cc200_t,y_test)))
print("CC400 Score = %1.4f" %(model_cc400.score(x_test_cc400_t,y_test)))
print("EZ Score = %1.4f" %(model_ez.score(x_test_ez_t,y_test)))
print("FS86 Score = %1.4f" %(model_fs86.score(x_test_fs86_t,y_test)))
print("HO Score = %1.4f" %(model_ho.score(x_test_ho_t,y_test)))
print("TT Score = %1.4f" %(model_tt.score(x_test_tt_t,y_test)))


In [None]:
plt.figure(0).clf()

fpr, tpr, thresh = metrics.roc_curve(y_test, pred_aal[:,1])
auc = metrics.roc_auc_score(y_test, pred_aal[:,1])
plt.plot(fpr,tpr,label="   AAL     %1.4f" %(auc), lw=1, alpha=.8)

fpr, tpr, thresh = metrics.roc_curve(y_test, pred_cc200[:,1])
auc = metrics.roc_auc_score(y_test, pred_cc200[:,1])
plt.plot(fpr,tpr,label=" CC200   %1.4f" %(auc), lw=1, alpha=.8)

fpr, tpr, thresh = metrics.roc_curve(y_test, pred_cc400[:,1])
auc = metrics.roc_auc_score(y_test, pred_cc400[:,1])
plt.plot(fpr,tpr,label=" CC400   %1.4f" %(auc), lw=1, alpha=.8)

fpr, tpr, thresh = metrics.roc_curve(y_test, pred_ez[:,1])
auc = metrics.roc_auc_score(y_test, pred_ez[:,1])
plt.plot(fpr,tpr,label="    EZ      %1.4f" %(auc), lw=1, alpha=.8)

fpr, tpr, thresh = metrics.roc_curve(y_test, pred_fs86[:,1])
auc = metrics.roc_auc_score(y_test, pred_fs86[:,1])
plt.plot(fpr,tpr,label="  FS86    %1.4f" %(auc), lw=1, alpha=.8)

fpr, tpr, thresh = metrics.roc_curve(y_test, pred_ho[:,1])
auc = metrics.roc_auc_score(y_test, pred_ho[:,1])
plt.plot(fpr,tpr,label="    HO     %1.4f" %(auc), lw=1, alpha=.8)

fpr, tpr, thresh = metrics.roc_curve(y_test, pred_tt[:,1])
auc = metrics.roc_auc_score(y_test, pred_tt[:,1])
plt.plot(fpr,tpr,label="    TT      %1.4f" %(auc), lw=1, alpha=.8)

plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel('False Positive Rate', fontsize=14)
plt.ylabel('True Positive Rate', fontsize=14)
plt.title('Receiver Operating Characteristic Curve', fontsize=14)
plt.plot([0, 1], [0, 1], linestyle='--', lw=1,
         label='Chance', alpha=.8)

plt.legend(loc='best', fontsize=11, ncol=1, frameon=False, title="      Atlas    AUC", title_fontsize=12)

plt.savefig('roc_allatlases.svg')