In [1]:
import numpy as np
import os
import pandas as pd
import scipy.io as sio
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import learning_curve
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, roc_curve, auc
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
from sklearn.decomposition import SparsePCA

import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.stats.multicomp

from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV

In [2]:
def calc_num_components(a):
    num_pairs = len(a)
    num_components = int(np.ceil(np.sqrt(2 * num_pairs - 1)))
    if num_components * (num_components - 1) == 2 * num_pairs:
        return num_components
    else:
        return 0


def return_dfnc_from_vector(a):
    '''This Function returns a dfnc matrix given a vector
        a : the vector of correlation pairs
    '''
    n = calc_num_components(a)
    out = np.zeros((n, n))

    if not n:
        return out
    
    l_indices = np.tril_indices(n, -1)
    u_indices = np.triu_indices(n, 1)

    out[u_indices] = a
    out[l_indices] = out.T[l_indices]
    
    return out

In [3]:
sns.set_style("whitegrid")
cwd = os.path.dirname(os.getcwd())
data_dir = os.path.join(os.path.dirname(os.getcwd()), 'data')
doc_dir = os.path.join(os.path.dirname(os.getcwd()), 'docs')

In [4]:
sfnc_pairs = sio.loadmat(os.path.join(data_dir,'sfnc_pairs.mat'))
sfnc_corr_pairs = sfnc_pairs['fnc_corrs']

In [5]:
demographics = pd.read_excel(os.path.join(data_dir, '20160420_vcalhoun_rest_demography_cag_info_new.xls'))

In [6]:
X = sfnc_corr_pairs
y = demographics.cap_d_group_id2.values

In [11]:
y_diagnosis = demographics["visit_diagnosis_ID"].values
X_controls = X[y==0]
#random subset
control_subset = np.random.randint(0, high=78, size=23)
X_controls_subset = X_controls[control_subset].copy()
X_diagnosed = X[y_diagnosis==1]
X_HD_diagnosed = pd.concat([pd.DataFrame(X_diagnosed), pd.DataFrame(X_controls_subset)], axis=0)
y_HD_diagnosed = np.append(np.ones(23), np.zeros(23))

In [28]:
# create HD_near and Control samples
#demographics["HD_near"] = (demographics.cap_d_score > demographics.cap_d_score.dropna().median())*1
demographics["HD_near"] =  (demographics.cap_d_group=="high")*1
X_near = X[demographics["HD_near"]==1]
X_controls = X[y==0]
y_near = np.ones(np.sum(demographics["HD_near"]))
y_controls = np.zeros(78)
# concatenate HD_near and controls
X_HD_near = pd.concat([pd.DataFrame(X_near), pd.DataFrame(X_controls)], axis=0)
y_HD_near = np.append(y_near, y_controls)

In [12]:
# Hierarchical: Expert informed order of domains: Diagnosed HD patients vs controls: Gradient boosting
X_train, X_test, y_train, y_test = train_test_split(X_HD_diagnosed, y_HD_diagnosed, test_size=0.3, random_state=42)

# Reduce to domains in Training data
sfnc_corr_pairs = np.array(X_train)
X_AUD_2 = np.concatenate((sfnc_corr_pairs[:,0:2], sfnc_corr_pairs[:,45].reshape(-1,1)), axis=1)
X_CB_2 = np.concatenate((sfnc_corr_pairs[:,89:91], sfnc_corr_pairs[:,132].reshape(-1,1)), axis=1)
X_CC_2 = np.concatenate((sfnc_corr_pairs[:,215:221],sfnc_corr_pairs[:,255:260],sfnc_corr_pairs[:,294:298],sfnc_corr_pairs[:,332:335], sfnc_corr_pairs[:,369:371], sfnc_corr_pairs[:,405].reshape(-1,1)), axis=1)
X_DMN_2 = np.concatenate((sfnc_corr_pairs[:,474:488],sfnc_corr_pairs[:,507:520],sfnc_corr_pairs[:,539:551],sfnc_corr_pairs[:,570:581],sfnc_corr_pairs[:,600:610],sfnc_corr_pairs[:,629:638],sfnc_corr_pairs[:,657:665],sfnc_corr_pairs[:,684:691],sfnc_corr_pairs[:,710:716],sfnc_corr_pairs[:,735:740],sfnc_corr_pairs[:,759:763],sfnc_corr_pairs[:,782:785],sfnc_corr_pairs[:,804:806],sfnc_corr_pairs[:,825].reshape(-1,1)), axis=1)
X_SC_2 = sfnc_corr_pairs[:,882:883].copy()
X_VIS_2 = np.concatenate((sfnc_corr_pairs[:,980:990],sfnc_corr_pairs[:,990:999], sfnc_corr_pairs[:,999:1007], sfnc_corr_pairs[:,1007:1014],sfnc_corr_pairs[:,1014:1020],sfnc_corr_pairs[:,1020:1025],sfnc_corr_pairs[:,1025:1029],sfnc_corr_pairs[:,1029:1032], sfnc_corr_pairs[:,1032:1034], sfnc_corr_pairs[:,1034:1035]), axis=1)
X_SM_2 = np.concatenate((sfnc_corr_pairs[:,915:919],sfnc_corr_pairs[:,930:933],sfnc_corr_pairs[:,944:946],sfnc_corr_pairs[:,957:958],), axis=1)           

# Reduce to domains in Training data
sfnc_corr_pairs = np.array(X_test)
X_AUD_2_test = np.concatenate((sfnc_corr_pairs[:,0:2], sfnc_corr_pairs[:,45].reshape(-1,1)), axis=1)
X_CB_2_test = np.concatenate((sfnc_corr_pairs[:,89:91], sfnc_corr_pairs[:,132].reshape(-1,1)), axis=1)
X_CC_2_test = np.concatenate((sfnc_corr_pairs[:,215:221],sfnc_corr_pairs[:,255:260],sfnc_corr_pairs[:,294:298],sfnc_corr_pairs[:,332:335], sfnc_corr_pairs[:,369:371], sfnc_corr_pairs[:,405].reshape(-1,1)), axis=1)
X_DMN_2_test = np.concatenate((sfnc_corr_pairs[:,474:488],sfnc_corr_pairs[:,507:520],sfnc_corr_pairs[:,539:551],sfnc_corr_pairs[:,570:581],sfnc_corr_pairs[:,600:610],sfnc_corr_pairs[:,629:638],sfnc_corr_pairs[:,657:665],sfnc_corr_pairs[:,684:691],sfnc_corr_pairs[:,710:716],sfnc_corr_pairs[:,735:740],sfnc_corr_pairs[:,759:763],sfnc_corr_pairs[:,782:785],sfnc_corr_pairs[:,804:806],sfnc_corr_pairs[:,825].reshape(-1,1)), axis=1)
X_SC_2_test = sfnc_corr_pairs[:,882:883].copy()
X_VIS_2_test = np.concatenate((sfnc_corr_pairs[:,980:990],sfnc_corr_pairs[:,990:999], sfnc_corr_pairs[:,999:1007], sfnc_corr_pairs[:,1007:1014],sfnc_corr_pairs[:,1014:1020],sfnc_corr_pairs[:,1020:1025],sfnc_corr_pairs[:,1025:1029],sfnc_corr_pairs[:,1029:1032], sfnc_corr_pairs[:,1032:1034], sfnc_corr_pairs[:,1034:1035]), axis=1)
X_SM_2_test = np.concatenate((sfnc_corr_pairs[:,915:919],sfnc_corr_pairs[:,930:933],sfnc_corr_pairs[:,944:946],sfnc_corr_pairs[:,957:958],), axis=1)           

y_training = np.zeros((7, np.shape(X_train)[0]))
y_testing = np.zeros((7, np.shape(X_test)[0]))
score = np.zeros((7,np.shape(X_test)[0]))
domains = [X_SC_2, X_SM_2, X_CC_2, X_DMN_2, X_VIS_2, X_CB_2, X_AUD_2, ]
domains_test = [X_SC_2_test, X_SM_2_test, X_CC_2_test, X_DMN_2_test, X_VIS_2_test, X_CB_2_test, X_AUD_2_test, ]

# Fit & Test classifier one domain adter another
for n, d, d_test in zip(range(0,7,1), domains, domains_test):
    if n == 0:
        gbc = GradientBoostingClassifier(learning_rate=0.1,
                                               max_features='log2',
                                         loss = "deviance", max_depth=3, n_estimators=500)
        gbc.fit(d, y_train)
        y_training[n] = gbc.predict(d)
        y_testing[n] = gbc.predict(d_test)
    elif n !=0:
        X_train = np.concatenate((d, y_training[n-1].reshape(-1,1)), axis=1)
        gbc = GradientBoostingClassifier(learning_rate=0.1,
                                               max_features='log2',
                                         loss = "deviance", max_depth=3, n_estimators=500)
        gbc.fit(X_train, y_train)
        y_training[n] = gbc.predict(X_train)
        
        X_test = np.concatenate((d_test, y_testing[n-1].reshape(-1,1)), axis=1)
        y_testing[n] = gbc.predict(X_test)
        score[n] = gbc.score(X_test, y_test)

In [13]:
score

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        ],
       [0.71428571, 0.71428571, 0.71428571, 0.71428571, 0.71428571,
        0.71428571, 0.71428571, 0.71428571, 0.71428571, 0.71428571,
        0.71428571, 0.71428571, 0.71428571, 0.71428571],
       [0.71428571, 0.71428571, 0.71428571, 0.71428571, 0.71428571,
        0.71428571, 0.71428571, 0.71428571, 0.71428571, 0.71428571,
        0.71428571, 0.71428571, 0.71428571, 0.71428571],
       [0.78571429, 0.78571429, 0.78571429, 0.78571429, 0.78571429,
        0.78571429, 0.78571429, 0.78571429, 0.78571429, 0.78571429,
        0.78571429, 0.78571429, 0.78571429, 0.78571429],
       [0.78571429, 0.78571429, 0.78571429, 0.78571429, 0.78571429,
        0.78571429, 0.78571429, 0.78571429, 0.78571429, 0.78571429,
        0.78571429, 0.78571429, 0.78571429, 0.78571429],
       [0.78571429, 0.78571429, 0.7

In [14]:
# Hierarchical: Expert informed order of domains: Diagnosed HD patients vs controls: Logistic regression
X_train, X_test, y_train, y_test = train_test_split(X_HD_diagnosed, y_HD_diagnosed, test_size=0.3, random_state=42)

# Reduce to domains in Training data
sfnc_corr_pairs = np.array(X_train)
X_AUD_2 = np.concatenate((sfnc_corr_pairs[:,0:2], sfnc_corr_pairs[:,45].reshape(-1,1)), axis=1)
X_CB_2 = np.concatenate((sfnc_corr_pairs[:,89:91], sfnc_corr_pairs[:,132].reshape(-1,1)), axis=1)
X_CC_2 = np.concatenate((sfnc_corr_pairs[:,215:221],sfnc_corr_pairs[:,255:260],sfnc_corr_pairs[:,294:298],sfnc_corr_pairs[:,332:335], sfnc_corr_pairs[:,369:371], sfnc_corr_pairs[:,405].reshape(-1,1)), axis=1)
X_DMN_2 = np.concatenate((sfnc_corr_pairs[:,474:488],sfnc_corr_pairs[:,507:520],sfnc_corr_pairs[:,539:551],sfnc_corr_pairs[:,570:581],sfnc_corr_pairs[:,600:610],sfnc_corr_pairs[:,629:638],sfnc_corr_pairs[:,657:665],sfnc_corr_pairs[:,684:691],sfnc_corr_pairs[:,710:716],sfnc_corr_pairs[:,735:740],sfnc_corr_pairs[:,759:763],sfnc_corr_pairs[:,782:785],sfnc_corr_pairs[:,804:806],sfnc_corr_pairs[:,825].reshape(-1,1)), axis=1)
X_SC_2 = sfnc_corr_pairs[:,882:883].copy()
X_VIS_2 = np.concatenate((sfnc_corr_pairs[:,980:990],sfnc_corr_pairs[:,990:999], sfnc_corr_pairs[:,999:1007], sfnc_corr_pairs[:,1007:1014],sfnc_corr_pairs[:,1014:1020],sfnc_corr_pairs[:,1020:1025],sfnc_corr_pairs[:,1025:1029],sfnc_corr_pairs[:,1029:1032], sfnc_corr_pairs[:,1032:1034], sfnc_corr_pairs[:,1034:1035]), axis=1)
X_SM_2 = np.concatenate((sfnc_corr_pairs[:,915:919],sfnc_corr_pairs[:,930:933],sfnc_corr_pairs[:,944:946],sfnc_corr_pairs[:,957:958],), axis=1)           

# Reduce to domains in Training data
sfnc_corr_pairs = np.array(X_test)
X_AUD_2_test = np.concatenate((sfnc_corr_pairs[:,0:2], sfnc_corr_pairs[:,45].reshape(-1,1)), axis=1)
X_CB_2_test = np.concatenate((sfnc_corr_pairs[:,89:91], sfnc_corr_pairs[:,132].reshape(-1,1)), axis=1)
X_CC_2_test = np.concatenate((sfnc_corr_pairs[:,215:221],sfnc_corr_pairs[:,255:260],sfnc_corr_pairs[:,294:298],sfnc_corr_pairs[:,332:335], sfnc_corr_pairs[:,369:371], sfnc_corr_pairs[:,405].reshape(-1,1)), axis=1)
X_DMN_2_test = np.concatenate((sfnc_corr_pairs[:,474:488],sfnc_corr_pairs[:,507:520],sfnc_corr_pairs[:,539:551],sfnc_corr_pairs[:,570:581],sfnc_corr_pairs[:,600:610],sfnc_corr_pairs[:,629:638],sfnc_corr_pairs[:,657:665],sfnc_corr_pairs[:,684:691],sfnc_corr_pairs[:,710:716],sfnc_corr_pairs[:,735:740],sfnc_corr_pairs[:,759:763],sfnc_corr_pairs[:,782:785],sfnc_corr_pairs[:,804:806],sfnc_corr_pairs[:,825].reshape(-1,1)), axis=1)
X_SC_2_test = sfnc_corr_pairs[:,882:883].copy()
X_VIS_2_test = np.concatenate((sfnc_corr_pairs[:,980:990],sfnc_corr_pairs[:,990:999], sfnc_corr_pairs[:,999:1007], sfnc_corr_pairs[:,1007:1014],sfnc_corr_pairs[:,1014:1020],sfnc_corr_pairs[:,1020:1025],sfnc_corr_pairs[:,1025:1029],sfnc_corr_pairs[:,1029:1032], sfnc_corr_pairs[:,1032:1034], sfnc_corr_pairs[:,1034:1035]), axis=1)
X_SM_2_test = np.concatenate((sfnc_corr_pairs[:,915:919],sfnc_corr_pairs[:,930:933],sfnc_corr_pairs[:,944:946],sfnc_corr_pairs[:,957:958],), axis=1)           

y_training = np.zeros((7, np.shape(X_train)[0]))
y_testing = np.zeros((7, np.shape(X_test)[0]))
score = np.zeros((7,np.shape(X_test)[0]))
domains = [ X_SC_2, X_SM_2, X_CC_2, X_DMN_2, X_VIS_2, X_CB_2, X_AUD_2,]
domains_test = [ X_SC_2_test, X_SM_2_test, X_CC_2_test, X_DMN_2_test, X_VIS_2_test, X_CB_2_test, X_AUD_2_test, ]

# Fit & Test classifier one domain adter another
for n, d, d_test in zip(range(0,7,1), domains, domains_test):
    if n == 0:
        gbc = LogisticRegression(solver="lbfgs")
        gbc.fit(d, y_train)
        y_training[n] = gbc.predict(d)
        y_testing[n] = gbc.predict(d_test)
    elif n !=0:
        X_train = np.concatenate((d, y_training[n-1].reshape(-1,1)), axis=1)
        gbc = LogisticRegression(solver="lbfgs")
        gbc.fit(X_train, y_train)
        y_training[n] = gbc.predict(X_train)
        
        X_test = np.concatenate((d_test, y_testing[n-1].reshape(-1,1)), axis=1)
        y_testing[n] = gbc.predict(X_test)
        score[n] = gbc.score(X_test, y_test)

In [15]:
score

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        ],
       [0.64285714, 0.64285714, 0.64285714, 0.64285714, 0.64285714,
        0.64285714, 0.64285714, 0.64285714, 0.64285714, 0.64285714,
        0.64285714, 0.64285714, 0.64285714, 0.64285714],
       [0.64285714, 0.64285714, 0.64285714, 0.64285714, 0.64285714,
        0.64285714, 0.64285714, 0.64285714, 0.64285714, 0.64285714,
        0.64285714, 0.64285714, 0.64285714, 0.64285714],
       [0.78571429, 0.78571429, 0.78571429, 0.78571429, 0.78571429,
        0.78571429, 0.78571429, 0.78571429, 0.78571429, 0.78571429,
        0.78571429, 0.78571429, 0.78571429, 0.78571429],
       [0.78571429, 0.78571429, 0.78571429, 0.78571429, 0.78571429,
        0.78571429, 0.78571429, 0.78571429, 0.78571429, 0.78571429,
        0.78571429, 0.78571429, 0.78571429, 0.78571429],
       [0.78571429, 0.78571429, 0.7

In [29]:
# Hierarchical: Expert informed order of domains: Late HD patients vs controls: Gradient boosting
X_train, X_test, y_train, y_test = train_test_split(X_HD_near, y_HD_near, test_size=0.3, random_state=42)

# Reduce to domains in Training data
sfnc_corr_pairs = np.array(X_train)
X_AUD_2 = np.concatenate((sfnc_corr_pairs[:,0:2], sfnc_corr_pairs[:,45].reshape(-1,1)), axis=1)
X_CB_2 = np.concatenate((sfnc_corr_pairs[:,89:91], sfnc_corr_pairs[:,132].reshape(-1,1)), axis=1)
X_CC_2 = np.concatenate((sfnc_corr_pairs[:,215:221],sfnc_corr_pairs[:,255:260],sfnc_corr_pairs[:,294:298],sfnc_corr_pairs[:,332:335], sfnc_corr_pairs[:,369:371], sfnc_corr_pairs[:,405].reshape(-1,1)), axis=1)
X_DMN_2 = np.concatenate((sfnc_corr_pairs[:,474:488],sfnc_corr_pairs[:,507:520],sfnc_corr_pairs[:,539:551],sfnc_corr_pairs[:,570:581],sfnc_corr_pairs[:,600:610],sfnc_corr_pairs[:,629:638],sfnc_corr_pairs[:,657:665],sfnc_corr_pairs[:,684:691],sfnc_corr_pairs[:,710:716],sfnc_corr_pairs[:,735:740],sfnc_corr_pairs[:,759:763],sfnc_corr_pairs[:,782:785],sfnc_corr_pairs[:,804:806],sfnc_corr_pairs[:,825].reshape(-1,1)), axis=1)
X_SC_2 = sfnc_corr_pairs[:,882:883].copy()
X_VIS_2 = np.concatenate((sfnc_corr_pairs[:,980:990],sfnc_corr_pairs[:,990:999], sfnc_corr_pairs[:,999:1007], sfnc_corr_pairs[:,1007:1014],sfnc_corr_pairs[:,1014:1020],sfnc_corr_pairs[:,1020:1025],sfnc_corr_pairs[:,1025:1029],sfnc_corr_pairs[:,1029:1032], sfnc_corr_pairs[:,1032:1034], sfnc_corr_pairs[:,1034:1035]), axis=1)
X_SM_2 = np.concatenate((sfnc_corr_pairs[:,915:919],sfnc_corr_pairs[:,930:933],sfnc_corr_pairs[:,944:946],sfnc_corr_pairs[:,957:958],), axis=1)           

# Reduce to domains in Training data
sfnc_corr_pairs = np.array(X_test)
X_AUD_2_test = np.concatenate((sfnc_corr_pairs[:,0:2], sfnc_corr_pairs[:,45].reshape(-1,1)), axis=1)
X_CB_2_test = np.concatenate((sfnc_corr_pairs[:,89:91], sfnc_corr_pairs[:,132].reshape(-1,1)), axis=1)
X_CC_2_test = np.concatenate((sfnc_corr_pairs[:,215:221],sfnc_corr_pairs[:,255:260],sfnc_corr_pairs[:,294:298],sfnc_corr_pairs[:,332:335], sfnc_corr_pairs[:,369:371], sfnc_corr_pairs[:,405].reshape(-1,1)), axis=1)
X_DMN_2_test = np.concatenate((sfnc_corr_pairs[:,474:488],sfnc_corr_pairs[:,507:520],sfnc_corr_pairs[:,539:551],sfnc_corr_pairs[:,570:581],sfnc_corr_pairs[:,600:610],sfnc_corr_pairs[:,629:638],sfnc_corr_pairs[:,657:665],sfnc_corr_pairs[:,684:691],sfnc_corr_pairs[:,710:716],sfnc_corr_pairs[:,735:740],sfnc_corr_pairs[:,759:763],sfnc_corr_pairs[:,782:785],sfnc_corr_pairs[:,804:806],sfnc_corr_pairs[:,825].reshape(-1,1)), axis=1)
X_SC_2_test = sfnc_corr_pairs[:,882:883].copy()
X_VIS_2_test = np.concatenate((sfnc_corr_pairs[:,980:990],sfnc_corr_pairs[:,990:999], sfnc_corr_pairs[:,999:1007], sfnc_corr_pairs[:,1007:1014],sfnc_corr_pairs[:,1014:1020],sfnc_corr_pairs[:,1020:1025],sfnc_corr_pairs[:,1025:1029],sfnc_corr_pairs[:,1029:1032], sfnc_corr_pairs[:,1032:1034], sfnc_corr_pairs[:,1034:1035]), axis=1)
X_SM_2_test = np.concatenate((sfnc_corr_pairs[:,915:919],sfnc_corr_pairs[:,930:933],sfnc_corr_pairs[:,944:946],sfnc_corr_pairs[:,957:958],), axis=1)           

y_training = np.zeros((7, np.shape(X_train)[0]))
y_testing = np.zeros((7, np.shape(X_test)[0]))
score = np.zeros((7,np.shape(X_test)[0]))
domains = [X_SC_2, X_SM_2, X_CC_2, X_DMN_2, X_VIS_2, X_CB_2, X_AUD_2, ]
domains_test = [X_SC_2_test, X_SM_2_test, X_CC_2_test, X_DMN_2_test, X_VIS_2_test, X_CB_2_test, X_AUD_2_test, ]

# Fit & Test classifier one domain adter another
for n, d, d_test in zip(range(0,7,1), domains, domains_test):
    if n == 0:
        gbc = GradientBoostingClassifier(learning_rate=0.1,
                                               max_features='log2',
                                         loss = "deviance", max_depth=3, n_estimators=500)
        gbc.fit(d, y_train)
        y_training[n] = gbc.predict(d)
        y_testing[n] = gbc.predict(d_test)
    elif n !=0:
        X_train = np.concatenate((d, y_training[n-1].reshape(-1,1)), axis=1)
        gbc = GradientBoostingClassifier(learning_rate=0.1,
                                               max_features='log2',
                                         loss = "deviance", max_depth=3, n_estimators=500)
        gbc.fit(X_train, y_train)
        y_training[n] = gbc.predict(X_train)
        
        X_test = np.concatenate((d_test, y_testing[n-1].reshape(-1,1)), axis=1)
        y_testing[n] = gbc.predict(X_test)
        score[n] = gbc.score(X_test, y_test)

In [30]:
score

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        ],
       [0.57446809, 0.57446809, 0.57446809, 0.57446809, 0.57446809,
        0.57446809, 0.57446809, 0.57446809, 0.57446809, 0.57446809,
        0.57446809, 0.57446809, 0.57446809, 0.57446809, 0.57446809,
        0.57446809, 0.57446809, 0.57446809, 0.57446809, 0.57446809,
        0.57446809, 0.57446809, 0.57446809, 0.57446809, 0.57446809,
        0.57446

In [31]:
# Hierarchical: Expert informed order of domains: Late HD patients vs controls: Logistic regression
X_train, X_test, y_train, y_test = train_test_split(X_HD_near, y_HD_near, test_size=0.3, random_state=42)


# Reduce to domains in Training data
sfnc_corr_pairs = np.array(X_train)
X_AUD_2 = np.concatenate((sfnc_corr_pairs[:,0:2], sfnc_corr_pairs[:,45].reshape(-1,1)), axis=1)
X_CB_2 = np.concatenate((sfnc_corr_pairs[:,89:91], sfnc_corr_pairs[:,132].reshape(-1,1)), axis=1)
X_CC_2 = np.concatenate((sfnc_corr_pairs[:,215:221],sfnc_corr_pairs[:,255:260],sfnc_corr_pairs[:,294:298],sfnc_corr_pairs[:,332:335], sfnc_corr_pairs[:,369:371], sfnc_corr_pairs[:,405].reshape(-1,1)), axis=1)
X_DMN_2 = np.concatenate((sfnc_corr_pairs[:,474:488],sfnc_corr_pairs[:,507:520],sfnc_corr_pairs[:,539:551],sfnc_corr_pairs[:,570:581],sfnc_corr_pairs[:,600:610],sfnc_corr_pairs[:,629:638],sfnc_corr_pairs[:,657:665],sfnc_corr_pairs[:,684:691],sfnc_corr_pairs[:,710:716],sfnc_corr_pairs[:,735:740],sfnc_corr_pairs[:,759:763],sfnc_corr_pairs[:,782:785],sfnc_corr_pairs[:,804:806],sfnc_corr_pairs[:,825].reshape(-1,1)), axis=1)
X_SC_2 = sfnc_corr_pairs[:,882:883].copy()
X_VIS_2 = np.concatenate((sfnc_corr_pairs[:,980:990],sfnc_corr_pairs[:,990:999], sfnc_corr_pairs[:,999:1007], sfnc_corr_pairs[:,1007:1014],sfnc_corr_pairs[:,1014:1020],sfnc_corr_pairs[:,1020:1025],sfnc_corr_pairs[:,1025:1029],sfnc_corr_pairs[:,1029:1032], sfnc_corr_pairs[:,1032:1034], sfnc_corr_pairs[:,1034:1035]), axis=1)
X_SM_2 = np.concatenate((sfnc_corr_pairs[:,915:919],sfnc_corr_pairs[:,930:933],sfnc_corr_pairs[:,944:946],sfnc_corr_pairs[:,957:958],), axis=1)           

# Reduce to domains in Training data
sfnc_corr_pairs = np.array(X_test)
X_AUD_2_test = np.concatenate((sfnc_corr_pairs[:,0:2], sfnc_corr_pairs[:,45].reshape(-1,1)), axis=1)
X_CB_2_test = np.concatenate((sfnc_corr_pairs[:,89:91], sfnc_corr_pairs[:,132].reshape(-1,1)), axis=1)
X_CC_2_test = np.concatenate((sfnc_corr_pairs[:,215:221],sfnc_corr_pairs[:,255:260],sfnc_corr_pairs[:,294:298],sfnc_corr_pairs[:,332:335], sfnc_corr_pairs[:,369:371], sfnc_corr_pairs[:,405].reshape(-1,1)), axis=1)
X_DMN_2_test = np.concatenate((sfnc_corr_pairs[:,474:488],sfnc_corr_pairs[:,507:520],sfnc_corr_pairs[:,539:551],sfnc_corr_pairs[:,570:581],sfnc_corr_pairs[:,600:610],sfnc_corr_pairs[:,629:638],sfnc_corr_pairs[:,657:665],sfnc_corr_pairs[:,684:691],sfnc_corr_pairs[:,710:716],sfnc_corr_pairs[:,735:740],sfnc_corr_pairs[:,759:763],sfnc_corr_pairs[:,782:785],sfnc_corr_pairs[:,804:806],sfnc_corr_pairs[:,825].reshape(-1,1)), axis=1)
X_SC_2_test = sfnc_corr_pairs[:,882:883].copy()
X_VIS_2_test = np.concatenate((sfnc_corr_pairs[:,980:990],sfnc_corr_pairs[:,990:999], sfnc_corr_pairs[:,999:1007], sfnc_corr_pairs[:,1007:1014],sfnc_corr_pairs[:,1014:1020],sfnc_corr_pairs[:,1020:1025],sfnc_corr_pairs[:,1025:1029],sfnc_corr_pairs[:,1029:1032], sfnc_corr_pairs[:,1032:1034], sfnc_corr_pairs[:,1034:1035]), axis=1)
X_SM_2_test = np.concatenate((sfnc_corr_pairs[:,915:919],sfnc_corr_pairs[:,930:933],sfnc_corr_pairs[:,944:946],sfnc_corr_pairs[:,957:958],), axis=1)           

y_training = np.zeros((7, np.shape(X_train)[0]))
y_testing = np.zeros((7, np.shape(X_test)[0]))
score = np.zeros((7,np.shape(X_test)[0]))
domains = [ X_SC_2, X_SM_2, X_CC_2, X_DMN_2, X_VIS_2, X_CB_2, X_AUD_2,]
domains_test = [ X_SC_2_test, X_SM_2_test, X_CC_2_test, X_DMN_2_test, X_VIS_2_test, X_CB_2_test, X_AUD_2_test, ]

# Fit & Test classifier one domain adter another
for n, d, d_test in zip(range(0,7,1), domains, domains_test):
    if n == 0:
        gbc = LogisticRegression(solver="lbfgs")
        gbc.fit(d, y_train)
        y_training[n] = gbc.predict(d)
        y_testing[n] = gbc.predict(d_test)
    elif n !=0:
        X_train = np.concatenate((d, y_training[n-1].reshape(-1,1)), axis=1)
        gbc = LogisticRegression(solver="lbfgs")
        gbc.fit(X_train, y_train)
        y_training[n] = gbc.predict(X_train)
        
        X_test = np.concatenate((d_test, y_testing[n-1].reshape(-1,1)), axis=1)
        y_testing[n] = gbc.predict(X_test)
        score[n] = gbc.score(X_test, y_test)

In [32]:
score

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        ],
       [0.57446809, 0.57446809, 0.57446809, 0.57446809, 0.57446809,
        0.57446809, 0.57446809, 0.57446809, 0.57446809, 0.57446809,
        0.57446809, 0.57446809, 0.57446809, 0.57446809, 0.57446809,
        0.57446809, 0.57446809, 0.57446809, 0.57446809, 0.57446809,
        0.57446809, 0.57446809, 0.57446809, 0.57446809, 0.57446809,
        0.57446