In [None]:
#import relevant libraries
import os
import sys

import numpy as np
import pandas as pd
import sys; sys.path
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.multitest import multipletests as fdr
from matplotlib import colors
from scipy import stats

from sklearn.metrics import explained_variance_score, r2_score, classification_report
from sklearn.linear_model import Ridge, RidgeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, GroupKFold, GroupShuffleSplit, StratifiedKFold
from sklearn.svm import SVC
from scipy import stats
from sklearn.utils import shuffle
from datetime import datetime


import warnings
warnings.filterwarnings('ignore')

In [None]:
# read and clean up ABCD data
# set base dirctories
ABCD_base_dir   = 'PATH_TO_DIR'
ABCD_results_dir   = 'PATH_TO_DIR'

#load subj fc data
#ABCD_fc= pd.read_csv(os.path.join(ABCD_base_dir, 'ABCD_rsfc_withsubcort_5260subj.csv'), header=None)
#ABCD_fc = ABCD_fc.T
ABCD_subj = pd.read_csv(os.path.join(ABCD_base_dir, 'subjects_5260.txt'), header=None, names='s')
#ABCD_fc.insert(0, "subjectkey", ABCD_subj, True)
#ABCD_fc = ABCD_fc.sort_values(by='subjectkey', ascending=True)

# load subj demo and clinical data
ABCD_gender_p = pd.read_csv(os.path.join(ABCD_base_dir, 'abcd_pgi01.csv'), header=0)
ABCD_gender_y = pd.read_csv(os.path.join(ABCD_base_dir, 'abcd_ygi01.csv'), header=0)

#drop duplicate header rows
header_row = 0
ABCD_gender_p = ABCD_gender_p.drop(header_row)
ABCD_gender_y = ABCD_gender_y.drop(header_row)

In [None]:
#select subjects for whom we have FC data
ABCD_gender_p = ABCD_gender_p[ABCD_gender_p.subjectkey.isin(ABCD_subj.s)]
ABCD_gender_y = ABCD_gender_y[ABCD_gender_y.subjectkey.isin(ABCD_subj.s)]

In [None]:
#sort data using subjectkey so it's all in the same order
ABCD_gender_p = ABCD_gender_p.sort_values(by='subjectkey', ascending=True)
ABCD_gender_y = ABCD_gender_y.sort_values(by='subjectkey', ascending=True)

In [None]:
#select variables of interest
ABCD_gender_y = ABCD_gender_y[['subjectkey', 'sex', 'eventname',
                               'gish_m1_y', 'gish_m2_y', 
                               'gish_f1_y', 'gish_f2_y', 
                               'gish_m3_y', 'gish_m4_y', 
                               'gish_f3_y', 'gish_f4_y']]


ABCD_gender_p = ABCD_gender_p[['subjectkey', 'sex', 'eventname',
                               'gish_m1_p', 'gish_m2_p', 'gish_m3_p', 'gish_m4_p', 
                               'gish_m5_p', 'gish_m6_p', 'gish_m7_p', 'gish_m8_p', 
                               'gish_m9_p', 'gish_m10_p', 'gish_m11_p', 'gish_m12_p', 
                               'gish_m13_p', 'gish_m14_p', 
                               'gish_f1_p', 'gish_f2_p', 'gish_f3_p', 'gish_f4_p',
                               'gish_f5_p', 'gish_f6_p', 'gish_f7_p', 'gish_f8_p', 
                               'gish_f9_p', 'gish_f10_p', 'gish_f11_p', 'gish_f12_p', 
                               'gish_f13_p', 'gish_f14_p']]


In [None]:
#create separate variables for self-report and parent-report data
ABCD_gender_p_f1 = ABCD_gender_p[(ABCD_gender_p.eventname == '1_year_follow_up_y_arm_1')]
ABCD_gender_y_f1 = ABCD_gender_y[(ABCD_gender_y.eventname == '1_year_follow_up_y_arm_1')]

In [None]:
#separate data by assigned sex

ABCD_gender_p_f1_f = ABCD_gender_p_f1[(ABCD_gender_p_f1.sex =='F')]
ABCD_gender_y_f1_f = ABCD_gender_y_f1[(ABCD_gender_y_f1.sex =='F')]
ABCD_gender_p_f1_m = ABCD_gender_p_f1[(ABCD_gender_p_f1.sex =='M')]
ABCD_gender_y_f1_m = ABCD_gender_y_f1[(ABCD_gender_y_f1.sex =='M')]


In [None]:
#organize all data
subj_m = ABCD_gender_y_f1_m.subjectkey
subj_f = ABCD_gender_y_f1_f.subjectkey

gender_y_f1_m = ABCD_gender_y_f1_m[['gish_m1_y', 'gish_m2_y', 'gish_m3_y', 'gish_m4_y']]
gender_y_f1_f = ABCD_gender_y_f1_f[['gish_f1_y', 'gish_f2_y', 'gish_f3_y', 'gish_f4_y']]



gender_p_f1_m = ABCD_gender_p_f1_m[['gish_m1_p', 'gish_m2_p', 'gish_m3_p', 'gish_m4_p',
                                    'gish_m5_p', 'gish_m6_p', 'gish_m7_p', 'gish_m8_p', 
                                    'gish_m10_p', 'gish_m12_p', 'gish_m13_p', 'gish_m14_p']]

gender_p_f1_f = ABCD_gender_p_f1_f[['gish_f1_p', 'gish_f2_p', 'gish_f3_p', 'gish_f4_p',
                                    'gish_f5_p', 'gish_f6_p', 'gish_f7_p', 'gish_f8_p',
                                    'gish_f10_p', 'gish_f12_p', 'gish_f13_p', 'gish_f14_p']]



In [None]:
#remove subjects with missing data
nanmask = np.isnan(np.double(gender_y_f1_m)).any(axis=1)
gender_y_m = gender_y_f1_m[~nanmask]
gender_p_m = gender_p_f1_m[~nanmask]
subj_m = subj_m[~nanmask]

nanmask = np.isnan(np.double(gender_y_f1_f)).any(axis=1)
gender_y_f = gender_y_f1_f[~nanmask]
gender_p_f = gender_p_f1_f[~nanmask]
subj_f = subj_f[~nanmask]




In [None]:
#remove subjects who did not answer the questions
nanmask = np.isnan(np.double(gender_p_m)).any(axis=1)
gender_y_m = np.double(gender_y_m[~nanmask])
gender_p_m = np.double(gender_p_m[~nanmask])
subj_m = subj_m[~nanmask]


nanmask = np.isnan(np.double(gender_p_f)).any(axis=1)
gender_y_f = np.double(gender_y_f[~nanmask])
gender_p_f = np.double(gender_p_f[~nanmask])
subj_f = subj_f[~nanmask]


mask777 = (gender_p_m>=777)
mask777 = mask777.any(axis=1)
gender_y_m_clean = gender_y_m[~mask777]
gender_p_m_clean = gender_p_m[~mask777]
subj_m = subj_m[~mask777]


mask777 = (gender_p_f>=777)
mask777 = mask777.any(axis=1)
gender_y_f_clean = gender_y_f[~mask777]
gender_p_f_clean = gender_p_f[~mask777]
subj_f = subj_f[~mask777]




In [None]:
#take the sum to get summary self- and parent- report scores
gender_y_m_sum = np.sum(gender_y_m_clean, axis=1)
gender_y_f_sum = np.sum(gender_y_f_clean, axis=1)
gender_p_m_sum = np.sum(gender_p_m_clean, axis=1)
gender_p_f_sum = np.sum(gender_p_f_clean, axis=1)

In [None]:
#load data on site
ABCD_site = pd.read_csv(os.path.join(ABCD_base_dir, 'abcd_lt01.csv'), header=0)
ABCD_site = ABCD_site.drop(header_row)
ABCD_site_f1 = ABCD_site[ABCD_site.eventname == 'baseline_year_1_arm_1']
ABCD_site_f1 = ABCD_site_f1.sort_values(by='subjectkey', ascending=True)
ABCD_site_f1 = ABCD_site_f1[['subjectkey', 'site_id_l']]
ABCD_site_f1.reset_index(inplace=True) 


site_m = ABCD_site_f1[ABCD_site_f1.subjectkey.isin(subj_m)].site_id_l
site_f = ABCD_site_f1[ABCD_site_f1.subjectkey.isin(subj_f)].site_id_l





In [None]:
#site names
sites = ['Colorado Boulder',
         'Florida International',
         'Laureate Institute',
         'Medical University of South Carolina',
         'Oregon Health and Science University',
         'University of Rochester',
         'Stanford Research Institute International',
         'University of California - Los Angeles',
         'University of California - San Diego',
         'University of Florida',
         'University of Maryland Baltimore',
         'University of Michigan',
         'University of Minnesota',
         'University of Pittsburgh',
         'University of Utah',
         'University of Wisconsin-Milwaukee',
         'Washington University St.Louis',
         'Yale']

In [None]:
#remove data from specific sites with fewer than 10 subjects
site_mask = site_m.values!=('site22')
site_m = site_m[site_mask]
subj_m = subj_m[site_mask]
gender_y_m_sum = gender_y_m_sum[site_mask]
gender_p_m_sum = gender_p_m_sum[site_mask]

site_mask = site_m.values!=('site19')
site_m = site_m[site_mask]
subj_m = subj_m[site_mask]
gender_y_m_sum = gender_y_m_sum[site_mask]
gender_p_m_sum = gender_p_m_sum[site_mask]

site_mask = site_f.values!=('site22')
site_f = site_f[site_mask]
subj_f = subj_f[site_mask]
gender_y_f_sum = gender_y_f_sum[site_mask]
gender_p_f_sum = gender_p_f_sum[site_mask]

site_mask = site_f.values!=('site19')
site_f = site_f[site_mask]
subj_f = subj_f[site_mask]
gender_y_f_sum = gender_y_f_sum[site_mask]
gender_p_f_sum = gender_p_f_sum[site_mask]



In [None]:
#plot gender data by site/sex
nrows = 1
ncols = 2
dpi = 900
tight = True


fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharey=True, sharex=False, figsize=[15, 10], dpi=dpi, constrained_layout=tight)
sns.set(font_scale=1.5, style='white')

scale = 'width'
orient = 'h'
split = True
hue = 'Sex'
order = np.unique(site_f)
inner = 'quartile'
cut = 0

gender_f = gender_p_f_sum
data_f_p = pd.DataFrame(gender_f)
data_f_p.columns = ['Gender']
data_f_p['Sex'] = 'F'
data_f_p['Report'] = 'Parent'
data_f_p['Site'] = site_f.values

gender_f = gender_y_f_sum
data_f_y = pd.DataFrame(gender_f)
data_f_y.columns = ['Gender']
data_f_y['Sex'] = 'F'
data_f_y['Report'] = 'Self'
data_f_y['Site'] = site_f.values

gender_m = gender_p_m_sum
data_m_p = pd.DataFrame(gender_m)
data_m_p.columns = ['Gender']
data_m_p['Sex'] = 'M'
data_m_p['Report'] = 'Parent'
data_m_p['Site'] = site_m.values

gender_m = gender_y_m_sum
data_m_y = pd.DataFrame(gender_m)
data_m_y.columns = ['Gender']
data_m_y['Sex'] = 'M'
data_m_y['Report'] = 'Self'
data_m_y['Site'] = site_m.values



data_p = pd.concat([data_f_p, data_m_p])
data_y = pd.concat([data_f_y, data_m_y])



ax[0] = sns.violinplot(ax=ax[0], x='Gender', y='Site', hue=hue, scale=scale, orient=orient, palette='Set1', 
                    split=split, data=data_y, inner=inner, cut=cut, order = order)
ax[1] = sns.violinplot(ax=ax[1], x='Gender', y='Site', hue=hue, scale=scale, orient=orient, palette='Set1', 
                    split=split, data=data_p, inner=inner, cut=cut, order = order)


fontsize=18


ax[0].set_title('Self-Report', fontsize=fontsize, weight='bold')
ax[1].set_title('Parent-Report', fontsize=fontsize, weight='bold')

ax[0].set_yticklabels(sites, fontsize=fontsize)
ax[0].locator_params(axis='x', nbins=5)

ax[0].set_ylabel('Site', fontsize=fontsize, weight='bold')
ax[1].set_ylabel('Site', fontsize=fontsize, weight='bold')


ax[0].set_xlabel('Gender', fontsize=fontsize, weight='bold')
ax[1].set_xlabel('Gender', fontsize=fontsize, weight='bold')

lw = 1
color = 'k'


for i in range(ncols):

    ax[i].axhline(0.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(1.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(2.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(3.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(4.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(5.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(6.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(7.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(8.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(9.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(10.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(11.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(12.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(13.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(14.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(15.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].axhline(16.5,  linewidth=lw, color=color, dashes=(1, 4))

        
    ax[i].get_legend().remove()
    
#plt.savefig((results_dir + '/gender_dist_site.png'), dpi=900, bbox_inches="tight") 

    
    
        


In [None]:
#plot gender data by sex
nrows = 1
ncols = 2
dpi = 900
tight = True


fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharey=True, sharex=False, 
                       figsize=[15, 3], dpi=dpi, constrained_layout=tight)
sns.set(font_scale=1.5, style='white')

scale = 'width'
orient = 'h'
split = False
inner = 'quartile'
cut = 0




ax[0] = sns.violinplot(ax=ax[0], x='Gender', y='Sex', hue=hue, scale=scale, orient=orient, palette='Set1', 
                    split=split, data=data_y, inner=inner, cut=cut)
ax[1] = sns.violinplot(ax=ax[1], x='Gender', y='Sex', hue=hue, scale=scale, orient=orient, palette='Set1', 
                    split=split, data=data_p, inner=inner, cut=cut)


fontsize=18


ax[0].set_title('Self-Report', fontsize=fontsize, weight='bold')
ax[1].set_title('Parent-Report', fontsize=fontsize, weight='bold')

ax[0].set_ylabel('Sex', fontsize=fontsize, weight='bold')
ax[1].set_ylabel('Sex', fontsize=fontsize, weight='bold')


ax[0].set_xlabel('Gender', fontsize=fontsize, weight='bold')
ax[1].set_xlabel('Gender', fontsize=fontsize, weight='bold')



sexes = ['AFAB', 'AMAB']
ax[0].set_yticklabels(sexes, fontsize=fontsize)
ax[0].locator_params(axis='x', nbins=5)

lw = 1
color = 'k'

for i in range(ncols):
    ax[i].axhline(0.5,  linewidth=lw, color=color, dashes=(1, 4))
    ax[i].get_legend().remove()

#plt.savefig((results_dir + '/gender_dist.png'), dpi=900, bbox_inches="tight") 


In [None]:
#compute Spearman correlations between parent and self report data
corr_m = stats.spearmanr(gender_p_m_sum, gender_y_m_sum)
corr_f = stats.spearmanr(gender_p_f_sum, gender_y_f_sum)


print(corr_m)
print(corr_f)

In [None]:
#compute sex differences in gender scores 
diff_p = stats.mannwhitneyu(gender_p_m_sum, gender_p_f_sum)
diff_s = stats.mannwhitneyu(gender_y_m_sum, gender_y_f_sum)

print(diff_p)
print(diff_s)

In [None]:
#function to shift midpoint of colorbar
import matplotlib as mpl
class MidpointNormalize(mpl.colors.Normalize):
    def __init__(self, vmin, vmax, midpoint=0, clip=False):
        self.midpoint = midpoint
        mpl.colors.Normalize.__init__(self, vmin, vmax, clip)

    def __call__(self, value, clip=None):
        normalized_min = max(0, 1 / 2 * (1 - abs((self.midpoint - self.vmin) / (self.midpoint - self.vmax))))
        normalized_max = min(1, 1 / 2 * (1 + abs((self.vmax - self.midpoint) / (self.midpoint - self.vmin))))
        normalized_mid = 0.5
        x, y = [self.vmin, self.midpoint, self.vmax], [normalized_min, normalized_mid, normalized_max]
        return np.ma.masked_array(np.interp(value, x, y))
    
def set_diag(self, values): 
    n = min(len(self.index), len(self.columns))
    self.values[[np.arange(n)] * 2] = values
pd.DataFrame.set_diag = set_diag

In [None]:
#function to compute p values for singificance and across pairs of models
def get_null_p(x,null):
    pval = 1-np.mean(x-null>=0)
    
    return pval

def get_exact_p(x,y):
    pval = 2*np.min([np.mean(x-y>=0), np.mean(x-y<=0)])
    
    return pval

In [None]:
#load in results
pred_name = 'parent_report'
pred_sex = 'm'

mp_corr = np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '.npy')
mp_corr_null = np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '_null.npy')

mp_var = np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '.npy')
mp_var_null = np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '_null.npy')

pred_sex = 'f'

fp_corr = np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '.npy')
fp_corr_null = np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '_null.npy')

fp_var = np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '.npy')
fp_var_null = np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '_null.npy')


pred_name = 'self_report'
pred_sex = 'm'

ms_corr = np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '.npy')
ms_corr_null = np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '_null.npy')

ms_var = np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '.npy')
ms_var_null = np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '_null.npy')

pred_sex = 'f'

fs_corr = np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '.npy')
fs_corr_null = np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '_null.npy')

fs_var = np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '.npy')
fs_var_null = np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '_null.npy')




In [None]:
#compute p values by comparing to null models
corr_pvals_p = [get_null_p(np.mean(fp_corr), fp_corr_null), 
                get_null_p(np.mean(mp_corr), mp_corr_null)]



corr_pvals_s = [get_null_p(np.mean(fs_corr), fs_corr_null), 
                get_null_p(np.mean(ms_corr), ms_corr_null)]



var_pvals_p = [get_null_p(np.mean(fp_var), fp_var_null), 
                get_null_p(np.mean(mp_var), mp_var_null)]



var_pvals_s = [get_null_p(np.mean(fs_var), fs_var_null), 
                get_null_p(np.mean(ms_var), ms_var_null)]



In [None]:
print("Correlation")
print("Corrected p values parent (female, male)")
print(fdr(corr_pvals_p, alpha=0.05, method='fdr_bh', is_sorted=False, returnsorted=False)[1])

print("Corrected p values self (female, male)")
print(fdr(corr_pvals_s, alpha=0.05, method='fdr_bh', is_sorted=False, returnsorted=False)[1])

print("")
print("Explained Variance")
print("Corrected p values parent (female, male)")
print(fdr(var_pvals_p, alpha=0.05, method='fdr_bh', is_sorted=False, returnsorted=False)[1])

print("Corrected p values self (female, male)")
print(fdr(var_pvals_s, alpha=0.05, method='fdr_bh', is_sorted=False, returnsorted=False)[1])


In [None]:
#create dataframe with all the results data from true models
corr_self_m = pd.DataFrame(ms_corr)
corr_self_m.columns = ['Accuracy']
corr_self_m['Sex'] = 'M'
corr_self_m['Train'] = 'Self'
corr_self_m['Test'] = 'Self'

corr_self_f = pd.DataFrame(fs_corr)
corr_self_f.columns = ['Accuracy']
corr_self_f['Sex'] = 'F'
corr_self_f['Train'] = 'Self'
corr_self_f['Test'] = 'Self'


corr_parent_m = pd.DataFrame(mp_corr)
corr_parent_m.columns = ['Accuracy']
corr_parent_m['Sex'] = 'M'
corr_parent_m['Train'] = 'Parent'
corr_parent_m['Test'] = 'Parent'

corr_parent_f = pd.DataFrame(fp_corr)
corr_parent_f.columns = ['Accuracy']
corr_parent_f['Sex'] = 'F'
corr_parent_f['Train'] = 'Parent'
corr_parent_f['Test'] = 'Parent'

data_acc = pd.concat([corr_self_f, corr_self_m, corr_parent_f, corr_parent_m])

data_acc['Model'] = 'True'

In [None]:
#create dataframe with all the results data from null models
corr_self_m_null = pd.DataFrame(ms_corr_null)
corr_self_m_null.columns = ['Accuracy']
corr_self_m_null['Sex'] = 'M'
corr_self_m_null['Train'] = 'Self'
corr_self_m_null['Test'] = 'Self'

corr_self_f_null = pd.DataFrame(fs_corr_null)
corr_self_f_null.columns = ['Accuracy']
corr_self_f_null['Sex'] = 'F'
corr_self_f_null['Train'] = 'Self'
corr_self_f_null['Test'] = 'Self'


corr_parent_m_null = pd.DataFrame(mp_corr_null)
corr_parent_m_null.columns = ['Accuracy']
corr_parent_m_null['Sex'] = 'M'
corr_parent_m_null['Train'] = 'Parent'
corr_parent_m_null['Test'] = 'Parent'

corr_parent_f_null = pd.DataFrame(fp_corr_null)
corr_parent_f_null.columns = ['Accuracy']
corr_parent_f_null['Sex'] = 'F'
corr_parent_f_null['Train'] = 'Parent'
corr_parent_f_null['Test'] = 'Parent'




data_acc_null = pd.concat([corr_self_f_null, corr_self_m_null, corr_parent_f_null, corr_parent_m_null])

data_acc_null['Model'] = 'Null'


In [None]:
#create dataframe with all the results data from true models
var_self_m = pd.DataFrame(ms_var*100)
var_self_m.columns = ['Explained Variance']
var_self_m['Sex'] = 'M'
var_self_m['Train'] = 'Self'
var_self_m['Test'] = 'Self'

var_self_f = pd.DataFrame(fs_var*100)
var_self_f.columns = ['Explained Variance']
var_self_f['Sex'] = 'F'
var_self_f['Train'] = 'Self'
var_self_f['Test'] = 'Self'

var_parent_m = pd.DataFrame(mp_var*100)
var_parent_m.columns = ['Explained Variance']
var_parent_m['Sex'] = 'M'
var_parent_m['Train'] = 'Parent'
var_parent_m['Test'] = 'Parent'


var_parent_f = pd.DataFrame(fp_var*100)
var_parent_f.columns = ['Explained Variance']
var_parent_f['Sex'] = 'F'
var_parent_f['Train'] = 'Parent'
var_parent_f['Test'] = 'Parent'

data_var = pd.concat([var_self_f, var_self_m, var_parent_f, var_parent_m])


data_var['Model'] = 'True'


In [None]:
#create dataframe with all the results data from null models
var_self_m_null = pd.DataFrame(ms_var_null*100)
var_self_m_null.columns = ['Explained Variance']
var_self_m_null['Sex'] = 'M'
var_self_m_null['Train'] = 'Self'
var_self_m_null['Test'] = 'Self'

var_self_f_null = pd.DataFrame(fs_var_null*100)
var_self_f_null.columns = ['Explained Variance']
var_self_f_null['Sex'] = 'F'
var_self_f_null['Train'] = 'Self'
var_self_f_null['Test'] = 'Self'


var_parent_m_null = pd.DataFrame(mp_var_null*100)
var_parent_m_null.columns = ['Explained Variance']
var_parent_m_null['Sex'] = 'M'
var_parent_m_null['Train'] = 'Parent'
var_parent_m_null['Test'] = 'Parent'

var_parent_f_null = pd.DataFrame(fp_var_null*100)
var_parent_f_null.columns = ['Explained Variance']
var_parent_f_null['Sex'] = 'F'
var_parent_f_null['Train'] = 'Parent'
var_parent_f_null['Test'] = 'Parent'


data_var_null = pd.concat([var_self_f_null, var_self_m_null, var_parent_f_null, var_parent_m_null])
data_var_null['Model'] = 'Null'

In [None]:
#dataframes with all relevant results
data_acc_all = pd.concat([data_acc])

data_var_all = pd.concat([data_var])


In [None]:
#plot explained variance and prediction accuracy results for gender prediction
nrows = 2
ncols = 2
dpi = 900
tight = True




fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharey=True, sharex=False, figsize=[15, 4], dpi=dpi, constrained_layout=tight)
sns.set(font_scale=1.5, style='white')

scale = 'width'
orient = 'h'
split = False
hue = 'Model'
inner = 'quartile'
cut = 0




ax[0,0] = sns.violinplot(ax=ax[0,0], x='Explained Variance', y='Train', scale=scale, orient=orient, palette='tab10', 
                    split=split, data=data_var_all[data_var_all.Sex=='F'], inner=inner, cut=cut)
ax[0,1] = sns.violinplot(ax=ax[0,1], x='Accuracy', y='Train', scale=scale, orient=orient, palette='tab10', 
                    split=split, data=data_acc_all[data_acc_all.Sex=='F'], inner=inner, cut=cut)

ax[1,0] = sns.violinplot(ax=ax[1,0], x='Explained Variance', y='Train', scale=scale, orient=orient, palette='tab10', 
                    split=split, data=data_var_all[data_var_all.Sex=='M'], inner=inner, cut=cut)
ax[1,1] = sns.violinplot(ax=ax[1,1], x='Accuracy', y='Train', scale=scale, orient=orient, palette='tab10', 
                    split=split, data=data_acc_all[data_acc_all.Sex=='M'], inner=inner, cut=cut)




fontsize=18


ax[0,0].set_ylabel('AFAB', fontsize=fontsize, weight='bold')
ax[0,1].set_ylabel('AFAB', fontsize=fontsize, weight='bold')


ax[1,0].set_ylabel('AMAB', fontsize=fontsize, weight='bold')
ax[1,1].set_ylabel('AMAB', fontsize=fontsize, weight='bold')

ax[0,1].set_xlabel('Prediction Accuracy', fontsize=fontsize, weight='bold')
ax[0,0].set_xlabel('Explained Variance (%)', fontsize=fontsize, weight='bold')

ax[1,1].set_xlabel('Prediction Accuracy', fontsize=fontsize, weight='bold')
ax[1,0].set_xlabel('Explained Variance (%)', fontsize=fontsize, weight='bold')


measures = ['Self-Report', 'Parent-Report']
ax[0,0].set_yticklabels(measures)
ax[1,0].set_yticklabels(measures)


lw = 1
color = 'k'

for i in range(ncols):
    for j in range(nrows):
        ax[i,j].axhline(0.5,  linewidth=lw, color=color, dashes=(1, 4), zorder=0)
        ax[i,j].axvline(0,  linewidth=lw, color=color, dashes=(1, 0), zorder=0)
    
        ax[i,j].locator_params(axis='x', nbins=5)
        #ax[i,j].get_legend().remove()
        
ax[0,0].axis(xmin=-5,xmax=3)
ax[1,0].axis(xmin=-5,xmax=3)

ax[0,1].axis(xmin=-0.25,xmax=0.25)
ax[1,1].axis(xmin=-0.25,xmax=0.25)

    


#plt.savefig((results_dir + '/preds.png'), dpi=900, bbox_inches="tight") 





In [None]:
#load in results about feature weights
pred_name = 'sex'
pred_sex = 'all'
featimp_sex = np.abs(np.load(results_dir + '/fc_featimp_haufe_' + pred_name + pred_sex + '.npy'))

pred_name = 'self_report'
pred_sex = 'all'
featimp_gs = np.abs(np.load(results_dir + '/fc_featimp_haufe_' + pred_name + pred_sex + '.npy'))

pred_name = 'parent_report'
pred_sex = 'all'
featimp_gp = np.abs(np.load(results_dir + '/fc_featimp_haufe_' + pred_name + pred_sex + '.npy'))

pred_name = 'self_report'
pred_sex = 'm'
featimp_ms = np.abs(np.load(results_dir + '/fc_featimp_haufe_' + pred_name + pred_sex + '.npy'))

pred_sex = 'f'
featimp_fs = np.abs(np.load(results_dir + '/fc_featimp_haufe_' + pred_name + pred_sex + '.npy'))
                   
pred_name = 'parent_report'
pred_sex = 'm'
featimp_mp = np.abs(np.load(results_dir + '/fc_featimp_haufe_' + pred_name + pred_sex + '.npy'))

pred_sex = 'f'
featimp_fp = np.abs(np.load(results_dir + '/fc_featimp_haufe_' + pred_name + pred_sex + '.npy'))


featimp_all = np.vstack([np.mean(featimp_sex, axis=0).T, 
                         np.mean(featimp_gs, axis=0).T, np.mean(featimp_gp, axis=0).T, 
                         np.mean(featimp_fs, axis=0).T, np.mean(featimp_fp, axis=0).T, 
                         np.mean(featimp_ms, axis=0).T, np.mean(featimp_mp, axis=0).T])
corr_featimp = np.corrcoef(featimp_all)


In [None]:
#plot correlation between feature weights across models
vmin = 0
vmax = 1
cmap = 'Reds'
annot = True
dpi = 300
xticklabels = True
ticks = [0, 0.2, 0.4, 0.6, 0.8, 1.0]
cbar = True
tight = True
cbar_kws = {"orientation": "vertical", "ticks": ticks}

labels_figure_x = ['Sex', 'Self-Report,\nAll', 'Parent-Report,\nAll',
                 'Self-Report,\nAFAB',  'Self-Report,\nAMAB', 
                 'Parent-Report,\nAFAB', 'Parent-Report,\nAMAB']


labels_figure_y = ['Sex', 'Self-Report, All', 'Parent-Report, All',
                 'Self-Report, AFAB',  'Self-Report, AMAB', 
                 'Parent-Report, AFAB', 'Parent-Report, AMAB']


data = corr_featimp
data = pd.DataFrame(data, index=labels_figure_y, columns=labels_figure_y)



fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 5], dpi=dpi, constrained_layout=tight)
sns.set(font_scale=1.5, style="white")

ax = sns.heatmap(data,  cbar=cbar, cmap=cmap, xticklabels=labels_figure_x, annot=annot, fmt=".2f", 
                 yticklabels=labels_figure_y, norm=MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=0.7), 
                 cbar_kws=cbar_kws)

fontsize=18
#ax.set_title('Feature Importance Correlations', fontsize=fontsize)
#ax.set_ylabel('Model', fontsize=fontsize)
#ax.set_xlabel('Model', fontsize=fontsize)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=fontsize, which='major')
cbar.set_label("Correlation",fontsize=fontsize)
cbar.outline.set_linewidth(1)

lw = 1
color = 'k'

ax.axhline(0,  linewidth=2, color=color, dashes=(1, 0))
ax.axvline(0,  linewidth=2, color=color, dashes=(1, 0))
ax.axhline(1,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(1,  linewidth=lw, color=color, dashes=(1, 0))
ax.axhline(3,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(3,  linewidth=lw, color=color, dashes=(1, 0))
ax.axhline(5,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(5,  linewidth=lw, color=color, dashes=(1, 0))
ax.axhline(7,  linewidth=2, color=color, dashes=(1, 0))
ax.axvline(7,  linewidth=2, color=color, dashes=(1, 0))



#plt.savefig((results_dir + '/featimp_correlations.png'), dpi=900, bbox_inches="tight") 




In [None]:
#load in results for sex/gender predictions across all subjects
pred_name = 'sex'
pred_sex = 'all'

sex_corr = np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '.npy')
sex_corr_null= np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '_null.npy')

sex_var = np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '.npy')
sex_var_null= np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '_null.npy')

pred_name = 'self_report'
pred_sex = 'all'

gs_corr = np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '.npy')
gs_corr_null= np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '_null.npy')

gs_var = np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '.npy')
gs_var_null= np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '_null.npy')

pred_name = 'parent_report'
pred_sex = 'all'

gp_corr = np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '.npy')
gp_corr_null= np.load(results_dir + '/fc_corr_' + pred_name + pred_sex + '_null.npy')

gp_var = np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '.npy')
gp_var_null= np.load(results_dir + '/fc_var_' + pred_name + pred_sex + '_null.npy')



In [None]:
#evaluate model significance
corr_pvals_sex = [get_null_p(np.mean(sex_corr), sex_corr_null)]
var_pvals_sex = [get_null_p(np.mean(sex_var), sex_var_null)]

print(corr_pvals_sex, var_pvals_sex)


corr_pvals_gs = [get_null_p(np.mean(gs_corr), gs_corr_null)]
var_pvals_gs = [get_null_p(np.mean(gs_var), gs_var_null)]

print(corr_pvals_gs, var_pvals_gs)


corr_pvals_gp = [get_null_p(np.mean(gp_corr), gp_corr_null)]
var_pvals_gp = [get_null_p(np.mean(gp_var), gp_var_null)]

print(corr_pvals_gp, var_pvals_gp)

In [None]:
#save featimp data as csv
#np.savetxt((results_dir + '/featimp_sex.csv'), featimp_sex, delimiter=',') 
#np.savetxt((results_dir + '/featimp_ms.csv'), featimp_ms, delimiter=',') 
#np.savetxt((results_dir + '/featimp_mp.csv'), featimp_mp, delimiter=',') 
#np.savetxt((results_dir + '/featimp_fs.csv'), featimp_fs, delimiter=',') 
#np.savetxt((results_dir + '/featimp_fp.csv'), featimp_fp, delimiter=',') 
#np.savetxt((results_dir + '/featimp_gs.csv'), featimp_gs, delimiter=',') 
#np.savetxt((results_dir + '/featimp_gp.csv'), featimp_gp, delimiter=',') 



In [None]:
#read featimp data as matrices (converted in matlab)
featimp_sex_mat = pd.read_csv(os.path.join(results_dir + '/featimp_mat_sex.csv'), header=None).values
featimp_ms_mat = pd.read_csv(os.path.join(results_dir + '/featimp_mat_ms.csv'), header=None).values
featimp_mp_mat = pd.read_csv(os.path.join(results_dir + '/featimp_mat_mp.csv'), header=None).values
featimp_fs_mat = pd.read_csv(os.path.join(results_dir + '/featimp_mat_fs.csv'), header=None).values
featimp_fp_mat = pd.read_csv(os.path.join(results_dir + '/featimp_mat_fp.csv'), header=None).values
featimp_gs_mat = pd.read_csv(os.path.join(results_dir + '/featimp_mat_gs.csv'), header=None).values
featimp_gp_mat = pd.read_csv(os.path.join(results_dir + '/featimp_mat_gp.csv'), header=None).values




In [None]:
#read in network mapping labels
network_mapping = pd.read_csv(os.path.join(results_dir, 'network_mapping.csv'), header=None).values.ravel()


In [None]:
#summarize regional feature weights to network-level weights
net_feat_sex = np.zeros([np.max(np.unique(network_mapping)), np.max(np.unique(network_mapping))])
net_feat_ms = np.zeros([np.max(np.unique(network_mapping)), np.max(np.unique(network_mapping))])
net_feat_mp = np.zeros([np.max(np.unique(network_mapping)), np.max(np.unique(network_mapping))])
net_feat_fs = np.zeros([np.max(np.unique(network_mapping)), np.max(np.unique(network_mapping))])
net_feat_fp = np.zeros([np.max(np.unique(network_mapping)), np.max(np.unique(network_mapping))])
net_feat_gs = np.zeros([np.max(np.unique(network_mapping)), np.max(np.unique(network_mapping))])
net_feat_gp = np.zeros([np.max(np.unique(network_mapping)), np.max(np.unique(network_mapping))])

for j in range(np.max(np.unique(network_mapping))):
    for k in range(np.max(np.unique(network_mapping))):
        mask1 = network_mapping==(j+1)
        mask2 = network_mapping==(k+1)
        net_feat_sex[j,k] = np.mean(featimp_sex_mat[mask1,:][:,mask2])
        net_feat_ms[j,k] = np.mean(featimp_ms_mat[mask1,:][:,mask2])
        net_feat_mp[j,k] = np.mean(featimp_mp_mat[mask1,:][:,mask2])
        net_feat_fs[j,k] = np.mean(featimp_fs_mat[mask1,:][:,mask2])
        net_feat_fp[j,k] = np.mean(featimp_fp_mat[mask1,:][:,mask2])
        net_feat_gs[j,k] = np.mean(featimp_gs_mat[mask1,:][:,mask2])
        net_feat_gp[j,k] = np.mean(featimp_gp_mat[mask1,:][:,mask2])


In [None]:
#network names
networks = ['Temporal Parietal', 'Default', 'Default', 'Default', 'Control',
           'Control', 'Control', 'Limbic', 'Limbic', 'Ventral Attention',
           'Ventral Attention', 'Dorsal Attention', 'Dorsal Attention', 'Somatomotor', 
           'Somatomotor', 'Visual', 'Visual', 'Subcortical']


In [None]:
#plot featimp data (network-level) for sex predictions
nrows = 1
ncols = 1
dpi = 900
tight = True
#sns.reset_orig


sns.set(font_scale=1.5, style="white")
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharey=True, sharex=True, 
                       figsize=[7.5, 6.5], dpi=dpi, constrained_layout=tight)


vmin = 0
vmax = 1

cmap_pos = 'turbo'


annot = False
xticklabels = True
yticklabels = True
cbar = True
tight = True
ticks_pos = [0, .2, .4, .6, .8, 1]


cbar_kws_pos = {"orientation": "vertical", "ticks": ticks_pos}


data = net_feat_sex
data = data/np.max(data)

ax = sns.heatmap(data,  ax=ax, cbar=cbar, cmap=cmap_pos, annot=annot, xticklabels=networks,
                    yticklabels=networks, norm=MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=0.50), cbar_kws=cbar_kws_pos)

fontsize=18
ax.set_title('Sex', fontsize=fontsize, weight='bold')
ax.set_ylabel('Network', fontsize=fontsize, weight='bold')
ax.set_xlabel('Network', fontsize=fontsize, weight='bold')


cbar1 = ax.collections[0].colorbar
cbar1.ax.tick_params(labelsize=fontsize, which='major')
cbar1.set_label("Network-Level Associations",fontsize=fontsize)
cbar1.outline.set_linewidth(1)



lw = 1
color = 'k'
    

ax.axhline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axhline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(18,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(18,  linewidth=lw, color=color, dashes=(1, 0))
        
plt.savefig((results_dir + '/featimp_sex.png'), dpi=900, bbox_inches="tight") 

    



In [None]:
#plot featimp data (network-level) for self report gender predictions 
nrows = 1
ncols = 1
dpi = 900
tight = True
#sns.reset_orig


sns.set(font_scale=1.5, style="white")
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharey=True, sharex=True, 
                       figsize=[7.5, 6.5], dpi=dpi, constrained_layout=tight)


vmin = 0
vmax = 1

cmap_pos = 'turbo'


annot = False
xticklabels = True
yticklabels = True
cbar = True
tight = True
ticks_pos = [0, .2, .4, .6, .8, 1]


cbar_kws_pos = {"orientation": "vertical", "ticks": ticks_pos}


data = net_feat_fs
data = data/np.max(data)

ax = sns.heatmap(data,  ax=ax, cbar=cbar, cmap=cmap_pos, annot=annot, xticklabels=networks,
                    yticklabels=networks, norm=MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=0.50), cbar_kws=cbar_kws_pos)

fontsize=18
ax.set_title('Self-Report, AFAB', fontsize=fontsize, weight='bold')
ax.set_ylabel('Network', fontsize=fontsize, weight='bold')
ax.set_xlabel('Network', fontsize=fontsize, weight='bold')


cbar1 = ax.collections[0].colorbar
cbar1.ax.tick_params(labelsize=fontsize, which='major')
cbar1.set_label("Network-Level Associations",fontsize=fontsize)
cbar1.outline.set_linewidth(1)



lw = 1
color = 'k'
    

ax.axhline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axhline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(18,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(18,  linewidth=lw, color=color, dashes=(1, 0))
        
plt.savefig((results_dir + '/featimp_fs.png'), dpi=900, bbox_inches="tight") 

    



In [None]:
#plot featimp data (network-level) for parent report gender predictions 

nrows = 1
ncols = 1
dpi = 900
tight = True
#sns.reset_orig


sns.set(font_scale=1.5, style="white")
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharey=True, sharex=True, 
                       figsize=[7.5, 6.5], dpi=dpi, constrained_layout=tight)


vmin = 0
vmax = 1

cmap_pos = 'turbo'


annot = False
xticklabels = True
yticklabels = True
cbar = True
tight = True
ticks_pos = [0, .2, .4, .6, .8, 1]


cbar_kws_pos = {"orientation": "vertical", "ticks": ticks_pos}


data = net_feat_fp
data = data/np.max(data)

ax = sns.heatmap(data,  ax=ax, cbar=cbar, cmap=cmap_pos, annot=annot, xticklabels=networks,
                    yticklabels=networks, norm=MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=0.50), cbar_kws=cbar_kws_pos)

fontsize=18
ax.set_title('Parent-Report, AFAB', fontsize=fontsize, weight='bold')
ax.set_ylabel('Network', fontsize=fontsize, weight='bold')
ax.set_xlabel('Network', fontsize=fontsize, weight='bold')


cbar1 = ax.collections[0].colorbar
cbar1.ax.tick_params(labelsize=fontsize, which='major')
cbar1.set_label("Network-Level Associations",fontsize=fontsize)
cbar1.outline.set_linewidth(1)



lw = 1
color = 'k'
    

ax.axhline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axhline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(18,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(18,  linewidth=lw, color=color, dashes=(1, 0))
        
plt.savefig((results_dir + '/featimp_fp.png'), dpi=900, bbox_inches="tight") 

    



In [None]:
#plot featimp data (network-level) for self report gender predictions 

nrows = 1
ncols = 1
dpi = 900
tight = True
#sns.reset_orig


sns.set(font_scale=1.5, style="white")
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharey=True, sharex=True, 
                       figsize=[7.5, 6.5], dpi=dpi, constrained_layout=tight)


vmin = 0
vmax = 1

cmap_pos = 'turbo'


annot = False
xticklabels = True
yticklabels = True
cbar = True
tight = True
ticks_pos = [0, .2, .4, .6, .8, 1]


cbar_kws_pos = {"orientation": "vertical", "ticks": ticks_pos}


data = net_feat_ms
data = data/np.max(data)

ax = sns.heatmap(data,  ax=ax, cbar=cbar, cmap=cmap_pos, annot=annot, xticklabels=networks,
                    yticklabels=networks, norm=MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=0.50), cbar_kws=cbar_kws_pos)

fontsize=18
ax.set_title('Self-Report, AMAB', fontsize=fontsize, weight='bold')
ax.set_ylabel('Network', fontsize=fontsize, weight='bold')
ax.set_xlabel('Network', fontsize=fontsize, weight='bold')


cbar1 = ax.collections[0].colorbar
cbar1.ax.tick_params(labelsize=fontsize, which='major')
cbar1.set_label("Network-Level Associations",fontsize=fontsize)
cbar1.outline.set_linewidth(1)



lw = 1
color = 'k'
    

ax.axhline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axhline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(18,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(18,  linewidth=lw, color=color, dashes=(1, 0))
        
plt.savefig((results_dir + '/featimp_ms.png'), dpi=900, bbox_inches="tight") 

    



In [None]:
#plot featimp data (network-level) for parent report gender predictions 

nrows = 1
ncols = 1
dpi = 900
tight = True
#sns.reset_orig


sns.set(font_scale=1.5, style="white")
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharey=True, sharex=True, 
                       figsize=[7.5, 6.5], dpi=dpi, constrained_layout=tight)


vmin = 0
vmax = 1

cmap_pos = 'turbo'


annot = False
xticklabels = True
yticklabels = True
cbar = True
tight = True
ticks_pos = [0, .2, .4, .6, .8, 1]


cbar_kws_pos = {"orientation": "vertical", "ticks": ticks_pos}


data = net_feat_mp
data = data/np.max(data)

ax = sns.heatmap(data,  ax=ax, cbar=cbar, cmap=cmap_pos, annot=annot, xticklabels=networks,
                    yticklabels=networks, norm=MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=0.50), cbar_kws=cbar_kws_pos)

fontsize=18
ax.set_title('Parent-Report, AMAB', fontsize=fontsize, weight='bold')
ax.set_ylabel('Network', fontsize=fontsize, weight='bold')
ax.set_xlabel('Network', fontsize=fontsize, weight='bold')


cbar1 = ax.collections[0].colorbar
cbar1.ax.tick_params(labelsize=fontsize, which='major')
cbar1.set_label("Network-Level Associations",fontsize=fontsize)
cbar1.outline.set_linewidth(1)



lw = 1
color = 'k'
    

ax.axhline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axhline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(18,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(18,  linewidth=lw, color=color, dashes=(1, 0))
        
plt.savefig((results_dir + '/featimp_mp.png'), dpi=900, bbox_inches="tight") 

    



In [None]:
#plot featimp data (network-level) for self report gender predictions across all individuals


nrows = 1
ncols = 1
dpi = 900
tight = True
#sns.reset_orig


sns.set(font_scale=1.5, style="white")
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharey=True, sharex=True, 
                       figsize=[7.5, 6.5], dpi=dpi, constrained_layout=tight)


vmin = 0
vmax = 1

cmap_pos = 'turbo'


annot = False
xticklabels = True
yticklabels = True
cbar = True
tight = True
ticks_pos = [0, .2, .4, .6, .8, 1]


cbar_kws_pos = {"orientation": "vertical", "ticks": ticks_pos}


data = net_feat_gs
data = data/np.max(data)

ax = sns.heatmap(data,  ax=ax, cbar=cbar, cmap=cmap_pos, annot=annot, xticklabels=networks,
                    yticklabels=networks, norm=MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=0.50), cbar_kws=cbar_kws_pos)

fontsize=18
ax.set_title('Self-Report, All', fontsize=fontsize, weight='bold')
ax.set_ylabel('Network', fontsize=fontsize, weight='bold')
ax.set_xlabel('Network', fontsize=fontsize, weight='bold')


cbar1 = ax.collections[0].colorbar
cbar1.ax.tick_params(labelsize=fontsize, which='major')
cbar1.set_label("Network-Level Associations",fontsize=fontsize)
cbar1.outline.set_linewidth(1)



lw = 1
color = 'k'
    

ax.axhline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axhline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(18,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(18,  linewidth=lw, color=color, dashes=(1, 0))
        
plt.savefig((results_dir + '/featimp_gs.png'), dpi=900, bbox_inches="tight") 

    



In [None]:
#plot featimp data (network-level) for parent report gender predictions across all individuals

nrows = 1
ncols = 1
dpi = 900
tight = True
#sns.reset_orig


sns.set(font_scale=1.5, style="white")
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharey=True, sharex=True, 
                       figsize=[7.5, 6.5], dpi=dpi, constrained_layout=tight)


vmin = 0
vmax = 1

cmap_pos = 'turbo'


annot = False
xticklabels = True
yticklabels = True
cbar = True
tight = True
ticks_pos = [0, .2, .4, .6, .8, 1]


cbar_kws_pos = {"orientation": "vertical", "ticks": ticks_pos}


data = net_feat_gp
data = data/np.max(data)

ax = sns.heatmap(data,  ax=ax, cbar=cbar, cmap=cmap_pos, annot=annot, xticklabels=networks,
                    yticklabels=networks, norm=MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=0.50), cbar_kws=cbar_kws_pos)

fontsize=18
ax.set_title('Parent-Report, All', fontsize=fontsize, weight='bold')
ax.set_ylabel('Network', fontsize=fontsize, weight='bold')
ax.set_xlabel('Network', fontsize=fontsize, weight='bold')


cbar1 = ax.collections[0].colorbar
cbar1.ax.tick_params(labelsize=fontsize, which='major')
cbar1.set_label("Network-Level Associations",fontsize=fontsize)
cbar1.outline.set_linewidth(1)



lw = 1
color = 'k'
    

ax.axhline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(0,  linewidth=lw, color=color, dashes=(1, 0))
ax.axhline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(1,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(4,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(7,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(9,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(11,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(13,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(15,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axvline(17,  linewidth=lw, color=color, dashes=(1, 5))
ax.axhline(18,  linewidth=lw, color=color, dashes=(1, 0))
ax.axvline(18,  linewidth=lw, color=color, dashes=(1, 0))
        
plt.savefig((results_dir + '/featimp_gp.png'), dpi=900, bbox_inches="tight") 

    



In [None]:
#organize results data for predictions across all individuals
corr_sex = pd.DataFrame(sex_corr)
corr_sex.columns = ['Accuracy']
corr_sex['Train'] = 'Sex'
corr_sex['Model'] = 'True'


var_sex = pd.DataFrame(sex_var*100)
var_sex.columns = ['Explained Variance']
var_sex['Train'] = 'Sex'
var_sex['Model'] = 'True'


corr_sex_null = pd.DataFrame(sex_corr_null)
corr_sex_null.columns = ['Accuracy']
corr_sex_null['Train'] = 'Sex'
corr_sex_null['Model'] = 'Null'8


var_sex_null = pd.DataFrame(sex_var_null*100)
var_sex_null.columns = ['Explained Variance']
var_sex_null['Train'] = 'Sex'
var_sex_null['Model'] = 'Null'


corr_sex_all = pd.concat([corr_sex])
var_sex_all = pd.concat([var_sex])



In [None]:
#organize results data for predictions across all individuals
corr_gs = pd.DataFrame(gs_corr)
corr_gs.columns = ['Accuracy']
corr_gs['Train'] = 'Gender, Self-Report'
corr_gs['Model'] = 'True'


var_gs = pd.DataFrame(gs_var*100)
var_gs.columns = ['Explained Variance']
var_gs['Train'] = 'Gender, Self-Report'
var_gs['Model'] = 'True'


corr_gs_null = pd.DataFrame(gs_corr_null)
corr_gs_null.columns = ['Accuracy']
corr_gs_null['Train'] = 'Gender, Self-Report'
corr_gs_null['Model'] = 'Null'


var_gs_null = pd.DataFrame(gs_var_null*100)
var_gs_null.columns = ['Explained Variance']
var_gs_null['Train'] = 'Gender, Self-Report'
var_gs_null['Model'] = 'Null'


corr_gs_all = pd.concat([corr_gs])
var_gs_all = pd.concat([var_gs])



In [None]:
#organize results data for predictions across all individuals
corr_gp = pd.DataFrame(gp_corr)
corr_gp.columns = ['Accuracy']
corr_gp['Train'] = 'Gender, Parent-Report'
corr_gp['Model'] = 'True'


var_gp = pd.DataFrame(gp_var*100)
var_gp.columns = ['Explained Variance']
var_gp['Train'] = 'Gender, Parent-Report'
var_gp['Model'] = 'True'


corr_gp_null = pd.DataFrame(gp_corr_null)
corr_gp_null.columns = ['Accuracy']
corr_gp_null['Train'] = 'Gender, Parent-Report'
corr_gp_null['Model'] = 'Null'


var_gp_null = pd.DataFrame(gp_var_null*100)
var_gp_null.columns = ['Explained Variance']
var_gp_null['Train'] = 'Gender, Parent-Report'
var_gp_null['Model'] = 'Null'


corr_gp_all = pd.concat([corr_gp])
var_gp_all = pd.concat([var_gp])


In [None]:
#organize results data for predictions across all individuals
corr_sg_all = pd.concat([corr_sex, corr_gs, corr_gp])
var_sg_all = pd.concat([var_sex, var_gs, var_gp])

In [None]:
#plot explained variance and prediciton accuracy results for predictions across all individuals
nrows = 1
ncols = 2
dpi = 900
tight = True


fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharex=False, sharey=True, figsize=[15, 3], dpi=dpi, constrained_layout=tight)
sns.set(font_scale=1.5, style='white')

scale = 'width'
orient = 'h'
split = False
hue = 'Model'
inner = 'quartile'
cut = 0
cmap='Dark2'




ax[0] = sns.violinplot(ax=ax[0], x='Explained Variance', y='Train', scale=scale, orient=orient, palette=cmap, 
                    split=split, data=var_sg_all, inner=inner, cut=cut)
ax[1] = sns.violinplot(ax=ax[1], x='Accuracy', y='Train', scale=scale, orient=orient, palette=cmap, 
                    split=split, data=corr_sg_all, inner=inner, cut=cut)





fontsize=18

#ax[0].set_ylabel('Sex', fontsize=fontsize, weight='bold')
#ax[1].set_ylabel('Gender\nSelf-Report', fontsize=fontsize, weight='bold')
#ax[2].set_ylabel('Gender\nParent-Report', fontsize=fontsize, weight='bold')


measures = ['Sex', 'Gender, Self-Report', 'Gender, Parent-Report']
ax[0].set_yticklabels(measures)

ax[1].set_xlabel('Prediction Accuracy', fontsize=fontsize, weight='bold')
ax[0].set_xlabel('Explained Variance (%)', fontsize=fontsize, weight='bold')

ax[0].set_ylabel('', fontsize=fontsize, weight='bold')
ax[1].set_ylabel('', fontsize=fontsize, weight='bold')

lw = 1
color = 'black'

for i in range(ncols):
        ax[i].axvline(0,  linewidth=lw, color=color, dashes=(1, 0), zorder=0)
        ax[i].axhline(0.5,  linewidth=lw, color=color, dashes=(1, 4), zorder=0)
        ax[i].axhline(1.5,  linewidth=lw, color=color, dashes=(1, 4), zorder=0)
        ax[i].locator_params(axis='x', nbins=4)
        #ax[i].get_legend().remove()
    

    
#plt.savefig((results_dir + '/preds_sexgenderall.png'), dpi=900, bbox_inches="tight") 

