In [1]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import monai
from monai.transforms import AddChannel, Compose, RandRotate90, Resize, ScaleIntensity, Flip, ToTensor
from monai.data import ImageDataset
import nibabel as nib

ModuleNotFoundError: No module named 'ipywidgets'

In [2]:
ABCD_data = {'fmriprep':'/scratch/connectome/3DCNN/data/1.ABCD/1.sMRI_fmriprep/preprocessed_masked',
             'freesurfer':'/scratch/connectome/3DCNN/data/1.ABCD/2.sMRI_freesurfer'}
ABCD_phenotype_dir = '/scratch/connectome/3DCNN/data/1.ABCD/4.demo_qc/ABCD_phenotype_total.csv'

UKB_data_dir = '/scratch/connectome/3DCNN/data/2.UKB/1.sMRI_fs_cropped/'
UKB_phenotype_dir = '/scratch/connectome/3DCNN/data/2.UKB/2.demo_qc/UKB_phenotype.csv'

def loading_images(image_dir, args):
    image_files = pd.Series(glob.glob(f'{image_dir}/*.npy')) # revising
    image_files = pd.concat([image_files, pd.Series(glob.glob(f'{image_dir}/*.nii.gz'))])
    image_files.sort_values(inplace=True)
    subjects = image_files.map(lambda x: x.split('.')[0]) # revising
    #image_files = image_files[:100]
    return image_files

def loading_phenotype(phenotype_dir, target_list, args):
    subject_data = pd.read_csv(phenotype_dir)
    subject_data = subject_data.sort_values(by=subjectkey)
#     subject_data = subject_data.dropna(axis = 0)
    subject_data = subject_data.reset_index(drop=True) # removing subject have NA values in sex
    
    return subject_data

def combining_image_target(subject_data, image_files, target_list): # revising
    if 'str' in str(type(subject_data[subjectkey][0])): 
        image_subjectkeys = image_files.map(lambda x: str(x.split('/')[-1].split('.')[0]))
    elif 'int' in str(type(subject_data[subjectkey][0])):
        image_subjectkeys = image_files.map(lambda x: int(x.split('/')[-1].split('.')[0]))

    image_list = pd.DataFrame({subjectkey:image_subjectkeys, 'image_files':image_files})
    subject_data = pd.merge(subject_data, image_list, how='inner', on=subjectkey)   

    return subject_data

In [3]:
args = None
dataset = 'ABCD' # ABCD
data = 'freesurfer'
target_list = None

image_dir = ABCD_data[data] if dataset == 'ABCD' else UKB_data_dir
phenotype_dir = ABCD_phenotype_dir if dataset == 'ABCD' else UKB_phenotype_dir

global subjectkey
subjectkey = 'subjectkey' if dataset == 'ABCD' else 'eid'
image_files = loading_images(image_dir, args)
subject_data= loading_phenotype(phenotype_dir, target_list, args)

# data preprocesing categorical variable and numerical variables 
imageFiles_labels = combining_image_target(subject_data, image_files, target_list)

  # Remove the CWD from sys.path while we load stuff.


In [4]:
imageFiles_labels

Unnamed: 0,subjectkey,age,sex,race.ethnicity,high.educ,income,married,abcd_site,height,weight,...,Other.Drugs.Use.Disorder.x,Sedative.Hypnotic.or.Anxiolytic.Use.Disorder.x,Other.Specified.Trauma.and.Stressor.Related.Disorder.x,Post.Traumatic.Stress.Disorder.x,HOMICIDAL.IDEATION.AND.BEHAVIOR.x,totalBIS,totalBAS,PPSP_frequency,PPSP_distress,image_files
0,NDARINV003RTV85,131.0,2.0,1.0,13.0,8.0,1.0,6.0,56.500,93.000000,...,0.0,0.0,0.0,0.0,0.0,2.0,17.0,0,0,/scratch/connectome/3DCNN/data/1.ABCD/2.sMRI_f...
1,NDARINV007W6H7B,126.0,1.0,1.0,19.0,10.0,1.0,22.0,56.500,82.800000,...,0.0,0.0,0.0,0.0,0.0,6.0,7.0,7,18,/scratch/connectome/3DCNN/data/1.ABCD/2.sMRI_f...
2,NDARINV00BD7VDC,112.0,1.0,1.0,20.0,10.0,1.0,7.0,57.500,76.800000,...,0.0,0.0,0.0,0.0,0.0,11.0,13.0,0,0,/scratch/connectome/3DCNN/data/1.ABCD/2.sMRI_f...
3,NDARINV00CY2MDM,130.0,1.0,1.0,15.0,6.0,4.0,20.0,56.500,91.500000,...,0.0,0.0,0.0,0.0,0.0,5.0,10.0,0,0,/scratch/connectome/3DCNN/data/1.ABCD/2.sMRI_f...
4,NDARINV00HEV6HB,124.0,1.0,2.0,13.0,,1.0,12.0,57.300,70.866667,...,0.0,0.0,0.0,0.0,0.0,4.0,24.0,1,0,/scratch/connectome/3DCNN/data/1.ABCD/2.sMRI_f...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11368,NDARINVZZNX6W2P,131.0,1.0,1.0,18.0,9.0,1.0,14.0,56.000,73.000000,...,0.0,0.0,0.0,0.0,0.0,10.0,25.0,0,0,/scratch/connectome/3DCNN/data/1.ABCD/2.sMRI_f...
11369,NDARINVZZPKBDAC,113.0,2.0,1.0,19.0,10.0,1.0,12.0,58.875,90.000000,...,0.0,0.0,0.0,0.0,0.0,12.0,21.0,3,2,/scratch/connectome/3DCNN/data/1.ABCD/2.sMRI_f...
11370,NDARINVZZZ2ALR6,120.0,2.0,5.0,21.0,10.0,1.0,8.0,54.700,59.500000,...,0.0,0.0,0.0,0.0,0.0,9.0,12.0,0,0,/scratch/connectome/3DCNN/data/1.ABCD/2.sMRI_f...
11371,NDARINVZZZNB0XC,108.0,2.0,3.0,19.0,3.0,1.0,3.0,49.000,63.000000,...,0.0,0.0,0.0,0.0,0.0,11.0,18.0,14,24,/scratch/connectome/3DCNN/data/1.ABCD/2.sMRI_f...


In [8]:
pd.read_csv(UKB_phenotype_dir).age.describe()

count    502485.000000
mean         56.528657
std           8.095190
min          37.000000
25%          50.000000
50%          58.000000
75%          63.000000
max          73.000000
Name: age, dtype: float64

In [9]:

def plot_brain(sag, cor, hor):
    fig, (ax0, ax1, ax2) = plt.subplots(nrows=1, ncols=3, figsize=(12,6))
            
    ax0.imshow(img_array[sag,:,:], cmap='gray')
    ax0.set_title('Sagittal view')
             
    ax1.imshow(np.rot90(img_array[:,:,cor],-1), cmap='gray')
    ax1.set_title('Coronal view')
              
    ax2.imshow(np.rot90(img_array[:,hor,:],-1), cmap='gray')
    ax2.set_title('Horizontal view')
    
    plt.show()
    
img_dir = imageFiles_labels['image_files'][0]
img = np.load(img_dir)
img_array = np.load(img_dir)
s, h, c = img_array.shape

interact_manual(plot_brain, sag=(0,s-1,1), cor=(0,c-1,1), hor=(0,h-1,1));


interactive(children=(IntSlider(value=75, description='sag', max=151), IntSlider(value=89, description='cor', …

In [11]:
s, h, c = [], [], []
for i in range(len(imageFiles_labels['image_files'])):
    img_dir = imageFiles_labels['image_files'][i]
    ss, hh, cc = np.load(img_dir).shape
    if ss == 142:
        break

In [13]:
monai_transform = Compose([ScaleIntensity(),
                           AddChannel(),
                           monai.transforms.RandAffine(prob=1,translate_range=(0,40),padding_mode='zeros'),
                           monai.transforms.RandFlip(prob=1, spatial_axis=0),
                           Resize((160,160,160))])
transformed_img = monai_transform(img_array)[0]
img = img_array
print(transformed_img.shape)
s, h, c = transformed_img.shape
img_array = transformed_img
interact_manual(plot_brain, sag=(0,s-1,1), cor=(0,c-1,1), hor=(0,h-1,1));

(160, 160, 160)


interactive(children=(IntSlider(value=79, description='sag', max=159), IntSlider(value=79, description='cor', …

In [22]:
img_array = np.load(img_dir)
s, h, c = np.load(img_dir).shape

interact_manual(plot_brain, sag=(0,s-1,1), cor=(0,c-1,1), hor=(0,h-1,1));

interactive(children=(IntSlider(value=70, description='sag', max=141), IntSlider(value=87, description='cor', …

In [23]:
s,h,c

(142, 157, 176)

In [2]:
adhd=pd.read_csv('/scratch/connectome/3DCNN/data/1.ABCD/4.demo_qc/ABCD_ADHD.csv')

In [29]:
adhd

Unnamed: 0,subjectkey,age,sex,race.ethnicity,high.educ,income,married,abcd_site,height,weight,...,Substance.Use.Disorder.x,Other.Drugs.Use.Disorder.x,Sedative.Hypnotic.or.Anxiolytic.Use.Disorder.x,Other.Specified.Trauma.and.Stressor.Related.Disorder.x,Post.Traumatic.Stress.Disorder.x,HOMICIDAL.IDEATION.AND.BEHAVIOR.x,totalBIS,totalBAS,PPSP_frequency,PPSP_distress
0,NDARINV00BD7VDC,112.0,1.0,1.0,20.0,10.0,1.0,7.0,57.50,76.800000,...,0,0,0,0,0,0,11,13,0,0
1,NDARINV00CY2MDM,130.0,1.0,1.0,15.0,6.0,4.0,20.0,56.50,91.500000,...,0,0,0,0,0,0,5,10,0,0
2,NDARINV00LJVZK2,121.0,1.0,5.0,11.0,1.0,5.0,11.0,53.50,81.400000,...,0,0,0,0,0,0,17,18,2,0
3,NDARINV00U4FTRU,130.0,2.0,5.0,12.0,6.0,6.0,4.0,52.50,96.966667,...,0,0,0,1,0,0,18,30,17,83
4,NDARINV0182J779,111.0,2.0,1.0,21.0,9.0,1.0,6.0,54.00,74.000000,...,0,0,0,0,0,0,10,15,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2501,NDARINVZCGL10MP,131.0,1.0,2.0,16.0,4.0,5.0,14.0,56.25,72.250000,...,0,0,0,0,0,0,11,19,0,0
2502,NDARINVZE3C6X7E,120.0,1.0,5.0,18.0,9.0,1.0,14.0,58.00,109.500000,...,0,0,0,0,0,0,8,24,0,0
2503,NDARINVZEB043HN,128.0,2.0,1.0,19.0,9.0,5.0,13.0,40.75,72.000000,...,0,0,0,0,0,0,8,28,0,0
2504,NDARINVZEKA1L7D,116.0,1.0,1.0,18.0,7.0,1.0,13.0,,67.000000,...,0,0,0,0,0,0,10,19,4,8


In [12]:
list(adhd.columns)

['subjectkey',
 'age',
 'sex',
 'race.ethnicity',
 'high.educ',
 'income',
 'married',
 'abcd_site',
 'height',
 'weight',
 'BMI',
 'vol',
 'nihtbx_totalcomp_uncorrected',
 'nihtbx_fluidcomp_uncorrected',
 'nihtbx_pattern_uncorrected',
 'nihtbx_picture_uncorrected',
 'nihtbx_list_uncorrected',
 'nihtbx_flanker_uncorrected',
 'nihtbx_cardsort_uncorrected',
 'nihtbx_cryst_uncorrected',
 'nihtbx_reading_uncorrected',
 'nihtbx_picvocab_uncorrected',
 'poa_total_6month_follow_up',
 'friends',
 'close_friends',
 'upps_negative_urgency',
 'upps_lack_of_planning',
 'upps_sensation_seeking',
 'upps_positive_urgency',
 'upps_lack_of_perseverance',
 'upps_total',
 'AnxDep.CBCL',
 'WithDep.CBCL',
 'Somatic.CBCL',
 'Social.CBCL',
 'Thought.CBCL',
 'Attention.CBCL',
 'RuleBreak.CBCL',
 'Aggressive.CBCL',
 'Internal.CBCL',
 'External.CBCL',
 'TotProb.CBCL',
 'Depress.CBCL',
 'AnxDisord.CBCL',
 'SomaticPr.CBCL',
 'ADHD.CBCL',
 'Opposit.CBCL',
 'Conduct.CBCL',
 'Sluggish.Cognitive.Tempo..SCT..CBCL',
 '

In [18]:
adhd[ 'Attention.Deficit.Hyperactivity.Disorder.x'].describe()

count    2506.0
mean        1.0
std         0.0
min         1.0
25%         1.0
50%         1.0
75%         1.0
max         1.0
Name: Attention.Deficit.Hyperactivity.Disorder.x, dtype: float64

In [26]:
tot=pd.read_csv('/scratch/connectome/3DCNN/data/1.ABCD/4.demo_qc/ABCD_phenotype_total.csv')

In [27]:
tot.describe()

Unnamed: 0,age,sex,race.ethnicity,high.educ,income,married,abcd_site,height,weight,BMI,...,Substance.Use.Disorder.x,Other.Drugs.Use.Disorder.x,Sedative.Hypnotic.or.Anxiolytic.Use.Disorder.x,Other.Specified.Trauma.and.Stressor.Related.Disorder.x,Post.Traumatic.Stress.Disorder.x,HOMICIDAL.IDEATION.AND.BEHAVIOR.x,totalBIS,totalBAS,PPSP_frequency,PPSP_distress
count,11536.0,11531.0,11519.0,11517.0,10555.0,11445.0,11536.0,11526.0,11524.0,11522.0,...,11702.0,11702.0,11866.0,11683.0,11683.0,11702.0,11869.0,11869.0,11875.0,11875.0
mean,118.95319,1.475934,2.035246,16.606842,7.230886,2.092442,11.445995,55.265927,82.538557,18.811432,...,8.5e-05,8.5e-05,8.4e-05,0.030386,0.019772,0.005982,9.493218,20.810262,2.630147,4.637895
std,7.464702,0.499442,1.323308,2.759331,2.412037,1.719346,6.080634,3.209198,23.621102,4.225132,...,0.009244,0.009244,0.00918,0.171654,0.139223,0.077114,3.771319,6.933897,3.558714,9.089124
min,108.0,1.0,1.0,1.0,1.0,1.0,1.0,32.65,23.9,5.266641,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,112.0,1.0,1.0,15.0,6.0,1.0,6.0,53.0,66.1,15.939709,...,0.0,0.0,0.0,0.0,0.0,0.0,7.0,16.0,0.0,0.0
50%,119.0,1.0,1.0,18.0,8.0,1.0,12.0,55.1,76.5,17.649024,...,0.0,0.0,0.0,0.0,0.0,0.0,9.0,20.0,1.0,0.0
75%,125.0,2.0,3.0,19.0,9.0,3.0,16.0,57.2,93.0,20.62595,...,0.0,0.0,0.0,0.0,0.0,0.0,12.0,25.0,4.0,6.0
max,131.0,2.0,5.0,21.0,10.0,6.0,22.0,82.0,272.0,54.992926,...,1.0,1.0,1.0,1.0,1.0,1.0,21.0,39.0,21.0,94.0


In [19]:
cont=pd.read_csv('/scratch/connectome/3DCNN/data/1.ABCD/4.demo_qc/ABCD_suicide_control.csv')

In [28]:
list(cont.columns)

['subjectkey',
 'Bipolar.I.Disorder.x',
 'Bipolar.II.Disorder.x',
 'Unspecified.Bipolar.and.Related.Disorder.x',
 'Major.Depressive.Disorder.x',
 'Persistent.Depressive.Disorder.x',
 'Unspecified.Depressive.Disorder.x',
 'Social.Anxiety.Disorder.x',
 'Generalized.Anxiety.Disorder.x',
 'SelfInjuriousBehaviorwithoutsuicidalintent.x',
 'SuicidalideationPassive.x',
 'SuicidalideationActive.x',
 'PreparatoryActionstowardimminentSuicidalbehavior.x',
 'InterruptedAttempt.x',
 'AbortedAttempt.x',
 'SuicideAttempt.x',
 'SLEEP.PROBLEMS.x',
 'Unspecified.Schizophrenia.Spectrum.and.Other.Psychotic.Disorder.x',
 'Hallucinations.x',
 'Delusions.x',
 'Associated.Psychotic.Symptoms.x',
 'Panic.Disorder.x',
 'Separation.Anxiety.Disorder.x',
 'Specific.Phobia.x',
 'Obsessive.Compulsive.Disorder.x',
 'Other.Specified.Obsessive.Compulsive.and.Related.Disorder.x',
 'Binge.Eating.Disorder.x',
 'Anorexia.Nervosa.x',
 'Bulimia.Nervosa.x',
 'Other.Specified.Feeding.or.Eating.Disorder.Bulimia.Nervosa.x',
 'Unsp

In [25]:
for c in cont:
    print(cont[c].describe())

count                3108
unique               3108
top       NDARINV005V6D2C
freq                    1
Name: subjectkey, dtype: object
count    3108.0
mean        0.0
std         0.0
min         0.0
25%         0.0
50%         0.0
75%         0.0
max         0.0
Name: Bipolar.I.Disorder.x, dtype: float64
count    3108.0
mean        0.0
std         0.0
min         0.0
25%         0.0
50%         0.0
75%         0.0
max         0.0
Name: Bipolar.II.Disorder.x, dtype: float64
count    3108.0
mean        0.0
std         0.0
min         0.0
25%         0.0
50%         0.0
75%         0.0
max         0.0
Name: Unspecified.Bipolar.and.Related.Disorder.x, dtype: float64
count    3108.0
mean        0.0
std         0.0
min         0.0
25%         0.0
50%         0.0
75%         0.0
max         0.0
Name: Major.Depressive.Disorder.x, dtype: float64
count    3108.0
mean        0.0
std         0.0
min         0.0
25%         0.0
50%         0.0
75%         0.0
max         0.0
Name: Persistent.Depre