# Notebook for creating and saving Subject objects from datasets

In [1]:
%load_ext autoreload
%autoreload 2

import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from subprocess import call

import sys
sys.path.append('../src/features')

from subject import Subject

cur_dir = os.getcwd()
asd_diag_dir = os.path.dirname(cur_dir)
abide_dir = os.path.dirname(asd_diag_dir) + '/abide/'
roi_200_dir = abide_dir + '/data/ABIDEI_preprocessed/Outputs/cpac/filt_noglobal/rois_cc200/'
subjects_dir = os.path.dirname(cur_dir) + '/data/ABIDEI_subjects/'
os.path.exists(roi_200_dir)

True

In [2]:
# Time step dictionaries for ABIDEI and ABIDEII
site_trs = {'CALTECH': 2, 'CMU': 2, 'KKI': '2.5', 'LEUVEN_1': 1.656, 'LEUVEN_2': 1.656, 'MAX_MUN': 3,
           'NYU': 2, 'OHSU': 2.5, 'OLIN': 2.5, 'PITT': 1.5, 'SBL': 2.2, 'SDSU': 2, 'STANFORD': 2, 
           'TRINITY': 2, 'UCLA_1': 3, 'UCLA_2': 3, 'UM_1': 2, 'UM_2': 2, 'USM': 2, 'YALE': 2}
site_trs_2 = {'BNI_1': 3, 'EMC_1': 2, 'ETH_1': 2, 'GU_1': 2, 'IU_1':.813, 'IP_1': 2.7, 'KKI_1': 2.5, 'KKI_3': 2.5, 
              'NYU_1': 2, 'NYU_2': 2, 'OHSU_1': 2.5, 'ONRC_1': .475, 'SDSU_1': 2, 'TCD_1': 2, 'UCD_1': 2,
             'UCLA_1': 3, 'USM_1': 2, 'UCLA_Long': 3, 'UPSM_Long': 1.5}
adhd200_trs = {1: 2, 2: 2, 3: 2.34, 4: 1.96, 5: 2, 6: 2.5, 7: 1.5, 8: 1.5}
acpi_trs = {'mta_1' : 2.17, 'nyu_1': 2}
hbn_trs = {'staten' : 1.45, 'rut': .8, 'cornell': .8}

# Load already made subjects for modification

In [3]:
# Load ABIDEI preprocessed rois by loading subjects
def open_pickle(f):
    file = open(f,'rb')
    o = pickle.load(file)
    file.close()
    return o

def load_subjects_d(subject_folder):
    subjects_d = {}
    for f in os.listdir(subject_folder):
        s = open_pickle(os.path.join(subject_folder, f))
        subjects_d[s._sub_id] = s
    return subjects_d

subjects_d = load_subjects_d(subjects_dir)

KeyboardInterrupt: 

## Subjects from ABIDE I preprocessed

In [11]:
phen_file = abide_dir + 'Phenotypic_V1_0b_preprocessed1.csv'
phen_df = pd.read_csv(phen_file)
dataset = 'ABIDEI'

In [12]:
subjects = list()
for index, row in phen_df.iterrows():
    data = list()
    file = row['FILE_ID'] + '_rois_cc200.1D'
    if(file in os.listdir(roi_200_dir)):
        with open(str(os.path.join(roi_200_dir, file)), newline='') as f:
            Lines = f.readlines()
            for l in Lines[1:]:
                ts = np.empty((200,))
                for n, i in enumerate(l.split("\t")):
                    ts[n] = i
                data.append(ts)       
        data_dict = {'filt_noglobal_roi_200_Cradd' : np.array(data)}
        label_dict = {'dx_group': row['DX_GROUP']}
        tr = site_trs[row['SITE_ID']]
        s = Subject(row['SUB_ID'],dataset ,row['SITE_ID'], tr, row['SEX'], row['AGE_AT_SCAN'], data_dict, label_dict)
        subjects.append(s)
assert len(subjects) == len(os.listdir(roi_200_dir))

In [13]:
# Save subjects
save_dir = os.path.dirname(cur_dir) + '/data/ABIDEI_subjects/'
for s in subjects:
    s._save_subject(save_dir)

# Add ABIDE I preprocessed

In [17]:
def open_1d_rois(file):
    data = list()
    with open(file, newline='') as f:
        Lines = f.readlines()
        for l in Lines[1:]:
            ts = np.empty((200,))
            for n, i in enumerate(l.split("\t")):
                ts[n] = i
            data.append(ts)
    return np.array(data)

def add_data(subjects_d, data_dir, data_name):
    print(f'Number of files in data dir {len(os.listdir(data_dir))}')
    for f in os.listdir(data_dir):
        sub_id = None
        for i in f.split('_'):
            if(i[0:3] == '005'):
                sub_id = int(i)
        if(sub_id is None):
            print(f'error getting sub_id from {f}')
        else:
            data = open_1d_rois(os.path.join(data_dir, f))
            s = subjects_d[sub_id]
            s._data_dict[data_name] = data
        
# data_dir = abide_dir + '/data/ABIDEI_preprocessed/Outputs/cpac/nofilt_noglobal/rois_cc200/'
# data_name = 'nofilt_noglobal_roi_200_Cradd'
# data_dir = abide_dir + '/data/ABIDEI_preprocessed/Outputs/cpac/nofilt_global/rois_cc200/'
# data_name = 'nofilt_global_roi_200_Cradd'
# data_dir = abide_dir + '/data/ABIDEI_preprocessed/Outputs/cpac/filt_global/rois_cc200/'
# data_name = 'filt_global_roi_200_Cradd'
data_dir = abide_dir + '/data/ABIDEI_preprocessed/Outputs/cpac/filt_noglobal/rois_cc200/'
data_name = 'filt_noglobal_roi_200_Cradd'
add_data(subjects_d, data_dir, data_name)

Number of files in data dir 884


In [4]:
# Check that data added 
data_name = 'filt_noglobal_roi_200_Cradd'
g_c = 0
for sub_id, s in subjects_d.items():
    if(data_name in s._data_dict):
        g_c += 1
print(g_c)

NameError: name 'subjects_d' is not defined

# Add CPAC preprocessed data

In [3]:
data_dir = asd_diag_dir + '/data/'
# cpac_output_dir = data_dir + 'cpac_output/output/pipeline_abide_cpac_run_freq-filter_nuisance/'
# cpac_output_dir = data_dir + 'cpac_output_cmu_a/output/pipeline_abide_cpac_run/'
# cpac_output_dir = data_dir + 'cpac_output_t/output/pipeline_abide_cpac_run/'

In [6]:
# for filter no global
# filt_noglobal = 'roi_timeseries/_scan_rest_run-1/_selector_CSF-2mmE-M_aC-CSF+WM-2mm-DPC5_M-SDB_P-2_BP-B0.01-T0.1/_mask_CC200_mask_file_..cpac_templates..CC200.nii.gz'
filt_noglobal = 'roi_timeseries/_scan_rest_run-1/_selector_CSF-2mm-M_aC-CSF+WM-2mm-DPC5_M-SDB_P-2_BP-B0.01-T0.1/_mask_CC200_mask_file_..cpac_templates..CC200.nii.gz'
def add_cpac_data(subjects_d, data_dir, data_name):
    def get_roi(sub_folder):
        roi_folder = os.path.join(sub_folder,filt_noglobal)
        if os.path.exists(os.path.join(roi_folder, 'roi_stats.npz')):
            call(['unzip', os.path.join(roi_folder, 'roi_stats.npz'), '-d', roi_folder])
            # first row is nans and skip first 5
            roi = np.load(os.path.join(roi_folder, 'arr_0.npy'), allow_pickle=True)[5:] 
        else:
            print(f"Couldn't get rois for {sub_folder}")
            roi = None
        return roi

    cpac_sub_roi_d = {}
    for sub_f in os.listdir(cpac_output_dir):
        sub_id = int(sub_f.split('-')[1].split('_')[0])
        sub_folder = os.path.join(cpac_output_dir, sub_f)
        roi = get_roi(sub_folder)
        if roi is not None:
            cpac_sub_roi_d[sub_id] = get_roi(sub_folder)
    print(f'Retrieved rois for {len(cpac_sub_roi_d)} subjects')
    for sub_id, roi in cpac_sub_roi_d.items():
        if sub_id in subjects_d:
            subjects_d[sub_id]._data_dict[data_name] = roi
        else:
            print(f'{sub_id} not in subjects dict')

In [10]:
cpac_output_dir = asd_diag_dir + '/data/aws_output/all_subjects/'
add_cpac_data(subjects_d, cpac_output_dir, 'cpac_aws_cc200')

Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/aws_output/all_subjects/sub-0051473_ses-1
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/aws_output/all_subjects/sub-0051474_ses-1
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/aws_output/all_subjects/sub-0051171_ses-1
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/aws_output/all_subjects/sub-0050961_ses-1
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/aws_output/all_subjects/sub-0051172_ses-1
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/aws_output/all_subjects/sub-0050960_ses-1
Retrieved rois for 36 subjects
50952 not in subjects dict
51166 not in subjects dict
51161 not in subjects dict
50953 not in subjects dict


In [9]:
# for s in subjects:
#     if s._sub_id in cpac_sub_roi_d:
#         s._data_dict['cpac_filt_noglobal_t'] = cpac_sub_roi_d[s._sub_id]

h
h
h
h
h
h
h
h
h
h


In [12]:
# Save subjects
save_dir = os.path.dirname(cur_dir) + '/data/ABIDEI_subjects/'
for sub_id, s in subjects_d.items():
    s._save_subject(save_dir)

# Create ABIDEII subjects

In [3]:
phen_file = abide_dir + 'ABIDEII_Composite_Phenotypic.csv'
phen_df = pd.read_csv(phen_file)
dataset = 'ABIDEII'
ABIDEII_preprocessed_dir = asd_diag_dir + '/data/ABIDEII_preprocessed/'

In [4]:
filt_noglobal = 'roi_timeseries/_scan_rest_run-1/_selector_CSF-2mm-M_aC-CSF+WM-2mm-DPC5_M-SDB_P-2_BP-B0.01-T0.1/_mask_CC200_mask_file_..cpac_templates..CC200.nii.gz'
def get_roi(sub_folder, skip=0):
    roi_folder = os.path.join(sub_folder,filt_noglobal)
    if os.path.exists(os.path.join(roi_folder, 'roi_stats.npz')):
        call(['unzip', os.path.join(roi_folder, 'roi_stats.npz'), '-d', roi_folder])
        # first row is nans and skip first 5
        roi = np.load(os.path.join(roi_folder, 'arr_0.npy'), allow_pickle=True)[skip:] 
    else:
        print(f"Couldn't get rois for {sub_folder}")
        roi = None
    return roi



In [5]:
ab2_subjects = list()
for index, row in phen_df.iterrows():
    site_id = row['SITE_ID'].split('-')[1]
    sub_folder = ABIDEII_preprocessed_dir + f"abideII_output_{site_id}/output/pipeline_abide_cpac_run/sub-{row['SUB_ID']}_ses-1/"
    data = get_roi(sub_folder)
    if data is not None:
        data_dict = {'filt_noglobal_roi_200_Cradd' : data}
        label_dict = {'dx_group': row['DX_GROUP']}
        tr = site_trs_2[site_id]
        s = Subject(row['SUB_ID'],dataset ,row['SITE_ID'], tr, row['SEX'], row['AGE_AT_SCAN '], data_dict, label_dict)
        ab2_subjects.append(s)
    else:
        print(f"Couldn't retrieve roi data for {row['SUB_ID']}")
print(len(ab2_subjects))

(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(120, 200)
(120, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_BNI_1/output/pipeline_abide_cpac_run/sub-29038_ses-1/
Couldn't retrieve roi data for 29038
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(121, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(161, 200)
(

(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
(153, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_IP_1/output/pipeline_abide_cpac_run/sub-29584_ses-1/
Couldn't retrieve roi data for 29584
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_IP_1/output/pipeline_abide_cpac_run/sub-29587_ses-1/
Couldn't retrieve roi data for 29587
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_IP_1/output/pipeline_abide_cpac_run/sub-29588_ses-1/
Couldn't retrieve roi data for 29588
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_IP_1/output/pipeline_abide_cpac_run/sub-29591_ses-1/
Coul

Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_KKI_1/output/pipeline_abide_cpac_run/sub-29476_ses-1/
Couldn't retrieve roi data for 29476
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_KKI_1/output/pipeline_abide_cpac_run/sub-29478_ses-1/
Couldn't retrieve roi data for 29478
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_KKI_1/output/pipeline_abide_cpac_run/sub-29480_ses-1/
Couldn't retrieve roi data for 29480
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_KKI_1/output/pipeline_abide_cpac_run/sub-29483_ses-1/
Couldn't retrieve roi data for 29483
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_KKI_1/output/pipeline_abide_cpac_run/sub-29485_ses-1/
Couldn't retrieve roi data for 29485
Couldn't get rois fo

Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_NYU_2/output/pipeline_abide_cpac_run/sub-29165_ses-1/
Couldn't retrieve roi data for 29165
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_NYU_2/output/pipeline_abide_cpac_run/sub-29166_ses-1/
Couldn't retrieve roi data for 29166
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_NYU_2/output/pipeline_abide_cpac_run/sub-29167_ses-1/
Couldn't retrieve roi data for 29167
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_NYU_2/output/pipeline_abide_cpac_run/sub-29168_ses-1/
Couldn't retrieve roi data for 29168
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_NYU_2/output/pipeline_abide_cpac_run/sub-29169_ses-1/
Couldn't retrieve roi data for 29169
Couldn't get rois fo

(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
(181, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_SU_2/output/pipeline_abide_cpac_run/sub-30174_ses-1/
Couldn't retrieve roi data for 30174
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_SU_2/output/pipeline_abide_cpac_run/sub-30178_ses-1/
Couldn't retrieve roi data for 

(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(161, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(211, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_UCD_1/output/pipeline_abide_cpac_run/sub-30017_ses-1/
Couldn't retrieve roi data for 30017
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
(152, 200)
Couldn't get rois for /

(241, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_USM_1/output/pipeline_abide_cpac_run/sub-29506_ses-1/
Couldn't retrieve roi data for 29506
(241, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_USM_1/output/pipeline_abide_cpac_run/sub-29508_ses-1/
Couldn't retrieve roi data for 29508
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_USM_1/output/pipeline_abide_cpac_run/sub-29504_ses-1/
Couldn't retrieve roi data for 29504
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
(241, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/ABIDEII_preprocessed/abideII_output_USM_1/output/pipeline_abide_cpac_run/sub-29525_ses-1/
Couldn't re

In [6]:
# Save subjects
save_dir = os.path.dirname(cur_dir) + '/data/ABIDEII_subjects/'
for s in ab2_subjects:
    s._save_subject(save_dir)

#  Create ADHD200 subjects

In [7]:
phen_file_t = asd_diag_dir + '/data/dicts/allSubs_testSet_phenotypic_dx.csv'
phen_file = asd_diag_dir + '/data/dicts/adhd200_preprocessed_phenotypics.tsv'
phen_df_t = pd.read_csv(phen_file_t)
phen_df = pd.read_csv(phen_file, sep='\t')
dataset = 'ADHD200'
ADHD200_preprocessed_dir = asd_diag_dir + '/data/PCP/ADHD200/'
dx_group = -1

In [8]:
ids_t = set(phen_df_t['ID'])
ids_2 = set(phen_df['ScanDir ID'])

In [9]:
len(os.listdir(ADHD200_preprocessed_dir))

163

In [10]:
filt_noglobal = "roi_timeseries/_scan_rest_1/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global0.motion1.quadratic1.gm0.compcor1.csf0/_bandpass_freqs_0.01.0.1/_mask_CC200/"
def get_roi(sub_folder, skip=0):
    roi_folder = os.path.join(sub_folder,filt_noglobal)
    if os.path.exists(os.path.join(roi_folder, 'roi_CC200.npz')):
        call(['unzip', os.path.join(roi_folder, 'roi_CC200.npz'), '-d', roi_folder])
        # first row is nans and skip first 5
        roi = np.load(os.path.join(roi_folder, 'roi_data.npy'), allow_pickle=True)
        roi = np.swapaxes(roi, 0, 1)[skip:] 
    else:
        print(f"Couldn't get rois for {sub_folder}")
        roi = None
    return roi

In [12]:
adhd200_subjects = list()
for index, row in phen_df.iterrows():
    sub_id = str(row['ScanDir ID'])
    while len(sub_id) < 7:
        sub_id = '0' + sub_id
    sub_folder = ADHD200_preprocessed_dir + f"{sub_id}_session_1/"
    data = get_roi(sub_folder)
    if data is not None:
        print(data.shape)
        data_dict = {'filt_noglobal_roi_200_Cradd' : data}
        label_dict = {'dx_group': dx_group}
        site_id = row['Site']
        tr = adhd200_trs[site_id]
        sex = row['Gender'] == 1
        s = Subject(row['ScanDir ID'],dataset , site_id, tr, sex, row['Age'], data_dict, label_dict)
        adhd200_subjects.append(s)
    else:
        print(f"Couldn't retrieve roi data for {row['ScanDir ID']}")
print(len(adhd200_subjects))

Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2371032_session_1/
Couldn't retrieve roi data for 2371032
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2026113_session_1/
Couldn't retrieve roi data for 2026113
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3434578_session_1/
Couldn't retrieve roi data for 3434578
(152, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/1623716_session_1/
Couldn't retrieve roi data for 1623716
(124, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2930625_session_1/
Couldn't retrieve roi data for 2930625
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3154996_session_1/
Couldn't retrieve roi data for 3154996
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3160561_session_1/
Couldn't retrieve roi data for 3160561
Could

(261, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2574674_session_1/
Couldn't retrieve roi data for 2574674
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2671604_session_1/
Couldn't retrieve roi data for 2671604
(261, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2876903_session_1/
Couldn't retrieve roi data for 2876903
(261, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3007585_session_1/
Couldn't retrieve roi data for 3007585
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3048588_session_1/
Couldn't retrieve roi data for 3048588
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3082137_session_1/
Couldn't retrieve roi data for 3082137
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3108222_session_1/
Couldn't retrieve roi data for 31

Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3518345_session_1/
Couldn't retrieve roi data for 3518345
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3542588_session_1/
Couldn't retrieve roi data for 3542588
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3601861_session_1/
Couldn't retrieve roi data for 3601861
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3619797_session_1/
Couldn't retrieve roi data for 3619797
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3650634_session_1/
Couldn't retrieve roi data for 3650634
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3653737_session_1/
Couldn't retrieve roi data for 3653737
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3662296_session_1/
Couldn't retrieve roi data for 3662296
Couldn't get rois for /home

(236, 200)
(236, 200)
(236, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/1912810_session_1/
Couldn't retrieve roi data for 1912810
(236, 200)
(236, 200)
(236, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2123983_session_1/
Couldn't retrieve roi data for 2123983
(236, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2196753_session_1/
Couldn't retrieve roi data for 2196753
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2240562_session_1/
Couldn't retrieve roi data for 2240562
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2249443_session_1/
Couldn't retrieve roi data for 2249443
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2266806_session_1/
Couldn't retrieve roi data for 2266806
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2367157_

(236, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/1562298_session_1/
Couldn't retrieve roi data for 1562298
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/1093743_session_1/
Couldn't retrieve roi data for 1093743
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/1177160_session_1/
Couldn't retrieve roi data for 1177160
(236, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/1494102_session_1/
Couldn't retrieve roi data for 1494102
(236, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/1875013_session_1/
Couldn't retrieve roi data for 1875013
(236, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/3993793_session_1/
Couldn't retrieve roi data for 3993793
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2310449_session_1/
Couldn't retrieve roi 

(236, 200)
(236, 200)
(236, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/8191384_session_1/
Couldn't retrieve roi data for 8191384
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0016058_session_1/
Couldn't retrieve roi data for 16058
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0016046_session_1/
Couldn't retrieve roi data for 16046
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0016060_session_1/
Couldn't retrieve roi data for 16060
(196, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0016029_session_1/
Couldn't retrieve roi data for 16029
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0016089_session_1/
Couldn't retrieve roi data for 16089
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0016004_session_1/
Couldn't retrieve roi data for 1

(235, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/1581470_session_1/
Couldn't retrieve roi data for 1581470
(235, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/1849382_session_1/
Couldn't retrieve roi data for 1849382
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/1854691_session_1/
Couldn't retrieve roi data for 1854691
(235, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/1951511_session_1/
Couldn't retrieve roi data for 1951511
(235, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2024999_session_1/
Couldn't retrieve roi data for 2024999
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2051479_session_1/
Couldn't retrieve roi data for 2051479
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/2101067_session_1/
Couldn't retrieve roi 

(77, 200)
(77, 200)
(77, 200)
(77, 200)
(77, 200)
(77, 200)
(77, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0023026_session_1/
Couldn't retrieve roi data for 23026
(77, 200)
(77, 200)
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0023030_session_1/
Couldn't retrieve roi data for 23030
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0023031_session_1/
Couldn't retrieve roi data for 23031
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0023033_session_1/
Couldn't retrieve roi data for 23033
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0023035_session_1/
Couldn't retrieve roi data for 23035
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0023036_session_1/
Couldn't retrieve roi data for 23036
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ADHD200/0023037

In [13]:
# Save subjects
save_dir = os.path.dirname(cur_dir) + '/data/ADHD200_subjects/'
for s in adhd200_subjects:
    s._save_subject(save_dir)

# Create ACPI Subjects

In [14]:
phen_file = asd_diag_dir + '/data/dicts/acpi_aggregated_phenotypic_data.csv'
phen_df = pd.read_csv(phen_file)
dataset = 'ACPI'
ACPI_preprocessed_dir = asd_diag_dir + '/data/PCP/ACPI/'
dx_group = -1

In [18]:
# filt_noglobal = "roi_timeseries/_scan_rest_1/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global0.motion1.quadratic1.gm0.compcor1.csf0/_bandpass_freqs_0.01.0.1/_mask_CC200/"
def get_roi(sub_folder, skip=0):
    roi_folder = os.path.join(sub_folder)
    if os.path.exists(os.path.join(roi_folder, 'ts_cc200_rois.csv')):
        data = list()
        with open(str(os.path.join(roi_folder, 'ts_cc200_rois.csv')), newline='') as f:
            Lines = f.readlines()
            for l in Lines[1:]:
                # ts = np.empty((200,))
                ts = list()
                for n, i in enumerate(l.split(",")):
                    # ts[n] = i
                    ts.append(np.float32(i))
                ts = np.array(ts)
                data.append(ts)
        roi = np.array(data)
        roi = np.swapaxes(roi, 0, 1)[skip:]
        
    else:
        print(f"Couldn't get rois for {sub_folder}")
        roi = None
    return roi

In [19]:
acpi_subjects = list()
for index, row in phen_df.iterrows():
    sub_id = str(row['ID'])
    while len(sub_id) < 7:
        sub_id = '0' + sub_id
    site_id = row['DATASET'].lower()
    sub_folder = ACPI_preprocessed_dir + f"{site_id}_ts_cc200_rois/{sub_id}-session_1/"
    data = get_roi(sub_folder)
    if data is not None:
        data_dict = {'filt_noglobal_roi_200_Cradd' : data}
        label_dict = {'dx_group': dx_group}
        tr = acpi_trs[site_id]
        sex = row['SEX'] == 1
        s = Subject(row['ID'],dataset ,site_id, tr, sex, row['AGE_AT_SCAN_1'], data_dict, label_dict)
        acpi_subjects.append(s)
    else:
        print(f"Couldn't retrieve roi data for {row['ID']}")
print(len(acpi_subjects))

Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ACPI/mta_1_ts_cc200_rois/0028040-session_1/
Couldn't retrieve roi data for 28040
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ACPI/mta_1_ts_cc200_rois/0028050-session_1/
Couldn't retrieve roi data for 28050
Couldn't get rois for /home/cparish/Capstone/asd_diagnosis_fmri/data/PCP/ACPI/mta_1_ts_cc200_rois/0028119-session_1/
Couldn't retrieve roi data for 28119
155


In [20]:
data.shape

(1288, 200)

In [21]:
# Save subjects
save_dir = os.path.dirname(cur_dir) + '/data/ACPI_subjects/'
for s in acpi_subjects:
    s._save_subject(save_dir)

# Site TRs can be found on abide website or 
* https://www.researchgate.net/figure/SCANNING-PARAMETERS-OF-DIFFERENT-SITES-OF-ABIDE-1_tbl1_335722272
* https://www.nature.com/articles/sdata201710/tables/4

In [14]:
site_trs = {'CALTECH': 2, 'CMU': 2, 'KKI': '2.5', 'LEUVEN_1': 1.656, 'LEUVEN_2': 1.656, 'MAX_MUN': 3,
           'NYU': 2, 'OHSU': 2.5, 'OLIN': 2.5, 'PITT': 1.5, 'SBL': 2.2, 'SDSU': 2, 'STANFORD': 2, 
           'TRINITY': 2, 'UCLA_1': 3, 'UCLA_2': 3, 'UM_1': 2, 'UM_2': 2, 'USM': 2, 'YALE': 2}
site_trs_2 = {'BNI': 3, 'EMC': 2, 'ETH': 2, 'GU': 2, 'IU':.813, 'IP': 2.7, 'KKI_1': 2.5, 'KKI_3': 2.5, 
              'NYU_1': 2, 'NYU_2': 2, 'OHSU_1': 2.5, 'ONRC_1': .475, 'SDSU': 2, 'TCD': 2, 'UCD': 2,
             'UCLA': 3, 'USM': 2, 'UCLA_Long': 3, 'UPSM_Long': 1.5}

In [24]:
import json

trs_save_file = save_dir = os.path.dirname(cur_dir) + '/data/dicts/ABIDEI_site_trs.json'
with open(trs_save_file, 'w') as fp:
    json.dump(site_trs, fp)

In [27]:
os.path.exists('/home/cparish/Capstone/abide/data/ABIDEII_preprocessed/')

False