In [1]:
import numpy as np
import scipy as sp
import pandas as pd

In [2]:
data = pd.read_csv('../data/unres_behav_data.csv', dtype={'Subject':str})
data = data.set_index('Subject')
print(data.shape)
data.head()

(184, 382)


Unnamed: 0_level_0,Release,Acquisition,Gender,Age,3T_Full_MR_Compl,T1_Count,T2_Count,3T_RS-fMRI_Count,3T_RS-fMRI_PctCompl,3T_Full_Task_fMRI,...,Noise_Comp,Odor_Unadj,Odor_AgeAdj,PainIntens_RawScore,PainInterf_Tscore,Taste_Unadj,Taste_AgeAdj,Mars_Log_Score,Mars_Errs,Mars_Final
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100610,S900,Q08,M,26-30,True,2,1,4,100.0,True,...,2.0,122.25,110.45,0,38.6,84.84,85.31,1.92,1,1.88
102311,S500,Q06,F,26-30,True,1,1,4,100.0,True,...,4.4,122.25,110.45,1,52.2,98.31,99.06,1.8,0,1.8
102816,Q3,Q03,F,26-30,True,1,1,4,100.0,True,...,5.2,122.25,111.41,1,52.6,98.31,97.68,1.72,1,1.68
104416,S900,Q09,F,31-35,True,2,2,4,100.0,True,...,2.8,101.12,87.11,0,38.6,95.36,93.48,1.92,1,1.88
105923,MEG2,Q07,F,31-35,True,2,2,4,100.0,True,...,5.2,122.25,111.41,0,38.6,97.26,96.41,1.88,1,1.84


### Get only subjects that have complete data for 7T REST and MOVIE

In [3]:
cols_of_interest = [
    "fMRI_Movie_Compl",
    "7T_RS-fMRI_PctCompl"
]

In [4]:
data = data[cols_of_interest]
data.head()

Unnamed: 0_level_0,fMRI_Movie_Compl,7T_RS-fMRI_PctCompl
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1
100610,True,100.0
102311,True,100.0
102816,True,100.0
104416,True,100.0
105923,True,100.0


In [5]:
print(data[(data["fMRI_Movie_Compl"]==True) & (data["7T_RS-fMRI_PctCompl"]==100.0)].shape)

(176, 2)


In [6]:
subj_list = data[(data["fMRI_Movie_Compl"]==True) & (data["7T_RS-fMRI_PctCompl"]==100.0)].index.values
print(len(subj_list))
subj_list

176


array(['100610', '102311', '102816', '104416', '105923', '108323',
       '109123', '111514', '114823', '115017', '115825', '116726',
       '118225', '125525', '126426', '126931', '128935', '130114',
       '130518', '131217', '131722', '132118', '134627', '134829',
       '135124', '137128', '140117', '144226', '145834', '146129',
       '146432', '146735', '146937', '148133', '150423', '155938',
       '156334', '157336', '158035', '158136', '159239', '162935',
       '164131', '164636', '165436', '167036', '167440', '169040',
       '169343', '169444', '169747', '171633', '172130', '173334',
       '175237', '176542', '177140', '177645', '177746', '178142',
       '178243', '178647', '180533', '181232', '182436', '182739',
       '185442', '186949', '187345', '191033', '191336', '191841',
       '192439', '192641', '193845', '195041', '196144', '197348',
       '198653', '199655', '200210', '200311', '200614', '201515',
       '203418', '204521', '205220', '209228', '212419', '2140

### Get family structure for these subjects

For this you will need to download the restricted behavioral data from HCP and save it in the `/data` directory as a .csv (here called "res_behav_data.csv").

In [9]:
res_behav_data = pd.read_csv("../data/res_behav_data.csv", dtype={'Subject':str})
res_behav_data = res_behav_data.set_index("Subject")
print(res_behav_data.shape)
# res_behav_data.head()

(184, 200)


In [10]:
mothers = res_behav_data.loc[subj_list, "Mother_ID"]
mothers = mothers.to_frame()
# mothers.head()

In [11]:
siblings_list = []

for mother in mothers["Mother_ID"].unique():
    siblings_list.append(mothers.query("Mother_ID==@mother").index.values)
    
siblings_list = np.array(siblings_list)

print("There are {} unique families in the dataset".format(len(siblings_list)))

subj_set1 = [family[0] for family in siblings_list]
subj_set2 = [family[1] for family in siblings_list if len(family)>1]

# Make sure subjects are sorted
subj_set1.sort()
subj_set2.sort()

print("There are {} unrelated subjects in set 1".format(len(subj_set1)))
print("There are {} unrelated subjects in set 2".format(len(subj_set2)))

There are 90 unique families in the dataset
There are 90 unrelated subjects in set 1
There are 84 unrelated subjects in set 2


In [12]:
# siblings_list

### Save stuff

In [48]:
np.save('subj_list.npy', subj_list, allow_pickle=True)
np.save('family_list.npy', siblings_list, allow_pickle=True)