In [None]:
import pandas as pd

In [None]:
base_dir = '/path/to/data'

# Read Data
## Read inclusion list

In [None]:
with open('{}/outputs/lst_subjkey_include_bsl.txt'.format(base_dir),'r') as f:
    lst_subjkey_bsl = f.read().splitlines()

with open('{}/outputs/lst_subjkey_include_y2.txt'.format(base_dir),'r') as f:
    lst_subjkey_y2 = f.read().splitlines()

In [None]:
print("Baseline #subj recommended to include: {}".format(len(lst_subjkey_bsl)))
print("Year2 #subj recommended to include: {}".format(len(lst_subjkey_y2)))

## Read beta weights

### SST beta weights

In [None]:
df_sst = pd.read_table('{}/mrisst02.txt'.format(base_dir), skiprows=[1])
print(df_sst.eventname.unique())

In [None]:
df_sst_bsl=df_sst[df_sst['eventname']=='baseline_year_1_arm_1']
df_sst_y2=df_sst[df_sst['eventname']=='2_year_follow_up_y_arm_1']

# sort
df_sst_bsl=df_sst_bsl.sort_values(by=['subjectkey','interview_date'])
df_sst_y2=df_sst_y2.sort_values(by=['subjectkey','interview_date'])

In [None]:
print("Length of the whole table: {}".format(len(df_sst)))
print("Length of basline: {}. Number of unique subject key: {}".format(len(df_sst_bsl),len(df_sst_bsl.subjectkey.unique())))
print("Length of year2: {}. Number of unique subject key: {}".format(len(df_sst_y2),len(df_sst_y2.subjectkey.unique())))

In [None]:
# match with inclusion list
df_sst_bsl_inc = df_sst_bsl[df_sst_bsl['subjectkey'].isin(lst_subjkey_bsl)]
df_sst_y2_inc = df_sst_y2[df_sst_y2['subjectkey'].isin(lst_subjkey_y2)]
print("Length of inclusion for basline: {}".format(len(df_sst_bsl_inc)))
print("Length of inclusion for year2: {}".format(len(df_sst_y2_inc)))

In [None]:
df_sst_bsl_inc.head()

### Emotional nback beta weights

In [None]:
df_nback = pd.read_table('{}/nback_bwroi02.txt'.format(base_dir), skiprows=[1])
print(df_nback.eventname.unique())

In [None]:
df_nback_bsl=df_nback[df_nback['eventname']=='baseline_year_1_arm_1']
df_nback_y2=df_nback[df_nback['eventname']=='2_year_follow_up_y_arm_1']

# sort
df_nback_bsl=df_nback_bsl.sort_values(by=['subjectkey','interview_date'])
df_nback_y2=df_nback_y2.sort_values(by=['subjectkey','interview_date'])

In [None]:
print("Length of the whole table: {}".format(len(df_nback)))
print("Length of basline: {}. Number of unique subject key: {}".format(len(df_nback_bsl),len(df_nback_bsl.subjectkey.unique())))
print("Length of year2: {}. Number of unique subject key: {}".format(len(df_nback_y2),len(df_nback_y2.subjectkey.unique())))

In [None]:
# match with inclusion list
df_nback_bsl_inc = df_nback_bsl[df_nback_bsl['subjectkey'].isin(lst_subjkey_bsl)]
df_nback_y2_inc = df_nback_y2[df_nback_y2['subjectkey'].isin(lst_subjkey_y2)]
print("Length of inclusion for basline: {}".format(len(df_nback_bsl_inc)))
print("Length of inclusion for year2: {}".format(len(df_nback_y2_inc)))

In [None]:
df_nback_bsl_inc.head()

### MID beta weights

In [None]:
df_mid_p1 = pd.read_table('{}/midaparc03.txt'.format(base_dir), skiprows=[1])
df_mid_p2 = pd.read_table('{}/midaparcp203.txt'.format(base_dir), skiprows=[1])
print(df_mid_p1.eventname.unique())
print(df_mid_p2.eventname.unique())

In [None]:
df_mid_p1_bsl=df_mid_p1[df_mid_p1['eventname']=='baseline_year_1_arm_1']
df_mid_p2_bsl=df_mid_p2[df_mid_p2['eventname']=='baseline_year_1_arm_1']

df_mid_p1_y2=df_mid_p1[df_mid_p1['eventname']=='2_year_follow_up_y_arm_1']
df_mid_p2_y2=df_mid_p2[df_mid_p2['eventname']=='2_year_follow_up_y_arm_1']

# sort
df_mid_p1_bsl=df_mid_p1_bsl.sort_values(by=['subjectkey','interview_date'])
df_mid_p1_y2=df_mid_p1_y2.sort_values(by=['subjectkey','interview_date'])
df_mid_p2_bsl=df_mid_p2_bsl.sort_values(by=['subjectkey','interview_date'])
df_mid_p2_y2=df_mid_p2_y2.sort_values(by=['subjectkey','interview_date'])

In [None]:
print("Length of the whole table: part1 {}, part2 {}".format(len(df_mid_p1),len(df_mid_p2)))
print("Length of basline: part1 {}, part2 {}. Number of unique subject key: part1 {}, part2 {}".format(len(df_mid_p1_bsl),len(df_mid_p2_bsl),len(df_mid_p1_bsl.subjectkey.unique()),len(df_mid_p2_bsl.subjectkey.unique())))
print("Length of year2: part1 {}, part2 {}. Number of unique subject key: part1 {}, part2 {}".format(len(df_mid_p1_y2),len(df_mid_p2_y2),len(df_mid_p1_y2.subjectkey.unique()),len(df_mid_p2_y2.subjectkey.unique())))

In [None]:
# merge on subjectkey
print(len(df_mid_p1_bsl.columns),len(df_mid_p2_bsl.columns))
df_mid_bsl = pd.merge(df_mid_p1_bsl,df_mid_p2_bsl,how='inner',
                      on=['subjectkey','src_subject_id','sex','interview_date','interview_age','eventname','collection_id', 'dataset_id','collection_title'],
                      suffixes=('_p1', '_p2'),
                      validate='one_to_one')

print(len(df_mid_p1_y2.columns),len(df_mid_p2_y2.columns))
df_mid_y2 = pd.merge(df_mid_p1_y2,df_mid_p2_y2,how='inner',
                      on=['subjectkey','src_subject_id','sex','interview_date','interview_age','eventname','collection_id', 'dataset_id','collection_title'],
                      suffixes=('_p1', '_p2'),
                      validate='one_to_one')

In [None]:
print(df_mid_bsl.shape)
print(df_mid_y2.shape)

In [None]:
# match with inclusion list
df_mid_bsl_inc = df_mid_bsl[df_mid_bsl['subjectkey'].isin(lst_subjkey_bsl)]
df_mid_y2_inc = df_mid_y2[df_mid_y2['subjectkey'].isin(lst_subjkey_y2)]
print("Length of inclusion for basline: {}".format(len(df_mid_bsl_inc)))
print("Length of inclusion for year2: {}".format(len(df_mid_y2_inc)))

In [None]:
df_mid_bsl_inc.head()

# Create Co-Activation Matrix
For each subject, output a correlation matrix and a raw data matrix (#ROI x 12 contrasts)

## Make dictionaries for key conversion
### SST

In [None]:
dict_sst = pd.read_table('{}/mrisst02.txt'.format(base_dir), nrows=1)
header_sst = dict_sst.values.tolist()[0]
key_sst = dict_sst.columns.tolist()

In [None]:
find_key_sst = dict(zip(header_sst,key_sst))
find_key_sst['Mean beta weight for SST correct go versus fixation contrast in ASEG ROI left-amygdala']

### Emotional Nback

In [None]:
dict_nback = pd.read_table('{}/nback_bwroi02.txt'.format(base_dir), nrows=1)
header_nback = dict_nback.values.tolist()[0]
key_nback = dict_nback.columns.tolist()

In [None]:
find_key_nback = dict(zip(header_nback,key_nback))
find_key_nback['Mean beta weight for nBack 0 back condition in APARC ROI rh-parstriangularis']

In [None]:
print(header_nback.index('Mean beta weight for nBack 0 back condition in APARC ROI rh-parstriangularis'))
print(key_nback.index('tfmri_nback_all_323'))

### MID

In [None]:
key_mid = df_mid_bsl_inc.columns.tolist()

In [None]:
len(key_mid)

In [None]:
dict_mid_p1 = pd.read_table('{}/midaparc03.txt'.format(base_dir), nrows=1)
header_mid_p1 = dict_mid_p1.values.tolist()[0]
key_mid_p1 = dict_mid_p1.columns.tolist()

dict_mid_p2 = pd.read_table('{}/midaparcp203.txt'.format(base_dir), nrows=1)
header_mid_p2 = dict_mid_p2.values.tolist()[0]
key_mid_p2 = dict_mid_p2.columns.tolist()

In [None]:
dt1 = dict(zip(key_mid_p1, header_mid_p1))
dt2 = dict(zip(key_mid_p2, header_mid_p2))

In [None]:
header_mid = []
for key in key_mid:
    if key in dt1:
        header_mid.append(dt1[key])
    elif key in dt2:
        header_mid.append(dt2[key])
    else:
        print("No match for {}?".format(key))

In [None]:
find_key_mid = dict(zip(header_mid,key_mid))
find_key_mid['Beta weight for MID all anticipation of small reward versus neutral contrast in APARC ROI lh-cuneus']

## Determin column and index labels

In [None]:
lst_contrasts = ['nBack 2 back versus 0 back contrast','nBack face versus place contrast','nBack negative face versus neutral face contrast','nBack positive face versus neutral face contrast',
                'MID all anticipation of loss versus neutral contrast','MID all anticipation of reward versus neutral contrast','MID all loss positive versus negative feedback contrast','MID all reward positive versus negative feedback contrast',
                'SST correct stop versus correct go contrast','SST incorrect stop versus correct go contrast','SST correct stop versus incorrect stop contrast','SST incorrect go versus correct go contrast']

In [None]:
lst_contrasts

In [None]:
sub_header_sst = [x for x in header_sst if 'SST correct go versus fixation contrast' in x]
print(len(sub_header_sst))

In [None]:
lst_rois_all = [x[64:] for x in sub_header_sst]
lst_roi_rm = ['ASEG ROI left-cerebral-white-matter','ASEG ROI left-lateral-ventricle','ASEG ROI left-inf-lat-vent',
              'ASEG ROI left-cerebellum-white-matter','ASEG ROI 3rd-ventricle','ASEG ROI 4th-ventricle','ASEG ROI brain-stem',
             'ASEG ROI csf','ASEG ROI right-cerebral-white-matter','ASEG ROI right-lateral-ventricle','ASEG ROI right-inf-lat-vent',
              'ASEG ROI right-cerebellum-white-matter']
lst_rois = [x for x in lst_rois_all if x not in lst_roi_rm]

In [None]:
len(lst_rois)

In [None]:
with open('{}/outputs/lst_rois_APARC_ASEG_86ROIs.txt'.format(base_dir),'w') as f:
    for roi in lst_rois:
        f.write('{}\n'.format(roi))

## Make matrix
### Baseline

In [None]:
outfolder='coactivation_matrices_bsl_86ROIs'

In [None]:
i=0
lst_usable = []
for subj in lst_subjkey_bsl:
    i=i+1
    #print("#{} subjectkey: {}".format(i, subj))
    df_tmp = pd.DataFrame(index=lst_rois, columns=lst_contrasts)
    for contrast in lst_contrasts:
        lst_values = []
        for roi in lst_rois:
            sub_str = contrast + ' in ' + roi
            if 'nBack' in contrast:
                lst_hd = [x for x in header_nback if sub_str in x]
                if len(lst_hd) != 1:
                    print("WARNING: multiple match?? Or no match??")
                else:
                    hd = lst_hd[0]
                    ky = find_key_nback[hd]
                    beta = df_nback_bsl_inc[df_nback_bsl_inc['subjectkey']==subj][ky].values.tolist()[0]
                    lst_values.append(beta)
            elif 'MID' in contrast:
                lst_hd = [x for x in header_mid if sub_str in x]
                if len(lst_hd) != 1:
                    print("WARNING: multiple match?? Or no match??")
                else:
                    hd = lst_hd[0]
                    ky = find_key_mid[hd]
                    beta = df_mid_bsl_inc[df_mid_bsl_inc['subjectkey']==subj][ky].values.tolist()[0]
                    lst_values.append(beta)
            elif 'SST' in contrast:
                lst_hd = [x for x in header_sst if sub_str in x]
                if len(lst_hd) != 1:
                    print("WARNING: multiple match?? Or no match??")
                else:
                    hd = lst_hd[0]
                    ky = find_key_sst[hd]
                    beta = df_sst_bsl_inc[df_sst_bsl_inc['subjectkey']==subj][ky].values.tolist()[0]
                    lst_values.append(beta)
            else:
                print("ERROR: No match for task name?")
        
        df_tmp[contrast]=lst_values
    df_tmp.to_csv('{}/outputs/{}/{}_86ROI_12contrasts_data_matrix_bsl.csv'.format(base_dir,outfolder,subj))
    if df_tmp.isnull().values.any():
        print("{} has NaN in its data matrix".format(subj))
        continue
    else:
        df_corr = df_tmp.T.corr()
        if df_corr.isnull().values.any():
            print("{} has NaN in its correlation matrix".format(subj))
            continue
        else:
            lst_usable.append(subj)
            df_corr.to_csv('{}/outputs/{}/{}_86ROI_12contrasts_corr_matrix_bsl.txt'.format(base_dir,outfolder,subj), sep=' ', header=False, float_format='%.3f', index=False)

In [None]:
len(lst_usable)

In [None]:
with open("{}/outputs/{}/lst_usable_bsl.txt".format(base_dir, outfolder),'w') as f:
    for subj in lst_usable:
        f.write("{}\n".format(subj))

### Year 2

In [None]:
outfolder='coactivation_matrices_y2_86ROIs'

In [None]:
i=0
lst_usable = []
for subj in lst_subjkey_y2:
    i=i+1
    #print("#{} subjectkey: {}".format(i, subj))
    df_tmp = pd.DataFrame(index=lst_rois, columns=lst_contrasts)
    for contrast in lst_contrasts:
        lst_values = []
        for roi in lst_rois:
            sub_str = contrast + ' in ' + roi
            if 'nBack' in contrast:
                lst_hd = [x for x in header_nback if sub_str in x]
                if len(lst_hd) != 1:
                    print("WARNING: multiple match?? Or no match??")
                else:
                    hd = lst_hd[0]
                    ky = find_key_nback[hd]
                    beta = df_nback_y2_inc[df_nback_y2_inc['subjectkey']==subj][ky].values.tolist()[0]
                    lst_values.append(beta)
            elif 'MID' in contrast:
                lst_hd = [x for x in header_mid if sub_str in x]
                if len(lst_hd) != 1:
                    print("WARNING: multiple match?? Or no match??")
                else:
                    hd = lst_hd[0]
                    ky = find_key_mid[hd]
                    beta = df_mid_y2_inc[df_mid_y2_inc['subjectkey']==subj][ky].values.tolist()[0]
                    lst_values.append(beta)
            elif 'SST' in contrast:
                lst_hd = [x for x in header_sst if sub_str in x]
                if len(lst_hd) != 1:
                    print("WARNING: multiple match?? Or no match??")
                else:
                    hd = lst_hd[0]
                    ky = find_key_sst[hd]
                    beta = df_sst_y2_inc[df_sst_y2_inc['subjectkey']==subj][ky].values.tolist()[0]
                    lst_values.append(beta)
            else:
                print("ERROR: No match for task name?")
        
        df_tmp[contrast]=lst_values
    df_tmp.to_csv('{}/outputs/{}/{}_86ROI_12contrasts_data_matrix_y2.csv'.format(base_dir,outfolder,subj))
    if df_tmp.isnull().values.any():
        print("{} has NaN in its data matrix".format(subj))
        continue
    else:
        df_corr = df_tmp.T.corr()
        if df_corr.isnull().values.any():
            print("{} has NaN in its correlation matrix".format(subj))
            continue
        else:
            lst_usable.append(subj)
            df_corr.to_csv('{}/outputs/{}/{}_86ROI_12contrasts_corr_matrix_y2.txt'.format(base_dir,outfolder,subj), sep=' ', header=False, float_format='%.3f', index=False)

In [None]:
len(lst_usable)

In [None]:
with open("{}/outputs/{}/lst_usable_y2.txt".format(base_dir, outfolder),'w') as f:
    for subj in lst_usable:
        f.write("{}\n".format(subj))