In [1]:
import pandas as pd

### load adni demographics

In [2]:
adni_df = pd.read_csv('/home/angela/Desktop/adni_rsfmri/admci_model_multisite_scanner_fd_snr_20151113.csv',
                      skipinitialspace=True)

In [3]:
# get rid of trailing spaces in adni_df
adni_df.rename(columns=lambda x: x.strip(),inplace=True)

In [4]:
# rename column id for subject
adni_df.rename(index=str,columns={'Unnamed: 0':'ID'},inplace=True)

In [5]:
# take only adni subjects
adni_df = adni_df[adni_df.adni2 != 0]

In [6]:
# get rid of trailing spaces in subject names in df and rename
for i,row in adni_df.iterrows():
    subj = row[adni_df.columns.get_loc("ID")]
    subj = subj.rstrip()
    rid = 's' + subj[7::]
    adni_df.loc[i,'ID'] = rid

In [7]:
# get rid of unnecessary stuff
adni_df.drop(labels=['excluded','criugm','mnimci','ad_criugm','criugm_mci','adpd','snr_vmpfc',
                    'fd'],
             axis=1,inplace=True)

In [8]:
adni_df['cimaq'] = 0

### make dummy variable for female sex

In [9]:
for i,row in adni_df.iterrows():
    gen = row['gender']
    if gen == 1:
        fem = 0
    elif gen == 2:
        fem = 1
    adni_df.loc[i,'Female'] = fem

### make dummy variables for scanner (Achieva = 5, Gemini = 6, Ingenia = 7, Ingenuity = 8, Intera = 9)

In [10]:
for i,row in adni_df.iterrows():
    site = row['mtladni2sites']
    if site == 5:
        adni_df.loc[i,'adni_achieva'] = 1
    else:
        adni_df.loc[i,'adni_achieva'] = 0
    if site == 6:
        adni_df.loc[i,'adni_gemini'] = 1
    else:
        adni_df.loc[i,'adni_gemini'] = 0
    if site == 7:
        adni_df.loc[i,'adni_ingenia'] = 1
    else:
        adni_df.loc[i,'adni_ingenia'] = 0
    if site == 8:
        adni_df.loc[i,'adni_ingenuity'] = 1
    else:
        adni_df.loc[i,'adni_ingenuity'] = 0
    if site == 9:
        adni_df.loc[i,'adni_intera'] = 1
    else:
        adni_df.loc[i,'adni_intera'] = 0
            

### make dummy variables for diagnosis

In [11]:
for i,row in adni_df.iterrows():
    dx = row['diagnosis']
    if dx == '1':
        adni_df.loc[i,'CN'] = 1
    else:
        adni_df.loc[i,'CN'] = 0
    if dx == '2':
        adni_df.loc[i,'MCI'] = 1
    else:
        adni_df.loc[i,'MCI'] = 0
    if dx == '3':
        adni_df.loc[i,'AD'] = 1
    else:
        adni_df.loc[i,'AD'] = 0

In [12]:
adni_df.drop(labels=['mtladni2sites','gender','diagnosis','ctrlvsmci','ctrlvsad','mcivsad'],
             axis=1,inplace=True)

### qc for adni1

In [13]:
# get qc info for adni2
adni_qc = pd.read_csv('/home/angela/Desktop/adni_rsfmri/adni2_qc_fmri.csv')

In [14]:
adni_qc.rename(index=str,columns={'qc':'QC'},inplace=True)

### fd for adni1

In [15]:
# get fd for adni1
adni_fd = pd.read_csv('/home/angela/Desktop/adni_rsfmri/qc_group_motion/qc_scrubbing_group.csv',
                     skipinitialspace=True)

In [16]:
# get rid of trailing spaces in adni_df
adni_fd.rename(columns=lambda x: x.strip(),inplace=True)

In [17]:
# rename column id for session
adni_fd.rename(index=str,columns={'':'session'},inplace=True)

In [18]:
# make ID column
for i,row in adni_fd.iterrows():
    subj = row['session']
    subj = subj[0:5]
    adni_fd.loc[i,'ID'] = subj

### merge adni qc and fd to demographics 

In [19]:
adni_df = pd.merge(adni_df, adni_qc[['ID','QC']], how = 'left', on = 'ID')

In [20]:
adni_df = pd.merge(adni_df, adni_fd[['ID','FD_scrubbed']], how='left',on='ID')

In [None]:
#adni_df.to_csv('/home/angela/Desktop/adni_rsfmri/adni2_demog.csv',index=False)

### load cimaq

In [22]:
cimaq_df = pd.read_csv('/home/angela/Desktop/cimaq/cimaq_demog.csv')

### add dummy variable for cimaq

In [23]:
cimaq_df['cimaq'] = 1

In [24]:
# rename some columsn
cimaq_df.rename(index=str,columns={'SIEMENS_site':'cimaq_siemens'},inplace=True)
cimaq_df.rename(index=str,columns={'CINQ':'cimaq_cinq'},inplace=True)
cimaq_df.rename(index=str,columns={'CHUS':'cimaq_chus'},inplace=True)

In [25]:
# strip - from subject IDs
cimaq_df['ID'] = cimaq_df['ID'].str.replace('-', '', regex=False)

In [26]:
# rename column
cimaq_df.rename(index=str,columns={'Age_Loris':'age'},inplace=True)

### dummy variables for diagnosis

In [27]:
for i,row in cimaq_df.iterrows():
    dx = row['Diagnostic_Loris']
    if dx == 'cognitivement_sain_(cs)':
        cimaq_df.loc[i,'CN'] = 1
    else:
        cimaq_df.loc[i,'CN'] = 0
    if dx == 'troubles_subjectifs_de_cognition':
        cimaq_df.loc[i,'SCI'] = 1
    else:
        cimaq_df.loc[i,'SCI'] = 0
    if dx == 'trouble_cognitif_léger_précoce' or dx == 'trouble_cognitif_léger_tardif':
        cimaq_df.loc[i,'MCI'] = 1
    else:
        cimaq_df.loc[i,'MCI'] = 0
    if dx == 'démence_de_type_alzheimer-légère':
        cimaq_df.loc[i,'AD'] = 1
    else:
        cimaq_df.loc[i,'AD'] = 0

In [28]:
cimaq_df.columns.tolist()

['PSCID',
 'DCCID',
 'Visit_Label',
 'Site_Loris',
 'Site_Mincheader',
 'Manufacturer_Mincheader',
 'Scanner_Model_Mincheader',
 'Gender_Loris',
 'age',
 'Diagnostic_Loris',
 'FD_scrubbed',
 'ID',
 'cimaq_siemens',
 'cimaq_cinq',
 'cimaq_chus',
 'Male',
 'Female',
 'QC',
 'cimaq',
 'CN',
 'SCI',
 'MCI',
 'AD']

In [29]:
cimaq_df.drop(labels=['PSCID','DCCID','Visit_Label','Site_Loris','Site_Mincheader','Manufacturer_Mincheader',
                      'Scanner_Model_Mincheader','Gender_Loris','Diagnostic_Loris'],axis=1,inplace=True)

In [31]:
cimaq_df['adni2'] = 0
cimaq_df['adni_achieva'] = 0
cimaq_df['adni_gemini'] = 0
cimaq_df['adni_ingenia'] = 0
cimaq_df['adni_ingenuity'] = 0
cimaq_df['adni_intera'] = 0

### add dummy variables for cimaq sites in adni2

In [33]:
adni_df['cimaq_siemens'] = 0
adni_df['cimaq_cinq'] = 0
adni_df['cimaq_chus'] = 0
adni_df['SCI'] = 0

In [None]:
adni_df.head()

### reorder columns in both dataframes

In [34]:
adni_df = adni_df[['ID','age','Female','CN','SCI','MCI','AD','QC','FD_scrubbed',
                   'adni2','cimaq','adni_achieva','adni_gemini',
                   'adni_ingenia','adni_ingenuity','adni_intera','cimaq_siemens','cimaq_cinq',
                  'cimaq_chus']]

In [35]:
cimaq_df = cimaq_df[['ID','age','Female','CN','SCI','MCI','AD','QC','FD_scrubbed',
                   'adni2','cimaq','adni_achieva','adni_gemini',
                   'adni_ingenia','adni_ingenuity','adni_intera','cimaq_siemens','cimaq_cinq',
                  'cimaq_chus']]

### concatenate dataframes 

In [36]:
frames = [adni_df,cimaq_df]

In [37]:
master_df = pd.concat(frames)

In [38]:
master_df.to_csv('/home/angela/Desktop/adni_cimaq/adni_cimaq_info.csv',index=False)