## Get list of subjects.

In [None]:
import os, sys, shutil
import numpy as np
import pandas as pd
import subprocess, re
from pandas import read_csv

## Specify directories. 

data_dir = '/space/will/4/users/EMBARC/DATA'
dicom_dir = '/space/will/3/users/EMBARC/DICOMs'
recon_dir = '/space/will/3/users/EMBARC/Recons'
behav_dir = '/space/will/3/users/EMBARC/behavior'
fast_dir = '/space/will/3/users/EMBARC/EMBARC-FAST'

## Read in baseline csv. 
baseline = read_csv(os.path.join(behav_dir, 'embarc_baseline_totals.csv'))
baseline = baseline.set_index('ProjectSpecificID', drop=True)

## Read in QC csv. 
qc = read_csv('/space/will/4/users/EMBARC/QC/aafzal_9_7_2016_11_37_59_MASTER.csv')

columns = ['Session','ANAT','ANAT_QC','EMO','EMO_QC']
qc = qc[columns]
qc.columns = ['Session','ANAT','ANAT_QC','EMO','EMO_QC',]

## Restrict to subjects in baseline_totals csv.
qc = qc[(qc.Session).isin([s for s in qc.Session if s[:6] in baseline.index])]
qc = qc.set_index('Session', drop=True)
qc = qc[qc.notnull()]

## Restrict QC csv to valid baseline subjects. 
qc = qc[(qc.index).isin([(s.replace('+AF8-','')).upper() for s in qc.index if (s.startswith('CU') | s.startswith('MG') 
    | s.startswith('TX')| s.startswith('UM')) and (not 'PH' in s) and (not 'TM' in s) and ('MR1' in s)])]

## Manually identify Failed EMO scans.
# bad_eor1 = ['CU0001CUMR1R1','MG0202MGMR1R1','MG0248MGMR1R1','TX0045TXMR1R1','TX0065TXMR1R1','UM0024UMMR1R1','UM0094UMMR1R1']

# qc = qc[(qc.ANAT_QC != 'FAIL') & (qc.EMO_QC != 'FAIL')]     ## Remove FAIL scans.
# qc = qc[~(qc.index).isin(bad_eor1)]                          ## Remove bad EOR1 scans (manual inspection)

## Check against existing DICOMs.
dicoms = os.listdir(dicom_dir)
data = os.listdir(data_dir)

subjects = [s for s in qc.index]
## Remove MG0006MGMR1R1. No ANAT.
subjects.remove('MG0006MGMR1R1')
## Remove CU0089CUMR1R1. No EMO. 
subjects.remove('CU0089CUMR1R1')
# print 'Subjects remaning after QC: %d' %(len(subjects))
print 'Subjects: %d' %len(subjects)

print 'Missing DICOMs: %s' %(len(subjects)-len([s for s in subjects if s in dicoms]))
print 'Missing DATA: %s' %(len(subjects)-len([s for s in subjects if s in data]))

## Load EMO run numbers from QC csv.

In [None]:
missing_subs = list(read_csv('/space/will/3/users/EMBARC/Etkin/EMBARC_missing_subjects.csv')['Subject name'])
n_missing = len(missing_subs)
print 'Missing: %d' %n_missing
found = [d for d in subjects if d[:6] in missing_subs]

## Create DataFrame to store run numbers. 
rundf = pd.DataFrame(columns=['Subject_ID','anat','emo'])
rundf['Subject_ID'] = found
rundf = rundf.set_index('Subject_ID', drop=True)

for subject in found:
    rundf.loc[subject,'anat'] = qc.loc[subject,'ANAT']
    rundf.loc[subject,'emo'] = qc.loc[subject,'EMO']
    
rundf = rundf.dropna()
print 'Found EMO runs for %s subjects.' %len(rundf)

## Copy functional files to Etkin Dir.

In [None]:
dest_dir = '/space/will/3/users/EMBARC/Etkin/emo_raw'

for subject in rundf.index:
    if os.path.isdir(os.path.join(data_dir, subject, 'bold', '%03d' %rundf.loc[subject,'emo'])):
        run_dir = os.path.join(data_dir, subject, 'bold', '%03d' %rundf.loc[subject,'emo'])
        src = os.path.join(run_dir,'%s_bld%03d_rest.nii' %(subject,rundf.loc[subject,'emo']))
        dst = os.path.join(dest_dir,'%s_emo_raw.nii' %subject)
        print subject, rundf.loc[subject,'emo']
#         shutil.copy(src,dst)

## Copy anatomical files to Etkin Dir.

In [66]:
dest_dir = '/space/will/3/users/EMBARC/Etkin/anat_raw'

for subject in rundf.index:
    if os.path.isdir(os.path.join(data_dir, subject, 'anat', '%03d' %rundf.loc[subject,'anat'])):
        run_dir = os.path.join(data_dir, subject, 'anat', '%03d' %rundf.loc[subject,'anat'])
        src = os.path.join(run_dir,'%s_mpr%03d.nii' %(subject,rundf.loc[subject,'anat']))
        dst = os.path.join(dest_dir,'%s_anat_raw.nii' %subject)
        print subject, rundf.loc[subject,'anat']
#         shutil.copy(src,dst)

CU0001CUMR1R1 3.0
CU0002CUMR1R1 3.0
CU0004CUMR1R1 3.0
CU0007CUMR1R1 3.0
CU0066CUMR1R1 4.0
CU0070CUMR1R1 3.0
CU0090CUMR1R1 3.0
CU0092CUMR1R1 4.0
CU0093CUMR1R1 4.0
CU0094CUMR1R1 4.0
CU0095CUMR1R1 4.0
CU0097CUMR1R1 4.0
CU0100CUMR1R1 4.0
CU0102CUMR1R1 4.0
CU0103CUMR1R1 4.0
CU0104CUMR1R1 4.0
CU0105CUMR1R1 4.0
CU0106CUMR1R1 22.0
CU0108CUMR1R1 4.0
CU0110CUMR1R1 4.0
CU0111CUMR1R1 4.0
CU0113CUMR1R1 4.0
CU0116CUMR1R1 4.0
CU0117CUMR1R1 4.0
CU0119CUMR1R1 4.0
CU0120CUMR1R1 5.0
CU0121CUMR1R1 4.0
CU0125CUMR1R1 4.0
CU0126CUMR1R1 3.0
CU0127CUMR1R1 4.0
CU0128CUMR1R1 4.0
CU0129CUMR1R1 4.0
CU0130CUMR1R1 4.0
CU0131CUMR1R1 4.0
CU0132CUMR1R1 4.0
CU0133CUMR1R1 4.0
CU0134CUMR1R1 4.0
CU0135CUMR1R1 4.0
MG0001MGMR1R1 4.0
MG0002MGMR1R1 4.0
MG0003MGMR1R1 4.0
MG0004MGMR1R1 4.0
MG0005MGMR1R1 4.0
MG0006MGMR1R2 4.0
MG0018MGMR1R1 7.0
MG0172MGMR1R1 4.0
MG0206MGMR1R1 4.0
MG0213MGMR1R1 4.0
MG0218MGMR1R1 4.0
MG0220MGMR1R1 4.0
MG0222MGMR1R1 4.0
MG0228MGMR1R1 4.0
MG0231MGMR1R1 4.0
MG0238MGMR1R1 4.0
MG0239MGMR1R1 4.0
MG0242MGM

## Compile run numbers. Unpack DICOMs.

In [None]:
## Create DataFrame to store run numbers. 
rundf = pd.DataFrame(columns=['Subject_ID','anat','emo'])
rundf['Subject_ID'] = found
rundf = rundf.set_index('Subject_ID', drop=True)

## Specify scan names to query by. 
anats = ['SAG3DFSPGR11Flip1NEX', 'embarc_struc', 'STRUCFSPGRSAG3DASSET2', 
         'embarcstruc', 'MPRAGE2150SENSE', 'T1W_3D_TFESENSE','STRUCFSPGRSAG3DARC','WIPMPRAGE2150SENSE']
bolds = ['ESTROOP3975dummy39sl','FUNCEPIESTROOPV397S39ASSET2','embarcestroop', 'fmri_emotion',
        'WIPfmri_emotionSENSE','ESTROOPfMRI','fmri_emotionSENSE','ESTROOPfMRISENSE'] 

## Parse run numbers from unpacklog in dicom_dir. 
for subject in found: 

    try:
        ## Read in subject's dcmunpack unpacklog. 
        unpacklog = read_csv(os.path.join(dicom_dir, subject, '%s.unpacklog' %subject), sep=' *', skiprows=20,
                             names=['Run','TE','TR','Flip Angle','Unknown1','Unknown2','Length','Filepath'], engine='python')

    except (IOError, StopIteration): 

        try:
            ## Read in subject's unpacksdcmdir unpacklog.
            unpacklog = read_csv(os.path.join(dicom_dir, subject, '%s.unpacklog' %subject), sep=' *', skiprows=0,
                                names=['Run','Status','X','Y','Z','Unknown1','Unknown2'], engine='python')

        except (IOError, StopIteration): 
            continue

        ## Store anat run. 
        anat = unpacklog.Run[np.in1d(unpacklog['Run'], anats)].index[0]
        if not np.isnan(int(anat)):
            rundf.loc[subject,'anat'] = anat
        else:
            print 'Cannot find anat run # for %s' %subject

        ## Store EOR runs. 
        bold = unpacklog.Run[np.in1d(unpacklog['Run'], bolds)].index

        ## If both runs of EOR exist, store both.
        if len(bold)>0:
            rundf.loc[subject,'emo'] = bold[0]
        else:
            print 'Cannot find BOLD run # for %s' %subject

## Parse unpacked data. 
for subject in found: 

    try:
        ## Read in subject's dcmunpack unpacklog. 
        unpacklog = read_csv(os.path.join(data_dir, subject, 'series-info.dat'), sep=' *',
                             names=['Run','TE','TR','Flip Angle','Unknown1','Unknown2','Length','Filepath'], engine='python')

    except (IOError, StopIteration): 
        continue

    ## Store anat run. 
    anat = unpacklog.Run[np.in1d(unpacklog['Run'], anats)].index[0]
    if not np.isnan(int(anat)):
        rundf.loc[subject,'anat'] = anat
    else:
        print 'Cannot find anat run # for %s' %subject

    ## Store EOR runs. 
    bold = unpacklog.Run[np.in1d(unpacklog['Run'], bolds)].index

    ## If both runs of EOR exist, store both.
    if len(bold)>0:
        rundf.loc[subject,'emo'] = bold[0]
    else:
        print 'Cannot find BOLD run # for %s' %subject
        
        
rundf = rundf.dropna()
print len(rundf)