1. Cross reference subjects with QC csv. 
2. Of surviving subjects, check how many have existing dicoms/data.
3. Run recons.
4. Transfer data to embarc_fast.

## Remove subjects with failed QC.

In [1]:
import os, sys, shutil
import numpy as np
import pandas as pd
import subprocess, re
from pandas import read_csv

## Specify directories. 

data_dir = '/space/will/4/users/EMBARC/DATA'
dicom_dir = '/space/will/3/users/EMBARC/DICOMs'
recon_dir = '/space/will/3/users/EMBARC/Recons'
behav_dir = '/space/will/3/users/EMBARC/behavior'
fast_dir = '/space/will/3/users/EMBARC/EMBARC-FAST'

## Read in baseline csv. 
baseline = read_csv(os.path.join(behav_dir, 'embarc_baseline_totals.csv'))
baseline = baseline.set_index('ProjectSpecificID', drop=True)

## Read in QC csv. 
qc = read_csv('/space/will/4/users/EMBARC/QC/aafzal_9_7_2016_11_37_59_MASTER.csv')

columns = ['Session','ANAT_QC','EOR1_QC','EMO_QC','EOR2_QC']
qc = qc[columns]
qc.columns = ['Session','ANAT_QC','EOR1_QC','EMO_QC','EOR2_QC']

## Restrict to subjects in baseline_totals csv.
qc = qc[(qc.Session).isin([s for s in qc.Session if s[:6] in baseline.index])]
qc = qc.set_index('Session', drop=True)
qc = qc[qc.notnull()]

## Restrict QC csv to valid baseline subjects. 
qc = qc[(qc.index).isin([(s.replace('+AF8-','')).upper() for s in qc.index if (s.startswith('CU') | s.startswith('MG') 
    | s.startswith('TX')| s.startswith('UM')) and (not 'PH' in s) and (not 'TM' in s) and ('MR1' in s)])]

## Manually identify Failed EOR1 scans.
bad_eor1 = ['CU0001CUMR1R1','MG0202MGMR1R1','MG0248MGMR1R1','TX0045TXMR1R1','TX0065TXMR1R1','UM0024UMMR1R1','UM0094UMMR1R1']

qc = qc[(qc.ANAT_QC != 'FAIL') & (qc.EOR1_QC != 'FAIL')]     ## Remove FAIL scans.
qc = qc[~(qc.index).isin(bad_eor1)]                          ## Remove bad EOR1 scans (manual inspection)

## Check against existing DICOMs.
dicoms = os.listdir(dicom_dir)
data = os.listdir(data_dir)

subjects = [s for s in qc.index]
## Remove MG0006MGMR1R1 from subjects list (no ANAT collected).
subjects.remove('MG0006MGMR1R1')
print 'Subjects remaning after QC: %d' %(len(subjects))

print 'Missing DICOMs for %s subjects.' %(len(subjects)-len([s for s in subjects if s in dicoms]))
print 'Missing DATA for %s subjects.' %(len(subjects)-len([s for s in subjects if s in data]))

Subjects remaning after QC: 251
Missing DICOMs for 0 subjects.
Missing DATA for 0 subjects.


## Compile run numbers. Unpack DICOMs.

In [5]:
## Create DataFrame to store run numbers. 
rundf = pd.DataFrame(columns=['Subject_ID','anat','eor1','eor2'])
rundf['Subject_ID'] = subjects
rundf = rundf.set_index('Subject_ID', drop=True)

## Specify scan names to query by. 
anats = ['SAG3DFSPGR11Flip1NEX', 'embarc_struc', 'STRUCFSPGRSAG3DASSET2', 
         'embarcstruc', 'MPRAGE2150SENSE', 'T1W_3D_TFESENSE']
bolds = ['RESTING1805DUM39sl', 'embarc_resting_1', 'FUNCEPIRestV180S39ASSET2', 
         'embarcresting1', 'fcmrirun1SENSE', 'RESTSTATE1fMRI'] 

## Parse run numbers from unpacklog in dicom_dir. 
parse_unpacklog(subjects)

## If not found in unpacklog: parse run numbers from cfg file in data_dir.
parse_unpacked_data(list(rundf[~rundf['anat'].notnull()].index))
parse_unpacked_data(list(rundf[~rundf['eor1'].notnull()].index))

###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
## If no unpacklog or cfg file exists: Create SUBJECT.unpacklog. 
###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###

if len(list(rundf[~rundf['anat'].notnull()].index)) > 0: 
    for subject in list(rundf[~rundf['anat'].notnull()].index): 
        
        ## Call /space/will/3/users/EMBARC/EMBARC-FAST/scripts/unpacklog.csh. 
        ## Usage: 'source unpacklog.csh $subject_id' 
        print 'source /space/will/3/users/EMBARC/EMBARC-FAST/scripts/unpacklog.csh %s' %subject
    
###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
## Unpack subjects not already unpacked.
###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###

for subject in is_unpacked(rundf.index):
    
    ## Call /space/will/3/users/EMBARC/EMBARC-FAST/scripts/unpack_runs.csh
    ## Usage: 'source unpack_runs.csh $subject_id $anat_run $eor1_run' 
    print 'source /space/will/3/users/EMBARC/EMBARC-FAST/scripts/unpack_runs.csh %s' %subject

source /space/will/3/users/EMBARC/EMBARC-FAST/scripts/unpack_runs.csh CU0089CUMR1R1


## Recon Subjects.

In [22]:
## Source Freesurfer. 
cmd = 'source /usr/local/freesurfer/nmr-stable53-env'
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, )
output, _ = proc.communicate()
os.chdir(os.path.join(dicom_dir))

org_files = open(os.path.join(fast_dir, 'scripts', 'recon', 'organize_files.txt'), 'w')
submit_job = open(os.path.join(fast_dir, 'scripts', 'recon', 'submit_job.txt'), 'w')

## Submit recons for missing subjects. 
for subject in rundf.index:
    
    try:
        
        ## Read log file to check for succesful recon.
        with open(os.path.join(recon_dir, subject, 'scripts', 'recon-all.log')) as f:
            last = None
            ## Read last line of log file. 
            for last in (line for line in f if line.rstrip('\n')):
                pass
            ## Check if recon-all completed without errors.
            if not ('recon-all -s %s finished without error' %subject) in last: 
                print 'Error in recon: %s' %subject
    
    except IOError: ## Means subject has not been recon-ed yet. 
        
        ## Submit recons (2-step).
        run_num = '%.03d' %int(rundf.loc[subject].anat)
        mprage = os.path.join(data_dir, subject, 'anat', run_num, '%s_mpr%s.nii' %(subject,run_num))
        
        ## 1. Organize files. 
        org_files.write('pbsubmit -m aafzal -c "recon-all -i %s -subjid %s"\n' %(mprage,subject))
        
        ## 2. Submit job. 
        submit_job.write('pbsubmit -m aafzal -c "recon-all -all -subjid %s"\n' %subject)

org_files.close()
submit_job.close()

## Manually perform the following steps: 

# 1. ssh launchpad
# 2. embarc (or manually set SUBJECTS_DIR to /space/will/3/users/EMBARC/Recons)
# 3. cd /space/will/3/users/EMBARC/EMBARC-FAST/scripts/recon
# 4. source organize_files.txt
# 5. source submit_job.txt

## Transfer files for preprocessing.

In [19]:
###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
## Make subject and run directories in EMBARC_FAST. 
###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###

for subject in rundf.index:
    if not os.path.isdir(os.path.join(fast_dir, subject, 'rest', '001')):
        os.makedirs(os.path.join(fast_dir, subject, 'rest', '001'))
        
    ## Create subjectname file. 
    if not os.path.isfile(os.path.join(fast_dir, subject, 'subjectname')):
        with open(os.path.join(fast_dir, subject, 'subjectname'), 'w') as f:
            f.write(subject)

## Transfer EOR1 files.
for subject in rundf.index:
    
    ## Specify anat and eor1 runs. 
    eor1_run = rundf.loc[subject].eor1
    
    try: 
        
        src_dir = os.path.join(data_dir, subject, 'bold', '%.03d' %int(eor1_run)) 
        src_fn = '%s_bld%.03d_rest.nii' %(subject, int(eor1_run))
        
        dst_dir = os.path.join(fast_dir, subject, 'rest', '001')
        dst_fn = '%s.nii.gz' %subject
         
        if not os.path.isfile(os.path.join(dst_dir, dst_fn)): 
            shutil.copy(os.path.join(src_dir, src_fn), os.path.join(dst_dir, dst_fn))

        ## Copy subject.nii.gz and rename to f.nii.gz. 
        shutil.copy(os.path.join(dst_dir, '%s.nii.gz' %subject), os.path.join(dst_dir, 'f.nii.gz'))

    except OSError: 
        print 'Cannot find functional file for %s' %subject

In [3]:
def parse_unpacklog(subjects): 
    
    ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
    ## Parses Dicom_dir/Subject/subject.unpacklog to store 
    ## anat and eor run numbers in csv. 
    ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
    
    for subject in subjects:
        
        try:
            ## Read in subject's dcmunpack unpacklog. 
            unpacklog = read_csv(os.path.join(dicom_dir, subject, '%s.unpacklog' %subject), sep=' *', skiprows=20,
                                 names=['Run','TE','TR','Flip Angle','Unknown1','Unknown2','Length','Filepath'], engine='python')
        
        except (IOError, StopIteration): 

            try:
                ## Read in subject's unpacksdcmdir unpacklog.
                unpacklog = read_csv(os.path.join(dicom_dir, subject, '%s.unpacklog' %subject), sep=' *', skiprows=0,
                                    names=['Run','Status','X','Y','Z','Unknown1','Unknown2'], engine='python')
            
            except (IOError, StopIteration): 
                continue

        ## Store anat run. 
        anat = unpacklog.Run[np.in1d(unpacklog['Run'], anats)].index[0]
        if not np.isnan(int(anat)):
            rundf.loc[subject,'anat'] = anat
        else:
            print 'Cannot find anat run # for %s' %subject

        ## Store EOR runs. 
        bold = unpacklog.Run[np.in1d(unpacklog['Run'], bolds)].index
        
        ## If both runs of EOR exist, store both.
        if len(bold) > 1:
            rundf.loc[subject,'eor1'] = bold[0]
            rundf.loc[subject, 'eor2'] = bold[1]
            
        ## Otherwise, just store the run that was found.
        elif not np.isnan(int(bold[0])):
            rundf.loc[subject,'eor1'] = bold[0]
        else:
            print 'Cannot find anat run # for %s' %subject
    
def parse_unpacked_data(subjects):
    
    ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
    ## Parses data_dir/Subject/scripts/cfg to store 
    ## anat and eor run numbers in csv. 
    ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
    
    for subject in subjects: 
        
        try: 
            ## Read in subject's cfg file.
            subj_cfg = read_csv(os.path.join(data_dir, subject, 'scripts', '%s_unpack_fsl.cfg' %subject), names=['Run','Dir','Format', 'File'], delimiter='\s', engine='python')
            
            ## Store anat run. 
            rundf.loc[subject, 'anat'] = subj_cfg['Run'][subj_cfg['Dir'] == 'anat'][0]
            
            ## Store EOR runs. 
            n_rest = 0
            for i in np.arange(5):
                if len(subj_cfg['File'].str.findall('rest')[i]) > 0:
                    n_rest += 1
            if n_rest == 4:
                rundf.loc[subject, 'eor1'] = subj_cfg['Run'][1]
                rundf.loc[subject, 'eor2'] = subj_cfg['Run'][4]
            else:
                print 'Could not find all EOR runs for %s' %subject
        
        except (IOError,StopIteration): 
            print 'Cannot read log file for %s' %subject
            
def create_unpacklog(subjects):
    
    ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
    ## Calls /space/will/3/users/EMBARC/EMBARC-FAST/scripts/unpacklog.csh. 
    ## Usage: 'source unpacklog.csh $subject_id' 
    ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
    
    for subject in subjects: 
        
        ## Source unpacklog script.
        cmd = 'source %s %s' %(os.path.join(fast_dir, 'scripts', 'unpacklog.csh'), subject)
        print cmd
        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, )
        output, _ = proc.communicate()

        
def unpack_dicoms(subjects):
    
    ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
    ## Calls /space/will/3/users/EMBARC/EMBARC-FAST/scripts/unpack_runs.csh
    ## Usage: 'source unpack_runs.csh $subject_id $anat_run $eor1_run' 
    ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
    
    ## Change to dicom_dir.
    os.chdir(os.path.join(dicom_dir))
    
    for subject in subjects:
        
        ## Specify anat and eor1 runs. 
        anat_run = rundf.loc[subject].anat
        eor1_run = rundf.loc[subject].eor1

        ## Source unpacking script.
        cmd = 'source %s %s %s %s' %(os.path.join(fast_dir, 'scripts', 'unpack_runs.csh'), subject, anat_run, eor1_run)
        print cmd
        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, )
        output, _ = proc.communicate()
        
def is_unpacked(subjects): 
    
    ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
    ## Checks data_dir/subject for anat and eor files. 
    ## Returns: list of subjects with missing anat or eor files. 
    ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
    
    result = []
    
    for subject in subjects:
        
        is_anat = False
        is_eor = False

        ## Check if anat was unpacked.
        anat_dir = '%03d' %int(rundf.loc[subject, 'anat'])
        anat_fn = '%s_mpr%s_reorient.nii.gz' %(subject, anat_dir)
        if os.path.isfile(os.path.join(data_dir, subject, 'anat', anat_dir, anat_fn)):
            is_anat = True

        ## Check if bold was unpacked.
        eor_dir = '%03d' %int(rundf.loc[subject, 'eor1'])
        eor_fn = '%s_bld%s_rest_reorient.nii.gz' %(subject, eor_dir)
        if os.path.isfile(os.path.join(data_dir, subject, 'bold', eor_dir, eor_fn)):
            is_eor = True
        
        if not (is_anat or is_eor):
            result.append(subject)
    return result

def submit_recons(subject):
    
        ## Submit Recons.
        cmd = 'source %s %s' %(os.path.join(fast_dir, 'scripts', 'recons_submit.csh'), subject)
        #print cmd
        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, )
        output, _ = proc.communicate()
        #print output