In [86]:
"""###################################################################
Module to be loaded before running the script:
- ANTs/git
- qbatch/git

###################################################################"""
print "This box contains notes for myself"

This box contains notes for myself


In [87]:
# ====================================================================
# IMPORT LIBRARIES
# ====================================================================
import os
import subprocess
import sys
import glob #for wildcard matching



In [88]:
# ====================================================================
# Setup directories, paths and filenames
# - All paths must be ABSOLUTE!
# - Refer to 'glob' manual to see the wildcard documentation for file-
#        name matching
# 
# ====================================================================
""" Note on directory hierarchy for the input, reference and transform files 
parent_dir
    |
    +-- subj_dir_1 (as indicated by dir_wc)
    |        |
    |        +- file_of_interest (as indicated by file_wc)
    |
    +-- subj_dir_2 ...
"""

# =====================================
# Input related
# =====================================
######## Input file paths ########
# Path to the parent directory of the input files
in_parent_dir='/data/chamal/projects/anthony/nmf_parcellation/cortical_tractMap/tract2voxel_probability_labels'
# Wildcard / name of the individual subject directories
in_dir_wc='[0-9][0-9][0-9][0-9][0-9][0-9]'
# Wildcard / name of the input file
in_file_wc='*.nii.gz'

######## Reference / template file paths ########
ref_parent_dir='/data/chamal/projects/anthony/HCP/raw_files/anthony'
ref_dir_wc='[0-9][0-9][0-9][0-9][0-9][0-9]'
ref_file_wc='HCP100unrelated-fa_template0_x07.nii.gz'

######## Transform file paths ########
tr_parent_dir='/data/chamal/projects/anthony/HCP/raw_files/anthony'
tr_dir_wc='[0-9][0-9][0-9][0-9][0-9][0-9]'
tr_file_wc='HCP100unrelated-fa_*_dt_fa*'


# =====================================
# Output related
# =====================================
######## Output file paths ########
out_parent_dir=''
 # NOTE:  output directory name will be the same as input directory (found via in_dir_wc)
out_file_name=''

######## Job document paths ########
jobDoc_dir=''


# =====================================
# antsApplyTransform related
# =====================================
DIMENSIONALITY='3'
INTERPOLATION='GenericLabel'

In [89]:
# ====================================================================
# Save the filenames into lists
# ====================================================================

#### Initilize and save input files
in_file_list=glob.glob(os.path.join(in_parent_dir,in_dir_wc,in_file_wc))
in_file_list.sort()

#### Initialize and save reference files
ref_file_list=glob.glob(os.path.join(ref_parent_dir,ref_dir_wc,ref_file_wc))
ref_file_list.sort()

#### Initialize and save transform files
tr_file_list=glob.glob(os.path.join(tr_parent_dir,tr_dir_wc,tr_file_wc))
tr_file_list.sort()

#Printout
print "File initilization stats:"
print "\tTotal input files: %d" % len(in_file_list)
print "\tTotal reference files: %d" % len(ref_file_list)
print "\tTotal transform files: %d" % len(tr_file_list)


File initilization stats:
	Total input files: 1467
	Total reference files: 163
	Total transform files: 200


In [90]:
# ====================================================================
# Merge paths in lists into dictionaries for each subjet
# - One does not need to use the above methods to get to this step;
#       the below processing steps will work as long as this dict
#       is initialized with the correct information
# - Currently, the merging is done via string-matching subject ID
#       in the file names
# ====================================================================

# Get a list of subject IDs
subject_list_path = "/data/chamal/projects/anthony/nmf_parcellation/cortical_tractMap/subj_list.txt" 
IDs=[]
f = open(subject_list_path, "r")
for subj in f:
    IDs.append(subj.strip())
f.close()

#Initilize the master dictionary to store subject filepaths
subj_files = {}
#Initialize all the sub-data structures within the dictionary
for ID in IDs:
    subj_files[ID] = {} #Initialize the individual subj dict
    subj_files[ID]['in']=[] #Initialize list for input files
    subj_files[ID]['tr']=[] #Initialize list of transform files

#Store all of the input files data in their corresponding subject
for i, filepath in enumerate(in_file_list):
    subj_id = filepath.split('/')[-2] #Get the subject id
    subj_files[subj_id]['in'].append(filepath)
    
for i, filepath in enumerate(ref_file_list):
    subj_id = filepath.split('/')[-2]
    subj_files[subj_id]['ref'] = filepath
    
for i, filepath in enumerate(tr_file_list):
    subj_id = filepath.split('/')[-2]
    subj_files[subj_id]['tr'].append(filepath)

    
#Delete the subjects that do not contain at least one of each
subj_to_delete = []
for subj_id in subj_files:
    if len(subj_files[subj_id]['in']) == 0:
        if subj_id not in subj_to_delete:
            subj_to_delete.append(subj_id)
    elif subj_files[subj_id]['ref'] == '':
        if subj_id not in subj_to_delete:
            subj_to_delete.append(subj_id)
    elif len(subj_files[subj_id]['tr']) == 0:
        if subj_id not in subj_to_delete:
            subj_to_delete.append(subj_id)
for subj_id in subj_to_delete:
    del(subj_files[subj_id])
    
#TODO: this dictionary is not sorted!
    
""" 
# subj_files is the dictionary for all subsequent processing
# see below for the variable hierarchy in this dictionary

subj_files <type 'dict'>
    |
    +-- subj_files['100307'] <type 'dict'>
    |        |
    |        +-- subj_files['100307']['in'] <type 'list'>
    |        |        |
    |        |        +-- 100307_input_filepath_1 <type 'str'>
    |        |        +-- 100307_input_filepath_2 <type 'str'>
    |        |        |   ...
    |        |
    |        +-- subj_files['100307']['ref'] <type 'str'> (reference file)
    |        |
    |        +-- subj_files['100307']['tr'] <type 'list'>
    |                 |
    |                 +-- 100307_transform_filepath_1 <type 'str'>
    |                 +-- 100307_transform_filepath_2 <type 'str'>
    |                 |   ...?
    |        
    ...        
"""

#Print status lines
print "Total number of subject ID read: %d" % len(IDs)
print "Subjects deleted due to incomplete file paths: %d" % len(subj_to_delete)
print "Remaining number of subjects stored in subj_files: %d" % len(subj_files)

#TODO: initiate dataframe that will store all input and ouputs?

Total number of subject ID read: 163
Subjects deleted due to incomplete file paths: 63
Remaining number of subjects stored in subj_files: 100


In [None]:

#Iterate through each subject
for subj_id in subj_files:
    #Iterate through each input file (1 antsApplyTransform per input file)
    
    #antsApplyTransform command specifications
    cmd_input['antsApplyTransforms','-d',DIMENSION,]