In [1]:
from src import processing, utils

import os
from glob import glob
import tempfile
from pyminc.volumes.factory import volumeFromFile, volumeLikeFile
from tqdm                   import tqdm

import multiprocessing      as mp


In [2]:
import subprocess
import os
import pandas as pd
import numpy as np
from warnings import warn
from random import randint

def execute_R(script, args):
    
    """
    Execute an R script.
    
    Arguments
    ---------
    script: str
        String containing the name of the R script to execute.
    args: dict
        Dictionary of key-value pairs containing command line arguments to pass to the script
    
    Returns
    -------
    None
    """
    
    args = [['--'+str(key), str(val)] for key, val in args.items()]
    args = sum(args, [])
    cmd = ['Rscript']+[script]+args
    subprocess.run(cmd)
    return

In [47]:
def normative_growth_norm(voxels, demographics, outfile, key = 'file', df = 5, combat = False, combat_batch = None, parallel = False, nproc = None):
    
    """
    Calculate human effect sizes using normative growth modelling.
    
    Arguments
    ---------
    voxels: str
        Path to the CSV file containing the voxelwise data.
    demographics: str
        Path to the CSV file containing the human demographics data.
    outfile: str
        Path to the CSV file in which to write the effect size data.
    key: str
        Primary key between voxels and demographics data.
    df: int
        Degrees of freedom to use in natural splines.
    combat: bool
        Option to run ComBat normalization on batch variables prior to normative growth modelling.
    combat_batch: list
        Variables to use in ComBat normalization.
    parallel: bool
        Option to run in parallel.
    nproc: int
        Number of processors to use in parallel.
    
    Returns
    -------
    
    """
  
    #Unpack function args into dictionary
    script_args = locals().copy()
  
    #ComBat normalization options
    if combat:
        script_args['combat'] = 'true'
        if combat_batch is None:
            raise Exception("Argument combat_batch must be specified when combat is True")
        else:
            if type(combat_batch) is str:
                 combat_batch = [combat_batch]
            script_args['combat_batch'] = '-'.join(combat_batch)
            script_args['combat-batch'] = script_args.pop('combat_batch')
    else:
        script_args['combat'] = 'false'
        del script_args['combat_batch']

    #Parallel options
    if parallel:
        script_args['parallel'] = 'true'
        if nproc is None:
            raise ValueError("Set the nproc argument to specify the number of processors to use")
    else:
        script_args['parallel'] = 'false'
    
    #Execute script
    script = 'normative_growth_normalization.R'
    execute_R(script = script, args = script_args)
    return outfile

In [48]:
def propensity_matching_norm(imgdir, demographics, mask, outdir, ncontrols = 10, parallel = False, nproc = None):
    
    """
    Calculate human effect size images using propensity-matching.
    
    Arguments
    ---------
    imgdir: str
        Path to the directory containing the MINC images to use to compute the effect sizes.
    demographics: str
        Path to the CSV file containing the human demographics data.
    mask: str
        Path to the mask MINC file for the images.
    outdir: str
        Path to the directory in which to save the effect size MINC images.
    ncontrols: int
        Number of propensity-matched controls to use when computing the effect sizes.
    parallel: bool
        Option to run in parallel.
    nproc: int
        Number of processors to use in parallel.
    
    Returns
    -------
    outfiles: list
        List of paths to the effect size images.
    """
    
    #Unpack function args into dictionary
    script_args = locals().copy()
    
    #Parallel options
    if parallel:
        script_args['parallel'] = 'true'
        if nproc is None:
            raise ValueError("Set the nproc argument to specify the number of processors to use")
    else:
        script_args['parallel'] = 'false'

    script = 'propensity_matching_normalization.R'
    execute_R(script = script, args = script_args)
    outfiles = [os.path.join(outdir, file) for file in os.listdir(outdir)]
    return outfiles

In [63]:
def vector_to_image(x, outfile, maskfile):
    
    mask_vol = volumeFromFile(maskfile)
    mask = np.array(mask_vol.data)
    mask_vol.closeVolume()

    img = np.zeros_like(mask.flatten())
    img[(mask == 1).flatten()] = x
    img = img.reshape(mask.shape)

    img_vol = volumeLikeFile(likeFilename = maskfile,
                             outputFilename = outfile,
                             labels = False)
    img_vol.data = img
    img_vol.writeFile()
    img_vol.closeVolume()
    

def matrix_to_images(x, outfiles, maskfile):

    #Function to export image
    exporter = lambda i : vector_to_image(x = x[i,], 
                                          outfile = outfiles[i], 
                                          maskfile = maskfile)
    #Error checking
    if type(x) is not np.ndarray:
        raise ValueError("Argument x must have type numpy.ndarray.")
    else:
        if len(x.shape) != 2:
            raise Exception("Argument x must be a 2-dimensional NumPy array.")
    
    if x.shape[0] != len(outfiles):
        raise Exception("Number of rows in x must be equal to the number of entries in outfiles")
    
    #Iterate over number of files
    irange = range(len(outfiles))
    out = list(map(exporter, tqdm(irange)))
        
    return outfiles

In [64]:
# basedir = 'data/human/derivatives/POND_SickKids/'
# isostep = 3.0
# jacobians = ['absolute', 'relative']
# for j, jac in enumerate(jacobians):

#     print("Resampling {} Jacobians...".format(jac))
    
#     #Input directory and input files
#     imgdir = os.path.join(basedir, 'jacobians', jacobians[j], '')
#     imgfiles = glob(imgdir+'*.mnc')

#     #Output directory 
#     imgdir_3mm = os.path.join(basedir, 'jacobians_3mm', jacobians[j])

#     #Resample images
#     imgfiles_3mm = processing.resample_images(infiles = imgfiles,
#                                               isostep = isostep,
#                                               outdir = imgdir_3mm,
#                                               parallel = True,
#                                               nproc = 8)

In [65]:
basedir = 'data/human/derivatives/POND_SickKids/'
isostep = 3.0
jacobians = ['absolute', 'relative']

#Instead of for loop
j = 0
    
#Input directory and input files
imgdir = os.path.join(basedir, 'jacobians', jacobians[j], '')
imgfiles = glob(imgdir+'*.mnc')

#Output directory 
imgdir_3mm = os.path.join(basedir, 'jacobians_3mm', jacobians[j], '')

# #Resample images
# imgfiles_3mm = processing.resample_images(infiles = imgfiles,
#                                           isostep = isostep,
#                                           outdir = imgdir_3mm,
#                                           parallel = True,
#                                           nproc = 8)
imgfiles_3mm = glob(imgdir_3mm+'*.mnc')

Next step will be creating voxel matrices for the absolute and relative Jacobians. Once I have the matrices, I can import them into R and look at using the ComBat normalization.

In [77]:
def calculate_human_effect_sizes(imgdir, demographics, outdir, method, mask, parallel = False, nproc = None, **kwargs):
    
    """
    Calculate human effect size images.
    
    Arguments
    ---------
    imgdir: str
        Path to the directory containing the MINC images to use to compute the effect sizes.
    demographics: str
        Path to the CSV file containing the human demographics data.
    outdir: str
        Path to the directory in which to save the effect size MINC images.
    mask: str
        Path to the mask MINC file for the images.
    method: str
        Method to use to compute effect sizes. 
    parallel: bool
        Option to run in parallel.
    nproc: int
        Number of processors to use in parallel.
        
    Returns
    -------
    outfiles: list
        List of paths to the effect size images.
    """
    
    imgdir = os.path.join(imgdir, '')
    outdir = os.path.join(outdir, '')
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    
    if method == "propensity-matching":
        
        kwargs.update({'imgdir':imgdir,
                       'demographics':demographics,
                       'mask':mask,
                       'outdir':outdir,
                       'parallel':parallel,
                       'nproc':nproc})
        outfiles = propensity_matching_norm(**kwargs)
        
    elif method == "normative-growth":
        
        print("Building voxel matrix...")
  
        imgfiles = glob(imgdir+'*.mnc')
        imgfiles = imgfiles[:50]
        tmpfile = tempfile.mkstemp(dir = outdir, suffix = '.csv')[1]
        df_voxels = processing.build_voxel_matrix(infiles = imgfiles,
                                                  mask = mask,
                                                  sort = True,
                                                  file_col = True,
                                                  parallel = parallel,
                                                  nproc = nproc)
        if 'key' in kwargs.keys():
            key = kwargs['key']
        else:
            key = 'file'
        df_voxels[key] = [os.path.basename(file) for file in df_voxels['file']]
        
        print("Writing out voxel matrix...")
        df_voxels.to_csv(tmpfile, index = False)
        
        
        print("Executive normative growth...")
        outfile = tempfile.mkstemp(dir = outdir, suffix = '.csv')[1]
        kwargs.update({'voxels':tmpfile,
                       'demographics':demographics,
                       'outfile':outfile,
                       'parallel':parallel,
                       'nproc':nproc})
        outfile = normative_growth_norm(**kwargs)
        
        x = fread(outfile, header = True).to_pandas()
        outfiles = x[key].to_list()
        outfiles = [os.path.join(outdir, outfile) for outfile in outfiles]
        x = x.drop(key, axis=1).to_numpy()
        outfiles = matrix_to_images(x = x, 
                                    outfiles = outfiles,
                                    maskfile = maskfile)
        
        os.remove(tmpfile)
        os.remove(outfile)
        
    else:
        raise ValueError("Argument method must be one of ['propensity-matching', 'normative-growth']: {}".format(method))
    
    return outfiles
    

In [78]:
imgdir = imgdir_3mm
demographics = 'data/human/derivatives/POND_SickKids/DBM_input_demo_passedqc.csv'
maskfile = 'data/human/registration/reference_files/mask_3.0mm.mnc'
outdir = 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/'
parallel = False
nproc = None

In [79]:
calculate_human_effect_sizes(imgdir = imgdir,
                            demographics = demographics,
                            mask = maskfile,
                            outdir = outdir,
                            method = 'normative-growth',
                            combat = False,
                            combat_batch = ['Site', 'Scanner'],
                            parallel = True,
                            nproc = 8)

Building voxel matrix...


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 90.76it/s]


Writing out voxel matrix...
Executive normative growth...


no DISPLAY variable so Tk is not available 




100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 34.01it/s]


['data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0004_01.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0012_02.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0028_03.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0030_01.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0032_02.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0207_01.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0233_01.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/sub-0880114_ses-01_run-02_T1w.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/sub-0880301_ses-01_run-02_T1w.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effec

Iterate over rows of data frame, need a mask. Put the voxel values into the mask locations. Write out. Will need to parallelize. 

In [68]:
from datatable import fread

infile = os.path.join(outdir, 'out.csv')
x = fread(infile, header = True).to_pandas()

In [69]:
outfiles = x['file'].to_list()
outfiles = [os.path.join(outdir, outfile) for outfile in outfiles]
x = x.drop('file', axis=1).to_numpy()

In [70]:
matrix_to_images(x = x, outfiles = outfiles, maskfile = maskfile)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 34.96it/s]


['data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0004_01.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0012_02.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0028_03.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0030_01.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0032_02.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0207_01.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/d8_0233_01.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/sub-0880114_ses-01_run-02_T1w.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effect_sizes/absolute/tmp/sub-0880301_ses-01_run-02_T1w.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/effec

In [125]:
np.array(mat_voxels[0,])

array([ 0.04186072,  0.05802296, -0.06054713, ...,  0.48946384,
        0.2535964 ,  0.4802016 ])