# Radiomics Processing Functions

## import libraries

In [4]:

# import header from the pyradiomics helloPyradiomics notebook
from __future__ import print_function
import sys
import os
import logging
import six
from radiomics import featureextractor, getFeatureClasses
import radiomics
import pandas as pd
import time


## create_directories

In [5]:
## Function to check/create directory structure for a radiomics analysis/MRP project

# parent_directory (str) - location of parent directory to produce subdirectories in
# paramType, imageType (str) - name of parameter and image to create subdirectories for
# roi_list (array) - array of region of interests to create subdirectories for 
def create_directories(parent_directory, paramType, imageType,roi_list):
    ## list of folders needed to be created
    directories_needed = ['images', 'parameters', 'notebooks', 'roi', 'radiomics_data']
    
    ## check and if not there, create directory
    for d in directories_needed:
        d_directory = os.path.join(parent_directory, d)
        if not os.path.exists(d_directory):
            os.makedirs(d_directory)
            
    param_directories_needed = [paramType]
    
    for d in param_directories_needed:
        d_directory = os.path.join(parent_directory, 'radiomics_data', d)
        if not os.path.exists(d_directory):
            os.makedirs(d_directory)
            
    imaging_directories_needed = [imageType]
    
    for d in imaging_directories_needed:
        d_directory = os.path.join(parent_directory, 'radiomics_data', paramType, d)
        if not os.path.exists(d_directory):
            os.makedirs(d_directory)
            
        roi_directories_needed = roi_list
    
        for r in roi_directories_needed:
            r_directory = os.path.join(parent_directory, 'radiomics_data', paramType, d, r)
            if not os.path.exists(r_directory):
                os.makedirs(r_directory)
                
            analysis_directories_needed = ['PyRadiomics_raw_data', 'comparisons']
            
            for a in analysis_directories_needed:
                a_directory = os.path.join(parent_directory, 'radiomics_data', paramType, d, r, a)
                if not os.path.exists(a_directory):
                    os.makedirs(a_directory)

## imageBatchProcess

In [6]:
# uses batchprocess fromm YuLabbatchProcessingwithPandas.py
# located in python_functions folder
from python_functions.YuLabbatchProcessingWithPandas import main as batchprocess

# full_image_directory (str) - path to  image directory to be analyzed
# output_file_name (str) - name for output file
# full_roi_mask_directory (str) - path to directory for all ROIs to be analyzed
# image_type (str) - image type to be analyzed
# roi (str) - specific region of interest to be analyzed
# param_type (str) - specific name of parameter file to use for radiomics analysis
# parameterPath (str) - path and name of .yaml file of the parameter file
# radiomics_data_folder (str) - location to save radiomics data .csv files (default = 'radiomics_data') --- we reccommend creating radiomic data files for each animal analyzed

def imageBatchProcess(full_image_directory, output_file_name, full_roi_mask_directory, image_type, roi, param_type, parameterPath, radiomics_data_folder='radiomics_data'):

    # create the paths to the images to be analyzed
    subjectPath = full_image_directory +'/'
    
    # create list of items in image directory 
    subjectFiles = os.listdir(subjectPath)
    #selects files in direcotry that are type '.nii.gz' only 
    imageFiles = [f for f in subjectFiles if '.nii.gz' in f]
    
    # create imagePath for each image 
    imagePath = [subjectPath+i for i in imageFiles]

    # create the paths to the roi masks to be analyzed
    roiPath = full_roi_mask_directory + '/' + roi +'/'
    # get list of roiMasks in the roiDirectory
    roiFiles = os.listdir(roiPath)
    # create same roiPath for every image in the directory (essentially pairs each image with the same ROI to ensure that all are tested)
    # this is a necessity for how the batchProcess is performed using the fileDataFrame.csv
    maskPath = [roiPath + r for r in roiFiles] * len(imagePath) # odd way of determining length to do same ROI on every image

    #create a dataframe containing the filenames, imagepaths, and maskpaths 
    filePaths = {'filename':imageFiles, 'Image':imagePath, 'Mask':maskPath}
    fileDataFrame = pd.DataFrame(data=filePaths).set_index('filename')

    #create a .csv that can be used to allow batchprocess to find the image and mask files efficiently; saves in directory in which this function is run
    fileDataFrame.to_csv('fileDataFrame.csv')
    
    # run batchprocess, produces .csv files of the radomics data; function defined in YuLabbatchProcesssingWithPandas.py
    batchprocess('fileDataFrame.csv', output_file_name, roi ,output_file_name+'_log', parameterPath, param_type, image_type, radiomics_data_folder)

# Example radiomic directory set up and processing

In [7]:
roi_list = ['r_amygdala','r_hippocampus','r_globus_pallidus_caudate_putamen', 'l_amygdala','l_hippocampus','l_globus_pallidus_caudate_putamen' ]
param_type = [ 'binWidth_0_05', 'binWidth_0_075',  
              'binWidth_0_1', 'binWidth_0_25',
              'binWidth_0_5','binWidth_1']
image_type = ['NDI', 'ODI','vISO']

In [8]:

## I run this once to create a radiomics_data directory and then duplicate and rename that directory for each animal type i am analyzing

# use one directory back as the parent directory in which to produce all new directories
new_parent_directory = os.path.join('..')
for p in param_type:
    for i in image_type:
        create_directories(new_parent_directory, p, i, roi_list)

In [9]:
roi_list = ['r_amygdala','r_hippocampus','r_globus_pallidus_caudate_putamen', 
            'l_amygdala','l_hippocampus','l_globus_pallidus_caudate_putamen' ]

imageDirectory = 'images'

roiDirectory = '../roi'

paramType = [ 'binWidth_0_05', 'binWidth_0_075',  
              'binWidth_0_1', 'binWidth_0_25',
              'binWidth_0_5','binWidth_1']
imageType = ['NDI','ODI','vISO']
sexes = ['male']
days = ['P45']
animals = ['control','nrxn']
radiomics_folders = ["_".join(['radiomics_data',animal]) for animal in animals]


In [None]:
# using for loops, create the paths to the image directories to be passed into the imageBatchProcess function 

# time the batchprocess 
tic = time.perf_counter()

# establish a list to store the timing info for each run of the imageBatchProcess
timeDictList = [] 

for p in paramType: 
    # find parameters file to be used 
    paramPath = os.path.join('..', 'parameters', '_'.join(['Params',p])+'.yaml')
    print(p)
    for image in imageType:
        print(image)
        for sex in sexes:
            print(sex)
            for day in days:
                print(day)
                for animal, folder in zip(animals,radiomics_folders):
                    print(animal)
                    output_file_name  = "_".join([p,image,sex,day,animal])
                    print(output_file_name)
                    full_image_path = os.path.join("..",imageDirectory,image,sex,day, animal)
                    print(full_image_path)
                    print(p)
                    for roi in roi_list:
                        print(roi)
                        # time imageBatchProcess
                        t0 = time.time()
                        imageBatchProcess(full_image_path, output_file_name, 
                                          roiDirectory, image, roi, p, paramPath, folder)
                        t1 = time.time()
                        total = t1-t0

                        print('Runtime: '+str(total/60) + ' minutes')

                        # save runtime information for each imageBatchProcess (this list can be converted easily to a pandas dataframe to analyze speed of processing more closely)
                        timeDictList.append({"imageType":image, "parameter":p,'roi':roi,'animal':animal, 
                                             'time':total})



toc = time.perf_counter()
