In [2]:
from src import processing, utils

import os
import numpy as np
import pandas as pd
from   glob  import glob
from   re    import sub

In [2]:
x = '-'.join(['aaa', 'bbb'])
print(x)

aaa-bbb


In [4]:
x.split('-')

['aaa', 'bbb']

---

# Specify which cohorts to use

In [3]:
#Create directory to save data subset derivatives
human_basedir = 'data/human/derivatives'
datasets = ['POND', 'SickKids']    
human_basedir = utils.mkdir_from_list(inlist = datasets,
                                      basedir = human_basedir)
human_basedir

'data/human/derivatives/POND_SickKids'

## Filter demographics for cohorts of interest

In [5]:
#Path to the demographics file 
demographics = 'data/human/registration/DBM_input_demo_passedqc.csv'

#Import demographics
df_demographics = pd.read_csv(demographics)

#Filter individuals for data subset
df_demographics = (df_demographics
                   .loc[df_demographics['Dataset'].isin(datasets)]
                   .copy())

#Create a new column for patient image files
df_demographics['file'] = (df_demographics['Extract_ID']
                           .str.replace('.mnc', 
                                        '_fwhm_4vox.mnc', 
                                        regex = True))

#Write out demographics subset to subset directory
demographics = os.path.join(human_basedir, os.path.basename(demographics))
df_demographics.to_csv(demographics, index = False)

## Create symlinks to absolute jacobian images

In [17]:
indir_abs = 'data/human/registration/jacobians/absolute/smooth_minc/'
infiles_abs = glob(indir_abs+'*.mnc')

infiles_abs_dataset = [[f for f in infiles_abs if g in f][0] 
                        for g in df_demographics['file'].to_list()]

outdir_abs = os.path.join(human_basedir, 'jacobians', 'resolution_0.5', 'absolute', '')
infiles_abs_dataset

['data/human/registration/jacobians/absolute/smooth_minc/sub-1050004_ses-01_T1w.extracted_fwhm_4vox.mnc',
 'data/human/registration/jacobians/absolute/smooth_minc/sub-1050027_ses-01_run-03_T1w.extracted_fwhm_4vox.mnc',
 'data/human/registration/jacobians/absolute/smooth_minc/sub-1050032_ses-01_T1w.extracted_fwhm_4vox.mnc',
 'data/human/registration/jacobians/absolute/smooth_minc/sub-1050039_ses-01_T1w.extracted_fwhm_4vox.mnc',
 'data/human/registration/jacobians/absolute/smooth_minc/sub-1050040_ses-01_T1w.extracted_fwhm_4vox.mnc',
 'data/human/registration/jacobians/absolute/smooth_minc/sub-1050051_ses-01_T1w.extracted_fwhm_4vox.mnc',
 'data/human/registration/jacobians/absolute/smooth_minc/sub-1050053_ses-01_T1w.extracted_fwhm_4vox.mnc',
 'data/human/registration/jacobians/absolute/smooth_minc/sub-1050056_ses-01_T1w.extracted_fwhm_4vox.mnc',
 'data/human/registration/jacobians/absolute/smooth_minc/sub-1050065_ses-01_T1w.extracted_fwhm_4vox.mnc',
 'data/human/registration/jacobians/abs

In [19]:
outfile_abs = utils.mk_symlinks(src = infiles_abs_dataset,
                               dst = outdir_abs)
outfile_abs[:5]

['data/human/derivatives/POND_SickKids/jacobians/resolution_0.5/absolute/sub-1050004_ses-01_T1w.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/jacobians/resolution_0.5/absolute/sub-1050027_ses-01_run-03_T1w.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/jacobians/resolution_0.5/absolute/sub-1050032_ses-01_T1w.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/jacobians/resolution_0.5/absolute/sub-1050039_ses-01_T1w.extracted_fwhm_4vox.mnc',
 'data/human/derivatives/POND_SickKids/jacobians/resolution_0.5/absolute/sub-1050040_ses-01_T1w.extracted_fwhm_4vox.mnc']

In [6]:
#Create output directory for absolute jacobian images
imgdir_abs = os.path.join(human_basedir, 'jacobians', 'resolution_0.5', 'absolute', '')

imgfiles_abs = glob(imgdir_abs+'*.mnc')

#Get image file names
imgfiles_abs_dataset = df_demographics['file'].to_list()

#Get image paths for images in the subset
imgfiles_abs_dataset = [[f for f in imgfiles_abs if g in f][0] 
                        for g in imgfiles_abs_dataset]


#Create symbolic links for subset images to the subset directory
imgfiles_abs = utils.mk_symlinks(src = imgfiles_abs_dataset,
                        dst = imgdir_abs)
imgfiles_abs[:5]

IndexError: list index out of range

## Create symlinks to relative jacobian images

In [None]:
#Create output directory for absolute jacobian images
imgdir_rel = os.path.join(human_basedir, 'jacobians', 'resolution_0.5', 'relative', '')

#Get image file names
imgfiles_rel_dataset = df_demographics['file'].to_list()

#Get image paths for images in the subset
imgfiles_rel_dataset = [[f for f in imgfiles_rel if g in f][0] 
                        for g in imgfiles_rel_dataset]

#Create symbolic links for subset images to the subset directory
imgfiles_rel = utils.mk_symlinks(src = imgfiles_rel_dataset,
                                 dst = imgdir_rel)
imgfiles_rel[:5]

---

# Downsample jacobian images

## Downsampling absolute jacobian images to 3.0mm

In [None]:
# Resolution (mm) to which we want to downsample
isostep = 3.0

In [None]:
# Create a separate output directory for downsampled images
imgdir_abs_3mm = sub(r'resolution_0.5',
                    'resolution_{}'.format(isostep),
                    imgdir_abs)

# imgfiles_abs_3mm = processing.resample_images(infiles = imgfiles_abs,
#                                               isostep = isostep,
#                                               outdir = imgdir_abs_3mm,
#                                               parallel = True,
#                                               nproc = 8)

imgfiles_abs_3mm = glob(imgdir_abs_3mm+'*.mnc')
imgfiles_abs_3mm[:5]

## Downsampling relative jacobian images to 3.0mm

In [None]:
# Create a separate output directory for downsampled images
imgdir_rel_3mm = sub(r'resolution_0.5',
                    'resolution_{}'.format(isostep),
                    imgdir_rel)

# imgfiles_rel_3mm = processing.resample_images(infiles = imgfiles_rel,
#                                               isostep = isostep,
#                                               outdir = imgdir_rel_3mm,
#                                               parallel = True,
#                                               nproc = 12)

imgfiles_rel_3mm = glob(imgdir_rel_3mm+'*.mnc')
imgfiles_rel_3mm[:5]

# Downsampling auxiliary imaging files

In [None]:
# Path to the neuroanatomical template image
model = 'data/human/registration/reference_files/model.mnc'

# Path to the mask image
mask = 'data/human/registration/reference_files/mask.mnc'

# # Downsample the template to 3.0 mm
# model_3mm = processing.resample_image(infile = model,
#                                       isostep = 3.0,
#                                       suffix = '_3.0mm')
model_3mm = 'data/human/registration/reference_files/model_3.0mm.mnc'

# # Downsample the mask to 3.0 mm
# mask_3mm = processing.resample_image(infile = mask,
#                                      isostep = 3.0,
#                                      suffix = '_3.0mm')
mask_3mm = 'data/human/registration/reference_files/mask_3.0mm.mnc'

---

# Calculating human participant effect sizes

In [None]:
#Parameters for effect size calculation
method = 'normative-growth'
df = 5
combat = True
combat_batch = ['Site', 'Scanner']
ncontrols = None

## Absolute jacobian effect sizes

In [None]:
es_basedir_abs = os.path.join(human_basedir,
                              'effect_sizes',
                              'resolution_3.0',
                              'absolute',
                              '')


es_params = {'es_method':method,
            'es_df':df,
            'es_combat':combat,
            'es_combat_batch':'-'.join(combat_batch),
            'es_ncontrols':ncontrols}

(es_dir_abs, es_metadata) = utils.mkdir_from_params(params = es_params,
                                                    basedir = es_basedir_abs,
                                                    return_metadata = True)

In [None]:
# es_files_abs = processing.calculate_human_effect_sizes(imgdir = imgdir_abs_3mm,
#                                                        demographics = demographics,
#                                                        mask = mask_3mm,
#                                                        outdir = es_dir_abs,
#                                                        method = method,
#                                                        df = df,
#                                                        combat = combat,
#                                                        combat_batch = combat_batch,
#                                                        parallel = True,
#                                                        nproc = 8)

es_files_abs = glob(es_dir_abs+'*.mnc')
es_files_abs[:5]

## Relative jacobian effect sizes

In [None]:
df_metadata = pd.read_csv(es_metadata, dtype = 'str')
df_metadata

In [None]:
es_params_id = pd.merge(df_metadata, 
                        pd.DataFrame(es_params, index = [0], 
                                     dtype = 'str'), 
                        how = 'inner').id[0]

es_basedir_rel = os.path.join(human_basedir,
                              'effect_sizes',
                              'resolution_3.0',
                              'relative',
                              '')

es_dir_rel = utils.mkdir_from_params(params = es_params,
                                     basedir = es_basedir_rel, 
                                     params_id=es_params_id,
                                     return_metadata = False)

In [None]:
# es_files_rel = processing.calculate_human_effect_sizes(imgdir = imgdir_rel_3mm,
#                                                        demographics = demographics,
#                                                        mask = mask_3mm,
#                                                        outdir = es_dir_rel,
#                                                        method = method,
#                                                        df = df,
#                                                        combat = combat,
#                                                        combat_batch = combat_batch,
#                                                        parallel = True,
#                                                        nproc = 8)

es_files_rel = glob(es_dir_rel+'*.mnc')
es_files_rel[:5]

---

# Clustering the participants

## Creating effect size matrices

### Absolute effect size matrix

In [None]:
# File in which to save the absolute effect size matrix
es_abs_matrix_csv = 'effect_sizes.csv'
es_abs_matrix_csv = os.path.join(es_dir_abs, es_abs_matrix_csv)

# # Compute the absolute effect size matrix
# df_es_abs = processing.build_voxel_matrix(infiles = es_files_abs,
#                                           mask = mask_3mm,
#                                           sort = True,
#                                           file_col = True,
#                                           parallel = False,
#                                           nproc = 2)

# # Extract file base names from paths
# df_es_abs['file'] = [os.path.basename(file) for file in df_es_abs['file']]

# # Write to file
# df_es_abs.to_csv(es_abs_matrix_csv, index = False)

### Relative effect size matrix

In [None]:
# File in which to save the absolute effect size matrix
es_rel_matrix_csv = 'effect_sizes.csv'
es_rel_matrix_csv = os.path.join(es_dir_rel, es_rel_matrix_csv)

# # Compute the absolute effect size matrix
# df_es_rel = processing.build_voxel_matrix(infiles = es_files_rel,
#                                           mask = mask_3mm,
#                                           sort = True,
#                                           file_col = True,
#                                           parallel = False,
#                                           nproc = 2)

# # Extract file base names from paths
# df_es_rel['file'] = [os.path.basename(file) for file in df_es_rel['file']]

# # Write to file
# df_es_rel.to_csv(es_rel_matrix_csv, index = False)

# Clustering the participants

In [None]:
#Cluster parameters
infiles = [es_abs_matrix_csv, es_rel_matrix_csv]
nk_max = 10
metric = 'correlation'
K = 10
sigma = 0.5
t = 20

#Combine cluster params with previous params
cluster_params = es_params.copy()
cluster_params.update({'clust_nk_max':nk_max,
                       'clust_metric':metric,
                       'clust_K':K,
                       'clust_sigma':sigma,
                       'clust_t':t})

#Create cluster params ID based on previous ID
cluster_params_id = '-'.join([es_params_id, utils.random_id(3)])

#Create cluster params directory
cluster_basedir = os.path.join(human_basedir, 'clusters')
(cluster_dir, cluster_metadata) = utils.mkdir_from_params(params = cluster_params,
                                                            basedir = cluster_basedir,
                                                          params_id = cluster_params_id,
                                                            return_metadata = True)

cluster_file = os.path.join(cluster_dir, 'human_clusters.csv')

In [None]:
# df_clusters = processing.cluster_human_data(infiles = infiles,
#                                             rownames = 'file',
#                                             nk_max = nk_max,
#                                             metric = metric,
#                                             K = K,
#                                             sigma = sigma,
#                                             t = t,
#                                             cluster_file = cluster_file)

# df_clusters.head()

In [None]:
df_cluster_metadata = pd.read_csv(cluster_metadata, dtype = 'str')
df_cluster_metadata

**To-do**: Implement W-matrix in clustering

# Create cluster maps

## Create absolute cluster maps

In [None]:
cluster_params_id = pd.merge(df_cluster_metadata, 
                        pd.DataFrame(cluster_params, index = [0], 
                                     dtype = 'str'), 
                        how = 'inner').id[0]

cluster_map_params = cluster_params.copy()
cluster_map_params.update({'clust_map_method':'mean'})

#Create cluster params ID based on previous ID
cluster_map_params_id = '-'.join([cluster_params_id, utils.random_id(3)])

cluster_map_basedir_abs = os.path.join(human_basedir, 'cluster_maps', 'resolution_3.0', 'absolute', '')
(cluster_map_dir_abs, cluster_map_metadata) = utils.mkdir_from_params(params = cluster_map_params,
                                                                     basedir = cluster_map_basedir_abs,
                                                                     params_id = cluster_map_params_id,
                                                                     return_metadata = True)

In [None]:
cluster_maps_abs = processing.create_cluster_maps(clusters = cluster_file,
                                                  imgdir = es_dir_abs,
                                                  outdir = cluster_map_dir_abs,
                                                  method = 'mean',
                                                  mask = mask_3mm)

# cluster_maps_abs = glob(cluster_map_dir_abs+'*.mnc')
cluster_maps_abs[:5]

## Create relative cluster maps

In [None]:
df_cluster_map_metadata = pd.read_csv(cluster_map_metadata, dtype = 'str')
cluster_map_params_id = pd.merge(df_cluster_map_metadata,
                                 pd.DataFrame(cluster_map_params, index = [0], dtype = 'str'),
                                how = 'inner').id[0]

cluster_map_basedir_rel = os.path.join(human_basedir, 'cluster_maps', 'resolution_3.0', 'relative', '')
(cluster_map_dir_rel, cluster_map_metadata) = utils.mkdir_from_params(params = cluster_map_params,
                                                                     basedir = cluster_map_basedir_rel,
                                                                     params_id = cluster_map_params_id,
                                                                     return_metadata = True)

In [None]:
cluster_maps_rel = processing.create_cluster_maps(clusters = cluster_file,
                                                  imgdir = es_dir_rel,
                                                  outdir = cluster_map_dir_rel,
                                                  method = 'mean',
                                                  mask = mask_3mm)

# cluster_maps_rel = glob(cluster_map_dir_rel+'*.mnc')
cluster_maps_rel[:5]

In [None]:
x = '-'.join(['aaa', 'bbb'])
print(x)

---