In [1]:
from src import processing

import os
import numpy as np
from   glob  import glob
from   re    import sub

# Extract NIFTY images from compressed files

## Extract absolute Jacobian images

In [2]:
# Directory containing compressed images
input_dir_abs = 'data/human/registration/jacobians/absolute/smooth/'
input_dir_abs = os.path.join(input_dir_abs, '')

# List of paths to compressed images
input_files_abs = glob(input_dir_abs+'*.gz')

# Extract absolute jacobians
# unzipped_files_abs = processing.gunzip_files(infiles = input_files_abs, 
#                                              keep = True,
#                                              parallel = True,
#                                              nproc = 8)

unzipped_files_abs = glob(input_dir_abs+'*.nii')
# unzipped_files_abs[:5]

## Extract relative Jacobian images

In [3]:
# Directory containing compressed images
input_dir_rel = 'data/human/registration/jacobians/relative/smooth/'
input_dir_rel = os.path.join(input_dir_rel, '')

# List of paths to compressed images
input_files_rel = glob(input_dir_rel+'*.gz')

# Extract relative jacobians
# unzipped_files_rel = processing.gunzip_files(infiles = input_files_rel, 
#                                              keep = True,
#                                              parallel = True,
#                                              nproc = 8)

unzipped_files_rel = glob(input_dir_rel+'*.nii')
# unzipped_files_rel[:5]

---

# Convert NIFTY images to MINC

## Convert absolute Jacobian images

In [4]:
# Directory in which to save the converted images
outdir_abs = 'data/human/registration/jacobians/absolute/smooth_minc/'

# Convert absolute jacobian images to MINC
# imgfiles_abs = processing.convert_images(infiles = unzipped_files_abs,
#                                          input_format = 'nifty',
#                                          output_format = 'minc',
#                                          outdir = outdir_abs,
#                                          keep = True,
#                                          parallel = True,
#                                          nproc = 8)

imgfiles_abs = glob(outdir_abs+'*.mnc')
# imgfiles_abs[:5]

## Convert relative Jacobian images

In [5]:
# Directory in which to save the converted images
outdir_rel = 'data/human/registration/jacobians/relative/smooth_minc/'

# Convert absolute jacobian images to MINC
# imgfiles_rel = processing.convert_images(infiles = unzipped_files_rel,
#                                          input_format = 'nifty',
#                                          output_format = 'minc',
#                                          outdir = outdir_rel,
#                                          keep = True,
#                                          parallel = True,
#                                          nproc = 8)

imgfiles_rel = glob(outdir_rel+'*.mnc')
# imgfiles_rel[:5]

---

# Calculating human participant effect sizes

In [6]:
# Path to the demographics file 
demographics = 'data/human/registration/DBM_input_demo_passedqc.csv'

# Path to the mask image to use
maskfile = 'data/human/registration/reference_files/mask.mnc'

# Option to specify which data to use
dataset = 1

# Number of controls to use for propensity matching
ncontrols = 10

# Lower bound on the number of matched controls before the matching criteria are relaxed
threshold = 5

## Absolute jacobian effect sizes

In [7]:
# Directory containing absolute jacobian MINC images
imgdir_abs = outdir_abs

# Output directory
es_dir_abs = ('data/human/effect_sizes/absolute/'
              'resolution_{}_dataset_{}_ncontrols_{}_threshold_{}'
              .format(0.5, dataset, ncontrols, threshold))
es_dir_abs = os.path.join(es_dir_abs, '')

# Calculate effect size images using absolute Jacobians
# es_files_abs = processing.calculate_human_effect_sizes(demographics = demographics,
#                                                        imgdir = imgdir_abs,
#                                                        maskfile = maskfile,
#                                                        outdir = es_dir_abs, 
#                                                        ncontrols = ncontrols,
#                                                        threshold = threshold,
#                                                        parallel = True,
#                                                        nproc = 4)

es_files_abs = glob(es_dir_abs+'*.mnc')
# es_files_abs[:5]

## Relative jacobian effect sizes

In [8]:
# Directory containing absolute jacobian MINC images
imgdir_rel = outdir_rel

# Output directory
es_dir_rel = ('data/human/effect_sizes/relative/'
              'resolution_{}_dataset_{}_ncontrols_{}_threshold_{}'
              .format(0.5, dataset, ncontrols, threshold))
es_dir_rel = os.path.join(es_dir_rel, '')

# Calculate effect size images using relative Jacobians
# es_files_rel = processing.calculate_human_effect_sizes(demographics = demographics,
#                                                        imgdir = imgdir_rel,
#                                                        maskfile = maskfile,
#                                                        outdir = es_dir_rel, 
#                                                        ncontrols = ncontrols,
#                                                        threshold = threshold,
#                                                        parallel = True,
#                                                        nproc = 4)

es_files_rel = glob(es_dir_rel+'*.mnc')
# es_files_rel[:5]

# Downsampling the effect size images

## Downsampling absolute effect size images to 3.0mm

In [9]:
# Resolution (mm) to which we want to downsample
isostep = 3.0

In [10]:
# Output directory
es_dir_abs_downsampled_3mm = sub(r'resolution_0.5',
                                 'resolution_{}'.format(isostep),
                                 es_dir_abs)

# Downsample absolute effect size images
# es_files_abs_downsampled_3mm = processing.resample_images(infiles = es_files_abs,
#                                                           isostep = isostep,
#                                                           outdir = es_dir_abs_downsampled_3mm,
#                                                           parallel = True,
#                                                           nproc = 4)

es_files_abs_downsampled_3mm = glob(es_dir_abs_downsampled_3mm+'*.mnc')
# es_files_abs_downsampled_3mm[:5]

## Downsampling relative effect size images to 3.0mm

In [11]:
# Output directory
es_dir_rel_downsampled_3mm = sub(r'resolution_0.5',
                                 'resolution_{}'.format(isostep),
                                 es_dir_rel)

# Downsample absolute effect size images
# es_files_rel_downsampled_3mm = processing.resample_images(infiles = es_files_rel,
#                                                           isostep = isostep,
#                                                           outdir = es_dir_rel_downsampled_3mm,
#                                                           parallel = True,
#                                                           nproc = 8)

es_files_rel_downsampled_3mm = glob(es_dir_rel_downsampled_3mm+'*.mnc')
# es_files_rel_downsampled_3mm[:5]

# Downsampling auxiliary imaging files

In [12]:
# Path to the neuroanatomical template image
model = 'data/human/registration/reference_files/model.mnc'

# Path to the mask image
mask = 'data/human/registration/reference_files/mask.mnc'

# Downsample the template to 3.0 mm
model_3mm = processing.resample_image(infile = model,
                                      isostep = 3.0)

# Downsample the mask to 3.0 mm
mask_3mm = processing.resample_image(infile = mask,
                                     isostep = 3.0)

**TO-DO**: Allow resample_images() to take in a single image, by converting it to a list. 

---

# Clustering the participants

## Creating effect size matrices

### Absolute effect size matrix

In [13]:
# File in which to save the absolute effect size matrix
es_abs_matrix_csv = 'ES_data_{}_nc_{}_threshold_{}_3.0mm.csv'.format(dataset, ncontrols, threshold)
es_abs_matrix_csv = os.path.join(es_dir_abs_downsampled_3mm, es_abs_matrix_csv)

# Compute the absolute effect size matrix
df_es_abs = processing.build_voxel_matrix(infiles = es_files_abs_downsampled_3mm,
                                          mask = mask_3mm,
                                          sort = True,
                                          file_col = True,
                                          parallel = True,
                                          nproc = 2)

# Extract file base names from paths
df_es_abs['file'] = [os.path.basename(file) for file in df_es_abs['file']]

# Write to file
df_es_abs.to_csv(es_abs_matrix_csv, index = False)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 530/530 [00:04<00:00, 112.68it/s]


### Relative effect size matrix

In [14]:
# File in which to save the relative effect size matrix
es_rel_matrix_csv = 'ES_data_{}_nc_{}_threshold_{}_3.0mm.csv'.format(dataset, ncontrols, threshold)
es_rel_matrix_csv = os.path.join(es_dir_rel_downsampled_3mm, es_rel_matrix_csv)

# Compute the relative effect size matrix
df_es_rel = processing.build_voxel_matrix(infiles = es_files_rel_downsampled_3mm,
                                          mask = mask_3mm,
                                          sort = True,
                                          file_col = True,
                                          parallel = True,
                                          nproc = 2)


# Extract file base names from paths
df_es_rel['file'] = [os.path.basename(file) for file in df_es_rel['file']]

# Write to file
df_es_rel.to_csv(es_rel_matrix_csv, index = False)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 530/530 [00:04<00:00, 116.53it/s]


# Clustering the participants

In [18]:
infiles = [es_abs_matrix_csv, es_rel_matrix_csv]
nk_max = 10
metric = 'correlation'
K = 10
sigma = 0.5
t = 20
outfile = ('data/human/clustering/human_clusters_nk_{}_metric_{}_K_{}_sigma_{}_t_{}_{}mm.csv'
           .format(nk_max, metric, K, sigma, t, isostep))

df_clusters = processing.cluster_human_data(infiles = infiles,
                                            rownames = 'file',
                                            nk_max = nk_max,
                                            metric = metric,
                                            K = K,
                                            sigma = sigma,
                                            t = t,
                                            outfile = outfile)

df_clusters.head()

Importing data...
Running similarity network fusion...
Assigning clusters...


Unnamed: 0,ID,Group2,Group3,Group4,Group5,Group6,Group7,Group8,Group9,Group10
0,21001_T1.extracted_ES_res_0.5_data_1_nc_10_thr...,1,2,3,3,6,7,1,1,8
1,21002_T1.extracted_ES_res_0.5_data_1_nc_10_thr...,1,2,3,4,4,2,6,3,2
2,21005_T1.extracted_ES_res_0.5_data_1_nc_10_thr...,1,2,3,5,5,5,6,4,7
3,21006_T1.extracted_ES_res_0.5_data_1_nc_10_thr...,1,2,4,5,5,5,4,4,7
4,21007_T1.extracted_ES_res_0.5_data_1_nc_10_thr...,1,2,3,3,6,7,6,7,9
