# Convert data to CLAHE
The pre-process steps for HiP-CT 16bit data are as follows:
* 3D CLAHE applied to the 16 bit data 
* Normalisation to 8 bit

In [1]:
import sys
sys.path.append('..')
import os
import glob
import natsort
import numpy as np
import skimage.io as skio
from segmentation.preprocessing import preprocessor
import segmentation.preprocessing.helper as helper


In [2]:
# high-resolution 2.58um - 5.2um data conversion
input_folder = '/hdd/yang/data/kidney_seg/publish_data/highres_training_16bit_cubes/'
save_folder = '/hdd/yang/data/kidney_seg/high-res_training/highres_training_8bit_clahe/'
cube_paths = natsort.natsorted(glob.glob(os.path.join(input_folder, '*.tif')))
print(f'number of cubes: {len(cube_paths)}')
skio.imread(cube_paths[0])  # check if the first cube can be read
print(f'cube shape: {skio.imread(cube_paths[0]).shape}')
print(f'cube dtype: {skio.imread(cube_paths[0]).dtype}')

number of cubes: 40
cube shape: (512, 512, 512)
cube dtype: uint16


In [None]:
train_test_split.txt
hipct_clahe.clahe_3d()

Number of files: 40 
 File type: uint16 
 Converting to 8 bit: True 
 Masked: False


  0%|          | 0/40 [00:00<?, ?it/s]

# Generating training patches
The cubes of 512^3 pixels after CLAHE and 8 bit conversion will be devided into 128^3 patches.

In [3]:
image_dir = save_folder 
label_dir = '/hdd/yang/data/kidney_seg/publish_data/highres_training_16bit_labels' 
save_dir = '/hdd/yang/data/kidney_seg/high-res_training/training_patches'
split_size = 128

preprocessor.generate_training_patches(
    image_dir=image_dir,
    label_dir=label_dir,
    save_dir=save_dir,
    split_size=split_size
)

Number of cube files:  40


Processing cube file: complete_cube_39.tif: 100%|██████████| 40/40 [00:23<00:00,  1.69it/s]


Completed splitting the data patches!
Number of label files:  40


Processing label file: complete_label_39.tif: 100%|██████████| 40/40 [00:24<00:00,  1.64it/s]

Completed splitting the label patches!





# Prepare training and testing patches

In [2]:
# select the training and testing cubes across each sample
helper.train_test_split(
    save_dir='../data/high-res_training/90-10',
)

Sample: 5um S-20-28, Total Cubes: 20
Train Indices: [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19], total: 18
Test Indices: [7, 13], total: 2
Sample: 5.2um LADAF 2021-17 Left Kidney, Total Cubes: 7
Train Indices: [20, 21, 22, 23, 24, 25], total: 6
Test Indices: [26], total: 1
Sample: 5.2um LADAF 2021-17 Right Kidney, Total Cubes: 9
Train Indices: [27, 29, 30, 31, 32, 33, 34, 35], total: 8
Test Indices: [28], total: 1
Sample: 2.58um LADAF 2020-27 Left Kidney, Total Cubes: 4
Train Indices: [37, 38, 39], total: 3
Test Indices: [36], total: 1


In [4]:
# Generate 5-fold cross-validation splits
helper.generate_patch_list_per_sample(
    label_dir='../data/high-res_training/labels',
    train_cubes_txt='../data/high-res_training/90-10/train_selected_cubes_tr0.9.txt',
    test_cube_txt='../data/high-res_training/90-10/test_selected_cubes_tr0.9.txt',
    output_path='../data/high-res_training/90-10'
)

Working on sample: 5um S-20-28
Total number of valid cubes: 950
Working on sample: 5.2um LADAF 2021-17 Left Kidney
Total number of valid cubes: 197
Working on sample: 5.2um LADAF 2021-17 Right Kidney
Total number of valid cubes: 239
Working on sample: 2.58um LADAF 2020-27 Left Kidney
Total number of valid cubes: 112
Done!


In [2]:
helper.generate_folds(
    n_folds=5, 
    patch_list_per_sample='../data/high-res_training/90-10/non_zero_patch_list_per_sample.json', 
    output_json_dir='../data/high-res_training/90-10'
)

Working on sample: 5um S-20-28
Train: 706
Val: 177
Train: 706
Val: 177
Train: 706
Val: 177
Train: 707
Val: 176
Train: 707
Val: 176
Working on sample: 5.2um LADAF 2021-17 Left Kidney
Train: 117
Val: 30
Train: 117
Val: 30
Train: 118
Val: 29
Train: 118
Val: 29
Train: 118
Val: 29
Working on sample: 5.2um LADAF 2021-17 Right Kidney
Train: 164
Val: 41
Train: 164
Val: 41
Train: 164
Val: 41
Train: 164
Val: 41
Train: 164
Val: 41
Working on sample: 2.58um LADAF 2020-27 Left Kidney
Train: 63
Val: 16
Train: 63
Val: 16
Train: 63
Val: 16
Train: 63
Val: 16
Train: 64
Val: 15


In [8]:
# Now we can prepare the training dataset for model training from the json files
helper.processing_nnunet_dataset(
    patch_dir='../data/high-res_training',
    patch_list_per_sample='../data/high-res_training/90-10/non_zero_patch_list_per_sample.json',
    nnunet_raw_data_dir='../data/nnUNet_raw/',
    dataset_name='Dataset001_Glomeruli'
)

Copying training data:   0%|          | 0/1314 [00:00<?, ?it/s]

Copying training data: 100%|██████████| 1314/1314 [00:03<00:00, 382.95it/s]
