In [None]:
Step 1: Download dataset: follow instructions in this link (https://github.com/neheller/kits23/tree/main/kits23)
        
        
Step 2: Download preprocessing python scipts - Git hub contains sample dataset to test the preprocessing pipeline
cd med_preprocessing_pipeline

    
Step 3: Data conversion and background cropping
python data_conversion.py --data_dir ./sample_dataset/ --raw_data_dir /mnt/hdd/sda/ygeo/tmp_dataset_processed/ --crop_dir /mnt/hdd/sda/ygeo/tmp_dataset_processed/cropped/

Step 4: Normalize and Resample: 
python resample_and_normalize.py --spacing_zxy 1. 0.78125 0.78125 --data_dir /mnt/hdd/sda/ygeo/tmp_dataset_processed/cropped/ --num_threads 4 --output_dir /mnt/hdd/sda/ygeo/tmp_dataset_processed/


Step 5: Data loader and augmentations:     
python data_loader.py --patch_size_zxy 128 128 128 --data_dir /mnt/hdd/sda/ygeo/tmp_dataset_processed/resampled0/ --batch_size 4 --num_threads 4 

# Data conversion and background cropping (zero voxels)

In [None]:
from data_conversion import *

data_dir= "./sample_dataset/"
raw_data_dir= "/mnt/hdd/sda/ygeo/tmp_dataset_processed/"
crop_dir= "/mnt/hdd/sda/ygeo/tmp_dataset_processed/cropped/"
num_threads = 4 
create_dataset_json(data_dir, raw_data_dir)

strt = time.time()
background_crop(raw_data_dir, crop_dir, num_threads=num_threads)
print(f'Cropping zero voxels finished in {time.time() - strt} seconds')

strt = time.time()
dataset_properties = analyze_dataset(crop_dir,num_threads)
print(f'Save Dataset Information finished in {time.time() - strt} seconds')

strt = time.time()
dataset_properties = load_pickle(join(crop_dir, "dataset_properties.pkl"))
out_dir = crop_dir
plans = create_plans(dataset_properties, crop_dir, out_dir, data_type="3D")
print(f'Create Training Configs finished in {time.time() - strt} seconds')


# Normalize and Resample

In [None]:
from resample_and_normalize import *
def run_preprocessing(crop_dir, output_dir, num_threads=4):
    with open(crop_dir+"/config.pkl", 'rb') as f:
        plans = pickle.load(f)
    
    
    normalization_schemes = plans['normalization_schemes']
    use_nonzero_mask_for_normalization = plans['use_mask_for_norm']
    intensityproperties = plans['dataset_properties']['intensityproperties']
    
    preprocessor = GenericPreprocessor(normalization_schemes, use_nonzero_mask_for_normalization,
                                         plans['transpose_forward'],
                                          intensityproperties)
    target_spacings = [i["current_spacing"] for i in plans['plans_per_stage'].values()]
    print(target_spacings)
    target_spacings = [[1.     , 0.78125, 0.78125]]
    
    
    
    preprocessor.run(target_spacings, crop_dir, output_dir, "", num_threads)

out_dir = crop_dir
run_preprocessing(crop_dir, output_dir,)

[array([2.37661875, 1.8567334 , 1.8567334 ]), array([1.     , 0.78125, 0.78125])]
Initializing to run preprocessing
npz folder: /mnt/hdd/sda/ygeo/code/kits23/kits23_project/dataset_converted/nnUNet_cropped_data/Task100_KiTS23/
output_folder: /mnt/hdd/sda/ygeo/tmp_kits23/
separate z, order in z is 0 order inplane is 3
separate z, order in z is 0 order inplane is 3


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import glob
w= 6
h= 6
fig=plt.figure(figsize=(50,50))
data_dir = crop_dir + "/stage0/"
filenames = glob.glob(data_dir + '/*.npz')

i = 1
for f in filenames:
    img = np.load(f)['data']
    
    
    fig.add_subplot(w,h,i)
    i +=1
    idx = img.shape[1]//3
    plt.imshow(img[0,idx,:,:],cmap='gray'),plt.axis('off')
    ## Overlaying mask on images
    mask = img[1,idx,:,:]
    rgb_mask = np.repeat(mask[:,:,np.newaxis],3, axis=2)
    
    
    #1(kidney) 461, 2(tumour): 453, 3 (cyst): 230. 
    rgb_mask[mask==1] = [1,0,0]
    rgb_mask[mask==2] = [0,1,0]
    rgb_mask[mask==3] = [0,0,1]
    plt.imshow(rgb_mask, alpha=0.2)
    plt.title(img.shape)
    print(i, img.shape,img.min(),img.max(),np.unique(rgb_mask[:,:,0]))
    if i > w*h:
        break
    
plt.show()

# Data Loader: Cropping and Augmentation

In [None]:
from data_augmentation import *
from data_loader import *
data_dir_npz = "/mnt/hdd/sda/ygeo/tmp_kits23/preprocessing_stage0/"
patch_size = [128,128,128]
batch_size = 2
dataset = load_dataset(data_dir_npz)
unpack_dataset(data_dir_npz)

dl = DataLoader3D(dataset, patch_size, patch_size , batch_size, oversample_foreground_percent=0.33)
#dl2d = DataLoader2D(dataset, (64, 64), np.array(plans['patch_size']).astype(int)[1:], 12, oversample_foreground_percent=0.33)
tr, val = get_default_augmentation(dl, dl, patch_size)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

w=16
h=15
fig=plt.figure(figsize=(50,50))
i=1
for x in tr:
    print(i, x['data'].shape, x['target'].shape)
   
    #print(x['properties'])
    #print(x['keys'])
    #print(x['target'])
    
    for k in range(x['data'].shape[0]):
        for l in range(40,70,5):
            fig.add_subplot(w,h,i)
            i += 1
            plt.imshow(x['data'][k,0,l,:,:],cmap='gray'),plt.axis('off')

            fig.add_subplot(w,h,i)
            i += 1
            plt.imshow(x['target'][k,0,l,:,:], cmap='gray'),plt.axis('off')
            if i > 225:
                break
        if i > 225:
            break
    if i > 225:
        break
plt.show()

# Slicer Visualisation

In [1]:
### DEMO Download Slicer and connect it to notebook

import JupyterNotebooksLib as slicernb
import slicer

# Set viewer size to 50% of screen size
slicernb.AppWindow.setWindowSize(scale=0.5)
# Hide patient information from slice view
slicernb.showSliceViewAnnotations(False)

ModuleNotFoundError: No module named 'JupyterNotebooksLib'

In [None]:
pip install ipywidgets ipyevents ipycanvas
jupyter labextension install @jupyter-widgets/jupyterlab-manager
jupyter labextension install @jupyter-widgets/jupyterlab-manager ipycanvas
jupyter labextension install @jupyter-widgets/jupyterlab-manager ipyevents