In [12]:
from dataset_synapse import *

In [2]:
from glob import glob
import os
import random
import h5py
import numpy as np
import torch
from scipy import ndimage
from scipy.ndimage import zoom
from torch.utils.data import Dataset
import nibabel as nib
import random
import torchvision.transforms as transforms

In [3]:
def normalize_image(image):
    #norm_image = np.empty((image.shape[0], image.shape[1], image.shape[2]), np.float32)
    #for matrix in image:
    norm_image = (image-np.min(image)) / (np.max(image)-np.min(image))
    return norm_image

In [4]:
def zoom_image(image, label):
    original_height, original_width = image.shape[:2]
    new_height = int(original_height * 1.2)
    new_width = int(original_width * 1.2)

    zoom_image = zoom(image, (1.2, 1.2, 1), order=3)
    zoom_label = zoom(label, (1.2, 1.2, 1), order=0)

    start_x = (zoom_image.shape[1] - original_width) // 2
    start_y = (zoom_image.shape[0] - original_height) // 2

    cropped_image = zoom_image[start_y:start_y + original_height, start_x:start_x + original_width]
    cropped_label = zoom_label[start_y:start_y + original_height, start_x:start_x + original_width]

    return cropped_image, cropped_label

In [5]:
def transformations(image_file, label_file):
    # Shape: (*, 512, 512)
    m, s = np.mean(image_file, axis=(0, 1)), np.std(image_file, axis=(0, 1))
    try:
        preprocess_image = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize((512, 512)),
            transforms.Normalize(mean=m, std=s),
        ])
        #print(image_file.shape)
        img_data = preprocess_image(image_file).squeeze(0)
    except:
        preprocess_image = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize((512, 512)),
        ])
        #print(image_file.shape)
        img_data = preprocess_image(image_file).squeeze(0)
    #print(img_data.shape)
    preprocess_label = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((512, 512)),
    ])
    #print(label_file.shape)
    lbl_data = preprocess_label(label_file).squeeze(0)
    #print(lbl_data.shape)
    return img_data, lbl_data

In [6]:
def nii_to_hdf5(image_file, label_file, output_h5):
    with h5py.File(output_h5+".npy.h5", 'w') as h5f:
        #img_data = np.transpose(img_data, (2, 0, 1))
        #lbl_data = np.transpose(lbl_data, (2, 0, 1))
        
        # Create datasets in the HDF5 file
        h5f.create_dataset('image', data=image_file, dtype='float32')
        h5f.create_dataset('label', data=label_file, dtype='uint8')

        print(f"Saved datasets to {output_h5} successfully.")

In [7]:
def get_trainning_data(nii_file1, nii_file2, output_npz): 
    for slice_num in range(1, nii_file1.shape[0]):
        image_slice = nii_file1.numpy()[slice_num, :, :]
        label_slice = nii_file2.numpy()[slice_num, :, :]
        
        # Save the numpy arrays as a .npz file
        np.savez(output_npz+"_slice"+str(slice_num), image=image_slice, label=label_slice)
    print(f"Saved {output_npz} slices successfully.")

In [8]:
def filenames_with_relative_paths(folder_path, train_folder, test_folder):
    # Loop through all directories and files in the given folder
    datasets = ["Training-Testing", "Training-Training"]
    for dataset in datasets:
        for root, dirs, files in os.walk(os.path.join(folder_path, dataset, "img")):
            if not root.split('/')[-1].startswith("00"):
                continue
            test = True
            random.shuffle(files)
            for file in files:
                print(f"Current {file}")
                image_path = os.path.join(root, file)
                label_path = image_path.replace('img', 'label')
                file_name = "case"+root.split('/')[-1]+'_'+file[4:6]
                # Load the .nii.gz files
                image = nib.load(image_path)
                label = nib.load(label_path)
                image = image.get_fdata()
                image = np.clip(image, -125, 275)
                image = normalize_image(image)
                label = label.get_fdata()

                # crop
                crop_size = 380
                start_row = (image.shape[0] - crop_size)//2
                start_col = (image.shape[0] - crop_size)//2
                image = image[start_row:start_row+crop_size, start_col:start_col+crop_size, :]
                label = label[start_row:start_row+crop_size, start_col:start_col+crop_size, :]
                
                # Transformations
                image, label = transformations(image, label)
                
                if test and dataset == "Training-Testing":
                    nii_to_hdf5(image, label, os.path.join(test_folder, file_name))
                    test = False
                else:
                    get_trainning_data(image, label, os.path.join(train_folder, file_name))
                
    # Original name: /label or img/0079/img0034-0079.nii.gz
    # Target name: case0079_slice034.npz

In [9]:
!rm -rf Synapse/train_npz/*
!rm -rf Synapse/test_vol_h5/*

# create train_npz
train_folder = "Synapse/train_npz"
os.makedirs(train_folder, exist_ok=True)

# create test_vol_h5
test_folder = "Synapse/test_vol_h5"
os.makedirs(test_folder, exist_ok=True)

# Replace with your folder path
folder_path = "Synapse/data_multi_organ/Abdomen/Abdomen/RegData"
filenames_with_relative_paths(folder_path, train_folder, test_folder)

Current img0010-0079.nii.gz
Saved datasets to Synapse/test_vol_h5/case0079_01 successfully.
Current img0040-0079.nii.gz
Saved Synapse/train_npz/case0079_04 slices successfully.
Current img0009-0079.nii.gz
Saved Synapse/train_npz/case0079_00 slices successfully.
Current img0035-0079.nii.gz
Saved Synapse/train_npz/case0079_03 slices successfully.
Current img0039-0079.nii.gz
Saved Synapse/train_npz/case0079_03 slices successfully.
Current img0031-0079.nii.gz
Saved Synapse/train_npz/case0079_03 slices successfully.
Current img0038-0079.nii.gz
Saved Synapse/train_npz/case0079_03 slices successfully.
Current img0024-0079.nii.gz
Saved Synapse/train_npz/case0079_02 slices successfully.
Current img0004-0079.nii.gz
Saved Synapse/train_npz/case0079_00 slices successfully.
Current img0026-0079.nii.gz
Saved Synapse/train_npz/case0079_02 slices successfully.
Current img0030-0079.nii.gz
Saved Synapse/train_npz/case0079_03 slices successfully.
Current img0037-0079.nii.gz
Saved Synapse/train_npz/case00

ImageFileError: Cannot work out file type of "Synapse/data_multi_organ/Abdomen/Abdomen/RegData/Training-Testing/img/0061/Untitled.ipynb"

In [10]:
!rm ../lists/lists_Synapse/train.txt
!rm ../lists/lists_Synapse/test_vol.txt

!.././get_train_data.sh Synapse/train_npz ../lists/lists_Synapse/train.txt train
!.././get_train_data.sh Synapse/test_vol_h5 ../lists/lists_Synapse/test_vol.txt test

train
File names without extensions have been written to: ../lists/lists_Synapse/train.txt
test
File names without extensions have been written to: ../lists/lists_Synapse/test_vol.txt


In [22]:
!cd ../..
!ls
!./train.sh

dataset_synapse.ipynb  Preprocessing.ipynb  README.md
dataset_synapse.py     __pycache__	    Synapse
/bin/bash: line 1: ./train.sh: No such file or directory
