In [1]:
import os
# import pydicom
import numpy as np
# import dicom_numpy
from os import listdir
from scipy.io import loadmat
#from imageio import imread, imresize, imsave
from sklearn.model_selection import train_test_split
import h5py


In [2]:
f = loadmat('Data/structset_3d_channel_all.mat')
d = loadmat('Data/Block_3d_all.mat')
voxel_array = f['structset_3d_channel_all']
Block_array = d['Block_3d_all']
    

In [3]:
voxel_array.shape
#dose_array=np.expand_dims(dose_array,axis=3)
print(voxel_array.shape)
print(Block_array.shape)
Block_array=np.expand_dims(Block_array,axis=4)
test=np.concatenate((voxel_array,Block_array),axis=4)
np.array(test)
print(test.shape)

(21, 256, 256, 160, 2)
(21, 256, 256, 160)
(21, 256, 256, 160, 3)


In [4]:
def get_scan(mat_path):
    # Getting structure set images from path:
    if not os.path.exists(mat_path):
        print('MAT files not exists!')
        return

    f = loadmat('Data/structset_3d_channel_all.mat')
    voxel_array = f['structset_3d_channel_all']
   # voxel_array = np.expand_dims(voxel_array,axis=3)
    return voxel_array


In [5]:
def get_Block_img(images_path):
    # Getting dose image from file
    if not os.path.exists(images_path):
        print('Dose images not exists!')
        return

    f = loadmat('Data/Block_3d_all.mat')
    Block = f['Block_3d_all']
    Block = np.expand_dims(Block,axis=4)
    return Block

In [6]:
def scan_pading(scan, seg_img, section_size):
    # For easly split:
    pad_size = section_size - (scan.shape[-1] % section_size)
    if pad_size != section_size:
        padded_scan = np.pad(scan, ((0,0),(0,0),(0,pad_size)), 'constant')
        try:
            padded_seg_img = np.pad(seg_img, ((0,0),(0,0),(0,pad_size)), 'constant')
        except:
            padded_seg_img = None
    else:
        padded_scan = scan
        padded_seg_img = seg_img
    return padded_scan, padded_seg_img


In [7]:
def split_scans_imgs(scans, seg_img, section_size):
    # Split with sliding window:
    splitted_scans = []
    for i in range(0, scans.shape[-1]-(section_size-1)):
        splitted_scans.append(scans[:,:,:,i:i+section_size])

    splitted_seg_img = []
    for i in range(0, seg_img.shape[-1]-(section_size-1)):
        splitted_seg_img.append(seg_img[:,:,i:i+section_size])

    splitted_scans = np.array(splitted_scans)
    splitted_seg_img = np.array(splitted_seg_img)
    return splitted_scans, splitted_seg_img

In [8]:
def get_dataset(dataset_path, mat_file = 'structset_2d.mat', ground_file = 'doseset_2d.mat', section_size = (256, 256), test_size = 0.2, save_npy = True, dataset_save_path = 'Data/npy_dataset'):
    # Create dateset:
    scans, seg_imgs = [], []
    print ('reading dataset')
    scan = get_scan(dataset_path+'/'+mat_file)
    Block_img = get_Block_img(dataset_path+'/'+ground_file)
    scans = np.array(scan, dtype='float32')
    Block_imgs = np.array(Block_img).astype('float32')

    print('Scan Data Shape: ' + str(scans.shape))
    print('Block Data Shape: ' + str(Block_imgs.shape))

    if save_npy:
        if not os.path.exists(dataset_save_path):
            os.makedirs(dataset_save_path)
        np.save(dataset_save_path+'/scans.npy', scans)
        np.save(dataset_save_path+'/Block.npy', Block_imgs)
        print('NPY dataset saved!')

    X, X_test, Y, Y_test = train_test_split(scans, Block_imgs, test_size=test_size, random_state=42)
    print('training data size'+ str(X.shape))
    print('test data size'+ str(Y.shape))
    return X, X_test, Y, Y_test

In [9]:
def split_npy_dataset(npy_dataset_path, split_npy_dataset_path, test_path, batch_size, test_size):
    X = np.load(npy_dataset_path+'/scans.npy')
    Y = np.load(npy_dataset_path+'/Block.npy')

    if not os.path.exists(split_npy_dataset_path):
        os.makedirs(split_npy_dataset_path)
    if not os.path.exists(test_path):
        os.makedirs(test_path)

    X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42)
    print('X_test size',X_test.shape,'Y_test size=',Y_test.shape)
    test_npy = np.concatenate((X_test,Y_test),axis=4)
    test_npy = np.array(test_npy)

   # np.save(test_path+'/test.npy', test_npy)
    for batch_i in range(0, Y_test.shape[0], batch_size):
        batch_npy = np.concatenate((X_test[batch_i:batch_i+batch_size],Y_test[batch_i:batch_i+batch_size]),axis=4)
        batch_npy = np.array(batch_npy)
        np.save(test_path+'/batch_{0}.npy'.format(batch_i), batch_npy)

    for batch_i in range(0, Y.shape[0], batch_size):
        batch_npy = np.concatenate((X[batch_i:batch_i+batch_size],Y[batch_i:batch_i+batch_size]),axis=4)
        batch_npy = np.array(batch_npy)
        np.save(split_npy_dataset_path+'/batch_{0}.npy'.format(batch_i), batch_npy)

        
    print('Splitted NPY Dataset saved!')

In [10]:
def read_npy_dataset(npy_dataset_path, test_size = 0.2):
    X = np.load(npy_dataset_path+'/scans.npy')
    Y = np.load(npy_dataset_path+'/dose.npy')
    X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42)
    print('Train Data Shape: ' + str(X.shape[0]))
    print('Test Data Shape: ' + str(X_test.shape[0]))
    return X, X_test, Y, Y_test


In [11]:
if __name__ == '__main__':
    dataset_path = 'Data'
    npy_dataset_path = 'Data/npy_dataset'
    splitted_npy_dataset_path = npy_dataset_path+'/splitted_npy_dataset'
    test_path = npy_dataset_path+'/test_npy'

    X, X_test, Y, Y_test = get_dataset(dataset_path, mat_file = 'structset_3d_channel_all.mat', ground_file = 'Block_3d_all.mat', section_size = (256, 256, 4), test_size = 0.2, save_npy = True, dataset_save_path = npy_dataset_path)
    split_npy_dataset(npy_dataset_path, splitted_npy_dataset_path, test_path, batch_size = 1, test_size = 0.2)

reading dataset
Scan Data Shape: (21, 256, 256, 160, 2)
Block Data Shape: (21, 256, 256, 160, 1)
NPY dataset saved!
training data size(16, 256, 256, 160, 2)
test data size(16, 256, 256, 160, 1)
X_test size (5, 256, 256, 160, 2) Y_test size= (5, 256, 256, 160, 1)
Splitted NPY Dataset saved!


In [None]:
test1 = get_scan('Data/structset.mat')
test2  = get_dose_img('Data/doseset.mat')


In [None]:
X, X_test, Y, Y_test = train_test_split(test1, test2, test_size=0.2, random_state=42)

In [None]:
X_test.shape