This notebook makes dataloaders out of slices, and save them as numpy arrays for further use

In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Define paths to slices
data_training = 'sliced/training'
data_test = 'sliced/test'
data_validation = 'sliced/validation'


In [2]:
def load_image(path, target_size=(128, 128)):
    image = load_img(path, color_mode='grayscale', target_size=target_size)
    image = img_to_array(image)
    image = image / 255.0  
    return image

def load_images(scan_dir):
    images = []
    segmentations = []
    for filename in sorted(os.listdir(scan_dir)):
        if 'slice_' in filename and 'seg' not in filename:
            img_path = os.path.join(scan_dir, filename)
            seg_path = img_path.replace('slice_', 'slice_seg')
            images.append(load_image(img_path))
            segmentations.append(load_image(seg_path))
    return np.array(images), np.array(segmentations)

In [3]:
def create_dataset(data_dir):
    image_data = []
    segmentation_data = []
    for scan_folder in sorted(os.listdir(data_dir)):
        scan_dir = os.path.join(data_dir, scan_folder)
        images, segmentations = load_images(scan_dir)
        image_data.append(images)
        segmentation_data.append(segmentations)
    
    image_data = np.concatenate(image_data, axis=0)
    segmentation_data = np.concatenate(segmentation_data, axis=0)
    
    return image_data, segmentation_data


In [4]:
images, segmentations = create_dataset(data_training)
np.save('images_training_dataset', images)
np.save('segmentations_training_dataset', segmentations)

In [5]:
images, segmentations = create_dataset(data_test)
np.save('images_test_dataset', images)
np.save('segmentations_test_dataset', segmentations)

In [6]:
images, segmentations = create_dataset(data_validation)
np.save('images_validation_dataset', images)
np.save('segmentations_validation_dataset', segmentations)

In [7]:
def tensorflow_dataset(images, segmentations, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((images, segmentations))
    dataset = dataset.shuffle(buffer_size=100).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

batch_size = 16
train_dataset = tensorflow_dataset(images, segmentations, batch_size)