In [1]:
import numpy as np
import import_ipynb
from skimage.transform import resize
import albumentations

from transformations import ComposeDouble, FunctionWrapperDouble, create_dense_target, normalize_01, AlbuSeg2d
from customdatasets import SegmentationDataSet
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import pathlib

# root directory
root = pathlib.Path.cwd() / 'dataset' / 'images_train'

def get_filenames_of_path(path: pathlib.Path, ext: str = '*'):
    # return a list of files in a directory/path
    # uses pathlib
    filenames = [file for file in path.glob(ext) if file.is_file()]
    return filenames


# input and target files
inputs = get_filenames_of_path(root / 'data', ext='*.png')
inputs.sort()
targets = get_filenames_of_path(root / 'labels', ext='*.tif')
targets.sort()


# training transforms and augmentations
transforms = ComposeDouble([
    FunctionWrapperDouble(resize, input=True, target=False, output_shape=(696, 892, 3)),
    FunctionWrapperDouble(resize, input=False, target=True, output_shape=(696, 892), order=0, anti_aliasing=False, preserve_range=True),
    FunctionWrapperDouble(create_dense_target, input=False, target=True),
    FunctionWrapperDouble(np.moveaxis, input=True, target=False, source=-1, destination=0),
    FunctionWrapperDouble(normalize_01)
])

transforms_training = ComposeDouble([
    AlbuSeg2d(albumentations.HorizontalFlip(p=0.5)),
    FunctionWrapperDouble(resize, input=True, target=False, output_shape=(696, 892, 3)),
    FunctionWrapperDouble(resize, input=False, target=True, output_shape=(696, 892), order=0, anti_aliasing=False, preserve_range=True),
    FunctionWrapperDouble(create_dense_target, input=False, target=True),
    FunctionWrapperDouble(np.moveaxis, input=True, target=False, source=-1, destination=0),
    FunctionWrapperDouble(normalize_01)
])

transforms_validation = ComposeDouble([
    FunctionWrapperDouble(resize, input=True, target=False, output_shape=(696, 892, 3)),
    FunctionWrapperDouble(resize, input=False, target=True, output_shape=(696, 892), order=0, anti_aliasing=False, preserve_range=True),
    FunctionWrapperDouble(create_dense_target, input=False, target=True),
    FunctionWrapperDouble(np.moveaxis, input=True, target=False, source=-1, destination=0),
    FunctionWrapperDouble(normalize_01)
])


# random seed
random_seed = 42

# split dataset into training and validation set
train_size = 0.8 # 80-20 split

inputs_train, inputs_valid = train_test_split(
    inputs,
    random_state=random_seed,
    train_size=train_size,
    shuffle=True
)

targets_train, targets_valid = train_test_split(
    targets, 
    random_state=random_seed,
    train_size=train_size,
    shuffle=True
)

# dataset training
dataset_train = SegmentationDataSet(inputs=inputs_train, targets=targets_train, transform=transforms_training)

# dataset validation
dataset_valid = SegmentationDataSet(inputs=inputs_valid, targets=targets_valid, transform=transforms_validation)

# dataloader training
dataloader_training = DataLoader(dataset=dataset_train, batch_size=2, shuffle=True)

# dataloader validation
dataloader_validation = DataLoader(dataset=dataset_valid, batch_size=2, shuffle=True)

importing Jupyter notebook from transformations.ipynb
# of unique classes = [10 11 12 13 14]
x = shape: (128, 128, 3); type: uint8
x = min: 0; max: 255
x_t = shape: (3, 64, 64); type: float64
x_t = min: 0.0; max: 1.0
y = shape: (128, 128); class: [10 11 12 13 14]
y_t = shape: (64, 64); class: [0 1 2 3 4]
importing Jupyter notebook from customdatasets.ipynb
x = shape: torch.Size([2, 892, 696, 3]); type: torch.float32
x = min: 0.0; max: 255.0
y = shape: torch.Size([2, 892, 696]); class: tensor([0, 1]); type: torch.int64


Since the dataset class provides a `__getitem__` method, we can now treat them very similarly to a sequence object (e.g. a list). This is illustrated in the following:

In [2]:
batch = dataset_train[0]
x, y = next(iter(dataloader_training))

print(f'x = shape: {x.shape}; type: {x.dtype}')
print(f'x = min: {x.min()}; max: {x.max()}')
print(f'y = shape: {y.shape}; class: {y.unique()}; type: {y.dtype}')

# of unique classes = [  0 255]
# of unique classes = [0. 1.]
# of unique classes = [  0 255]
x = shape: torch.Size([2, 3, 696, 892]); type: torch.float32
x = min: 0.0; max: 1.0
y = shape: torch.Size([2, 696, 892]); class: tensor([0, 1]); type: torch.int64


In [4]:
# open napari instance for training dataset
import import_ipynb
from visualize import DatasetViewer

dataset_viewer_training = DatasetViewer(dataset_train)
dataset_viewer_training.napari()

# of unique classes = [  0 255]
