From 1ea5efcd6991c94212937e0f2ecc567c1ccf26e8 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Tue, 23 May 2023 14:10:46 +0100 Subject: [PATCH 01/18] updates image preprocessor by removing squeezing from onehot encode making the function more flexible, providing a preprocessing queue interface and clipping data after its sent through the preprocessing queue --- .../segmentation_utils/ImagePreprocessor.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py index ea7665b..1094ef1 100644 --- a/utilities/segmentation_utils/ImagePreprocessor.py +++ b/utilities/segmentation_utils/ImagePreprocessor.py @@ -1,10 +1,21 @@ from dataclasses import dataclass -from typing import Callable, Dict, Optional +from typing import Callable, Dict, Optional, Protocol import numpy as np import tensorflow as tf +class PreprocessorInterface(Protocol): + queue: list[Callable] + arguments: list[Dict] + + def update_seed(self, seed: int) -> None: + ... + + def get_queue_length(self) -> int: + ... + + @dataclass class PreprocessingQueue: """ @@ -98,7 +109,7 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor: """ encoded = np.zeros((masks.shape[0], output_size[0] * output_size[1], num_classes)) for i in range(num_classes): - encoded[:, :, i] = tf.squeeze((masks == i).astype(int)) + encoded[:, :, i] = (masks == i).astype(int) encoded = tf.convert_to_tensor(encoded) return encoded @@ -166,6 +177,9 @@ def augmentation_pipeline( # flattens masks out to the correct output shape if output_size[1] == 1: mask = flatten(mask, output_size, channels=1) + + image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1)) + return image, mask From 6a37482a896dff8623552960119e7bfc52346ee9 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Tue, 23 May 2023 14:11:25 +0100 Subject: [PATCH 02/18] adds experimental implementation of the flowreader object changing the tf reliant implementation to a self maintained --- utilities/segmentation_utils/flowreader.py | 263 ++++++++++++++++++++- 1 file changed, 255 insertions(+), 8 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 66088a2..05fccb2 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -7,7 +7,9 @@ from typing import Optional import numpy as np +import tensorflow as tf from keras.preprocessing.image import ImageDataGenerator +from keras.utils import Sequence from utilities.segmentation_utils import ImagePreprocessor @@ -39,8 +41,6 @@ class FlowGenerator: :bool preprocessing_enabled: whether to apply preprocessing or not :int seed: seed for flow from directory :int preprocessing_seed: seed for preprocessing, defaults to None - :PreprocessingQueue preprocessing_queue_image: preprocessing queue for images - :PreprocessingQueue preprocessing_queue_mask: preprocessing queue for masks Raises ------ @@ -48,6 +48,10 @@ class FlowGenerator: :ValueError: if the output size is not a square matrix or a column vector """ + preprocessing_seed = None + preprocessing_queue_image = None + preprocessing_queue_mask = None + def __init__( self, image_path: str, @@ -60,10 +64,6 @@ def __init__( preprocessing_enabled: bool = True, seed: int = 909, preprocessing_seed: Optional[int] = None, - preprocessing_queue_image: Optional[ - ImagePreprocessor.PreprocessingQueue - ] = None, - preprocessing_queue_mask: Optional[ImagePreprocessor.PreprocessingQueue] = None, ): if len(output_size) != 2: raise ValueError("The output size has to be a tuple of length 2") @@ -81,8 +81,6 @@ def __init__( self.shuffle = shuffle self.seed = seed self.preprocessing_enabled = preprocessing_enabled - self.preprocessing_queue_image = preprocessing_queue_image - self.preprocessing_queue_mask = preprocessing_queue_mask self.preprocessing_seed = preprocessing_seed self.__make_generator() print("Reading images from: ", self.image_path) @@ -98,6 +96,22 @@ def get_dataset_size(self) -> int: return len(os.listdir(os.path.join(self.image_path, "img"))) + def set_preprocessing_pipeline( + self, + preprocessing_queue_image: ImagePreprocessor.PreprocessorInterface, + preprocessing_queue_mask: ImagePreprocessor.PreprocessorInterface, + ) -> None: + """ + Sets the preprocessing pipeline + + Parameters + ---------- + :PreprocessingQueue preprocessing_queue_image: preprocessing queue for images + :PreprocessingQueue preprocessing_queue_mask: preprocessing queue for masks + """ + self.preprocessing_queue_image = preprocessing_queue_image + self.preprocessing_queue_mask = preprocessing_queue_mask + def __make_generator(self): """ Creates the generator @@ -195,3 +209,236 @@ def preprocess(self, generator_zip): mask, self.output_size, self.num_classes ) yield (img, mask) + + +class FlowGeneratorExperimental(Sequence): + """ + Initializes the flow generator object, + which can be used to read in images for semantic segmentation. + Additionally, the reader can apply augmentation on the images, + and one-hot encode them on the fly. + + Note: in case the output is a column vector it has to be in the shape (x, 1) + Note: this is an experimental version of the flow generator, which uses a \ + custom implemented dataloader instead of the keras ImageDataGenerator + + Parameters + ---------- + :string image: path to the image directory + :string mask: path to the mask directory + :int batch_size: batch size + :tuple image_size: image size + :tuple output_size: output size + + + :int num_classes: number of classes + + Keyword Arguments + ----------------- + :bool shuffle: whether to shuffle the dataset or not + :int batch_size: batch size + :bool preprocessing_enabled: whether to apply preprocessing or not + :int seed: seed for flow from directory + :int preprocessing_seed: seed for preprocessing, defaults to None + + Raises + ------ + :ValueError: if the output size is not a tuple of length 2 + :ValueError: if the output size is not a square matrix or a column vector + """ + + preprocessing_seed = None + preprocessing_queue_image = None + preprocessing_queue_mask = None + + def __init__( + self, + image_path: str, + mask_path: str, + image_size: tuple[int, int], + output_size: tuple[int, int], + channel_mask: list[bool], + num_classes: int, + shuffle: bool = True, + batch_size: int = 2, + preprocessing_enabled: bool = True, + seed: int = 909, + preprocessing_seed: Optional[int] = None, + ): + if len(output_size) != 2: + raise ValueError("The output size has to be a tuple of length 2") + if output_size[1] != 1 and output_size[0] != output_size[1]: + raise ValueError( + "The output size has to be a square matrix or a column vector" + ) + + self.image_path = image_path + self.mask_path = mask_path + self.batch_size = batch_size + self.mini_batch = batch_size + self.image_size = image_size + self.output_size = output_size + self.channel_mask = np.array(channel_mask) + self.n_channels = np.sum(channel_mask) + self.num_classes = num_classes + self.shuffle = shuffle + self.seed = seed + self.preprocessing_enabled = preprocessing_enabled + self.preprocessing_seed = preprocessing_seed + + self.image_filenames = os.listdir(os.path.join(self.image_path, "img")) + self.mask_filenames = os.listdir(os.path.join(self.mask_path, "mask")) + + self.image_batch_store = np.zeros( + (1, self.batch_size, image_size[0], image_size[1], self.n_channels) + ) + self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes)) + self.validity_index = 0 + + if self.output_size[1] == 1: + # only enters if the output is a column vector + # such no need to define it otherwise + dimension = math.sqrt(self.output_size[0]) + self.output_reshape = (int(dimension), int(dimension)) + else: + self.output_reshape = None + + print("Reading images from: ", self.image_path) + + def set_preprocessing_pipeline( + self, + preprocessing_queue_image: ImagePreprocessor.PreprocessorInterface, + preprocessing_queue_mask: ImagePreprocessor.PreprocessorInterface, + ) -> None: + """ + Sets the preprocessing pipeline + + Parameters + ---------- + :PreprocessingQueue preprocessing_queue_image: preprocessing queue for images + :PreprocessingQueue preprocessing_queue_mask: preprocessing queue for masks + """ + self.preprocessing_queue_image = preprocessing_queue_image + self.preprocessing_queue_mask = preprocessing_queue_mask + + def set_mini_batch_size(self, batch_size: int) -> None: + """ + Function to set the appropriate minibatch size. Required to allign batch size in the reader with the model.\ + Does not change the batch size of the reader. + + Parameters + ---------- + :int batch_size: the mini batch size + + Raises + ------ + :raises ValueError: if the mini batch size is larger than the batch size + :raises ValueError: if the batch size is not divisible by the mini batch size + """ + if batch_size > self.batch_size: + raise ValueError("The mini batch size cannot be larger than the batch size") + if self.batch_size % batch_size != 0: + raise ValueError("The batch size must be divisible by the mini batch size") + self.mini_batch = batch_size + + def read_batch(self, start: int, end: int) -> None: + # read image batch + batch_image_filenames = self.image_filenames[start:end] + batch_mask_filenames = self.mask_filenames[start:end] + + # calculate number of mini batches in a batch + n = self.batch_size // self.mini_batch + + batch_images = np.zeros( + ( + n, + self.mini_batch, + self.image_size[0], + self.image_size[1], + self.n_channels, + ) + ) + batch_masks = np.zeros( + ( + n, + self.mini_batch, + self.output_size[0], + self.output_size[1], + self.num_classes, + ) + ) + + # preprocess and assign images and masks to the batch + for i in range(n): + for j in range(self.mini_batch): + image = np.load( + os.path.join(self.image_path, "img", batch_image_filenames[j]) + ) + mask = np.load( + os.path.join(self.mask_path, "mask", batch_mask_filenames[j]) + ) + + # for now it is assumed that n is 1 + batch_images[i, j, :, :, :] = image[:, :, self.channel_mask] + + if self.output_size[1] == 1: + batch_masks = batch_masks.reshape((-1, 1)) # or batch_masks[:, np.newaxis] + + if self.preprocessing_enabled: + if self.preprocessing_seed is None: + image_seed = np.random.randint(0, 100000) + else: + state = np.random.RandomState(self.preprocessing_seed) + image_seed = state.randint(0, 100000) + + ( + batch_images[i, j, :, :, :], + mask, + ) = ImagePreprocessor.augmentation_pipeline( + image=batch_images[i, j, :, :, :], + mask=mask, + input_size=self.image_size, + output_size=self.output_size, + output_reshape=self.output_reshape, + seed=image_seed, + #!both preprocessing queues are assigned by this time + image_queue=self.preprocessing_queue_image, # type: ignore + mask_queue=self.preprocessing_queue_mask, # type: ignore + ) + + batch_masks[i, j, :, :, :] = ImagePreprocessor.onehot_encode( + mask, self.output_size, self.num_classes + ) + + # chaches the batch + self.image_batch_store = batch_images + self.mask_batch_store = batch_masks + + # required to check when to read the next batch + self.validity_index = end + + def __len__(self): + return int(np.ceil(len(self.image_filenames) / float(self.batch_size))) + + def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: + # check if the batch is already cached + if index == self.validity_index: + self.read_batch(index, index + self.batch_size) + + # slices new batch + store_index = (index - self.validity_index) % self.mini_batch + + batch_images = self.image_batch_store[store_index, :, :, :, :] + batch_masks = self.mask_batch_store[store_index, :, :, :, :] + + tf.squeeze(batch_masks, axis=2) + + return np.array(batch_images), np.array(batch_masks) + + def on_epoch_end(self): + # Shuffle image and mask filenames + if self.shuffle: + np.random.seed(self.seed) + np.random.shuffle(self.image_filenames) + np.random.seed(self.seed) + np.random.shuffle(self.mask_filenames) From 3ee77363276a83fa641cfe4205722346e342fcf6 Mon Sep 17 00:00:00 2001 From: Andras Bodrogai Date: Tue, 23 May 2023 16:25:32 +0100 Subject: [PATCH 03/18] updates flowreader experimental to run with training --- .../segmentation_utils/ImagePreprocessor.py | 2 +- utilities/segmentation_utils/flowreader.py | 70 ++++++++++++------- 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py index 1094ef1..2e00b3b 100644 --- a/utilities/segmentation_utils/ImagePreprocessor.py +++ b/utilities/segmentation_utils/ImagePreprocessor.py @@ -109,7 +109,7 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor: """ encoded = np.zeros((masks.shape[0], output_size[0] * output_size[1], num_classes)) for i in range(num_classes): - encoded[:, :, i] = (masks == i).astype(int) + encoded[:, :, i] = tf.squeeze((masks == i)) encoded = tf.convert_to_tensor(encoded) return encoded diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 05fccb2..173f33d 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -8,6 +8,7 @@ import numpy as np import tensorflow as tf +import cv2 from keras.preprocessing.image import ImageDataGenerator from keras.utils import Sequence @@ -286,8 +287,8 @@ def __init__( self.preprocessing_enabled = preprocessing_enabled self.preprocessing_seed = preprocessing_seed - self.image_filenames = os.listdir(os.path.join(self.image_path, "img")) - self.mask_filenames = os.listdir(os.path.join(self.mask_path, "mask")) + self.image_filenames = os.listdir(os.path.join(self.image_path)) + self.mask_filenames = os.listdir(os.path.join(self.mask_path)) self.image_batch_store = np.zeros( (1, self.batch_size, image_size[0], image_size[1], self.n_channels) @@ -358,31 +359,49 @@ def read_batch(self, start: int, end: int) -> None: self.n_channels, ) ) - batch_masks = np.zeros( - ( - n, - self.mini_batch, - self.output_size[0], - self.output_size[1], - self.num_classes, + if self.output_size[1] == 1: + column = True + batch_masks = np.zeros((n, self.mini_batch, self.output_size[0],self.num_classes)) + else: + column = False + batch_masks = np.zeros( + ( + n, + self.mini_batch, + self.output_size[0], + self.output_size[1], + self.num_classes, + ) ) - ) # preprocess and assign images and masks to the batch for i in range(n): for j in range(self.mini_batch): - image = np.load( - os.path.join(self.image_path, "img", batch_image_filenames[j]) + image = cv2.imread( + os.path.join(self.image_path, batch_image_filenames[j]), + cv2.IMREAD_COLOR, ) - mask = np.load( - os.path.join(self.mask_path, "mask", batch_mask_filenames[j]) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = cv2.resize(image, self.image_size) + image = np.asarray(image) + + mask = cv2.imread( + os.path.join(self.mask_path, batch_mask_filenames[j]), + cv2.IMREAD_GRAYSCALE, ) + mask = cv2.resize(mask, self.output_size) + mask = np.asarray(mask).reshape(self.output_size) + # np.load( + # os.path.join(self.image_path, batch_image_filenames[j]) + # ) + # mask = np.load( + # os.path.join(self.mask_path, batch_mask_filenames[j]) + # ) # for now it is assumed that n is 1 batch_images[i, j, :, :, :] = image[:, :, self.channel_mask] - if self.output_size[1] == 1: - batch_masks = batch_masks.reshape((-1, 1)) # or batch_masks[:, np.newaxis] + if self.preprocessing_enabled: if self.preprocessing_seed is None: @@ -405,10 +424,13 @@ def read_batch(self, start: int, end: int) -> None: image_queue=self.preprocessing_queue_image, # type: ignore mask_queue=self.preprocessing_queue_mask, # type: ignore ) - - batch_masks[i, j, :, :, :] = ImagePreprocessor.onehot_encode( - mask, self.output_size, self.num_classes - ) + + batch_masks[i, j, : , 0] = tf.squeeze(mask) + + batch_masks[i, :,:,:] = ImagePreprocessor.onehot_encode( + batch_masks[i, :,:,0], self.output_size, self.num_classes + ) + # chaches the batch self.image_batch_store = batch_images @@ -426,12 +448,12 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: self.read_batch(index, index + self.batch_size) # slices new batch - store_index = (index - self.validity_index) % self.mini_batch + store_index = (index - (self.validity_index-self.batch_size)) // self.mini_batch - batch_images = self.image_batch_store[store_index, :, :, :, :] - batch_masks = self.mask_batch_store[store_index, :, :, :, :] + batch_images = self.image_batch_store[store_index,...] + batch_masks = self.mask_batch_store[store_index,...] - tf.squeeze(batch_masks, axis=2) + return np.array(batch_images), np.array(batch_masks) From 952ed852b2818b5a0a7c88617fd1c52ccbdda31f Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Wed, 24 May 2023 21:03:19 +0100 Subject: [PATCH 04/18] specifies datatypes for the mask store array --- utilities/segmentation_utils/flowreader.py | 40 ++++++++++++---------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 173f33d..d7180ee 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -291,9 +291,11 @@ def __init__( self.mask_filenames = os.listdir(os.path.join(self.mask_path)) self.image_batch_store = np.zeros( - (1, self.batch_size, image_size[0], image_size[1], self.n_channels) + (1, self.batch_size, image_size[0], image_size[1], self.n_channels), + ) - self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes)) + self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes), + dtype=np.uint8) self.validity_index = 0 if self.output_size[1] == 1: @@ -357,11 +359,15 @@ def read_batch(self, start: int, end: int) -> None: self.image_size[0], self.image_size[1], self.n_channels, - ) + ), + ) if self.output_size[1] == 1: column = True - batch_masks = np.zeros((n, self.mini_batch, self.output_size[0],self.num_classes)) + batch_masks = np.zeros( + (n, self.mini_batch, self.output_size[0], self.num_classes), + dtype=np.uint8, + ) else: column = False batch_masks = np.zeros( @@ -371,7 +377,8 @@ def read_batch(self, start: int, end: int) -> None: self.output_size[0], self.output_size[1], self.num_classes, - ) + ), + dtype=np.uint8, ) # preprocess and assign images and masks to the batch @@ -401,8 +408,6 @@ def read_batch(self, start: int, end: int) -> None: # for now it is assumed that n is 1 batch_images[i, j, :, :, :] = image[:, :, self.channel_mask] - - if self.preprocessing_enabled: if self.preprocessing_seed is None: image_seed = np.random.randint(0, 100000) @@ -424,13 +429,12 @@ def read_batch(self, start: int, end: int) -> None: image_queue=self.preprocessing_queue_image, # type: ignore mask_queue=self.preprocessing_queue_mask, # type: ignore ) - - batch_masks[i, j, : , 0] = tf.squeeze(mask) - - batch_masks[i, :,:,:] = ImagePreprocessor.onehot_encode( - batch_masks[i, :,:,0], self.output_size, self.num_classes + + batch_masks[i, j, :, 0] = tf.squeeze(mask) + + batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode( + batch_masks[i, :, :, 0], self.output_size, self.num_classes ) - # chaches the batch self.image_batch_store = batch_images @@ -448,12 +452,12 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: self.read_batch(index, index + self.batch_size) # slices new batch - store_index = (index - (self.validity_index-self.batch_size)) // self.mini_batch - - batch_images = self.image_batch_store[store_index,...] - batch_masks = self.mask_batch_store[store_index,...] + store_index = ( + index - (self.validity_index - self.batch_size) + ) // self.mini_batch - + batch_images = self.image_batch_store[store_index, ...] + batch_masks = self.mask_batch_store[store_index, ...] return np.array(batch_images), np.array(batch_masks) From f913e5038039f88c69261cf615c47a76d185a5d3 Mon Sep 17 00:00:00 2001 From: Andras Bodrogai Date: Wed, 24 May 2023 21:17:38 +0100 Subject: [PATCH 05/18] removes uint declaration --- utilities/segmentation_utils/flowreader.py | 45 +++++++++------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index d7180ee..78b0018 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -291,11 +291,9 @@ def __init__( self.mask_filenames = os.listdir(os.path.join(self.mask_path)) self.image_batch_store = np.zeros( - (1, self.batch_size, image_size[0], image_size[1], self.n_channels), - + (1, self.batch_size, image_size[0], image_size[1], self.n_channels) ) - self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes), - dtype=np.uint8) + self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes)) self.validity_index = 0 if self.output_size[1] == 1: @@ -347,7 +345,7 @@ def set_mini_batch_size(self, batch_size: int) -> None: def read_batch(self, start: int, end: int) -> None: # read image batch batch_image_filenames = self.image_filenames[start:end] - batch_mask_filenames = self.mask_filenames[start:end] + batch_mask_filenames = batch_image_filenames # calculate number of mini batches in a batch n = self.batch_size // self.mini_batch @@ -359,15 +357,11 @@ def read_batch(self, start: int, end: int) -> None: self.image_size[0], self.image_size[1], self.n_channels, - ), - + ) ) if self.output_size[1] == 1: column = True - batch_masks = np.zeros( - (n, self.mini_batch, self.output_size[0], self.num_classes), - dtype=np.uint8, - ) + batch_masks = np.zeros((n, self.mini_batch, self.output_size[0],self.num_classes)) else: column = False batch_masks = np.zeros( @@ -377,8 +371,7 @@ def read_batch(self, start: int, end: int) -> None: self.output_size[0], self.output_size[1], self.num_classes, - ), - dtype=np.uint8, + ) ) # preprocess and assign images and masks to the batch @@ -429,12 +422,13 @@ def read_batch(self, start: int, end: int) -> None: image_queue=self.preprocessing_queue_image, # type: ignore mask_queue=self.preprocessing_queue_mask, # type: ignore ) - - batch_masks[i, j, :, 0] = tf.squeeze(mask) - - batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode( - batch_masks[i, :, :, 0], self.output_size, self.num_classes + + batch_masks[i, j, : , 0] = tf.squeeze(mask) + + batch_masks[i, :,:,:] = ImagePreprocessor.onehot_encode( + batch_masks[i, :,:,0], self.output_size, self.num_classes ) + # chaches the batch self.image_batch_store = batch_images @@ -448,23 +442,22 @@ def __len__(self): def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: # check if the batch is already cached + if index < self.validity_index-self.batch_size: + self.validity_index = 0 + if index == self.validity_index: self.read_batch(index, index + self.batch_size) # slices new batch - store_index = ( - index - (self.validity_index - self.batch_size) - ) // self.mini_batch + store_index = (index - (self.validity_index-self.batch_size)) // self.mini_batch - batch_images = self.image_batch_store[store_index, ...] - batch_masks = self.mask_batch_store[store_index, ...] + batch_images = self.image_batch_store[store_index,...] + batch_masks = self.mask_batch_store[store_index,...] return np.array(batch_images), np.array(batch_masks) def on_epoch_end(self): # Shuffle image and mask filenames + if self.shuffle: - np.random.seed(self.seed) np.random.shuffle(self.image_filenames) - np.random.seed(self.seed) - np.random.shuffle(self.mask_filenames) From 7cc5ae3ad0bf7ca33dae84af52a4a0a04651b7b8 Mon Sep 17 00:00:00 2001 From: Andras Bodrogai Date: Thu, 25 May 2023 10:23:48 +0100 Subject: [PATCH 06/18] fixes possible indexing issue and changes default reader library from CV2 to PIL --- .../segmentation_utils/ImagePreprocessor.py | 4 +- utilities/segmentation_utils/flowreader.py | 73 +++++++++++-------- 2 files changed, 43 insertions(+), 34 deletions(-) diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py index 2e00b3b..59ce65c 100644 --- a/utilities/segmentation_utils/ImagePreprocessor.py +++ b/utilities/segmentation_utils/ImagePreprocessor.py @@ -160,7 +160,7 @@ def augmentation_pipeline( ------- :return tuple(tf.Tensor, tf.Tensor): tuple of the processed image and mask """ - + # reshapes masks, such that transforamtions work properly if output_reshape is not None and output_size[1] == 1: mask = tf.reshape(mask, (output_reshape[0], output_reshape[1], 1)) @@ -201,4 +201,4 @@ def flatten(image, input_size, channels=1) -> tf.Tensor: :return tf.Tensor: flattened image """ # the 1 is required to preserve the shape similar to the original - return tf.reshape(image, (input_size[0] * input_size[1], 1, channels)) + return tf.reshape(image, (input_size[0] * input_size[1], channels)) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 78b0018..4f1c2bf 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -8,7 +8,7 @@ import numpy as np import tensorflow as tf -import cv2 +from PIL import Image from keras.preprocessing.image import ImageDataGenerator from keras.utils import Sequence @@ -361,7 +361,9 @@ def read_batch(self, start: int, end: int) -> None: ) if self.output_size[1] == 1: column = True - batch_masks = np.zeros((n, self.mini_batch, self.output_size[0],self.num_classes)) + batch_masks = np.zeros( + (n, self.mini_batch, self.output_size[0], self.num_classes) + ) else: column = False batch_masks = np.zeros( @@ -376,21 +378,24 @@ def read_batch(self, start: int, end: int) -> None: # preprocess and assign images and masks to the batch for i in range(n): + raw_masks = np.zeros( + (self.mini_batch, self.output_size[0] * self.output_size[1], 1) + ) for j in range(self.mini_batch): - image = cv2.imread( - os.path.join(self.image_path, batch_image_filenames[j]), - cv2.IMREAD_COLOR, - ) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - image = cv2.resize(image, self.image_size) - image = np.asarray(image) - - mask = cv2.imread( - os.path.join(self.mask_path, batch_mask_filenames[j]), - cv2.IMREAD_GRAYSCALE, - ) - mask = cv2.resize(mask, self.output_size) - mask = np.asarray(mask).reshape(self.output_size) + image = Image.open( + os.path.join(self.image_path, batch_image_filenames[j]) + ).resize(self.image_size) + # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + image = np.array(image) + image = image / 255 + + mask = Image.open( + os.path.join(self.mask_path, batch_mask_filenames[j]) + ).resize(self.output_reshape) + + mask = np.array(mask) + mask = np.reshape(mask, self.output_size) # np.load( # os.path.join(self.image_path, batch_image_filenames[j]) # ) @@ -399,7 +404,10 @@ def read_batch(self, start: int, end: int) -> None: # ) # for now it is assumed that n is 1 - batch_images[i, j, :, :, :] = image[:, :, self.channel_mask] + + image = image[:, :, self.channel_mask] + + batch_images[i, j, :, :, :] = image if self.preprocessing_enabled: if self.preprocessing_seed is None: @@ -422,42 +430,43 @@ def read_batch(self, start: int, end: int) -> None: image_queue=self.preprocessing_queue_image, # type: ignore mask_queue=self.preprocessing_queue_mask, # type: ignore ) - - batch_masks[i, j, : , 0] = tf.squeeze(mask) - - batch_masks[i, :,:,:] = ImagePreprocessor.onehot_encode( - batch_masks[i, :,:,0], self.output_size, self.num_classes + + raw_masks[j, :, :] = mask + + batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode( + raw_masks, self.output_size, self.num_classes ) - # chaches the batch self.image_batch_store = batch_images self.mask_batch_store = batch_masks # required to check when to read the next batch - self.validity_index = end def __len__(self): - return int(np.ceil(len(self.image_filenames) / float(self.batch_size))) + return int(np.floor(len(self.image_filenames) / float(self.batch_size))) def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: # check if the batch is already cached - if index < self.validity_index-self.batch_size: + if index < self.validity_index - self.batch_size // self.mini_batch: self.validity_index = 0 if index == self.validity_index: - self.read_batch(index, index + self.batch_size) + self.read_batch(index * self.batch_size, (index + 1) * self.batch_size) + self.validity_index = (self.batch_size // self.mini_batch) + index # slices new batch - store_index = (index - (self.validity_index-self.batch_size)) // self.mini_batch + store_index = ( + index - (self.validity_index - self.batch_size) + ) // self.mini_batch - batch_images = self.image_batch_store[store_index,...] - batch_masks = self.mask_batch_store[store_index,...] + batch_images = self.image_batch_store[store_index, ...] + batch_masks = self.mask_batch_store[store_index, ...] - return np.array(batch_images), np.array(batch_masks) + return batch_images, batch_masks def on_epoch_end(self): # Shuffle image and mask filenames - + if self.shuffle: np.random.shuffle(self.image_filenames) From 39fd65b899adaa59989f8ba24ca3955a8fa11056 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Thu, 25 May 2023 10:40:47 +0100 Subject: [PATCH 07/18] adds bicubic interpolation to the image reader --- utilities/segmentation_utils/flowreader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 4f1c2bf..42cd9da 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -384,7 +384,8 @@ def read_batch(self, start: int, end: int) -> None: for j in range(self.mini_batch): image = Image.open( os.path.join(self.image_path, batch_image_filenames[j]) - ).resize(self.image_size) + + ).resize(self.image_size, Image.BICUBIC) # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = np.array(image) From 38e16a9c370155565c3f9b445987791730d9cf70 Mon Sep 17 00:00:00 2001 From: Andras Bodrogai Date: Thu, 25 May 2023 22:49:28 +0100 Subject: [PATCH 08/18] removes some unnecessary things --- .../segmentation_utils/ImagePreprocessor.py | 2 +- utilities/segmentation_utils/flowreader.py | 33 ++++++++----------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py index 59ce65c..ded7100 100644 --- a/utilities/segmentation_utils/ImagePreprocessor.py +++ b/utilities/segmentation_utils/ImagePreprocessor.py @@ -178,7 +178,7 @@ def augmentation_pipeline( if output_size[1] == 1: mask = flatten(mask, output_size, channels=1) - image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1)) + #image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1)) return image, mask diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 42cd9da..eca315e 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -302,7 +302,7 @@ def __init__( dimension = math.sqrt(self.output_size[0]) self.output_reshape = (int(dimension), int(dimension)) else: - self.output_reshape = None + self.output_reshape = self.output_size print("Reading images from: ", self.image_path) @@ -346,7 +346,7 @@ def read_batch(self, start: int, end: int) -> None: # read image batch batch_image_filenames = self.image_filenames[start:end] batch_mask_filenames = batch_image_filenames - + tf.print(batch_image_filenames) # calculate number of mini batches in a batch n = self.batch_size // self.mini_batch @@ -384,9 +384,7 @@ def read_batch(self, start: int, end: int) -> None: for j in range(self.mini_batch): image = Image.open( os.path.join(self.image_path, batch_image_filenames[j]) - - ).resize(self.image_size, Image.BICUBIC) - # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + ).resize(self.image_size, Image.ANTIALIAS) image = np.array(image) image = image / 255 @@ -396,16 +394,6 @@ def read_batch(self, start: int, end: int) -> None: ).resize(self.output_reshape) mask = np.array(mask) - mask = np.reshape(mask, self.output_size) - # np.load( - # os.path.join(self.image_path, batch_image_filenames[j]) - # ) - # mask = np.load( - # os.path.join(self.mask_path, batch_mask_filenames[j]) - # ) - - # for now it is assumed that n is 1 - image = image[:, :, self.channel_mask] batch_images[i, j, :, :, :] = image @@ -431,6 +419,8 @@ def read_batch(self, start: int, end: int) -> None: image_queue=self.preprocessing_queue_image, # type: ignore mask_queue=self.preprocessing_queue_mask, # type: ignore ) + + mask = np.reshape(mask, self.output_size) raw_masks[j, :, :] = mask @@ -450,24 +440,27 @@ def __len__(self): def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: # check if the batch is already cached if index < self.validity_index - self.batch_size // self.mini_batch: + self.validity_index = 0 if index == self.validity_index: + self.read_batch(index * self.batch_size, (index + 1) * self.batch_size) self.validity_index = (self.batch_size // self.mini_batch) + index # slices new batch - store_index = ( - index - (self.validity_index - self.batch_size) - ) // self.mini_batch + store_index = (self.batch_size//self.mini_batch) - (self.validity_index - index) + batch_images = self.image_batch_store[store_index, ...] batch_masks = self.mask_batch_store[store_index, ...] - return batch_images, batch_masks + return tf.convert_to_tensor(batch_images), tf.convert_to_tensor(batch_masks) def on_epoch_end(self): # Shuffle image and mask filenames - + if self.shuffle: + np.random.shuffle(self.image_filenames) + \ No newline at end of file From 10653a92c1596c999bbf8be33ca8068c4ff174d3 Mon Sep 17 00:00:00 2001 From: Andras Bodrogai Date: Thu, 25 May 2023 23:41:16 +0100 Subject: [PATCH 09/18] adds possible fix for image flips not being determinable --- .../segmentation_utils/ImagePreprocessor.py | 46 ++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py index ded7100..800a3c9 100644 --- a/utilities/segmentation_utils/ImagePreprocessor.py +++ b/utilities/segmentation_utils/ImagePreprocessor.py @@ -160,7 +160,7 @@ def augmentation_pipeline( ------- :return tuple(tf.Tensor, tf.Tensor): tuple of the processed image and mask """ - + # reshapes masks, such that transforamtions work properly if output_reshape is not None and output_size[1] == 1: mask = tf.reshape(mask, (output_reshape[0], output_reshape[1], 1)) @@ -178,7 +178,7 @@ def augmentation_pipeline( if output_size[1] == 1: mask = flatten(mask, output_size, channels=1) - #image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1)) + # image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1)) return image, mask @@ -202,3 +202,45 @@ def flatten(image, input_size, channels=1) -> tf.Tensor: """ # the 1 is required to preserve the shape similar to the original return tf.reshape(image, (input_size[0] * input_size[1], channels)) + + +def random_flip_up_down(image, seed=0) -> tf.Tensor: + """ + Function that randomly flips an image up or down + + Parameters + ---------- + :tf.Tensor image: image to be flipped + + Returns + ------- + :return tf.Tensor: flipped image + """ + + state = np.random.RandomState(seed) + flip = state.choice([True, False]) + if flip: + return tf.image.flip_up_down(image) + else: + return image + + +def random_flip_left_right(image, seed=0) -> tf.Tensor: + """ + Function that randomly flips an image left or right + + Parameters + ---------- + :tf.Tensor image: image to be flipped + + Returns + ------- + :return tf.Tensor: flipped image + """ + + state = np.random.RandomState(seed) + flip = state.choice([True, False]) + if flip: + return tf.image.flip_left_right(image) + else: + return image From 0c58719b2106c65dfd723dd8b65b0831aeb5d9ff Mon Sep 17 00:00:00 2001 From: Andras Bodrogai Date: Thu, 25 May 2023 23:44:21 +0100 Subject: [PATCH 10/18] adds function to validate dataset --- utilities/segmentation_utils/flowreader.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index eca315e..187d953 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -11,6 +11,7 @@ from PIL import Image from keras.preprocessing.image import ImageDataGenerator from keras.utils import Sequence +from tqdm import tqdm from utilities.segmentation_utils import ImagePreprocessor @@ -287,8 +288,16 @@ def __init__( self.preprocessing_enabled = preprocessing_enabled self.preprocessing_seed = preprocessing_seed - self.image_filenames = os.listdir(os.path.join(self.image_path)) - self.mask_filenames = os.listdir(os.path.join(self.mask_path)) + self.image_filenames = sorted(os.listdir(os.path.join(self.image_path))) + self.mask_filenames = sorted(os.listdir(os.path.join(self.mask_path))) + + print("Validating dataset...") + + for i_name,m_name in tqdm(zip(self.image_filenames,self.mask_filenames)): + if i_name != m_name: + raise ValueError("The image and mask directories do not match") + + self.image_batch_store = np.zeros( (1, self.batch_size, image_size[0], image_size[1], self.n_channels) @@ -346,7 +355,7 @@ def read_batch(self, start: int, end: int) -> None: # read image batch batch_image_filenames = self.image_filenames[start:end] batch_mask_filenames = batch_image_filenames - tf.print(batch_image_filenames) + # calculate number of mini batches in a batch n = self.batch_size // self.mini_batch @@ -461,6 +470,7 @@ def on_epoch_end(self): # Shuffle image and mask filenames if self.shuffle: - - np.random.shuffle(self.image_filenames) + shuffled_indices = np.random.permutation(len(self.image_filenames)) + self.image_filenames = self.image_filenames[shuffled_indices] + self.mask_filenames = self.mask_filenames[shuffled_indices] \ No newline at end of file From bea1882816610116a9119ec66939e9c727d2a2a0 Mon Sep 17 00:00:00 2001 From: Andras Bodrogai Date: Thu, 1 Jun 2023 18:24:53 +0100 Subject: [PATCH 11/18] updates, flowreader and processing pipeline to be able to read images --- .../segmentation_utils/ImagePreprocessor.py | 18 ++-- utilities/segmentation_utils/flowreader.py | 87 +++++++++++-------- 2 files changed, 66 insertions(+), 39 deletions(-) diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py index 800a3c9..e5ef245 100644 --- a/utilities/segmentation_utils/ImagePreprocessor.py +++ b/utilities/segmentation_utils/ImagePreprocessor.py @@ -107,10 +107,13 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor: ------- :return tf.Tensor: Batch of one-hot encoded masks """ - encoded = np.zeros((masks.shape[0], output_size[0] * output_size[1], num_classes)) + encoded = np.zeros((masks.shape[0], output_size[0], output_size[1], num_classes)) for i in range(num_classes): - encoded[:, :, i] = tf.squeeze((masks == i)) - encoded = tf.convert_to_tensor(encoded) + mask = (masks == i).astype(float) + encoded[:, :, :, i] = mask + if output_size[1] == 1: + encoded = encoded.reshape((masks.shape[0],output_size[0] * output_size[1], num_classes)) + return encoded @@ -163,8 +166,10 @@ def augmentation_pipeline( # reshapes masks, such that transforamtions work properly if output_reshape is not None and output_size[1] == 1: - mask = tf.reshape(mask, (output_reshape[0], output_reshape[1], 1)) - + mask = tf.reshape(mask, (output_reshape[0], output_reshape[1])) + + mask = tf.expand_dims(mask,axis=-1) + image_queue.update_seed(seed) mask_queue.update_seed(seed) @@ -177,8 +182,11 @@ def augmentation_pipeline( # flattens masks out to the correct output shape if output_size[1] == 1: mask = flatten(mask, output_size, channels=1) + else: + mask = tf.squeeze(mask, axis=-1) # image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1)) + return image, mask diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 187d953..4f0d901 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -252,6 +252,7 @@ class FlowGeneratorExperimental(Sequence): preprocessing_seed = None preprocessing_queue_image = None preprocessing_queue_mask = None + shuffle_counter = 0 def __init__( self, @@ -288,21 +289,23 @@ def __init__( self.preprocessing_enabled = preprocessing_enabled self.preprocessing_seed = preprocessing_seed - self.image_filenames = sorted(os.listdir(os.path.join(self.image_path))) - self.mask_filenames = sorted(os.listdir(os.path.join(self.mask_path))) + self.image_filenames = np.array( + sorted(os.listdir(os.path.join(self.image_path))) + ) + self.mask_filenames = np.array(sorted(os.listdir(os.path.join(self.mask_path)))) + + self.shuffle_filenames() + + self.dataset_size = self.__len__() print("Validating dataset...") - for i_name,m_name in tqdm(zip(self.image_filenames,self.mask_filenames)): + for i_name, m_name in tqdm(zip(self.image_filenames, self.mask_filenames)): if i_name != m_name: raise ValueError("The image and mask directories do not match") - - - self.image_batch_store = np.zeros( - (1, self.batch_size, image_size[0], image_size[1], self.n_channels) - ) - self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes)) + self.image_batch_store = None + self.mask_batch_store = None self.validity_index = 0 if self.output_size[1] == 1: @@ -354,8 +357,11 @@ def set_mini_batch_size(self, batch_size: int) -> None: def read_batch(self, start: int, end: int) -> None: # read image batch batch_image_filenames = self.image_filenames[start:end] - batch_mask_filenames = batch_image_filenames - + batch_mask_filenames = self.mask_filenames[start:end] + for i in range(len(batch_image_filenames)): + if batch_image_filenames[i] != batch_mask_filenames[i]: + raise ValueError("The image and mask directories do not match") + # calculate number of mini batches in a batch n = self.batch_size // self.mini_batch @@ -387,25 +393,30 @@ def read_batch(self, start: int, end: int) -> None: # preprocess and assign images and masks to the batch for i in range(n): - raw_masks = np.zeros( - (self.mini_batch, self.output_size[0] * self.output_size[1], 1) - ) + if column: + raw_masks = np.zeros( + (self.mini_batch, self.output_size[0] * self.output_size[1], 1) + ) + else: + raw_masks = np.zeros( + (self.mini_batch, self.output_size[0], self.output_size[1]) + ) + for j in range(self.mini_batch): + image_index = i * self.mini_batch + j image = Image.open( - os.path.join(self.image_path, batch_image_filenames[j]) + os.path.join(self.image_path, batch_image_filenames[image_index]) ).resize(self.image_size, Image.ANTIALIAS) image = np.array(image) image = image / 255 mask = Image.open( - os.path.join(self.mask_path, batch_mask_filenames[j]) + os.path.join(self.mask_path, batch_mask_filenames[image_index]) ).resize(self.output_reshape) - - mask = np.array(mask) - image = image[:, :, self.channel_mask] - batch_images[i, j, :, :, :] = image + mask = np.array(mask) + # image = image[:, :, self.channel_mask] if self.preprocessing_enabled: if self.preprocessing_seed is None: @@ -415,10 +426,10 @@ def read_batch(self, start: int, end: int) -> None: image_seed = state.randint(0, 100000) ( - batch_images[i, j, :, :, :], + image, mask, ) = ImagePreprocessor.augmentation_pipeline( - image=batch_images[i, j, :, :, :], + image, mask=mask, input_size=self.image_size, output_size=self.output_size, @@ -428,12 +439,15 @@ def read_batch(self, start: int, end: int) -> None: image_queue=self.preprocessing_queue_image, # type: ignore mask_queue=self.preprocessing_queue_mask, # type: ignore ) - - mask = np.reshape(mask, self.output_size) + if column: + mask = np.reshape(mask, self.output_size) - raw_masks[j, :, :] = mask + batch_images[i, j, :, :, :] = image + raw_masks[ + j, ... + ] = mask # NOTE: this provides the flexibility required to process both column and matrix vectors - batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode( + batch_masks[i, ...] = ImagePreprocessor.onehot_encode( raw_masks, self.output_size, self.num_classes ) @@ -448,29 +462,34 @@ def __len__(self): def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: # check if the batch is already cached + index = index % self.dataset_size + if index < self.validity_index - self.batch_size // self.mini_batch: - self.validity_index = 0 if index == self.validity_index: - self.read_batch(index * self.batch_size, (index + 1) * self.batch_size) self.validity_index = (self.batch_size // self.mini_batch) + index # slices new batch - store_index = (self.batch_size//self.mini_batch) - (self.validity_index - index) - + store_index = (self.batch_size // self.mini_batch) - ( + self.validity_index - index + ) batch_images = self.image_batch_store[store_index, ...] batch_masks = self.mask_batch_store[store_index, ...] - return tf.convert_to_tensor(batch_images), tf.convert_to_tensor(batch_masks) + return batch_images, batch_masks def on_epoch_end(self): # Shuffle image and mask filenames - + self.shuffle_filenames() + + def shuffle_filenames(self): if self.shuffle: - shuffled_indices = np.random.permutation(len(self.image_filenames)) + state = np.random.RandomState(self.seed + self.shuffle_counter) + self.shuffle_counter += 1 + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) self.image_filenames = self.image_filenames[shuffled_indices] self.mask_filenames = self.mask_filenames[shuffled_indices] - \ No newline at end of file From 1068f6160e6f2a376365a8166aa6afcd35875181 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 4 Jun 2023 19:09:19 +0100 Subject: [PATCH 12/18] Improves code quality, updates documentation --- .../segmentation_utils/ImagePreprocessor.py | 20 +++--- utilities/segmentation_utils/flowreader.py | 61 ++++++++++--------- 2 files changed, 42 insertions(+), 39 deletions(-) diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py index e5ef245..5700bea 100644 --- a/utilities/segmentation_utils/ImagePreprocessor.py +++ b/utilities/segmentation_utils/ImagePreprocessor.py @@ -112,8 +112,10 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor: mask = (masks == i).astype(float) encoded[:, :, :, i] = mask if output_size[1] == 1: - encoded = encoded.reshape((masks.shape[0],output_size[0] * output_size[1], num_classes)) - + encoded = encoded.reshape( + (masks.shape[0], output_size[0] * output_size[1], num_classes) + ) + encoded = tf.convert_to_tensor(encoded, dtype=tf.float32) return encoded @@ -167,9 +169,9 @@ def augmentation_pipeline( # reshapes masks, such that transforamtions work properly if output_reshape is not None and output_size[1] == 1: mask = tf.reshape(mask, (output_reshape[0], output_reshape[1])) - - mask = tf.expand_dims(mask,axis=-1) - + + mask = tf.expand_dims(mask, axis=-1) + image_queue.update_seed(seed) mask_queue.update_seed(seed) @@ -185,8 +187,8 @@ def augmentation_pipeline( else: mask = tf.squeeze(mask, axis=-1) + mask = tf.convert_to_tensor(mask, dtype=tf.float32) # image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1)) - return image, mask @@ -209,7 +211,7 @@ def flatten(image, input_size, channels=1) -> tf.Tensor: :return tf.Tensor: flattened image """ # the 1 is required to preserve the shape similar to the original - return tf.reshape(image, (input_size[0] * input_size[1], channels)) + return tf.convert_to_tensor(tf.reshape(image, (input_size[0] * input_size[1], channels))) def random_flip_up_down(image, seed=0) -> tf.Tensor: @@ -228,7 +230,7 @@ def random_flip_up_down(image, seed=0) -> tf.Tensor: state = np.random.RandomState(seed) flip = state.choice([True, False]) if flip: - return tf.image.flip_up_down(image) + return tf.convert_to_tensor(tf.image.flip_up_down(image)) else: return image @@ -249,6 +251,6 @@ def random_flip_left_right(image, seed=0) -> tf.Tensor: state = np.random.RandomState(seed) flip = state.choice([True, False]) if flip: - return tf.image.flip_left_right(image) + return tf.convert_to_tensor(tf.image.flip_left_right(image)) else: return image diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 4f0d901..8dd40a8 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -7,10 +7,9 @@ from typing import Optional import numpy as np -import tensorflow as tf -from PIL import Image from keras.preprocessing.image import ImageDataGenerator from keras.utils import Sequence +from PIL import Image from tqdm import tqdm from utilities.segmentation_utils import ImagePreprocessor @@ -228,23 +227,23 @@ class FlowGeneratorExperimental(Sequence): ---------- :string image: path to the image directory :string mask: path to the mask directory - :int batch_size: batch size - :tuple image_size: image size - :tuple output_size: output size - - - :int num_classes: number of classes + :int batch_size: + :tuple image_size: specifies the size of the input image + :tuple output_size: specifies the size of the output mask + :list[bool] channel_mask: specifies which channels of the input image to use + :int num_classes: number of classes in the output mask Keyword Arguments ----------------- - :bool shuffle: whether to shuffle the dataset or not - :int batch_size: batch size - :bool preprocessing_enabled: whether to apply preprocessing or not + :bool, optional shuffle: whether to shuffle the dataset or not, defaults to True + :int batch_size: specifies the number of images read in one batch, defaults to 2 + :bool preprocessing_enabled: whether to apply preprocessing or not, defaults to True :int seed: seed for flow from directory :int preprocessing_seed: seed for preprocessing, defaults to None Raises ------ + :ValueError: if the names of the images and masks do not match :ValueError: if the output size is not a tuple of length 2 :ValueError: if the output size is not a square matrix or a column vector """ @@ -289,17 +288,19 @@ def __init__( self.preprocessing_enabled = preprocessing_enabled self.preprocessing_seed = preprocessing_seed + ( + self.preprocessing_queue_image, + self.preprocessing_queue_mask, + ) = ImagePreprocessor.generate_default_queue() + self.image_filenames = np.array( sorted(os.listdir(os.path.join(self.image_path))) ) self.mask_filenames = np.array(sorted(os.listdir(os.path.join(self.mask_path)))) - - self.shuffle_filenames() - + self.__shuffle_filenames() self.dataset_size = self.__len__() print("Validating dataset...") - for i_name, m_name in tqdm(zip(self.image_filenames, self.mask_filenames)): if i_name != m_name: raise ValueError("The image and mask directories do not match") @@ -336,8 +337,8 @@ def set_preprocessing_pipeline( def set_mini_batch_size(self, batch_size: int) -> None: """ - Function to set the appropriate minibatch size. Required to allign batch size in the reader with the model.\ - Does not change the batch size of the reader. + Function to set the appropriate minibatch size. Required to allign batch size in the \ + reader with the model. Does not change the batch size of the reader. Parameters ---------- @@ -354,12 +355,12 @@ def set_mini_batch_size(self, batch_size: int) -> None: raise ValueError("The batch size must be divisible by the mini batch size") self.mini_batch = batch_size - def read_batch(self, start: int, end: int) -> None: + def __read_batch(self, start: int, end: int) -> None: # read image batch batch_image_filenames = self.image_filenames[start:end] batch_mask_filenames = self.mask_filenames[start:end] - for i in range(len(batch_image_filenames)): - if batch_image_filenames[i] != batch_mask_filenames[i]: + for image, mask in zip(batch_image_filenames, batch_mask_filenames): + if image != mask: raise ValueError("The image and mask directories do not match") # calculate number of mini batches in a batch @@ -443,9 +444,9 @@ def read_batch(self, start: int, end: int) -> None: mask = np.reshape(mask, self.output_size) batch_images[i, j, :, :, :] = image - raw_masks[ - j, ... - ] = mask # NOTE: this provides the flexibility required to process both column and matrix vectors + # NOTE: this provides the flexibility required to process both + # column and matrix vectors + raw_masks[j, ...] = mask batch_masks[i, ...] = ImagePreprocessor.onehot_encode( raw_masks, self.output_size, self.num_classes @@ -457,7 +458,7 @@ def read_batch(self, start: int, end: int) -> None: # required to check when to read the next batch - def __len__(self): + def __len__(self) -> int: return int(np.floor(len(self.image_filenames) / float(self.batch_size))) def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: @@ -468,7 +469,7 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: self.validity_index = 0 if index == self.validity_index: - self.read_batch(index * self.batch_size, (index + 1) * self.batch_size) + self.__read_batch(index * self.batch_size, (index + 1) * self.batch_size) self.validity_index = (self.batch_size // self.mini_batch) + index # slices new batch @@ -476,16 +477,16 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: self.validity_index - index ) - batch_images = self.image_batch_store[store_index, ...] - batch_masks = self.mask_batch_store[store_index, ...] + batch_images = self.image_batch_store[store_index, ...] # type: ignore + batch_masks = self.mask_batch_store[store_index, ...] # type: ignore return batch_images, batch_masks - def on_epoch_end(self): + def on_epoch_end(self) -> None: # Shuffle image and mask filenames - self.shuffle_filenames() + self.__shuffle_filenames() - def shuffle_filenames(self): + def __shuffle_filenames(self) -> None: if self.shuffle: state = np.random.RandomState(self.seed + self.shuffle_counter) self.shuffle_counter += 1 From 4ee21bd1ebddf6d36a89d5edef421f879def6007 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Tue, 6 Jun 2023 11:43:55 +0100 Subject: [PATCH 13/18] update flowgenerator so it reads weights, doesnt return them --- utilities/segmentation_utils/flowreader.py | 26 ++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 8dd40a8..4771fb3 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -11,7 +11,7 @@ from keras.utils import Sequence from PIL import Image from tqdm import tqdm - +import pandas as pd from utilities.segmentation_utils import ImagePreprocessor @@ -222,7 +222,10 @@ class FlowGeneratorExperimental(Sequence): Note: in case the output is a column vector it has to be in the shape (x, 1) Note: this is an experimental version of the flow generator, which uses a \ custom implemented dataloader instead of the keras ImageDataGenerator - + #TODO: Instead of using direct paths, and arguments, reading heads should be used + #TODO: as it reduces the number of arguments, and makes the code more readable and reduces + #TODO: cupling + Parameters ---------- :string image: path to the image directory @@ -240,6 +243,7 @@ class FlowGeneratorExperimental(Sequence): :bool preprocessing_enabled: whether to apply preprocessing or not, defaults to True :int seed: seed for flow from directory :int preprocessing_seed: seed for preprocessing, defaults to None + :bool read_weights: whether to read the weights from the mask directory, defaults to False Raises ------ @@ -266,6 +270,8 @@ def __init__( preprocessing_enabled: bool = True, seed: int = 909, preprocessing_seed: Optional[int] = None, + read_weights: bool = False, + weights_path: Optional[str] = None, ): if len(output_size) != 2: raise ValueError("The output size has to be a tuple of length 2") @@ -287,6 +293,8 @@ def __init__( self.seed = seed self.preprocessing_enabled = preprocessing_enabled self.preprocessing_seed = preprocessing_seed + self.read_weights = read_weights + self.weights_path = weights_path ( self.preprocessing_queue_image, @@ -297,6 +305,16 @@ def __init__( sorted(os.listdir(os.path.join(self.image_path))) ) self.mask_filenames = np.array(sorted(os.listdir(os.path.join(self.mask_path)))) + if self.read_weights: + weights_df = pd.read_csv( + os.path.join(self.weights_path, "distribution.csv"), header=None + ) + self.weights = weights_df.to_numpy()[:, 1:] + weight_names = weights_df.to_numpy()[:, 0] + for mask, weight_name in zip(self.mask_filenames, weight_names): + if mask != weight_name: + raise ValueError("The mask and weight directories do not match") + self.linked_data = [self.image_filenames, self.mask_filenames, self.weights] self.__shuffle_filenames() self.dataset_size = self.__len__() @@ -492,5 +510,5 @@ def __shuffle_filenames(self) -> None: self.shuffle_counter += 1 shuffled_indices = state.permutation(len(self.image_filenames)) shuffled_indices = shuffled_indices.astype(int) - self.image_filenames = self.image_filenames[shuffled_indices] - self.mask_filenames = self.mask_filenames[shuffled_indices] + for array in self.linked_data: + array = array[shuffled_indices] From 2eed73a6e0a1ec1f8528dee148576c806519e748 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Tue, 6 Jun 2023 11:51:24 +0100 Subject: [PATCH 14/18] adds sorting to weight reader --- utilities/segmentation_utils/flowreader.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 4771fb3..24b09e6 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -309,8 +309,11 @@ def __init__( weights_df = pd.read_csv( os.path.join(self.weights_path, "distribution.csv"), header=None ) - self.weights = weights_df.to_numpy()[:, 1:] - weight_names = weights_df.to_numpy()[:, 0] + weights_np = weights_df.to_numpy() + weights_np = sorted(weights_np, key=lambda x: x[0]) + + self.weights = weights_np[:, 1:] + weight_names = weights_np[:, 0] for mask, weight_name in zip(self.mask_filenames, weight_names): if mask != weight_name: raise ValueError("The mask and weight directories do not match") From 591c276b8f0d479ad95f274935fa2c48380a9ca7 Mon Sep 17 00:00:00 2001 From: Andras Bodrogai Date: Tue, 6 Jun 2023 12:22:25 +0100 Subject: [PATCH 15/18] fixes dtype errors in image preprocessor casting, and fixes weight reader problems in flowreader --- .../segmentation_utils/ImagePreprocessor.py | 4 +-- utilities/segmentation_utils/flowreader.py | 26 +++++++++++++------ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py index 5700bea..8ad3033 100644 --- a/utilities/segmentation_utils/ImagePreprocessor.py +++ b/utilities/segmentation_utils/ImagePreprocessor.py @@ -115,7 +115,7 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor: encoded = encoded.reshape( (masks.shape[0], output_size[0] * output_size[1], num_classes) ) - encoded = tf.convert_to_tensor(encoded, dtype=tf.float32) + encoded = tf.convert_to_tensor(encoded) return encoded @@ -187,7 +187,7 @@ def augmentation_pipeline( else: mask = tf.squeeze(mask, axis=-1) - mask = tf.convert_to_tensor(mask, dtype=tf.float32) + mask = tf.convert_to_tensor(mask) # image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1)) return image, mask diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 24b09e6..9ab33aa 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -302,22 +302,28 @@ def __init__( ) = ImagePreprocessor.generate_default_queue() self.image_filenames = np.array( - sorted(os.listdir(os.path.join(self.image_path))) + sorted(os.listdir(self.image_path)) ) - self.mask_filenames = np.array(sorted(os.listdir(os.path.join(self.mask_path)))) + self.mask_filenames = np.array(sorted(os.listdir(self.mask_path))) if self.read_weights: weights_df = pd.read_csv( - os.path.join(self.weights_path, "distribution.csv"), header=None + self.weights_path, header=None ) weights_np = weights_df.to_numpy() - weights_np = sorted(weights_np, key=lambda x: x[0]) - - self.weights = weights_np[:, 1:] + print(weights_np.shape) + #sort the numpy array by the first column + weights_np = weights_np[weights_np[:,0].argsort()] + + print(weights_np) + self.weights = weights_np[:,1:].astype(np.float64) weight_names = weights_np[:, 0] for mask, weight_name in zip(self.mask_filenames, weight_names): if mask != weight_name: raise ValueError("The mask and weight directories do not match") - self.linked_data = [self.image_filenames, self.mask_filenames, self.weights] + + self.linked_data = [self.image_filenames, self.mask_filenames] + if self.read_weights: + self.linked_data.append(self.weights) self.__shuffle_filenames() self.dataset_size = self.__len__() @@ -500,8 +506,12 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: batch_images = self.image_batch_store[store_index, ...] # type: ignore batch_masks = self.mask_batch_store[store_index, ...] # type: ignore + if self.read_weights: + batch_weights = self.weights[index * self.batch_size : (index + 1) * self.batch_size, ...] - return batch_images, batch_masks + return batch_images, batch_masks, batch_weights + else: + return batch_images, batch_masks def on_epoch_end(self) -> None: # Shuffle image and mask filenames From 1dfaa9d666360aa5c9d84cecc53838a056b85fc7 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Mon, 26 Jun 2023 11:04:33 +0200 Subject: [PATCH 16/18] update tests to fit new changes, add pd to requirements --- requirements.txt | 1 + .../flow_reader_test.py | 8 +- .../image_preprocessor_test.py | 51 ++++---- .../test_flowreader.py | 112 ++++++++++++++++++ .../segmentation_utils/ImagePreprocessor.py | 1 + utilities/segmentation_utils/flowreader.py | 4 +- 6 files changed, 146 insertions(+), 31 deletions(-) create mode 100644 tests/segmentation_utils_tests.py/test_flowreader.py diff --git a/requirements.txt b/requirements.txt index 48da228..cfa2fbc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ numpy==1.24.1 rasterio==1.3.6 Pillow==9.4.0 tqdm==4.64.1 +pandas==1.5.1 diff --git a/tests/segmentation_utils_tests.py/flow_reader_test.py b/tests/segmentation_utils_tests.py/flow_reader_test.py index 8b17913..437fc80 100644 --- a/tests/segmentation_utils_tests.py/flow_reader_test.py +++ b/tests/segmentation_utils_tests.py/flow_reader_test.py @@ -1,6 +1,7 @@ import os import numpy as np +import pytest import tensorflow as tf from keras.preprocessing.image import ImageDataGenerator from pytest import MonkeyPatch @@ -67,11 +68,10 @@ def test_makes_flow_generator_with_queue() -> None: # create a copy of the generator args new_generator_args = generator_args.copy() - new_generator_args["preprocessing_queue_image"] = image_queue - new_generator_args["preprocessing_queue_mask"] = mask_queue # create a flow generator - FlowGenerator(**new_generator_args) + generator = FlowGenerator(**new_generator_args) + generator.set_preprocessing_pipeline(image_queue, mask_queue) def test_makes_flow_generator_wrong_shape() -> None: @@ -181,7 +181,7 @@ def test_get_generator() -> None: patch.undo() patch.undo() - +@pytest.mark.skip(reason="Deprecated functionality") def test_reader_error_raised() -> None: try: # predifining input variables diff --git a/tests/segmentation_utils_tests.py/image_preprocessor_test.py b/tests/segmentation_utils_tests.py/image_preprocessor_test.py index 94bb086..aa7f18d 100644 --- a/tests/segmentation_utils_tests.py/image_preprocessor_test.py +++ b/tests/segmentation_utils_tests.py/image_preprocessor_test.py @@ -1,10 +1,12 @@ import numpy as np +import pytest import tensorflow as tf from utilities.segmentation_utils import ImagePreprocessor -def test_image_onehot_encoder() -> None: +@pytest.mark.skip(reason="Deprecated functionality") +def test_image_onehot_encoder_column() -> None: # predifining input variables n_classes = 2 batch_size = 1 @@ -24,39 +26,37 @@ def test_image_onehot_encoder() -> None: assert one_hot_image.shape == ( 1, - image_size[0] // 2 * image_size[1] // 2, + output_size[0] * output_size[1], n_classes, ) assert np.array_equal(one_hot_image, onehot_test) -def test_image_augmentation_pipeline_column() -> None: +def test_image_onehot_encoder_squarematrix() -> None: # predifining input variables - image = np.zeros((512, 512, 3)) - mask = np.zeros((256 * 256, 1)) - image = tf.convert_to_tensor(image) - mask = tf.convert_to_tensor(mask) + n_classes = 2 + batch_size = 1 + image_size = (512, 512) + output_size = (256, 256) - input_size = (512, 512) - output_size = (256 * 256, 1) - output_reshape = (256, 256) + # creating a mask with 2 classes + mask = np.zeros((batch_size, output_size[0], output_size[1])) + mask[:, ::2,:] = 1 - # creating dummy queues - image_queue = ImagePreprocessor.PreprocessingQueue( - queue=[lambda x, y, seed: x], arguments=[{"y": 1}] - ) - mask_queue = ImagePreprocessor.PreprocessingQueue( - queue=[lambda x, y, seed: x], arguments=[{"y": 1}] - ) + # creating a onehot mask to compare with the output of the function + onehot_test = np.zeros((batch_size, output_size[0] , output_size[1], n_classes)) + onehot_test[:, ::2, :,1] = 1 + onehot_test[:, 1::2,:, 0] = 1 - image_new, mask_new = ImagePreprocessor.augmentation_pipeline( - image, mask, input_size, output_size, image_queue, mask_queue,output_reshape - ) - image_new = image_new.numpy() - mask_new = mask_new.numpy() + one_hot_image = ImagePreprocessor.onehot_encode(mask, output_size, n_classes) - assert np.array(image_new).shape == (512, 512, 3) - assert np.array(mask_new).shape == (256 * 256, 1, 1) + assert one_hot_image.shape == ( + 1, + output_size[0], + output_size[1], + n_classes, + ) + assert np.array_equal(one_hot_image, onehot_test) def test_image_augmentation_pipeline_squarematrix() -> None: @@ -123,5 +123,4 @@ def test_flatten() -> None: image = tf.convert_to_tensor(image) image = ImagePreprocessor.flatten(image, (512, 512), 3) image = image.numpy() - assert image.shape == (512 * 512, 1, 3) - + assert image.shape == (512 * 512, 3) diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py new file mode 100644 index 0000000..6923bd7 --- /dev/null +++ b/tests/segmentation_utils_tests.py/test_flowreader.py @@ -0,0 +1,112 @@ +import os + +import numpy as np +import pytest +import tensorflow as tf +from keras.preprocessing.image import ImageDataGenerator +from pytest import MonkeyPatch + +from utilities.segmentation_utils import ImagePreprocessor +from utilities.segmentation_utils.flowreader import FlowGeneratorExperimental + + +def test_can_create_instance() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + # create generator instance + generator = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + image_size=(512, 512), + output_size=(512,512), + num_classes=7, + channel_mask= [True,True,True] + ) + pass + +def test_set_preprocessing_pipeline() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + # create generator instance + generator = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + image_size=(512, 512), + output_size=(512,512), + num_classes=7, + channel_mask= [True,True,True] + ) + + image_queue = ImagePreprocessor.PreprocessingQueue(queue=[],arguments=[]) + mask_queue = ImagePreprocessor.PreprocessingQueue(queue=[],arguments=[]) + + generator.set_preprocessing_pipeline( + image_queue,mask_queue + ) + pass + +def test_set_mini_batch_size() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + # create generator instance + generator = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + image_size=(512, 512), + output_size=(512,512), + num_classes=7, + channel_mask= [True,True,True] + ) + + generator.set_mini_batch_size(2) + assert generator.mini_batch == 2 + +def test_set_mini_batch_size_too_large() -> None: + + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + # create generator instance + generator = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + image_size=(512, 512), + output_size=(512,512), + num_classes=7, + channel_mask= [True,True,True] + ) + with pytest.raises(ValueError) as exc_info: + generator.set_mini_batch_size(5) + + assert exc_info.value.args[0] == "The mini batch size cannot be larger than the batch size" + + +def test_set_mini_batch_size_not_devisable() -> None: + + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + # create generator instance + generator = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + image_size=(512, 512), + output_size=(512,512), + num_classes=7, + channel_mask= [True,True,True], + batch_size=3 + + ) + with pytest.raises(ValueError) as exc_info: + generator.set_mini_batch_size(2) + + assert exc_info.value.args[0] == "The batch size must be divisible by the mini batch size" + diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py index 8ad3033..bf5e773 100644 --- a/utilities/segmentation_utils/ImagePreprocessor.py +++ b/utilities/segmentation_utils/ImagePreprocessor.py @@ -107,6 +107,7 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor: ------- :return tf.Tensor: Batch of one-hot encoded masks """ + #!TODO: add support for 1D masks encoded = np.zeros((masks.shape[0], output_size[0], output_size[1], num_classes)) for i in range(num_classes): mask = (masks == i).astype(float) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 9ab33aa..f229bf1 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -7,11 +7,12 @@ from typing import Optional import numpy as np +import pandas as pd from keras.preprocessing.image import ImageDataGenerator from keras.utils import Sequence from PIL import Image from tqdm import tqdm -import pandas as pd + from utilities.segmentation_utils import ImagePreprocessor @@ -252,6 +253,7 @@ class FlowGeneratorExperimental(Sequence): :ValueError: if the output size is not a square matrix or a column vector """ + #! these are class variables, and should be moved to the constructor to make them instance variables preprocessing_seed = None preprocessing_queue_image = None preprocessing_queue_mask = None From fad0b630e64779f805c25ae67f7c569b452eda21 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Mon, 26 Jun 2023 11:19:49 +0200 Subject: [PATCH 17/18] add arguments to image cutting loop so parameters are changable --- utilities/transform_utils/image_cutting.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/utilities/transform_utils/image_cutting.py b/utilities/transform_utils/image_cutting.py index 1bdc37b..4389bcc 100644 --- a/utilities/transform_utils/image_cutting.py +++ b/utilities/transform_utils/image_cutting.py @@ -320,6 +320,9 @@ def cut_ims_in_directory( target_dims: tuple[int, int] = (512, 512), mask=False, preprocess: bool = False, + batch_size: int = 100, + format: str = "tiff", + preprocess_function=__preprocess_mask_image, ) -> None: """Finds images at "Path_ims" cuts them into dimension "target_dims", and then saves them as png files to "path_target_dir". @@ -333,7 +336,7 @@ def cut_ims_in_directory( :bool, optional mask: If true assumes images are masks. Defaults to False. :bool, optional preprocess: If true preprocesses images. Defaults to False. """ - print("the following files are located at input Path :") + dir_contents = os.listdir(path_ims) dir_contents = sorted(dir_contents) batch_size = 100 @@ -383,7 +386,7 @@ def cut_ims_in_directory( # fill batch array for i, n in enumerate(cut_im): if preprocess: - n = __preprocess_mask_image(n) + n = preprocess_function(n) if mask: batch[counter, i, :, :, 0] = n[:, :] else: @@ -416,7 +419,8 @@ def cut_ims_in_directory( str(target_dims[0]), "x", str(target_dims[1]), - ".tiff", + ".", + format, ] ), ) From f3c59f264ed651a3d009e909e6fac0df11443fbc Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Sun, 30 Jul 2023 17:07:33 +0100 Subject: [PATCH 18/18] in __len__ function added self.mini_batch --- utilities/segmentation_utils/flowreader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index f229bf1..df5e9ec 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -488,7 +488,7 @@ def __read_batch(self, start: int, end: int) -> None: # required to check when to read the next batch def __len__(self) -> int: - return int(np.floor(len(self.image_filenames) / float(self.batch_size))) + return int(np.floor(len(self.image_filenames) / float(self.mini_batch))) def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: # check if the batch is already cached