From cccf762e47a763375c760a144a78b47b2689caa9 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 19 Aug 2023 12:07:13 +0100
Subject: [PATCH 1/3] removes unnecessary reshaping operation, adds it to the
 end only. adds documentation to new image preprocessor classes

---
 .../segmentation_utils/ImagePreprocessor.py   | 61 ++++++++---------
 utilities/segmentation_utils/flowreader.py    | 67 +++++++++----------
 2 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py
index 871e20c..84f4655 100644
--- a/utilities/segmentation_utils/ImagePreprocessor.py
+++ b/utilities/segmentation_utils/ImagePreprocessor.py
@@ -6,6 +6,13 @@
 
 
 class IPreprocessor(Protocol):
+    """
+    Interface of the preprocessing queue class
+    Parameters
+    ----------
+    :queue list: list of functions to be applied
+    """
+
     queue: list[Callable]
 
     def update_seed(self, seed: int) -> None:
@@ -16,6 +23,19 @@ def get_queue_length(self) -> int:
 
 
 class PreFunction:
+    """
+    Class that wraps a function and its arguments to be used in a preprocessing queue
+    enables function to be defined with their parameters prior to being called.
+
+    To call the function, simply call the PreFunction object with a tf.Tensor as an argument
+
+    Parameters
+    ----------
+    :function Callable: function to be wrapped
+    :args list: list of arguments to be passed to the function
+    :kwargs dict: dictionary of keyword arguments to be passed to the function
+    """
+
     def __init__(self, function: Callable, *args, **kwargs) -> None:
         self.function = function
         self.args = args
@@ -25,6 +45,13 @@ def __call__(self, image: tf.Tensor) -> tf.Tensor:
         return self.function(image, *self.args, **self.kwargs)
 
     def set_seed(self, seed: int) -> None:
+        """
+        Changes the seed of the function
+
+        Parameters
+        ----------
+        :seed int: seed to be changed to
+        """
         self.kwargs["seed"] = seed
 
 
@@ -127,7 +154,7 @@ def generate_default_queue(seed=0) -> tuple[PreprocessingQueue, PreprocessingQue
     return image_queue, mask_queue
 
 
-def onehot_encode(masks, output_size, num_classes) -> tf.Tensor:
+def onehot_encode(masks, num_classes) -> tf.Tensor:
     """
     Function that one-hot encodes masks
 
@@ -140,14 +167,10 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor:
     :return tf.Tensor: Batch of one-hot encoded masks
     """
     #!TODO: add support for 1D masks
-    encoded = np.zeros((masks.shape[0], output_size[0], output_size[1], num_classes))
+    encoded = np.zeros((masks.shape[0], masks.shape[1], masks.shape[2], num_classes))
     for i in range(num_classes):
         mask = (masks == i).astype(float)
         encoded[:, :, :, i] = mask
-    if output_size[1] == 1:
-        encoded = encoded.reshape(
-            (masks.shape[0], output_size[0] * output_size[1], num_classes)
-        )
     encoded = tf.convert_to_tensor(encoded)
     return encoded
 
@@ -155,12 +178,8 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor:
 def augmentation_pipeline(
     image,
     mask,
-    input_size: tuple[int, int],
-    output_size: tuple[int, int],
     image_queue: PreprocessingQueue,
     mask_queue: PreprocessingQueue,
-    output_reshape: Optional[tuple[int, int]] = None,
-    channels: int = 3,
     seed: int = 0,
 ) -> tuple[tf.Tensor, tf.Tensor]:
     """
@@ -172,37 +191,22 @@ def augmentation_pipeline(
     ----------
     :tf.Tensor image: The image to be processed
     :tf.Tensor mask: The mask to be processed
-    :tuple(int, int) input_size: Input size of the image
-    :tuple(int, int) output_size: Output size of the image
 
 
     Keyword Arguments
     -----------------
-    :tuple(int, int), optional output_reshape: In case the image is a column vector, \
-    this is the shape it should be reshaped to. Defaults to None.
-
     :PreprocessingQueue, optional mask_queue image_queue: \
     Augmentation processing queue for images, defaults to None
 
     :PreprocessingQueue, optional mask_queue: Augmentation processing queue \
     for masks, defaults to None
 
-    :int, optional channels: Number of bands in the image, defaults to 3 \
     :int, optional seed: The seed to be used in the pipeline, defaults to 0
 
-    Raises
-    ------
-    :raises ValueError: If only one of the queues is passed
-
     Returns
     -------
     :return tuple(tf.Tensor, tf.Tensor): tuple of the processed image and mask
     """
-
-    # reshapes masks, such that transforamtions work properly
-    if output_reshape is not None and output_size[1] == 1:
-        mask = tf.reshape(mask, (output_reshape[0], output_reshape[1]))
-
     mask = tf.expand_dims(mask, axis=-1)
 
     image_queue.update_seed(seed)
@@ -212,12 +216,9 @@ def augmentation_pipeline(
         image = fun_im(image)
         mask = fun_mask(mask)
 
-    # flattens masks out to the correct output shape
-    if output_size[1] == 1:
-        mask = flatten(mask, output_size, channels=1)
-    else:
-        mask = tf.squeeze(mask, axis=-1)
 
+
+    mask = tf.squeeze(mask, axis=-1) # removes the last dimension
     mask = tf.convert_to_tensor(mask)
     # image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1))
 
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 6766803..a254a9f 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -307,9 +307,10 @@ def __init__(
         self.shuffle_counter = shuffle_counter
         self.image_ordering = image_ordering
 
-
         self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
         self.mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
+
+        # should be moved out as a strategy
         if self.read_weights:
             weights_df = pd.read_csv(self.weights_path, header=None)
             weights_np = weights_df.to_numpy()
@@ -327,6 +328,7 @@ def __init__(
         self.linked_data = [self.image_filenames, self.mask_filenames]
         if self.read_weights:
             self.linked_data.append(self.weights)
+        
         self.__shuffle_filenames()
         self.dataset_size = self.__len__()
 
@@ -344,8 +346,10 @@ def __init__(
             # such no need to define it otherwise
             dimension = math.sqrt(self.output_size[0])
             self.output_reshape = (int(dimension), int(dimension))
+            self.column_vector = True
         else:
             self.output_reshape = self.output_size
+            self.column_vector = False
 
         print("Reading images from: ", self.image_path)
 
@@ -405,42 +409,32 @@ def __read_batch(self, start: int, end: int) -> None:
                 self.n_channels,
             )
         )
-        if self.output_size[1] == 1:
-            column = True
-            batch_masks = np.zeros(
-                (n, self.mini_batch, self.output_size[0], self.num_classes)
-            )
-        else:
-            column = False
-            batch_masks = np.zeros(
-                (
-                    n,
-                    self.mini_batch,
-                    self.output_size[0],
-                    self.output_size[1],
-                    self.num_classes,
-                )
+
+        batch_masks = np.zeros(
+            (
+                n,
+                self.mini_batch,
+                self.output_reshape[0],
+                self.output_reshape[1],
+                self.num_classes,
             )
+        )
 
         # preprocess and assign images and masks to the batch
         for i in range(n):
-            if column:
-                raw_masks = np.zeros(
-                    (self.mini_batch, self.output_size[0] * self.output_size[1], 1)
-                )
-            else:
-                raw_masks = np.zeros(
-                    (self.mini_batch, self.output_size[0], self.output_size[1])
-                )
+            raw_masks = np.zeros(
+                (self.mini_batch, self.output_reshape[0], self.output_reshape[1])
+            )
 
             for j in range(self.mini_batch):
                 image_index = i * self.mini_batch + j
+            
                 image = Image.open(
                     os.path.join(self.image_path, batch_image_filenames[image_index])
                 ).resize(self.image_size, Image.ANTIALIAS)
-
+                
                 image = np.array(image)
-                image = image / 255
+
 
                 mask = Image.open(
                     os.path.join(self.mask_path, batch_mask_filenames[image_index])
@@ -462,24 +456,19 @@ def __read_batch(self, start: int, end: int) -> None:
                     ) = ImagePreprocessor.augmentation_pipeline(
                         image,
                         mask=mask,
-                        input_size=self.image_size,
-                        output_size=self.output_size,
-                        output_reshape=self.output_reshape,
                         seed=image_seed,
                         #!both preprocessing queues are assigned by this time
                         image_queue=self.preprocessing_queue_image,  # type: ignore
                         mask_queue=self.preprocessing_queue_mask,  # type: ignore
                     )
-                if column:
-                    mask = np.reshape(mask, self.output_size)
 
                 batch_images[i, j, :, :, :] = image
                 # NOTE: this provides the flexibility required to process both
                 # column and matrix vectors
-                raw_masks[j, ...] = mask
+                raw_masks[j, :, :] = mask
 
-            batch_masks[i, ...] = ImagePreprocessor.onehot_encode(
-                raw_masks, self.output_size, self.num_classes
+            batch_masks[i, :, : , :] = ImagePreprocessor.onehot_encode(
+                raw_masks, self.num_classes
             )
 
         # chaches the batch
@@ -509,6 +498,16 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
 
         batch_images = self.image_batch_store[store_index, ...]  # type: ignore
         batch_masks = self.mask_batch_store[store_index, ...]  # type: ignore
+        if self.column_vector:
+            batch_masks = np.reshape(
+                batch_masks,
+                (
+                    self.mini_batch,
+                    batch_masks.shape[1] * batch_masks[2],
+                    self.num_classes,
+                ),
+            )
+
         if self.image_ordering == ImageOrdering.CHANNEL_FIRST:
             batch_images = np.moveaxis(batch_images, -1, 1)
             batch_masks = np.moveaxis(batch_masks, -1, 1)

From 2675dd08b5f4ce8530b6a4dba3486587fda7b1c6 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Wed, 23 Aug 2023 19:57:59 +0100
Subject: [PATCH 2/3] update tests for new standards

---
 tests/segmentation_utils_tests.py/image_preprocessor_test.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/image_preprocessor_test.py b/tests/segmentation_utils_tests.py/image_preprocessor_test.py
index c45cbdc..b596aaf 100644
--- a/tests/segmentation_utils_tests.py/image_preprocessor_test.py
+++ b/tests/segmentation_utils_tests.py/image_preprocessor_test.py
@@ -48,7 +48,7 @@ def test_image_onehot_encoder_squarematrix() -> None:
     onehot_test[:, ::2, :,1] = 1
     onehot_test[:, 1::2,:, 0] = 1
 
-    one_hot_image = ImagePreprocessor.onehot_encode(mask, output_size, n_classes)
+    one_hot_image = ImagePreprocessor.onehot_encode(mask, n_classes)
 
     assert one_hot_image.shape == (
         1,
@@ -80,8 +80,6 @@ def test_image_augmentation_pipeline_squarematrix() -> None:
     image_new, mask_new = ImagePreprocessor.augmentation_pipeline(
         image,
         mask,
-        input_size,
-        output_size,
         image_queue=image_queue,
         mask_queue=mask_queue,
     )

From 24c2f69c94698a7b6152be95213b458a23acdee4 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Wed, 23 Aug 2023 20:08:59 +0100
Subject: [PATCH 3/3] update flow reader to fit linting standards

---
 .../segmentation_utils/ImagePreprocessor.py   |  2 +-
 utilities/segmentation_utils/flowreader.py    | 21 ++++++++-----------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py
index 84f4655..b8493f4 100644
--- a/utilities/segmentation_utils/ImagePreprocessor.py
+++ b/utilities/segmentation_utils/ImagePreprocessor.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Callable, Optional, Protocol
+from typing import Callable, Protocol
 
 import numpy as np
 import tensorflow as tf
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index a254a9f..491a7c6 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -15,6 +15,7 @@
 
 from utilities.segmentation_utils import ImagePreprocessor
 from utilities.segmentation_utils.constants import ImageOrdering
+from utilities.segmentation_utils.ImagePreprocessor import IPreprocessor
 
 
 class FlowGenerator:
@@ -101,8 +102,8 @@ def get_dataset_size(self) -> int:
 
     def set_preprocessing_pipeline(
         self,
-        preprocessing_queue_image: ImagePreprocessor.IPreprocessor,
-        preprocessing_queue_mask: ImagePreprocessor.IPreprocessor,
+        preprocessing_queue_image: IPreprocessor,
+        preprocessing_queue_mask: IPreprocessor,
     ) -> None:
         """
         Sets the preprocessing pipeline
@@ -200,9 +201,6 @@ def preprocess(self, generator_zip):
                     i_image, i_mask = ImagePreprocessor.augmentation_pipeline(
                         image=i_image,
                         mask=i_mask,
-                        input_size=self.image_size,
-                        output_size=self.output_size,
-                        output_reshape=self.output_reshape,
                         seed=image_seed,
                         #!both preprocessing queues are assigned by this time
                         image_queue=self.preprocessing_queue_image,  # type: ignore
@@ -273,8 +271,8 @@ def __init__(
         preprocessing_enabled: bool = True,
         seed: int = 909,
         preprocessing_seed: Optional[int] = None,
-        preprocessing_queue_image: ImagePreprocessor.IPreprocessor = ImagePreprocessor.generate_image_queue(),
-        preprocessing_queue_mask: ImagePreprocessor.IPreprocessor = ImagePreprocessor.generate_mask_queue(),
+        preprocessing_queue_image: IPreprocessor = ImagePreprocessor.generate_image_queue(),
+        preprocessing_queue_mask: IPreprocessor = ImagePreprocessor.generate_mask_queue(),
         read_weights: bool = False,
         weights_path: Optional[str] = None,
         shuffle_counter: int = 0,
@@ -328,7 +326,7 @@ def __init__(
         self.linked_data = [self.image_filenames, self.mask_filenames]
         if self.read_weights:
             self.linked_data.append(self.weights)
-        
+
         self.__shuffle_filenames()
         self.dataset_size = self.__len__()
 
@@ -428,13 +426,12 @@ def __read_batch(self, start: int, end: int) -> None:
 
             for j in range(self.mini_batch):
                 image_index = i * self.mini_batch + j
-            
+
                 image = Image.open(
                     os.path.join(self.image_path, batch_image_filenames[image_index])
                 ).resize(self.image_size, Image.ANTIALIAS)
-                
-                image = np.array(image)
 
+                image = np.array(image)
 
                 mask = Image.open(
                     os.path.join(self.mask_path, batch_mask_filenames[image_index])
@@ -467,7 +464,7 @@ def __read_batch(self, start: int, end: int) -> None:
                 # column and matrix vectors
                 raw_masks[j, :, :] = mask
 
-            batch_masks[i, :, : , :] = ImagePreprocessor.onehot_encode(
+            batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode(
                 raw_masks, self.num_classes
             )