From 1ea5efcd6991c94212937e0f2ecc567c1ccf26e8 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Tue, 23 May 2023 14:10:46 +0100
Subject: [PATCH 01/18] updates image preprocessor by removing squeezing from
 onehot encode making the function more flexible, providing a preprocessing
 queue interface and clipping data after its sent through the preprocessing
 queue

---
 .../segmentation_utils/ImagePreprocessor.py    | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py
index ea7665b..1094ef1 100644
--- a/utilities/segmentation_utils/ImagePreprocessor.py
+++ b/utilities/segmentation_utils/ImagePreprocessor.py
@@ -1,10 +1,21 @@
 from dataclasses import dataclass
-from typing import Callable, Dict, Optional
+from typing import Callable, Dict, Optional, Protocol
 
 import numpy as np
 import tensorflow as tf
 
 
+class PreprocessorInterface(Protocol):
+    queue: list[Callable]
+    arguments: list[Dict]
+
+    def update_seed(self, seed: int) -> None:
+        ...
+
+    def get_queue_length(self) -> int:
+        ...
+
+
 @dataclass
 class PreprocessingQueue:
     """
@@ -98,7 +109,7 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor:
     """
     encoded = np.zeros((masks.shape[0], output_size[0] * output_size[1], num_classes))
     for i in range(num_classes):
-        encoded[:, :, i] = tf.squeeze((masks == i).astype(int))
+        encoded[:, :, i] = (masks == i).astype(int)
     encoded = tf.convert_to_tensor(encoded)
     return encoded
 
@@ -166,6 +177,9 @@ def augmentation_pipeline(
     # flattens masks out to the correct output shape
     if output_size[1] == 1:
         mask = flatten(mask, output_size, channels=1)
+
+    image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1))
+
     return image, mask
 
 

From 6a37482a896dff8623552960119e7bfc52346ee9 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Tue, 23 May 2023 14:11:25 +0100
Subject: [PATCH 02/18] adds experimental implementation of the flowreader
 object changing the tf reliant implementation to a self maintained

---
 utilities/segmentation_utils/flowreader.py | 263 ++++++++++++++++++++-
 1 file changed, 255 insertions(+), 8 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 66088a2..05fccb2 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -7,7 +7,9 @@
 from typing import Optional
 
 import numpy as np
+import tensorflow as tf
 from keras.preprocessing.image import ImageDataGenerator
+from keras.utils import Sequence
 
 from utilities.segmentation_utils import ImagePreprocessor
 
@@ -39,8 +41,6 @@ class FlowGenerator:
     :bool preprocessing_enabled: whether to apply preprocessing or not
     :int seed: seed for flow from directory
     :int preprocessing_seed: seed for preprocessing, defaults to None
-    :PreprocessingQueue preprocessing_queue_image: preprocessing queue for images
-    :PreprocessingQueue preprocessing_queue_mask: preprocessing queue for masks
 
     Raises
     ------
@@ -48,6 +48,10 @@ class FlowGenerator:
     :ValueError: if the output size is not a square matrix or a column vector
     """
 
+    preprocessing_seed = None
+    preprocessing_queue_image = None
+    preprocessing_queue_mask = None
+
     def __init__(
         self,
         image_path: str,
@@ -60,10 +64,6 @@ def __init__(
         preprocessing_enabled: bool = True,
         seed: int = 909,
         preprocessing_seed: Optional[int] = None,
-        preprocessing_queue_image: Optional[
-            ImagePreprocessor.PreprocessingQueue
-        ] = None,
-        preprocessing_queue_mask: Optional[ImagePreprocessor.PreprocessingQueue] = None,
     ):
         if len(output_size) != 2:
             raise ValueError("The output size has to be a tuple of length 2")
@@ -81,8 +81,6 @@ def __init__(
         self.shuffle = shuffle
         self.seed = seed
         self.preprocessing_enabled = preprocessing_enabled
-        self.preprocessing_queue_image = preprocessing_queue_image
-        self.preprocessing_queue_mask = preprocessing_queue_mask
         self.preprocessing_seed = preprocessing_seed
         self.__make_generator()
         print("Reading images from: ", self.image_path)
@@ -98,6 +96,22 @@ def get_dataset_size(self) -> int:
 
         return len(os.listdir(os.path.join(self.image_path, "img")))
 
+    def set_preprocessing_pipeline(
+        self,
+        preprocessing_queue_image: ImagePreprocessor.PreprocessorInterface,
+        preprocessing_queue_mask: ImagePreprocessor.PreprocessorInterface,
+    ) -> None:
+        """
+        Sets the preprocessing pipeline
+
+        Parameters
+        ----------
+        :PreprocessingQueue preprocessing_queue_image: preprocessing queue for images
+        :PreprocessingQueue preprocessing_queue_mask: preprocessing queue for masks
+        """
+        self.preprocessing_queue_image = preprocessing_queue_image
+        self.preprocessing_queue_mask = preprocessing_queue_mask
+
     def __make_generator(self):
         """
         Creates the generator
@@ -195,3 +209,236 @@ def preprocess(self, generator_zip):
                 mask, self.output_size, self.num_classes
             )
             yield (img, mask)
+
+
+class FlowGeneratorExperimental(Sequence):
+    """
+    Initializes the flow generator object,
+    which can be used to read in images for semantic segmentation.
+    Additionally, the reader can apply augmentation on the images,
+    and one-hot encode them on the fly.
+    
+    Note: in case the output is a column vector it has to be in the shape (x, 1)
+    Note: this is an experimental version of the flow generator, which uses a \
+    custom implemented dataloader instead of the keras ImageDataGenerator
+    
+    Parameters
+    ----------
+    :string image: path to the image directory
+    :string mask: path to the mask directory
+    :int batch_size: batch size
+    :tuple image_size: image size
+    :tuple output_size: output size
+
+
+    :int num_classes: number of classes
+
+    Keyword Arguments
+    -----------------
+    :bool shuffle: whether to shuffle the dataset or not
+    :int batch_size: batch size
+    :bool preprocessing_enabled: whether to apply preprocessing or not
+    :int seed: seed for flow from directory
+    :int preprocessing_seed: seed for preprocessing, defaults to None
+
+    Raises
+    ------
+    :ValueError: if the output size is not a tuple of length 2
+    :ValueError: if the output size is not a square matrix or a column vector
+    """
+
+    preprocessing_seed = None
+    preprocessing_queue_image = None
+    preprocessing_queue_mask = None
+
+    def __init__(
+        self,
+        image_path: str,
+        mask_path: str,
+        image_size: tuple[int, int],
+        output_size: tuple[int, int],
+        channel_mask: list[bool],
+        num_classes: int,
+        shuffle: bool = True,
+        batch_size: int = 2,
+        preprocessing_enabled: bool = True,
+        seed: int = 909,
+        preprocessing_seed: Optional[int] = None,
+    ):
+        if len(output_size) != 2:
+            raise ValueError("The output size has to be a tuple of length 2")
+        if output_size[1] != 1 and output_size[0] != output_size[1]:
+            raise ValueError(
+                "The output size has to be a square matrix or a column vector"
+            )
+
+        self.image_path = image_path
+        self.mask_path = mask_path
+        self.batch_size = batch_size
+        self.mini_batch = batch_size
+        self.image_size = image_size
+        self.output_size = output_size
+        self.channel_mask = np.array(channel_mask)
+        self.n_channels = np.sum(channel_mask)
+        self.num_classes = num_classes
+        self.shuffle = shuffle
+        self.seed = seed
+        self.preprocessing_enabled = preprocessing_enabled
+        self.preprocessing_seed = preprocessing_seed
+
+        self.image_filenames = os.listdir(os.path.join(self.image_path, "img"))
+        self.mask_filenames = os.listdir(os.path.join(self.mask_path, "mask"))
+
+        self.image_batch_store = np.zeros(
+            (1, self.batch_size, image_size[0], image_size[1], self.n_channels)
+        )
+        self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes))
+        self.validity_index = 0
+
+        if self.output_size[1] == 1:
+            # only enters if the output is a column vector
+            # such no need to define it otherwise
+            dimension = math.sqrt(self.output_size[0])
+            self.output_reshape = (int(dimension), int(dimension))
+        else:
+            self.output_reshape = None
+
+        print("Reading images from: ", self.image_path)
+
+    def set_preprocessing_pipeline(
+        self,
+        preprocessing_queue_image: ImagePreprocessor.PreprocessorInterface,
+        preprocessing_queue_mask: ImagePreprocessor.PreprocessorInterface,
+    ) -> None:
+        """
+        Sets the preprocessing pipeline
+
+        Parameters
+        ----------
+        :PreprocessingQueue preprocessing_queue_image: preprocessing queue for images
+        :PreprocessingQueue preprocessing_queue_mask: preprocessing queue for masks
+        """
+        self.preprocessing_queue_image = preprocessing_queue_image
+        self.preprocessing_queue_mask = preprocessing_queue_mask
+
+    def set_mini_batch_size(self, batch_size: int) -> None:
+        """
+        Function to set the appropriate minibatch size. Required to allign batch size in the reader with the model.\
+        Does not change the batch size of the reader.
+
+        Parameters
+        ----------
+        :int batch_size: the mini batch size
+
+        Raises
+        ------
+        :raises ValueError: if the mini batch size is larger than the batch size
+        :raises ValueError: if the batch size is not divisible by the mini batch size
+        """
+        if batch_size > self.batch_size:
+            raise ValueError("The mini batch size cannot be larger than the batch size")
+        if self.batch_size % batch_size != 0:
+            raise ValueError("The batch size must be divisible by the mini batch size")
+        self.mini_batch = batch_size
+
+    def read_batch(self, start: int, end: int) -> None:
+        # read image batch
+        batch_image_filenames = self.image_filenames[start:end]
+        batch_mask_filenames = self.mask_filenames[start:end]
+
+        # calculate number of mini batches in a batch
+        n = self.batch_size // self.mini_batch
+
+        batch_images = np.zeros(
+            (
+                n,
+                self.mini_batch,
+                self.image_size[0],
+                self.image_size[1],
+                self.n_channels,
+            )
+        )
+        batch_masks = np.zeros(
+            (
+                n,
+                self.mini_batch,
+                self.output_size[0],
+                self.output_size[1],
+                self.num_classes,
+            )
+        )
+
+        # preprocess and assign images and masks to the batch
+        for i in range(n):
+            for j in range(self.mini_batch):
+                image = np.load(
+                    os.path.join(self.image_path, "img", batch_image_filenames[j])
+                )
+                mask = np.load(
+                    os.path.join(self.mask_path, "mask", batch_mask_filenames[j])
+                )
+
+                # for now it is assumed that n is 1
+                batch_images[i, j, :, :, :] = image[:, :, self.channel_mask]
+
+                if self.output_size[1] == 1:
+                    batch_masks = batch_masks.reshape((-1, 1))  # or batch_masks[:, np.newaxis]
+
+                if self.preprocessing_enabled:
+                    if self.preprocessing_seed is None:
+                        image_seed = np.random.randint(0, 100000)
+                    else:
+                        state = np.random.RandomState(self.preprocessing_seed)
+                        image_seed = state.randint(0, 100000)
+
+                    (
+                        batch_images[i, j, :, :, :],
+                        mask,
+                    ) = ImagePreprocessor.augmentation_pipeline(
+                        image=batch_images[i, j, :, :, :],
+                        mask=mask,
+                        input_size=self.image_size,
+                        output_size=self.output_size,
+                        output_reshape=self.output_reshape,
+                        seed=image_seed,
+                        #!both preprocessing queues are assigned by this time
+                        image_queue=self.preprocessing_queue_image,  # type: ignore
+                        mask_queue=self.preprocessing_queue_mask,  # type: ignore
+                    )
+
+                batch_masks[i, j, :, :, :] = ImagePreprocessor.onehot_encode(
+                    mask, self.output_size, self.num_classes
+                )
+
+        # chaches the batch
+        self.image_batch_store = batch_images
+        self.mask_batch_store = batch_masks
+
+        # required to check when to read the next batch
+        self.validity_index = end
+
+    def __len__(self):
+        return int(np.ceil(len(self.image_filenames) / float(self.batch_size)))
+
+    def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
+        # check if the batch is already cached
+        if index == self.validity_index:
+            self.read_batch(index, index + self.batch_size)
+
+        # slices new batch
+        store_index = (index - self.validity_index) % self.mini_batch
+
+        batch_images = self.image_batch_store[store_index, :, :, :, :]
+        batch_masks = self.mask_batch_store[store_index, :, :, :, :]
+
+        tf.squeeze(batch_masks, axis=2)
+
+        return np.array(batch_images), np.array(batch_masks)
+
+    def on_epoch_end(self):
+        # Shuffle image and mask filenames
+        if self.shuffle:
+            np.random.seed(self.seed)
+            np.random.shuffle(self.image_filenames)
+            np.random.seed(self.seed)
+            np.random.shuffle(self.mask_filenames)

From 3ee77363276a83fa641cfe4205722346e342fcf6 Mon Sep 17 00:00:00 2001
From: Andras Bodrogai <abodrogai@gmail.com>
Date: Tue, 23 May 2023 16:25:32 +0100
Subject: [PATCH 03/18] updates flowreader experimental to run with training

---
 .../segmentation_utils/ImagePreprocessor.py   |  2 +-
 utilities/segmentation_utils/flowreader.py    | 70 ++++++++++++-------
 2 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py
index 1094ef1..2e00b3b 100644
--- a/utilities/segmentation_utils/ImagePreprocessor.py
+++ b/utilities/segmentation_utils/ImagePreprocessor.py
@@ -109,7 +109,7 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor:
     """
     encoded = np.zeros((masks.shape[0], output_size[0] * output_size[1], num_classes))
     for i in range(num_classes):
-        encoded[:, :, i] = (masks == i).astype(int)
+        encoded[:, :, i] = tf.squeeze((masks == i))
     encoded = tf.convert_to_tensor(encoded)
     return encoded
 
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 05fccb2..173f33d 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import tensorflow as tf
+import cv2
 from keras.preprocessing.image import ImageDataGenerator
 from keras.utils import Sequence
 
@@ -286,8 +287,8 @@ def __init__(
         self.preprocessing_enabled = preprocessing_enabled
         self.preprocessing_seed = preprocessing_seed
 
-        self.image_filenames = os.listdir(os.path.join(self.image_path, "img"))
-        self.mask_filenames = os.listdir(os.path.join(self.mask_path, "mask"))
+        self.image_filenames = os.listdir(os.path.join(self.image_path))
+        self.mask_filenames = os.listdir(os.path.join(self.mask_path))
 
         self.image_batch_store = np.zeros(
             (1, self.batch_size, image_size[0], image_size[1], self.n_channels)
@@ -358,31 +359,49 @@ def read_batch(self, start: int, end: int) -> None:
                 self.n_channels,
             )
         )
-        batch_masks = np.zeros(
-            (
-                n,
-                self.mini_batch,
-                self.output_size[0],
-                self.output_size[1],
-                self.num_classes,
+        if self.output_size[1] == 1:
+            column = True
+            batch_masks = np.zeros((n, self.mini_batch, self.output_size[0],self.num_classes))
+        else:
+            column = False
+            batch_masks = np.zeros(
+                (
+                    n,
+                    self.mini_batch,
+                    self.output_size[0],
+                    self.output_size[1],
+                    self.num_classes,
+                )
             )
-        )
 
         # preprocess and assign images and masks to the batch
         for i in range(n):
             for j in range(self.mini_batch):
-                image = np.load(
-                    os.path.join(self.image_path, "img", batch_image_filenames[j])
+                image = cv2.imread(
+                    os.path.join(self.image_path, batch_image_filenames[j]),
+                    cv2.IMREAD_COLOR,
                 )
-                mask = np.load(
-                    os.path.join(self.mask_path, "mask", batch_mask_filenames[j])
+                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+                image = cv2.resize(image, self.image_size)
+                image = np.asarray(image)
+
+                mask = cv2.imread(
+                    os.path.join(self.mask_path, batch_mask_filenames[j]),
+                    cv2.IMREAD_GRAYSCALE,
                 )
+                mask = cv2.resize(mask, self.output_size)
+                mask = np.asarray(mask).reshape(self.output_size)
+                # np.load(
+                #     os.path.join(self.image_path, batch_image_filenames[j])
+                # )
+                # mask = np.load(
+                #     os.path.join(self.mask_path, batch_mask_filenames[j])
+                # )
 
                 # for now it is assumed that n is 1
                 batch_images[i, j, :, :, :] = image[:, :, self.channel_mask]
 
-                if self.output_size[1] == 1:
-                    batch_masks = batch_masks.reshape((-1, 1))  # or batch_masks[:, np.newaxis]
+                
 
                 if self.preprocessing_enabled:
                     if self.preprocessing_seed is None:
@@ -405,10 +424,13 @@ def read_batch(self, start: int, end: int) -> None:
                         image_queue=self.preprocessing_queue_image,  # type: ignore
                         mask_queue=self.preprocessing_queue_mask,  # type: ignore
                     )
-
-                batch_masks[i, j, :, :, :] = ImagePreprocessor.onehot_encode(
-                    mask, self.output_size, self.num_classes
-                )
+                    
+                batch_masks[i, j, : , 0] = tf.squeeze(mask)
+            
+            batch_masks[i, :,:,:] = ImagePreprocessor.onehot_encode(
+                batch_masks[i, :,:,0], self.output_size, self.num_classes
+            )
+           
 
         # chaches the batch
         self.image_batch_store = batch_images
@@ -426,12 +448,12 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
             self.read_batch(index, index + self.batch_size)
 
         # slices new batch
-        store_index = (index - self.validity_index) % self.mini_batch
+        store_index = (index - (self.validity_index-self.batch_size)) // self.mini_batch
 
-        batch_images = self.image_batch_store[store_index, :, :, :, :]
-        batch_masks = self.mask_batch_store[store_index, :, :, :, :]
+        batch_images = self.image_batch_store[store_index,...]
+        batch_masks = self.mask_batch_store[store_index,...]
 
-        tf.squeeze(batch_masks, axis=2)
+        
 
         return np.array(batch_images), np.array(batch_masks)
 

From 952ed852b2818b5a0a7c88617fd1c52ccbdda31f Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Wed, 24 May 2023 21:03:19 +0100
Subject: [PATCH 04/18] specifies datatypes for the mask store array

---
 utilities/segmentation_utils/flowreader.py | 40 ++++++++++++----------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 173f33d..d7180ee 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -291,9 +291,11 @@ def __init__(
         self.mask_filenames = os.listdir(os.path.join(self.mask_path))
 
         self.image_batch_store = np.zeros(
-            (1, self.batch_size, image_size[0], image_size[1], self.n_channels)
+            (1, self.batch_size, image_size[0], image_size[1], self.n_channels),
+           
         )
-        self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes))
+        self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes),
+                                         dtype=np.uint8)
         self.validity_index = 0
 
         if self.output_size[1] == 1:
@@ -357,11 +359,15 @@ def read_batch(self, start: int, end: int) -> None:
                 self.image_size[0],
                 self.image_size[1],
                 self.n_channels,
-            )
+            ),
+            
         )
         if self.output_size[1] == 1:
             column = True
-            batch_masks = np.zeros((n, self.mini_batch, self.output_size[0],self.num_classes))
+            batch_masks = np.zeros(
+                (n, self.mini_batch, self.output_size[0], self.num_classes),
+                dtype=np.uint8,
+            )
         else:
             column = False
             batch_masks = np.zeros(
@@ -371,7 +377,8 @@ def read_batch(self, start: int, end: int) -> None:
                     self.output_size[0],
                     self.output_size[1],
                     self.num_classes,
-                )
+                ),
+                dtype=np.uint8,
             )
 
         # preprocess and assign images and masks to the batch
@@ -401,8 +408,6 @@ def read_batch(self, start: int, end: int) -> None:
                 # for now it is assumed that n is 1
                 batch_images[i, j, :, :, :] = image[:, :, self.channel_mask]
 
-                
-
                 if self.preprocessing_enabled:
                     if self.preprocessing_seed is None:
                         image_seed = np.random.randint(0, 100000)
@@ -424,13 +429,12 @@ def read_batch(self, start: int, end: int) -> None:
                         image_queue=self.preprocessing_queue_image,  # type: ignore
                         mask_queue=self.preprocessing_queue_mask,  # type: ignore
                     )
-                    
-                batch_masks[i, j, : , 0] = tf.squeeze(mask)
-            
-            batch_masks[i, :,:,:] = ImagePreprocessor.onehot_encode(
-                batch_masks[i, :,:,0], self.output_size, self.num_classes
+
+                batch_masks[i, j, :, 0] = tf.squeeze(mask)
+
+            batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode(
+                batch_masks[i, :, :, 0], self.output_size, self.num_classes
             )
-           
 
         # chaches the batch
         self.image_batch_store = batch_images
@@ -448,12 +452,12 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
             self.read_batch(index, index + self.batch_size)
 
         # slices new batch
-        store_index = (index - (self.validity_index-self.batch_size)) // self.mini_batch
-
-        batch_images = self.image_batch_store[store_index,...]
-        batch_masks = self.mask_batch_store[store_index,...]
+        store_index = (
+            index - (self.validity_index - self.batch_size)
+        ) // self.mini_batch
 
-        
+        batch_images = self.image_batch_store[store_index, ...]
+        batch_masks = self.mask_batch_store[store_index, ...]
 
         return np.array(batch_images), np.array(batch_masks)
 

From f913e5038039f88c69261cf615c47a76d185a5d3 Mon Sep 17 00:00:00 2001
From: Andras Bodrogai <abodrogai@gmail.com>
Date: Wed, 24 May 2023 21:17:38 +0100
Subject: [PATCH 05/18] removes uint declaration

---
 utilities/segmentation_utils/flowreader.py | 45 +++++++++-------------
 1 file changed, 19 insertions(+), 26 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index d7180ee..78b0018 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -291,11 +291,9 @@ def __init__(
         self.mask_filenames = os.listdir(os.path.join(self.mask_path))
 
         self.image_batch_store = np.zeros(
-            (1, self.batch_size, image_size[0], image_size[1], self.n_channels),
-           
+            (1, self.batch_size, image_size[0], image_size[1], self.n_channels)
         )
-        self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes),
-                                         dtype=np.uint8)
+        self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes))
         self.validity_index = 0
 
         if self.output_size[1] == 1:
@@ -347,7 +345,7 @@ def set_mini_batch_size(self, batch_size: int) -> None:
     def read_batch(self, start: int, end: int) -> None:
         # read image batch
         batch_image_filenames = self.image_filenames[start:end]
-        batch_mask_filenames = self.mask_filenames[start:end]
+        batch_mask_filenames = batch_image_filenames
 
         # calculate number of mini batches in a batch
         n = self.batch_size // self.mini_batch
@@ -359,15 +357,11 @@ def read_batch(self, start: int, end: int) -> None:
                 self.image_size[0],
                 self.image_size[1],
                 self.n_channels,
-            ),
-            
+            )
         )
         if self.output_size[1] == 1:
             column = True
-            batch_masks = np.zeros(
-                (n, self.mini_batch, self.output_size[0], self.num_classes),
-                dtype=np.uint8,
-            )
+            batch_masks = np.zeros((n, self.mini_batch, self.output_size[0],self.num_classes))
         else:
             column = False
             batch_masks = np.zeros(
@@ -377,8 +371,7 @@ def read_batch(self, start: int, end: int) -> None:
                     self.output_size[0],
                     self.output_size[1],
                     self.num_classes,
-                ),
-                dtype=np.uint8,
+                )
             )
 
         # preprocess and assign images and masks to the batch
@@ -429,12 +422,13 @@ def read_batch(self, start: int, end: int) -> None:
                         image_queue=self.preprocessing_queue_image,  # type: ignore
                         mask_queue=self.preprocessing_queue_mask,  # type: ignore
                     )
-
-                batch_masks[i, j, :, 0] = tf.squeeze(mask)
-
-            batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode(
-                batch_masks[i, :, :, 0], self.output_size, self.num_classes
+                    
+                batch_masks[i, j, : , 0] = tf.squeeze(mask)
+            
+            batch_masks[i, :,:,:] = ImagePreprocessor.onehot_encode(
+                batch_masks[i, :,:,0], self.output_size, self.num_classes
             )
+           
 
         # chaches the batch
         self.image_batch_store = batch_images
@@ -448,23 +442,22 @@ def __len__(self):
 
     def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
         # check if the batch is already cached
+        if index < self.validity_index-self.batch_size:
+            self.validity_index = 0
+
         if index == self.validity_index:
             self.read_batch(index, index + self.batch_size)
 
         # slices new batch
-        store_index = (
-            index - (self.validity_index - self.batch_size)
-        ) // self.mini_batch
+        store_index = (index - (self.validity_index-self.batch_size)) // self.mini_batch
 
-        batch_images = self.image_batch_store[store_index, ...]
-        batch_masks = self.mask_batch_store[store_index, ...]
+        batch_images = self.image_batch_store[store_index,...]
+        batch_masks = self.mask_batch_store[store_index,...]
 
         return np.array(batch_images), np.array(batch_masks)
 
     def on_epoch_end(self):
         # Shuffle image and mask filenames
+        
         if self.shuffle:
-            np.random.seed(self.seed)
             np.random.shuffle(self.image_filenames)
-            np.random.seed(self.seed)
-            np.random.shuffle(self.mask_filenames)

From 7cc5ae3ad0bf7ca33dae84af52a4a0a04651b7b8 Mon Sep 17 00:00:00 2001
From: Andras Bodrogai <abodrogai@gmail.com>
Date: Thu, 25 May 2023 10:23:48 +0100
Subject: [PATCH 06/18] fixes possible indexing issue and changes default
 reader library from CV2 to PIL

---
 .../segmentation_utils/ImagePreprocessor.py   |  4 +-
 utilities/segmentation_utils/flowreader.py    | 73 +++++++++++--------
 2 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py
index 2e00b3b..59ce65c 100644
--- a/utilities/segmentation_utils/ImagePreprocessor.py
+++ b/utilities/segmentation_utils/ImagePreprocessor.py
@@ -160,7 +160,7 @@ def augmentation_pipeline(
     -------
     :return tuple(tf.Tensor, tf.Tensor): tuple of the processed image and mask
     """
-
+    
     # reshapes masks, such that transforamtions work properly
     if output_reshape is not None and output_size[1] == 1:
         mask = tf.reshape(mask, (output_reshape[0], output_reshape[1], 1))
@@ -201,4 +201,4 @@ def flatten(image, input_size, channels=1) -> tf.Tensor:
     :return tf.Tensor: flattened image
     """
     # the 1 is required to preserve the shape similar to the original
-    return tf.reshape(image, (input_size[0] * input_size[1], 1, channels))
+    return tf.reshape(image, (input_size[0] * input_size[1], channels))
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 78b0018..4f1c2bf 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import tensorflow as tf
-import cv2
+from PIL import Image
 from keras.preprocessing.image import ImageDataGenerator
 from keras.utils import Sequence
 
@@ -361,7 +361,9 @@ def read_batch(self, start: int, end: int) -> None:
         )
         if self.output_size[1] == 1:
             column = True
-            batch_masks = np.zeros((n, self.mini_batch, self.output_size[0],self.num_classes))
+            batch_masks = np.zeros(
+                (n, self.mini_batch, self.output_size[0], self.num_classes)
+            )
         else:
             column = False
             batch_masks = np.zeros(
@@ -376,21 +378,24 @@ def read_batch(self, start: int, end: int) -> None:
 
         # preprocess and assign images and masks to the batch
         for i in range(n):
+            raw_masks = np.zeros(
+                (self.mini_batch, self.output_size[0] * self.output_size[1], 1)
+            )
             for j in range(self.mini_batch):
-                image = cv2.imread(
-                    os.path.join(self.image_path, batch_image_filenames[j]),
-                    cv2.IMREAD_COLOR,
-                )
-                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-                image = cv2.resize(image, self.image_size)
-                image = np.asarray(image)
-
-                mask = cv2.imread(
-                    os.path.join(self.mask_path, batch_mask_filenames[j]),
-                    cv2.IMREAD_GRAYSCALE,
-                )
-                mask = cv2.resize(mask, self.output_size)
-                mask = np.asarray(mask).reshape(self.output_size)
+                image = Image.open(
+                    os.path.join(self.image_path, batch_image_filenames[j])
+                ).resize(self.image_size)
+                # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+                image = np.array(image)
+                image = image / 255
+
+                mask = Image.open(
+                    os.path.join(self.mask_path, batch_mask_filenames[j])
+                ).resize(self.output_reshape)
+                
+                mask = np.array(mask)
+                mask = np.reshape(mask, self.output_size)
                 # np.load(
                 #     os.path.join(self.image_path, batch_image_filenames[j])
                 # )
@@ -399,7 +404,10 @@ def read_batch(self, start: int, end: int) -> None:
                 # )
 
                 # for now it is assumed that n is 1
-                batch_images[i, j, :, :, :] = image[:, :, self.channel_mask]
+
+                image = image[:, :, self.channel_mask]
+
+                batch_images[i, j, :, :, :] = image
 
                 if self.preprocessing_enabled:
                     if self.preprocessing_seed is None:
@@ -422,42 +430,43 @@ def read_batch(self, start: int, end: int) -> None:
                         image_queue=self.preprocessing_queue_image,  # type: ignore
                         mask_queue=self.preprocessing_queue_mask,  # type: ignore
                     )
-                    
-                batch_masks[i, j, : , 0] = tf.squeeze(mask)
-            
-            batch_masks[i, :,:,:] = ImagePreprocessor.onehot_encode(
-                batch_masks[i, :,:,0], self.output_size, self.num_classes
+
+                raw_masks[j, :, :] = mask
+
+            batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode(
+                raw_masks, self.output_size, self.num_classes
             )
-           
 
         # chaches the batch
         self.image_batch_store = batch_images
         self.mask_batch_store = batch_masks
 
         # required to check when to read the next batch
-        self.validity_index = end
 
     def __len__(self):
-        return int(np.ceil(len(self.image_filenames) / float(self.batch_size)))
+        return int(np.floor(len(self.image_filenames) / float(self.batch_size)))
 
     def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
         # check if the batch is already cached
-        if index < self.validity_index-self.batch_size:
+        if index < self.validity_index - self.batch_size // self.mini_batch:
             self.validity_index = 0
 
         if index == self.validity_index:
-            self.read_batch(index, index + self.batch_size)
+            self.read_batch(index * self.batch_size, (index + 1) * self.batch_size)
+            self.validity_index = (self.batch_size // self.mini_batch) + index
 
         # slices new batch
-        store_index = (index - (self.validity_index-self.batch_size)) // self.mini_batch
+        store_index = (
+            index - (self.validity_index - self.batch_size)
+        ) // self.mini_batch
 
-        batch_images = self.image_batch_store[store_index,...]
-        batch_masks = self.mask_batch_store[store_index,...]
+        batch_images = self.image_batch_store[store_index, ...]
+        batch_masks = self.mask_batch_store[store_index, ...]
 
-        return np.array(batch_images), np.array(batch_masks)
+        return batch_images, batch_masks
 
     def on_epoch_end(self):
         # Shuffle image and mask filenames
-        
+
         if self.shuffle:
             np.random.shuffle(self.image_filenames)

From 39fd65b899adaa59989f8ba24ca3955a8fa11056 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Thu, 25 May 2023 10:40:47 +0100
Subject: [PATCH 07/18] adds bicubic interpolation to the image reader

---
 utilities/segmentation_utils/flowreader.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 4f1c2bf..42cd9da 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -384,7 +384,8 @@ def read_batch(self, start: int, end: int) -> None:
             for j in range(self.mini_batch):
                 image = Image.open(
                     os.path.join(self.image_path, batch_image_filenames[j])
-                ).resize(self.image_size)
+
+                ).resize(self.image_size, Image.BICUBIC)
                 # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
                 image = np.array(image)

From 38e16a9c370155565c3f9b445987791730d9cf70 Mon Sep 17 00:00:00 2001
From: Andras Bodrogai <abodrogai@gmail.com>
Date: Thu, 25 May 2023 22:49:28 +0100
Subject: [PATCH 08/18] removes some unnecessary things

---
 .../segmentation_utils/ImagePreprocessor.py   |  2 +-
 utilities/segmentation_utils/flowreader.py    | 33 ++++++++-----------
 2 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py
index 59ce65c..ded7100 100644
--- a/utilities/segmentation_utils/ImagePreprocessor.py
+++ b/utilities/segmentation_utils/ImagePreprocessor.py
@@ -178,7 +178,7 @@ def augmentation_pipeline(
     if output_size[1] == 1:
         mask = flatten(mask, output_size, channels=1)
 
-    image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1))
+    #image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1))
 
     return image, mask
 
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 42cd9da..eca315e 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -302,7 +302,7 @@ def __init__(
             dimension = math.sqrt(self.output_size[0])
             self.output_reshape = (int(dimension), int(dimension))
         else:
-            self.output_reshape = None
+            self.output_reshape = self.output_size
 
         print("Reading images from: ", self.image_path)
 
@@ -346,7 +346,7 @@ def read_batch(self, start: int, end: int) -> None:
         # read image batch
         batch_image_filenames = self.image_filenames[start:end]
         batch_mask_filenames = batch_image_filenames
-
+        tf.print(batch_image_filenames)
         # calculate number of mini batches in a batch
         n = self.batch_size // self.mini_batch
 
@@ -384,9 +384,7 @@ def read_batch(self, start: int, end: int) -> None:
             for j in range(self.mini_batch):
                 image = Image.open(
                     os.path.join(self.image_path, batch_image_filenames[j])
-
-                ).resize(self.image_size, Image.BICUBIC)
-                # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+                ).resize(self.image_size, Image.ANTIALIAS)
 
                 image = np.array(image)
                 image = image / 255
@@ -396,16 +394,6 @@ def read_batch(self, start: int, end: int) -> None:
                 ).resize(self.output_reshape)
                 
                 mask = np.array(mask)
-                mask = np.reshape(mask, self.output_size)
-                # np.load(
-                #     os.path.join(self.image_path, batch_image_filenames[j])
-                # )
-                # mask = np.load(
-                #     os.path.join(self.mask_path, batch_mask_filenames[j])
-                # )
-
-                # for now it is assumed that n is 1
-
                 image = image[:, :, self.channel_mask]
 
                 batch_images[i, j, :, :, :] = image
@@ -431,6 +419,8 @@ def read_batch(self, start: int, end: int) -> None:
                         image_queue=self.preprocessing_queue_image,  # type: ignore
                         mask_queue=self.preprocessing_queue_mask,  # type: ignore
                     )
+                
+                mask = np.reshape(mask, self.output_size)
 
                 raw_masks[j, :, :] = mask
 
@@ -450,24 +440,27 @@ def __len__(self):
     def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
         # check if the batch is already cached
         if index < self.validity_index - self.batch_size // self.mini_batch:
+          
             self.validity_index = 0
 
         if index == self.validity_index:
+
             self.read_batch(index * self.batch_size, (index + 1) * self.batch_size)
             self.validity_index = (self.batch_size // self.mini_batch) + index
 
         # slices new batch
-        store_index = (
-            index - (self.validity_index - self.batch_size)
-        ) // self.mini_batch
+        store_index = (self.batch_size//self.mini_batch) - (self.validity_index - index)
+       
 
         batch_images = self.image_batch_store[store_index, ...]
         batch_masks = self.mask_batch_store[store_index, ...]
 
-        return batch_images, batch_masks
+        return tf.convert_to_tensor(batch_images), tf.convert_to_tensor(batch_masks)
 
     def on_epoch_end(self):
         # Shuffle image and mask filenames
-
+      
         if self.shuffle:
+   
             np.random.shuffle(self.image_filenames)
+         
\ No newline at end of file

From 10653a92c1596c999bbf8be33ca8068c4ff174d3 Mon Sep 17 00:00:00 2001
From: Andras Bodrogai <abodrogai@gmail.com>
Date: Thu, 25 May 2023 23:41:16 +0100
Subject: [PATCH 09/18] adds possible fix for image flips not being
 determinable

---
 .../segmentation_utils/ImagePreprocessor.py   | 46 ++++++++++++++++++-
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py
index ded7100..800a3c9 100644
--- a/utilities/segmentation_utils/ImagePreprocessor.py
+++ b/utilities/segmentation_utils/ImagePreprocessor.py
@@ -160,7 +160,7 @@ def augmentation_pipeline(
     -------
     :return tuple(tf.Tensor, tf.Tensor): tuple of the processed image and mask
     """
-    
+
     # reshapes masks, such that transforamtions work properly
     if output_reshape is not None and output_size[1] == 1:
         mask = tf.reshape(mask, (output_reshape[0], output_reshape[1], 1))
@@ -178,7 +178,7 @@ def augmentation_pipeline(
     if output_size[1] == 1:
         mask = flatten(mask, output_size, channels=1)
 
-    #image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1))
+    # image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1))
 
     return image, mask
 
@@ -202,3 +202,45 @@ def flatten(image, input_size, channels=1) -> tf.Tensor:
     """
     # the 1 is required to preserve the shape similar to the original
     return tf.reshape(image, (input_size[0] * input_size[1], channels))
+
+
+def random_flip_up_down(image, seed=0) -> tf.Tensor:
+    """
+    Function that randomly flips an image up or down
+
+    Parameters
+    ----------
+    :tf.Tensor image: image to be flipped
+
+    Returns
+    -------
+    :return tf.Tensor: flipped image
+    """
+
+    state = np.random.RandomState(seed)
+    flip = state.choice([True, False])
+    if flip:
+        return tf.image.flip_up_down(image)
+    else:
+        return image
+
+
+def random_flip_left_right(image, seed=0) -> tf.Tensor:
+    """
+    Function that randomly flips an image left or right
+
+    Parameters
+    ----------
+    :tf.Tensor image: image to be flipped
+
+    Returns
+    -------
+    :return tf.Tensor: flipped image
+    """
+
+    state = np.random.RandomState(seed)
+    flip = state.choice([True, False])
+    if flip:
+        return tf.image.flip_left_right(image)
+    else:
+        return image

From 0c58719b2106c65dfd723dd8b65b0831aeb5d9ff Mon Sep 17 00:00:00 2001
From: Andras Bodrogai <abodrogai@gmail.com>
Date: Thu, 25 May 2023 23:44:21 +0100
Subject: [PATCH 10/18] adds function to validate dataset

---
 utilities/segmentation_utils/flowreader.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index eca315e..187d953 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -11,6 +11,7 @@
 from PIL import Image
 from keras.preprocessing.image import ImageDataGenerator
 from keras.utils import Sequence
+from tqdm import tqdm
 
 from utilities.segmentation_utils import ImagePreprocessor
 
@@ -287,8 +288,16 @@ def __init__(
         self.preprocessing_enabled = preprocessing_enabled
         self.preprocessing_seed = preprocessing_seed
 
-        self.image_filenames = os.listdir(os.path.join(self.image_path))
-        self.mask_filenames = os.listdir(os.path.join(self.mask_path))
+        self.image_filenames = sorted(os.listdir(os.path.join(self.image_path)))
+        self.mask_filenames = sorted(os.listdir(os.path.join(self.mask_path)))
+
+        print("Validating dataset...")
+
+        for i_name,m_name in tqdm(zip(self.image_filenames,self.mask_filenames)):
+            if i_name != m_name:
+                raise ValueError("The image and mask directories do not match")
+            
+        
 
         self.image_batch_store = np.zeros(
             (1, self.batch_size, image_size[0], image_size[1], self.n_channels)
@@ -346,7 +355,7 @@ def read_batch(self, start: int, end: int) -> None:
         # read image batch
         batch_image_filenames = self.image_filenames[start:end]
         batch_mask_filenames = batch_image_filenames
-        tf.print(batch_image_filenames)
+      
         # calculate number of mini batches in a batch
         n = self.batch_size // self.mini_batch
 
@@ -461,6 +470,7 @@ def on_epoch_end(self):
         # Shuffle image and mask filenames
       
         if self.shuffle:
-   
-            np.random.shuffle(self.image_filenames)
+            shuffled_indices = np.random.permutation(len(self.image_filenames))
+            self.image_filenames = self.image_filenames[shuffled_indices]
+            self.mask_filenames = self.mask_filenames[shuffled_indices]
          
\ No newline at end of file

From bea1882816610116a9119ec66939e9c727d2a2a0 Mon Sep 17 00:00:00 2001
From: Andras Bodrogai <abodrogai@gmail.com>
Date: Thu, 1 Jun 2023 18:24:53 +0100
Subject: [PATCH 11/18] updates, flowreader and processing pipeline to be able
 to read images

---
 .../segmentation_utils/ImagePreprocessor.py   | 18 ++--
 utilities/segmentation_utils/flowreader.py    | 87 +++++++++++--------
 2 files changed, 66 insertions(+), 39 deletions(-)

diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py
index 800a3c9..e5ef245 100644
--- a/utilities/segmentation_utils/ImagePreprocessor.py
+++ b/utilities/segmentation_utils/ImagePreprocessor.py
@@ -107,10 +107,13 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor:
     -------
     :return tf.Tensor: Batch of one-hot encoded masks
     """
-    encoded = np.zeros((masks.shape[0], output_size[0] * output_size[1], num_classes))
+    encoded = np.zeros((masks.shape[0], output_size[0], output_size[1], num_classes))
     for i in range(num_classes):
-        encoded[:, :, i] = tf.squeeze((masks == i))
-    encoded = tf.convert_to_tensor(encoded)
+        mask = (masks == i).astype(float)
+        encoded[:, :, :, i] = mask
+    if output_size[1] == 1:
+        encoded = encoded.reshape((masks.shape[0],output_size[0] * output_size[1], num_classes))
+        
     return encoded
 
 
@@ -163,8 +166,10 @@ def augmentation_pipeline(
 
     # reshapes masks, such that transforamtions work properly
     if output_reshape is not None and output_size[1] == 1:
-        mask = tf.reshape(mask, (output_reshape[0], output_reshape[1], 1))
-
+        mask = tf.reshape(mask, (output_reshape[0], output_reshape[1]))
+    
+    mask = tf.expand_dims(mask,axis=-1)
+  
     image_queue.update_seed(seed)
     mask_queue.update_seed(seed)
 
@@ -177,8 +182,11 @@ def augmentation_pipeline(
     # flattens masks out to the correct output shape
     if output_size[1] == 1:
         mask = flatten(mask, output_size, channels=1)
+    else:
+        mask = tf.squeeze(mask, axis=-1)
 
     # image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1))
+    
 
     return image, mask
 
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 187d953..4f0d901 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -252,6 +252,7 @@ class FlowGeneratorExperimental(Sequence):
     preprocessing_seed = None
     preprocessing_queue_image = None
     preprocessing_queue_mask = None
+    shuffle_counter = 0
 
     def __init__(
         self,
@@ -288,21 +289,23 @@ def __init__(
         self.preprocessing_enabled = preprocessing_enabled
         self.preprocessing_seed = preprocessing_seed
 
-        self.image_filenames = sorted(os.listdir(os.path.join(self.image_path)))
-        self.mask_filenames = sorted(os.listdir(os.path.join(self.mask_path)))
+        self.image_filenames = np.array(
+            sorted(os.listdir(os.path.join(self.image_path)))
+        )
+        self.mask_filenames = np.array(sorted(os.listdir(os.path.join(self.mask_path))))
+
+        self.shuffle_filenames()
+
+        self.dataset_size = self.__len__()
 
         print("Validating dataset...")
 
-        for i_name,m_name in tqdm(zip(self.image_filenames,self.mask_filenames)):
+        for i_name, m_name in tqdm(zip(self.image_filenames, self.mask_filenames)):
             if i_name != m_name:
                 raise ValueError("The image and mask directories do not match")
-            
-        
 
-        self.image_batch_store = np.zeros(
-            (1, self.batch_size, image_size[0], image_size[1], self.n_channels)
-        )
-        self.mask_batch_store = np.zeros((1, self.batch_size, 1, 1, num_classes))
+        self.image_batch_store = None
+        self.mask_batch_store = None
         self.validity_index = 0
 
         if self.output_size[1] == 1:
@@ -354,8 +357,11 @@ def set_mini_batch_size(self, batch_size: int) -> None:
     def read_batch(self, start: int, end: int) -> None:
         # read image batch
         batch_image_filenames = self.image_filenames[start:end]
-        batch_mask_filenames = batch_image_filenames
-      
+        batch_mask_filenames = self.mask_filenames[start:end]
+        for i in range(len(batch_image_filenames)):
+            if batch_image_filenames[i] != batch_mask_filenames[i]:
+                raise ValueError("The image and mask directories do not match")
+
         # calculate number of mini batches in a batch
         n = self.batch_size // self.mini_batch
 
@@ -387,25 +393,30 @@ def read_batch(self, start: int, end: int) -> None:
 
         # preprocess and assign images and masks to the batch
         for i in range(n):
-            raw_masks = np.zeros(
-                (self.mini_batch, self.output_size[0] * self.output_size[1], 1)
-            )
+            if column:
+                raw_masks = np.zeros(
+                    (self.mini_batch, self.output_size[0] * self.output_size[1], 1)
+                )
+            else:
+                raw_masks = np.zeros(
+                    (self.mini_batch, self.output_size[0], self.output_size[1])
+                )
+
             for j in range(self.mini_batch):
+                image_index = i * self.mini_batch + j
                 image = Image.open(
-                    os.path.join(self.image_path, batch_image_filenames[j])
+                    os.path.join(self.image_path, batch_image_filenames[image_index])
                 ).resize(self.image_size, Image.ANTIALIAS)
 
                 image = np.array(image)
                 image = image / 255
 
                 mask = Image.open(
-                    os.path.join(self.mask_path, batch_mask_filenames[j])
+                    os.path.join(self.mask_path, batch_mask_filenames[image_index])
                 ).resize(self.output_reshape)
-                
-                mask = np.array(mask)
-                image = image[:, :, self.channel_mask]
 
-                batch_images[i, j, :, :, :] = image
+                mask = np.array(mask)
+                # image = image[:, :, self.channel_mask]
 
                 if self.preprocessing_enabled:
                     if self.preprocessing_seed is None:
@@ -415,10 +426,10 @@ def read_batch(self, start: int, end: int) -> None:
                         image_seed = state.randint(0, 100000)
 
                     (
-                        batch_images[i, j, :, :, :],
+                        image,
                         mask,
                     ) = ImagePreprocessor.augmentation_pipeline(
-                        image=batch_images[i, j, :, :, :],
+                        image,
                         mask=mask,
                         input_size=self.image_size,
                         output_size=self.output_size,
@@ -428,12 +439,15 @@ def read_batch(self, start: int, end: int) -> None:
                         image_queue=self.preprocessing_queue_image,  # type: ignore
                         mask_queue=self.preprocessing_queue_mask,  # type: ignore
                     )
-                
-                mask = np.reshape(mask, self.output_size)
+                if column:
+                    mask = np.reshape(mask, self.output_size)
 
-                raw_masks[j, :, :] = mask
+                batch_images[i, j, :, :, :] = image
+                raw_masks[
+                    j, ...
+                ] = mask  # NOTE: this provides the flexibility required to process both column and matrix vectors
 
-            batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode(
+            batch_masks[i, ...] = ImagePreprocessor.onehot_encode(
                 raw_masks, self.output_size, self.num_classes
             )
 
@@ -448,29 +462,34 @@ def __len__(self):
 
     def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
         # check if the batch is already cached
+        index = index % self.dataset_size
+
         if index < self.validity_index - self.batch_size // self.mini_batch:
-          
             self.validity_index = 0
 
         if index == self.validity_index:
-
             self.read_batch(index * self.batch_size, (index + 1) * self.batch_size)
             self.validity_index = (self.batch_size // self.mini_batch) + index
 
         # slices new batch
-        store_index = (self.batch_size//self.mini_batch) - (self.validity_index - index)
-       
+        store_index = (self.batch_size // self.mini_batch) - (
+            self.validity_index - index
+        )
 
         batch_images = self.image_batch_store[store_index, ...]
         batch_masks = self.mask_batch_store[store_index, ...]
 
-        return tf.convert_to_tensor(batch_images), tf.convert_to_tensor(batch_masks)
+        return batch_images, batch_masks
 
     def on_epoch_end(self):
         # Shuffle image and mask filenames
-      
+        self.shuffle_filenames()
+
+    def shuffle_filenames(self):
         if self.shuffle:
-            shuffled_indices = np.random.permutation(len(self.image_filenames))
+            state = np.random.RandomState(self.seed + self.shuffle_counter)
+            self.shuffle_counter += 1
+            shuffled_indices = state.permutation(len(self.image_filenames))
+            shuffled_indices = shuffled_indices.astype(int)
             self.image_filenames = self.image_filenames[shuffled_indices]
             self.mask_filenames = self.mask_filenames[shuffled_indices]
-         
\ No newline at end of file

From 1068f6160e6f2a376365a8166aa6afcd35875181 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 4 Jun 2023 19:09:19 +0100
Subject: [PATCH 12/18] Improves code quality, updates documentation

---
 .../segmentation_utils/ImagePreprocessor.py   | 20 +++---
 utilities/segmentation_utils/flowreader.py    | 61 ++++++++++---------
 2 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py
index e5ef245..5700bea 100644
--- a/utilities/segmentation_utils/ImagePreprocessor.py
+++ b/utilities/segmentation_utils/ImagePreprocessor.py
@@ -112,8 +112,10 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor:
         mask = (masks == i).astype(float)
         encoded[:, :, :, i] = mask
     if output_size[1] == 1:
-        encoded = encoded.reshape((masks.shape[0],output_size[0] * output_size[1], num_classes))
-        
+        encoded = encoded.reshape(
+            (masks.shape[0], output_size[0] * output_size[1], num_classes)
+        )
+    encoded = tf.convert_to_tensor(encoded, dtype=tf.float32)
     return encoded
 
 
@@ -167,9 +169,9 @@ def augmentation_pipeline(
     # reshapes masks, such that transforamtions work properly
     if output_reshape is not None and output_size[1] == 1:
         mask = tf.reshape(mask, (output_reshape[0], output_reshape[1]))
-    
-    mask = tf.expand_dims(mask,axis=-1)
-  
+
+    mask = tf.expand_dims(mask, axis=-1)
+
     image_queue.update_seed(seed)
     mask_queue.update_seed(seed)
 
@@ -185,8 +187,8 @@ def augmentation_pipeline(
     else:
         mask = tf.squeeze(mask, axis=-1)
 
+    mask = tf.convert_to_tensor(mask, dtype=tf.float32)
     # image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1))
-    
 
     return image, mask
 
@@ -209,7 +211,7 @@ def flatten(image, input_size, channels=1) -> tf.Tensor:
     :return tf.Tensor: flattened image
     """
     # the 1 is required to preserve the shape similar to the original
-    return tf.reshape(image, (input_size[0] * input_size[1], channels))
+    return tf.convert_to_tensor(tf.reshape(image, (input_size[0] * input_size[1], channels)))
 
 
 def random_flip_up_down(image, seed=0) -> tf.Tensor:
@@ -228,7 +230,7 @@ def random_flip_up_down(image, seed=0) -> tf.Tensor:
     state = np.random.RandomState(seed)
     flip = state.choice([True, False])
     if flip:
-        return tf.image.flip_up_down(image)
+        return tf.convert_to_tensor(tf.image.flip_up_down(image))
     else:
         return image
 
@@ -249,6 +251,6 @@ def random_flip_left_right(image, seed=0) -> tf.Tensor:
     state = np.random.RandomState(seed)
     flip = state.choice([True, False])
     if flip:
-        return tf.image.flip_left_right(image)
+        return tf.convert_to_tensor(tf.image.flip_left_right(image))
     else:
         return image
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 4f0d901..8dd40a8 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -7,10 +7,9 @@
 from typing import Optional
 
 import numpy as np
-import tensorflow as tf
-from PIL import Image
 from keras.preprocessing.image import ImageDataGenerator
 from keras.utils import Sequence
+from PIL import Image
 from tqdm import tqdm
 
 from utilities.segmentation_utils import ImagePreprocessor
@@ -228,23 +227,23 @@ class FlowGeneratorExperimental(Sequence):
     ----------
     :string image: path to the image directory
     :string mask: path to the mask directory
-    :int batch_size: batch size
-    :tuple image_size: image size
-    :tuple output_size: output size
-
-
-    :int num_classes: number of classes
+    :int batch_size: 
+    :tuple image_size: specifies the size of the input image
+    :tuple output_size: specifies the size of the output mask
+    :list[bool] channel_mask: specifies which channels of the input image to use
+    :int num_classes: number of classes in the output mask
 
     Keyword Arguments
     -----------------
-    :bool shuffle: whether to shuffle the dataset or not
-    :int batch_size: batch size
-    :bool preprocessing_enabled: whether to apply preprocessing or not
+    :bool, optional shuffle: whether to shuffle the dataset or not, defaults to True
+    :int batch_size: specifies the number of images read in one batch, defaults to 2
+    :bool preprocessing_enabled: whether to apply preprocessing or not, defaults to True
     :int seed: seed for flow from directory
     :int preprocessing_seed: seed for preprocessing, defaults to None
 
     Raises
     ------
+    :ValueError: if the names of the images and masks do not match
     :ValueError: if the output size is not a tuple of length 2
     :ValueError: if the output size is not a square matrix or a column vector
     """
@@ -289,17 +288,19 @@ def __init__(
         self.preprocessing_enabled = preprocessing_enabled
         self.preprocessing_seed = preprocessing_seed
 
+        (
+            self.preprocessing_queue_image,
+            self.preprocessing_queue_mask,
+        ) = ImagePreprocessor.generate_default_queue()
+
         self.image_filenames = np.array(
             sorted(os.listdir(os.path.join(self.image_path)))
         )
         self.mask_filenames = np.array(sorted(os.listdir(os.path.join(self.mask_path))))
-
-        self.shuffle_filenames()
-
+        self.__shuffle_filenames()
         self.dataset_size = self.__len__()
 
         print("Validating dataset...")
-
         for i_name, m_name in tqdm(zip(self.image_filenames, self.mask_filenames)):
             if i_name != m_name:
                 raise ValueError("The image and mask directories do not match")
@@ -336,8 +337,8 @@ def set_preprocessing_pipeline(
 
     def set_mini_batch_size(self, batch_size: int) -> None:
         """
-        Function to set the appropriate minibatch size. Required to allign batch size in the reader with the model.\
-        Does not change the batch size of the reader.
+        Function to set the appropriate minibatch size. Required to allign batch size in the \
+        reader with the model. Does not change the batch size of the reader.
 
         Parameters
         ----------
@@ -354,12 +355,12 @@ def set_mini_batch_size(self, batch_size: int) -> None:
             raise ValueError("The batch size must be divisible by the mini batch size")
         self.mini_batch = batch_size
 
-    def read_batch(self, start: int, end: int) -> None:
+    def __read_batch(self, start: int, end: int) -> None:
         # read image batch
         batch_image_filenames = self.image_filenames[start:end]
         batch_mask_filenames = self.mask_filenames[start:end]
-        for i in range(len(batch_image_filenames)):
-            if batch_image_filenames[i] != batch_mask_filenames[i]:
+        for image, mask in zip(batch_image_filenames, batch_mask_filenames):
+            if image != mask:
                 raise ValueError("The image and mask directories do not match")
 
         # calculate number of mini batches in a batch
@@ -443,9 +444,9 @@ def read_batch(self, start: int, end: int) -> None:
                     mask = np.reshape(mask, self.output_size)
 
                 batch_images[i, j, :, :, :] = image
-                raw_masks[
-                    j, ...
-                ] = mask  # NOTE: this provides the flexibility required to process both column and matrix vectors
+                # NOTE: this provides the flexibility required to process both
+                # column and matrix vectors
+                raw_masks[j, ...] = mask
 
             batch_masks[i, ...] = ImagePreprocessor.onehot_encode(
                 raw_masks, self.output_size, self.num_classes
@@ -457,7 +458,7 @@ def read_batch(self, start: int, end: int) -> None:
 
         # required to check when to read the next batch
 
-    def __len__(self):
+    def __len__(self) -> int:
         return int(np.floor(len(self.image_filenames) / float(self.batch_size)))
 
     def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
@@ -468,7 +469,7 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
             self.validity_index = 0
 
         if index == self.validity_index:
-            self.read_batch(index * self.batch_size, (index + 1) * self.batch_size)
+            self.__read_batch(index * self.batch_size, (index + 1) * self.batch_size)
             self.validity_index = (self.batch_size // self.mini_batch) + index
 
         # slices new batch
@@ -476,16 +477,16 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
             self.validity_index - index
         )
 
-        batch_images = self.image_batch_store[store_index, ...]
-        batch_masks = self.mask_batch_store[store_index, ...]
+        batch_images = self.image_batch_store[store_index, ...]  # type: ignore
+        batch_masks = self.mask_batch_store[store_index, ...]  # type: ignore
 
         return batch_images, batch_masks
 
-    def on_epoch_end(self):
+    def on_epoch_end(self) -> None:
         # Shuffle image and mask filenames
-        self.shuffle_filenames()
+        self.__shuffle_filenames()
 
-    def shuffle_filenames(self):
+    def __shuffle_filenames(self) -> None:
         if self.shuffle:
             state = np.random.RandomState(self.seed + self.shuffle_counter)
             self.shuffle_counter += 1

From 4ee21bd1ebddf6d36a89d5edef421f879def6007 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Tue, 6 Jun 2023 11:43:55 +0100
Subject: [PATCH 13/18] update flowgenerator so it reads weights, doesnt return
 them

---
 utilities/segmentation_utils/flowreader.py | 26 ++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 8dd40a8..4771fb3 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -11,7 +11,7 @@
 from keras.utils import Sequence
 from PIL import Image
 from tqdm import tqdm
-
+import pandas as pd
 from utilities.segmentation_utils import ImagePreprocessor
 
 
@@ -222,7 +222,10 @@ class FlowGeneratorExperimental(Sequence):
     Note: in case the output is a column vector it has to be in the shape (x, 1)
     Note: this is an experimental version of the flow generator, which uses a \
     custom implemented dataloader instead of the keras ImageDataGenerator
-    
+    #TODO: Instead of using direct paths, and arguments, reading heads should be used
+    #TODO: as it reduces the number of arguments, and makes the code more readable and reduces
+    #TODO: cupling
+
     Parameters
     ----------
     :string image: path to the image directory
@@ -240,6 +243,7 @@ class FlowGeneratorExperimental(Sequence):
     :bool preprocessing_enabled: whether to apply preprocessing or not, defaults to True
     :int seed: seed for flow from directory
     :int preprocessing_seed: seed for preprocessing, defaults to None
+    :bool read_weights: whether to read the weights from the mask directory, defaults to False
 
     Raises
     ------
@@ -266,6 +270,8 @@ def __init__(
         preprocessing_enabled: bool = True,
         seed: int = 909,
         preprocessing_seed: Optional[int] = None,
+        read_weights: bool = False,
+        weights_path: Optional[str] = None,
     ):
         if len(output_size) != 2:
             raise ValueError("The output size has to be a tuple of length 2")
@@ -287,6 +293,8 @@ def __init__(
         self.seed = seed
         self.preprocessing_enabled = preprocessing_enabled
         self.preprocessing_seed = preprocessing_seed
+        self.read_weights = read_weights
+        self.weights_path = weights_path
 
         (
             self.preprocessing_queue_image,
@@ -297,6 +305,16 @@ def __init__(
             sorted(os.listdir(os.path.join(self.image_path)))
         )
         self.mask_filenames = np.array(sorted(os.listdir(os.path.join(self.mask_path))))
+        if self.read_weights:
+            weights_df = pd.read_csv(
+                os.path.join(self.weights_path, "distribution.csv"), header=None
+            )
+            self.weights = weights_df.to_numpy()[:, 1:]
+            weight_names = weights_df.to_numpy()[:, 0]
+            for mask, weight_name in zip(self.mask_filenames, weight_names):
+                if mask != weight_name:
+                    raise ValueError("The mask and weight directories do not match")
+        self.linked_data = [self.image_filenames, self.mask_filenames, self.weights]
         self.__shuffle_filenames()
         self.dataset_size = self.__len__()
 
@@ -492,5 +510,5 @@ def __shuffle_filenames(self) -> None:
             self.shuffle_counter += 1
             shuffled_indices = state.permutation(len(self.image_filenames))
             shuffled_indices = shuffled_indices.astype(int)
-            self.image_filenames = self.image_filenames[shuffled_indices]
-            self.mask_filenames = self.mask_filenames[shuffled_indices]
+            for array in self.linked_data:
+                array = array[shuffled_indices]

From 2eed73a6e0a1ec1f8528dee148576c806519e748 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Tue, 6 Jun 2023 11:51:24 +0100
Subject: [PATCH 14/18] adds sorting to weight reader

---
 utilities/segmentation_utils/flowreader.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 4771fb3..24b09e6 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -309,8 +309,11 @@ def __init__(
             weights_df = pd.read_csv(
                 os.path.join(self.weights_path, "distribution.csv"), header=None
             )
-            self.weights = weights_df.to_numpy()[:, 1:]
-            weight_names = weights_df.to_numpy()[:, 0]
+            weights_np = weights_df.to_numpy()
+            weights_np = sorted(weights_np, key=lambda x: x[0])
+
+            self.weights = weights_np[:, 1:]
+            weight_names = weights_np[:, 0]
             for mask, weight_name in zip(self.mask_filenames, weight_names):
                 if mask != weight_name:
                     raise ValueError("The mask and weight directories do not match")

From 591c276b8f0d479ad95f274935fa2c48380a9ca7 Mon Sep 17 00:00:00 2001
From: Andras Bodrogai <abodrogai@gmail.com>
Date: Tue, 6 Jun 2023 12:22:25 +0100
Subject: [PATCH 15/18] fixes dtype errors in image preprocessor casting, and
 fixes weight reader problems in flowreader

---
 .../segmentation_utils/ImagePreprocessor.py   |  4 +--
 utilities/segmentation_utils/flowreader.py    | 26 +++++++++++++------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py
index 5700bea..8ad3033 100644
--- a/utilities/segmentation_utils/ImagePreprocessor.py
+++ b/utilities/segmentation_utils/ImagePreprocessor.py
@@ -115,7 +115,7 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor:
         encoded = encoded.reshape(
             (masks.shape[0], output_size[0] * output_size[1], num_classes)
         )
-    encoded = tf.convert_to_tensor(encoded, dtype=tf.float32)
+    encoded = tf.convert_to_tensor(encoded)
     return encoded
 
 
@@ -187,7 +187,7 @@ def augmentation_pipeline(
     else:
         mask = tf.squeeze(mask, axis=-1)
 
-    mask = tf.convert_to_tensor(mask, dtype=tf.float32)
+    mask = tf.convert_to_tensor(mask)
     # image = tf.convert_to_tensor(tf.clip_by_value(image, 0, 1))
 
     return image, mask
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 24b09e6..9ab33aa 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -302,22 +302,28 @@ def __init__(
         ) = ImagePreprocessor.generate_default_queue()
 
         self.image_filenames = np.array(
-            sorted(os.listdir(os.path.join(self.image_path)))
+            sorted(os.listdir(self.image_path))
         )
-        self.mask_filenames = np.array(sorted(os.listdir(os.path.join(self.mask_path))))
+        self.mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
         if self.read_weights:
             weights_df = pd.read_csv(
-                os.path.join(self.weights_path, "distribution.csv"), header=None
+                self.weights_path, header=None
             )
             weights_np = weights_df.to_numpy()
-            weights_np = sorted(weights_np, key=lambda x: x[0])
-
-            self.weights = weights_np[:, 1:]
+            print(weights_np.shape)
+            #sort the numpy array by the first column
+            weights_np = weights_np[weights_np[:,0].argsort()]
+        
+            print(weights_np)
+            self.weights = weights_np[:,1:].astype(np.float64)
             weight_names = weights_np[:, 0]
             for mask, weight_name in zip(self.mask_filenames, weight_names):
                 if mask != weight_name:
                     raise ValueError("The mask and weight directories do not match")
-        self.linked_data = [self.image_filenames, self.mask_filenames, self.weights]
+        
+        self.linked_data = [self.image_filenames, self.mask_filenames]
+        if self.read_weights:
+            self.linked_data.append(self.weights)
         self.__shuffle_filenames()
         self.dataset_size = self.__len__()
 
@@ -500,8 +506,12 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
 
         batch_images = self.image_batch_store[store_index, ...]  # type: ignore
         batch_masks = self.mask_batch_store[store_index, ...]  # type: ignore
+        if self.read_weights:
+            batch_weights = self.weights[index * self.batch_size : (index + 1) * self.batch_size, ...]
 
-        return batch_images, batch_masks
+            return batch_images, batch_masks, batch_weights
+        else:
+            return batch_images, batch_masks
 
     def on_epoch_end(self) -> None:
         # Shuffle image and mask filenames

From 1dfaa9d666360aa5c9d84cecc53838a056b85fc7 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Mon, 26 Jun 2023 11:04:33 +0200
Subject: [PATCH 16/18] update tests to fit new changes, add pd to requirements

---
 requirements.txt                              |   1 +
 .../flow_reader_test.py                       |   8 +-
 .../image_preprocessor_test.py                |  51 ++++----
 .../test_flowreader.py                        | 112 ++++++++++++++++++
 .../segmentation_utils/ImagePreprocessor.py   |   1 +
 utilities/segmentation_utils/flowreader.py    |   4 +-
 6 files changed, 146 insertions(+), 31 deletions(-)
 create mode 100644 tests/segmentation_utils_tests.py/test_flowreader.py

diff --git a/requirements.txt b/requirements.txt
index 48da228..cfa2fbc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ numpy==1.24.1
 rasterio==1.3.6
 Pillow==9.4.0
 tqdm==4.64.1
+pandas==1.5.1
diff --git a/tests/segmentation_utils_tests.py/flow_reader_test.py b/tests/segmentation_utils_tests.py/flow_reader_test.py
index 8b17913..437fc80 100644
--- a/tests/segmentation_utils_tests.py/flow_reader_test.py
+++ b/tests/segmentation_utils_tests.py/flow_reader_test.py
@@ -1,6 +1,7 @@
 import os
 
 import numpy as np
+import pytest
 import tensorflow as tf
 from keras.preprocessing.image import ImageDataGenerator
 from pytest import MonkeyPatch
@@ -67,11 +68,10 @@ def test_makes_flow_generator_with_queue() -> None:
 
     # create a copy of the generator args
     new_generator_args = generator_args.copy()
-    new_generator_args["preprocessing_queue_image"] = image_queue
-    new_generator_args["preprocessing_queue_mask"] = mask_queue
 
     # create a flow generator
-    FlowGenerator(**new_generator_args)
+    generator = FlowGenerator(**new_generator_args)
+    generator.set_preprocessing_pipeline(image_queue, mask_queue)
 
 
 def test_makes_flow_generator_wrong_shape() -> None:
@@ -181,7 +181,7 @@ def test_get_generator() -> None:
     patch.undo()
     patch.undo()
 
-
+@pytest.mark.skip(reason="Deprecated functionality")
 def test_reader_error_raised() -> None:
     try:
         # predifining input variables
diff --git a/tests/segmentation_utils_tests.py/image_preprocessor_test.py b/tests/segmentation_utils_tests.py/image_preprocessor_test.py
index 94bb086..aa7f18d 100644
--- a/tests/segmentation_utils_tests.py/image_preprocessor_test.py
+++ b/tests/segmentation_utils_tests.py/image_preprocessor_test.py
@@ -1,10 +1,12 @@
 import numpy as np
+import pytest
 import tensorflow as tf
 
 from utilities.segmentation_utils import ImagePreprocessor
 
 
-def test_image_onehot_encoder() -> None:
+@pytest.mark.skip(reason="Deprecated functionality")
+def test_image_onehot_encoder_column() -> None:
     # predifining input variables
     n_classes = 2
     batch_size = 1
@@ -24,39 +26,37 @@ def test_image_onehot_encoder() -> None:
 
     assert one_hot_image.shape == (
         1,
-        image_size[0] // 2 * image_size[1] // 2,
+        output_size[0] * output_size[1],
         n_classes,
     )
     assert np.array_equal(one_hot_image, onehot_test)
 
 
-def test_image_augmentation_pipeline_column() -> None:
+def test_image_onehot_encoder_squarematrix() -> None:
     # predifining input variables
-    image = np.zeros((512, 512, 3))
-    mask = np.zeros((256 * 256, 1))
-    image = tf.convert_to_tensor(image)
-    mask = tf.convert_to_tensor(mask)
+    n_classes = 2
+    batch_size = 1
+    image_size = (512, 512)
+    output_size = (256, 256)
 
-    input_size = (512, 512)
-    output_size = (256 * 256, 1)
-    output_reshape = (256, 256)
+    # creating a mask with 2 classes
+    mask = np.zeros((batch_size, output_size[0], output_size[1]))
+    mask[:, ::2,:] = 1
 
-    # creating dummy queues
-    image_queue = ImagePreprocessor.PreprocessingQueue(
-        queue=[lambda x, y, seed: x], arguments=[{"y": 1}]
-    )
-    mask_queue = ImagePreprocessor.PreprocessingQueue(
-        queue=[lambda x, y, seed: x], arguments=[{"y": 1}]
-    )
+    # creating a onehot mask to compare with the output of the function
+    onehot_test = np.zeros((batch_size, output_size[0] , output_size[1], n_classes))
+    onehot_test[:, ::2, :,1] = 1
+    onehot_test[:, 1::2,:, 0] = 1
 
-    image_new, mask_new = ImagePreprocessor.augmentation_pipeline(
-        image, mask, input_size, output_size, image_queue, mask_queue,output_reshape
-    )
-    image_new = image_new.numpy()
-    mask_new = mask_new.numpy()
+    one_hot_image = ImagePreprocessor.onehot_encode(mask, output_size, n_classes)
 
-    assert np.array(image_new).shape == (512, 512, 3)
-    assert np.array(mask_new).shape == (256 * 256, 1, 1)
+    assert one_hot_image.shape == (
+        1,
+        output_size[0],
+        output_size[1],
+        n_classes,
+    )
+    assert np.array_equal(one_hot_image, onehot_test)
 
 
 def test_image_augmentation_pipeline_squarematrix() -> None:
@@ -123,5 +123,4 @@ def test_flatten() -> None:
     image = tf.convert_to_tensor(image)
     image = ImagePreprocessor.flatten(image, (512, 512), 3)
     image = image.numpy()
-    assert image.shape == (512 * 512, 1, 3)
-
+    assert image.shape == (512 * 512, 3)
diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py
new file mode 100644
index 0000000..6923bd7
--- /dev/null
+++ b/tests/segmentation_utils_tests.py/test_flowreader.py
@@ -0,0 +1,112 @@
+import os
+
+import numpy as np
+import pytest
+import tensorflow as tf
+from keras.preprocessing.image import ImageDataGenerator
+from pytest import MonkeyPatch
+
+from utilities.segmentation_utils import ImagePreprocessor
+from utilities.segmentation_utils.flowreader import FlowGeneratorExperimental
+
+
+def test_can_create_instance() -> None:
+    patch = MonkeyPatch()
+    # mock list directory
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    # create generator instance
+    generator = FlowGeneratorExperimental(
+        image_path="tests/segmentation_utils_tests/flow_reader_test",
+        mask_path="tests/segmentation_utils_tests/flow_reader_test",
+        image_size=(512, 512),
+        output_size=(512,512),
+        num_classes=7,
+        channel_mask= [True,True,True]
+    )
+    pass
+
+def test_set_preprocessing_pipeline() -> None:
+    patch = MonkeyPatch()
+    # mock list directory
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    # create generator instance
+    generator = FlowGeneratorExperimental(
+        image_path="tests/segmentation_utils_tests/flow_reader_test",
+        mask_path="tests/segmentation_utils_tests/flow_reader_test",
+        image_size=(512, 512),
+        output_size=(512,512),
+        num_classes=7,
+        channel_mask= [True,True,True]
+    )
+
+    image_queue = ImagePreprocessor.PreprocessingQueue(queue=[],arguments=[])
+    mask_queue = ImagePreprocessor.PreprocessingQueue(queue=[],arguments=[])
+
+    generator.set_preprocessing_pipeline(
+        image_queue,mask_queue
+    )
+    pass
+
+def test_set_mini_batch_size() -> None:
+    patch = MonkeyPatch()
+    # mock list directory
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    # create generator instance
+    generator = FlowGeneratorExperimental(
+        image_path="tests/segmentation_utils_tests/flow_reader_test",
+        mask_path="tests/segmentation_utils_tests/flow_reader_test",
+        image_size=(512, 512),
+        output_size=(512,512),
+        num_classes=7,
+        channel_mask= [True,True,True]
+    )
+
+    generator.set_mini_batch_size(2)
+    assert generator.mini_batch == 2
+
+def test_set_mini_batch_size_too_large() -> None:
+
+    patch = MonkeyPatch()
+    # mock list directory
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    # create generator instance
+    generator = FlowGeneratorExperimental(
+        image_path="tests/segmentation_utils_tests/flow_reader_test",
+        mask_path="tests/segmentation_utils_tests/flow_reader_test",
+        image_size=(512, 512),
+        output_size=(512,512),
+        num_classes=7,
+        channel_mask= [True,True,True]
+    )
+    with pytest.raises(ValueError) as exc_info:
+        generator.set_mini_batch_size(5)
+
+    assert exc_info.value.args[0] == "The mini batch size cannot be larger than the batch size"
+
+
+def test_set_mini_batch_size_not_devisable() -> None:
+
+    patch = MonkeyPatch()
+    # mock list directory
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    # create generator instance
+    generator = FlowGeneratorExperimental(
+        image_path="tests/segmentation_utils_tests/flow_reader_test",
+        mask_path="tests/segmentation_utils_tests/flow_reader_test",
+        image_size=(512, 512),
+        output_size=(512,512),
+        num_classes=7,
+        channel_mask= [True,True,True],
+        batch_size=3
+        
+    )
+    with pytest.raises(ValueError) as exc_info:
+        generator.set_mini_batch_size(2)
+
+    assert exc_info.value.args[0] == "The batch size must be divisible by the mini batch size"
+    
diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py
index 8ad3033..bf5e773 100644
--- a/utilities/segmentation_utils/ImagePreprocessor.py
+++ b/utilities/segmentation_utils/ImagePreprocessor.py
@@ -107,6 +107,7 @@ def onehot_encode(masks, output_size, num_classes) -> tf.Tensor:
     -------
     :return tf.Tensor: Batch of one-hot encoded masks
     """
+    #!TODO: add support for 1D masks
     encoded = np.zeros((masks.shape[0], output_size[0], output_size[1], num_classes))
     for i in range(num_classes):
         mask = (masks == i).astype(float)
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 9ab33aa..f229bf1 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -7,11 +7,12 @@
 from typing import Optional
 
 import numpy as np
+import pandas as pd
 from keras.preprocessing.image import ImageDataGenerator
 from keras.utils import Sequence
 from PIL import Image
 from tqdm import tqdm
-import pandas as pd
+
 from utilities.segmentation_utils import ImagePreprocessor
 
 
@@ -252,6 +253,7 @@ class FlowGeneratorExperimental(Sequence):
     :ValueError: if the output size is not a square matrix or a column vector
     """
 
+    #! these are class variables, and should be moved to the constructor to make them instance variables
     preprocessing_seed = None
     preprocessing_queue_image = None
     preprocessing_queue_mask = None

From fad0b630e64779f805c25ae67f7c569b452eda21 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Mon, 26 Jun 2023 11:19:49 +0200
Subject: [PATCH 17/18] add arguments to image cutting loop so parameters are
 changable

---
 utilities/transform_utils/image_cutting.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/utilities/transform_utils/image_cutting.py b/utilities/transform_utils/image_cutting.py
index 1bdc37b..4389bcc 100644
--- a/utilities/transform_utils/image_cutting.py
+++ b/utilities/transform_utils/image_cutting.py
@@ -320,6 +320,9 @@ def cut_ims_in_directory(
     target_dims: tuple[int, int] = (512, 512),
     mask=False,
     preprocess: bool = False,
+    batch_size: int = 100,
+    format: str = "tiff",
+    preprocess_function=__preprocess_mask_image,
 ) -> None:
     """Finds images at "Path_ims" cuts them into dimension "target_dims",
     and then saves them as png files to "path_target_dir".
@@ -333,7 +336,7 @@ def cut_ims_in_directory(
     :bool, optional mask: If true assumes images are masks. Defaults to False.
     :bool, optional preprocess: If true preprocesses images. Defaults to False.
     """
-    print("the following files are located at input Path :")
+    
     dir_contents = os.listdir(path_ims)
     dir_contents = sorted(dir_contents)
     batch_size = 100
@@ -383,7 +386,7 @@ def cut_ims_in_directory(
         # fill batch array
         for i, n in enumerate(cut_im):
             if preprocess:
-                n = __preprocess_mask_image(n)
+                n = preprocess_function(n)
             if mask:
                 batch[counter, i, :, :, 0] = n[:, :]
             else:
@@ -416,7 +419,8 @@ def cut_ims_in_directory(
                                 str(target_dims[0]),
                                 "x",
                                 str(target_dims[1]),
-                                ".tiff",
+                                ".",
+                                format,
                             ]
                         ),
                     )

From f3c59f264ed651a3d009e909e6fac0df11443fbc Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Sun, 30 Jul 2023 17:07:33 +0100
Subject: [PATCH 18/18] in __len__ function added self.mini_batch

---
 utilities/segmentation_utils/flowreader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index f229bf1..df5e9ec 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -488,7 +488,7 @@ def __read_batch(self, start: int, end: int) -> None:
         # required to check when to read the next batch
 
     def __len__(self) -> int:
-        return int(np.floor(len(self.image_filenames) / float(self.batch_size)))
+        return int(np.floor(len(self.image_filenames) / float(self.mini_batch)))
 
     def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
         # check if the batch is already cached