From 72dc235621d8336ef5d463cad64cf63b58820cf4 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Sat, 12 Aug 2023 20:50:24 +0100
Subject: [PATCH 01/75] Added file including strategies for reader

---
 .../segmentation_utils/reading_strategies.py  | 117 ++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 utilities/segmentation_utils/reading_strategies.py

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
new file mode 100644
index 0000000..8677020
--- /dev/null
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -0,0 +1,117 @@
+from typing import Protocol, Tuple
+import os
+import numpy as np
+from PIL import Image
+import rasterio
+
+from flowreader import FlowGeneratorExperimental
+from utilities.segmentation_utils import ImagePreprocessor
+
+
+class ReaderInterface(Protocol):
+
+    def read_batch(self, start:int, end: int) -> None:
+        ...
+    
+    def get_dataset_size(self) -> None:
+        ...
+
+class RGB_Image_Strategy:
+
+    def __init__(
+        self,
+        image_path: str,
+        image_size: tuple [int, int],
+        batch_image_filenames: np.ndarray,   
+        mini_batch: int,
+    ):
+        self.image_path = image_path
+        self.image_size = image_size
+        self.mini_batch = mini_batch
+        self.batch_image_files = batch_image_filenames
+
+    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
+        #read images with PIL
+        for i in range(batch_size):
+                image_index = i + dataset_index
+                image = Image.open(
+                    os.path.join(self.image_path, self.batch_image_filenames[image_index])
+                    ).resize(image_size, Image.ANTIALIAS)
+                image = np.array(image)
+                image = image / 255
+        return image
+
+    def get_dataset_size(self) -> int:
+        dataset_size = FlowGeneratorExperimental.__len__
+        return dataset_size
+
+class Mask_Image_Strategy:
+
+    def __init__(
+        self,
+        mask_path: str,
+        batch_mask_filenames: np.ndarray,   
+        output_reshape: tuple[int, int],
+    ):
+    
+    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
+        #read images with PIL
+        for i in range(batch_size):
+            for j in range(mini_batch):
+                image_index = i * mini_batch + j
+                mask = Image.open(
+                    os.path.join(mask_path, batch_mask_filenames[image_index])
+                    ).resize(output_reshape)
+                mask = np.array(mask)
+        return mask
+
+    def get_dataset_size(self) -> int:
+        dataset_size = FlowGeneratorExperimental.__len__
+        return dataset_size
+
+
+
+#should this be a batch with read_batch as the function having all the code in it?
+    # def initialise_batch_img(self, mini_batch, image_size, channel_mask) -> np.ndarray:
+
+    #     num_mini_batches = Reader.calculate_mini_batch
+    #     channel_mask = np.array(channel_mask)
+    #     n_channels = np.sum(channel_mask)
+
+    #     batch_images = np.zeros(
+    #         (
+    #             num_mini_batches,
+    #             mini_batch,
+    #             image_size[0],
+    #             image_size[1],
+    #             n_channels,
+    #         )
+    #     )
+    #     return batch_images
+
+    # #output
+    # def initialise_batch_mask(self, output_size, mini_batch, num_classes) -> Tuple[bool, np.ndarray]:
+    #     #num_mini_batches = Reader.calculate_mini_batch
+
+    #     if self.output_size[1] == 1:
+    #         column = True
+    #         batch_masks = np.zeros(
+    #             (
+    #                 num_mini_batches,
+    #                 mini_batch, output_size[0],
+    #                 num_classes
+    #             )
+    #         )
+    #     else:
+    #         column = False
+    #         batch_masks = np.zeros(
+    #             (
+    #                 num_mini_batches,
+    #                 mini_batch,
+    #                 output_size[0],
+    #                 output_size[1],
+    #                 num_classes,
+    #             )
+    #         )
+
+    #     return column, batch_masks

From b3fad7f19481de532f26ebaf0a912bb991c4512c Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Sat, 12 Aug 2023 21:41:27 +0100
Subject: [PATCH 02/75] changes to for loops and class constructors

---
 .../segmentation_utils/reading_strategies.py  | 81 ++++---------------
 1 file changed, 17 insertions(+), 64 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 8677020..ad7c697 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -1,4 +1,4 @@
-from typing import Protocol, Tuple
+from typing import Protocol
 import os
 import numpy as np
 from PIL import Image
@@ -22,23 +22,21 @@ def __init__(
         self,
         image_path: str,
         image_size: tuple [int, int],
-        batch_image_filenames: np.ndarray,   
-        mini_batch: int,
+        batch_image_filenames: np.ndarray,
     ):
         self.image_path = image_path
         self.image_size = image_size
-        self.mini_batch = mini_batch
         self.batch_image_files = batch_image_filenames
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
         for i in range(batch_size):
-                image_index = i + dataset_index
-                image = Image.open(
-                    os.path.join(self.image_path, self.batch_image_filenames[image_index])
-                    ).resize(image_size, Image.ANTIALIAS)
-                image = np.array(image)
-                image = image / 255
+            image_index = i + dataset_index
+            image = Image.open(
+                os.path.join(self.image_path, self.batch_image_filenames[image_index])
+            ).resize(self.image_size, Image.ANTIALIAS)
+            image = np.array(image)
+            image = image / 255
         return image
 
     def get_dataset_size(self) -> int:
@@ -50,68 +48,23 @@ class Mask_Image_Strategy:
     def __init__(
         self,
         mask_path: str,
-        batch_mask_filenames: np.ndarray,   
+        batch_mask_filenames: np.ndarray,
         output_reshape: tuple[int, int],
     ):
+        self.mask_path = mask_path
+        self.batch_mask_filenames = batch_mask_filenames
+        self.output_reshape = output_reshape
     
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
         for i in range(batch_size):
-            for j in range(mini_batch):
-                image_index = i * mini_batch + j
-                mask = Image.open(
-                    os.path.join(mask_path, batch_mask_filenames[image_index])
-                    ).resize(output_reshape)
-                mask = np.array(mask)
+            image_index = i + dataset_index
+            mask = Image.open(
+                os.path.join(self.mask_path, self.batch_mask_filenames[image_index])
+                ).resize(self.output_reshape)
+            mask = np.array(mask)
         return mask
 
     def get_dataset_size(self) -> int:
         dataset_size = FlowGeneratorExperimental.__len__
         return dataset_size
-
-
-
-#should this be a batch with read_batch as the function having all the code in it?
-    # def initialise_batch_img(self, mini_batch, image_size, channel_mask) -> np.ndarray:
-
-    #     num_mini_batches = Reader.calculate_mini_batch
-    #     channel_mask = np.array(channel_mask)
-    #     n_channels = np.sum(channel_mask)
-
-    #     batch_images = np.zeros(
-    #         (
-    #             num_mini_batches,
-    #             mini_batch,
-    #             image_size[0],
-    #             image_size[1],
-    #             n_channels,
-    #         )
-    #     )
-    #     return batch_images
-
-    # #output
-    # def initialise_batch_mask(self, output_size, mini_batch, num_classes) -> Tuple[bool, np.ndarray]:
-    #     #num_mini_batches = Reader.calculate_mini_batch
-
-    #     if self.output_size[1] == 1:
-    #         column = True
-    #         batch_masks = np.zeros(
-    #             (
-    #                 num_mini_batches,
-    #                 mini_batch, output_size[0],
-    #                 num_classes
-    #             )
-    #         )
-    #     else:
-    #         column = False
-    #         batch_masks = np.zeros(
-    #             (
-    #                 num_mini_batches,
-    #                 mini_batch,
-    #                 output_size[0],
-    #                 output_size[1],
-    #                 num_classes,
-    #             )
-    #         )
-
-    #     return column, batch_masks

From e062bf9d1460272a3a7714a17483cad79bae1cfd Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Sun, 13 Aug 2023 12:14:36 +0100
Subject: [PATCH 03/75] changes to interface name, get function and constructor

---
 .../segmentation_utils/reading_strategies.py  | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index ad7c697..ca3b120 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -8,7 +8,7 @@
 from utilities.segmentation_utils import ImagePreprocessor
 
 
-class ReaderInterface(Protocol):
+class IReader(Protocol):
 
     def read_batch(self, start:int, end: int) -> None:
         ...
@@ -22,25 +22,25 @@ def __init__(
         self,
         image_path: str,
         image_size: tuple [int, int],
-        batch_image_filenames: np.ndarray,
     ):
         self.image_path = image_path
         self.image_size = image_size
-        self.batch_image_files = batch_image_filenames
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
+        image_filenames = np.array(sorted(os.listdir(self.image_path)))
         for i in range(batch_size):
             image_index = i + dataset_index
             image = Image.open(
-                os.path.join(self.image_path, self.batch_image_filenames[image_index])
+                os.path.join(self.image_path, image_filenames[image_index])
             ).resize(self.image_size, Image.ANTIALIAS)
             image = np.array(image)
             image = image / 255
         return image
 
-    def get_dataset_size(self) -> int:
-        dataset_size = FlowGeneratorExperimental.__len__
+    def get_dataset_size(self, mini_batch) -> int:
+        image_filenames = np.array(sorted(os.listdir(self.image_path)))
+        dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
         return dataset_size
 
 class Mask_Image_Strategy:
@@ -48,23 +48,23 @@ class Mask_Image_Strategy:
     def __init__(
         self,
         mask_path: str,
-        batch_mask_filenames: np.ndarray,
         output_reshape: tuple[int, int],
     ):
         self.mask_path = mask_path
-        self.batch_mask_filenames = batch_mask_filenames
         self.output_reshape = output_reshape
     
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
+        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
         for i in range(batch_size):
             image_index = i + dataset_index
             mask = Image.open(
-                os.path.join(self.mask_path, self.batch_mask_filenames[image_index])
+                os.path.join(self.mask_path, mask_filenames[image_index])
                 ).resize(self.output_reshape)
             mask = np.array(mask)
         return mask
 
-    def get_dataset_size(self) -> int:
-        dataset_size = FlowGeneratorExperimental.__len__
+    def get_dataset_size(self, mini_batch) -> int:
+        image_filenames = np.array(sorted(os.listdir(self.image_path)))
+        dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
         return dataset_size

From f043497030833c31ed7f793aab870bd9409fb022 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Tue, 15 Aug 2023 18:16:48 +0100
Subject: [PATCH 04/75] added strategies using rasterio to read images

---
 .../segmentation_utils/reading_strategies.py  | 57 +++++++++++++++++--
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index ca3b120..a36365b 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -4,10 +4,6 @@
 from PIL import Image
 import rasterio
 
-from flowreader import FlowGeneratorExperimental
-from utilities.segmentation_utils import ImagePreprocessor
-
-
 class IReader(Protocol):
 
     def read_batch(self, start:int, end: int) -> None:
@@ -64,7 +60,60 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
             mask = np.array(mask)
         return mask
 
+    def get_dataset_size(self, mini_batch) -> int:
+        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
+        dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch)))
+        return dataset_size
+    
+class Hyperspectral_Image_Strategy:
+
+    def __init__(
+        self,
+        image_path:str,
+    ):
+        self.image_path = image_path
+  
+    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
+        #read images with rasterio
+        image_filenames = np.array(sorted(os.listdir(self.image_path)))
+        for i in range(batch_size):
+            image_index = i + dataset_index
+            #open the source raster dataset
+            with rasterio.open(
+                os.path.join(self.image_path, image_filenames[image_index])
+                ) as dataset:
+                 #.read() returns a numpy array that contains the raster cell values in your file.
+                image = dataset.read()
+                image = image / 255
+        return image
+    
     def get_dataset_size(self, mini_batch) -> int:
         image_filenames = np.array(sorted(os.listdir(self.image_path)))
         dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
         return dataset_size
+
+class Hyperspectral_Mask_Image_Strategy:
+
+    def __init__(
+        self,
+        mask_path:str,
+    ):
+        self.mask_path = mask_path
+
+    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
+        #read images with rasterio
+        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
+        for i in range(batch_size):
+            image_index = i + dataset_index
+            #open the source raster dataset
+            with rasterio.open(
+                os.path.join(self.mask_path, mask_filenames[image_index])
+                ) as dataset:
+                #.read() returns a numpy array that contains the raster cell values in your file.
+                mask = dataset.read()
+        return mask
+    
+    def get_dataset_size(self, mini_batch) -> int:
+        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
+        dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch)))
+        return dataset_size

From 0384f8ace95ad685f9b04d8e629f6b3995fa7115 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Tue, 15 Aug 2023 21:48:00 +0100
Subject: [PATCH 05/75] removed mask strategies and added to constructor

---
 .../segmentation_utils/reading_strategies.py  | 64 ++-----------------
 1 file changed, 7 insertions(+), 57 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index a36365b..9ae5a5d 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -12,15 +12,17 @@ def read_batch(self, start:int, end: int) -> None:
     def get_dataset_size(self) -> None:
         ...
 
-class RGB_Image_Strategy:
+class RGBImageStrategy:
 
     def __init__(
         self,
         image_path: str,
         image_size: tuple [int, int],
+        antialias: int = 1,
     ):
         self.image_path = image_path
         self.image_size = image_size
+        self.antialias = antialias
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
@@ -29,7 +31,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
             image_index = i + dataset_index
             image = Image.open(
                 os.path.join(self.image_path, image_filenames[image_index])
-            ).resize(self.image_size, Image.ANTIALIAS)
+            ).resize(self.image_size, self.antialias)
             image = np.array(image)
             image = image / 255
         return image
@@ -38,34 +40,8 @@ def get_dataset_size(self, mini_batch) -> int:
         image_filenames = np.array(sorted(os.listdir(self.image_path)))
         dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
         return dataset_size
-
-class Mask_Image_Strategy:
-
-    def __init__(
-        self,
-        mask_path: str,
-        output_reshape: tuple[int, int],
-    ):
-        self.mask_path = mask_path
-        self.output_reshape = output_reshape
     
-    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
-        #read images with PIL
-        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
-        for i in range(batch_size):
-            image_index = i + dataset_index
-            mask = Image.open(
-                os.path.join(self.mask_path, mask_filenames[image_index])
-                ).resize(self.output_reshape)
-            mask = np.array(mask)
-        return mask
-
-    def get_dataset_size(self, mini_batch) -> int:
-        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
-        dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch)))
-        return dataset_size
-    
-class Hyperspectral_Image_Strategy:
+class HyperspectralImageStrategy:
 
     def __init__(
         self,
@@ -83,37 +59,11 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
                 os.path.join(self.image_path, image_filenames[image_index])
                 ) as dataset:
                  #.read() returns a numpy array that contains the raster cell values in your file.
-                image = dataset.read()
+                image = dataset.read() #!resize using numpy resize function?
                 image = image / 255
         return image
     
     def get_dataset_size(self, mini_batch) -> int:
         image_filenames = np.array(sorted(os.listdir(self.image_path)))
         dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
-        return dataset_size
-
-class Hyperspectral_Mask_Image_Strategy:
-
-    def __init__(
-        self,
-        mask_path:str,
-    ):
-        self.mask_path = mask_path
-
-    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
-        #read images with rasterio
-        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
-        for i in range(batch_size):
-            image_index = i + dataset_index
-            #open the source raster dataset
-            with rasterio.open(
-                os.path.join(self.mask_path, mask_filenames[image_index])
-                ) as dataset:
-                #.read() returns a numpy array that contains the raster cell values in your file.
-                mask = dataset.read()
-        return mask
-    
-    def get_dataset_size(self, mini_batch) -> int:
-        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
-        dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch)))
-        return dataset_size
+        return dataset_size
\ No newline at end of file

From ba9339bfa34da5d388f30fe1c014702c9ee4b042 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Tue, 15 Aug 2023 22:00:18 +0100
Subject: [PATCH 06/75] fixed constructor and set image_resample to default

---
 utilities/segmentation_utils/reading_strategies.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 9ae5a5d..9421b89 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -18,11 +18,11 @@ def __init__(
         self,
         image_path: str,
         image_size: tuple [int, int],
-        antialias: int = 1,
+        image_resample: Image.Resampling.NEAREST,
     ):
         self.image_path = image_path
         self.image_size = image_size
-        self.antialias = antialias
+        self.image_resample = image_resample
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
@@ -31,7 +31,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
             image_index = i + dataset_index
             image = Image.open(
                 os.path.join(self.image_path, image_filenames[image_index])
-            ).resize(self.image_size, self.antialias)
+            ).resize(self.image_size, self.image_resample)
             image = np.array(image)
             image = image / 255
         return image

From ff018b00199f3f9d3fe56e4a712850cc21a31f65 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Thu, 17 Aug 2023 13:42:14 +0100
Subject: [PATCH 07/75] added resizing to hyperspectral strategy

---
 utilities/segmentation_utils/reading_strategies.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 9421b89..5c44615 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -17,7 +17,7 @@ class RGBImageStrategy:
     def __init__(
         self,
         image_path: str,
-        image_size: tuple [int, int],
+        image_size: tuple[int, int],
         image_resample: Image.Resampling.NEAREST,
     ):
         self.image_path = image_path
@@ -46,8 +46,13 @@ class HyperspectralImageStrategy:
     def __init__(
         self,
         image_path:str,
+        image_resize:tuple[int,int],
+        image_resample: Image.Resampling.NEAREST,
+        
     ):
         self.image_path = image_path
+        self.image_resize = image_resize
+        self.image_resample = image_resample
   
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with rasterio
@@ -59,11 +64,13 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
                 os.path.join(self.image_path, image_filenames[image_index])
                 ) as dataset:
                  #.read() returns a numpy array that contains the raster cell values in your file.
-                image = dataset.read() #!resize using numpy resize function?
+                image = dataset.read()
                 image = image / 255
+                image = np.resize(self.image_resize, self.image_resample)
         return image
     
     def get_dataset_size(self, mini_batch) -> int:
         image_filenames = np.array(sorted(os.listdir(self.image_path)))
         dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
-        return dataset_size
\ No newline at end of file
+        return dataset_size
+    
\ No newline at end of file

From cd9cd37659e0b2a6faa8555b425563bac9b5eba4 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Thu, 17 Aug 2023 13:43:01 +0100
Subject: [PATCH 08/75] added tests for strategies - they do not yet pass

---
 .../test_strategies.py                        | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 tests/segmentation_utils_tests.py/test_strategies.py

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
new file mode 100644
index 0000000..4466781
--- /dev/null
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -0,0 +1,48 @@
+import os
+import numpy as np
+from PIL import Image
+from pytest import MonkeyPatch
+from utilities.segmentation_utils.reading_strategies import RGBImageStrategy
+
+def test_read_batch_image_path() -> None:
+    #should check if path is being read in correctly
+    image_strategy = RGBImageStrategy(
+    image_path="tests/segmentation_utils_tests/test_strategies",
+    image_size=(224, 224),
+    image_resample=Image.Resampling.NEAREST,
+)
+
+    batch_size = 2
+    dataset_index = 0
+    image_strategy.read_batch(batch_size, dataset_index)
+
+def test_read_batch_returns_nparray() -> None:
+    #checking if the returned value is a numpy array
+
+    image_strategy = RGBImageStrategy(
+    image_path="tests/segmentation_utils_tests/test_strategies",
+    image_size=(224, 224),
+    image_resample=Image.Resampling.NEAREST,
+)
+
+    result = image_strategy.read_batch(batch_size=2, dataset_index=0)
+    assert isinstance(result, np.ndarray) 
+
+def test_get_dataset_size() -> None:
+    #checking if the calculation is done correctly
+
+    image_strategy = RGBImageStrategy(
+    image_path="tests/segmentation_utils_tests/test_strategies",
+    image_size=(224, 224),
+    image_resample=Image.Resampling.NEAREST,
+)
+    dataset = 100 #if there are 100 images in the specified path
+    mini_batch = 32 #and we want 32 images in each batch
+    expected_value = dataset / mini_batch #number of sets of images we expect
+    
+    dataset_size = image_strategy.get_dataset_size(mini_batch)
+    assert dataset_size == expected_value
+
+
+
+

From 3e05fef84d0b27773f581a49336d3ef46a2c5c69 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Thu, 17 Aug 2023 22:31:39 +0100
Subject: [PATCH 09/75] updated test to pass, updates variable assigment in
 strategy constructor, adds extra comments

---
 .../test_strategies.py                        | 57 +++++++++++--------
 .../segmentation_utils/reading_strategies.py  | 12 ++--
 2 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index 4466781..3148105 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -1,48 +1,57 @@
 import os
+
 import numpy as np
 from PIL import Image
 from pytest import MonkeyPatch
+
 from utilities.segmentation_utils.reading_strategies import RGBImageStrategy
 
+
 def test_read_batch_image_path() -> None:
-    #should check if path is being read in correctly
+    # should check if path is being read in correctly
+    patch = MonkeyPatch()
+
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)))
+
     image_strategy = RGBImageStrategy(
-    image_path="tests/segmentation_utils_tests/test_strategies",
-    image_size=(224, 224),
-    image_resample=Image.Resampling.NEAREST,
-)
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
 
     batch_size = 2
     dataset_index = 0
     image_strategy.read_batch(batch_size, dataset_index)
+    patch.undo()
+    patch.undo()
+
 
 def test_read_batch_returns_nparray() -> None:
-    #checking if the returned value is a numpy array
+    # checking if the returned value is a numpy array
 
     image_strategy = RGBImageStrategy(
-    image_path="tests/segmentation_utils_tests/test_strategies",
-    image_size=(224, 224),
-    image_resample=Image.Resampling.NEAREST,
-)
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
 
     result = image_strategy.read_batch(batch_size=2, dataset_index=0)
-    assert isinstance(result, np.ndarray) 
+    assert isinstance(result, np.ndarray)
+
 
 def test_get_dataset_size() -> None:
-    #checking if the calculation is done correctly
+    # checking if the calculation is done correctly
 
     image_strategy = RGBImageStrategy(
-    image_path="tests/segmentation_utils_tests/test_strategies",
-    image_size=(224, 224),
-    image_resample=Image.Resampling.NEAREST,
-)
-    dataset = 100 #if there are 100 images in the specified path
-    mini_batch = 32 #and we want 32 images in each batch
-    expected_value = dataset / mini_batch #number of sets of images we expect
-    
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
+    dataset = 100  # if there are 100 images in the specified path
+    mini_batch = 32  # and we want 32 images in each batch
+    expected_value = dataset / mini_batch  # number of sets of images we expect
+
     dataset_size = image_strategy.get_dataset_size(mini_batch)
     assert dataset_size == expected_value
-
-
-
-
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 5c44615..cec427d 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -1,8 +1,10 @@
-from typing import Protocol
 import os
+from typing import Protocol
+
 import numpy as np
-from PIL import Image
 import rasterio
+from PIL import Image
+
 
 class IReader(Protocol):
 
@@ -18,7 +20,7 @@ def __init__(
         self,
         image_path: str,
         image_size: tuple[int, int],
-        image_resample: Image.Resampling.NEAREST,
+        image_resample = Image.Resampling.NEAREST,
     ):
         self.image_path = image_path
         self.image_size = image_size
@@ -26,6 +28,8 @@ def __init__(
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
+
+        #! add this to the intializer
         image_filenames = np.array(sorted(os.listdir(self.image_path)))
         for i in range(batch_size):
             image_index = i + dataset_index
@@ -47,7 +51,7 @@ def __init__(
         self,
         image_path:str,
         image_resize:tuple[int,int],
-        image_resample: Image.Resampling.NEAREST,
+        image_resample = Image.Resampling.NEAREST,
         
     ):
         self.image_path = image_path

From 583fd503be1e26d6953d195bb13395f26cac0b7f Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Thu, 24 Aug 2023 01:06:55 +0100
Subject: [PATCH 10/75] added variable to initialiser

---
 .../segmentation_utils/reading_strategies.py   | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index cec427d..9255a9d 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -23,30 +23,29 @@ def __init__(
         image_resample = Image.Resampling.NEAREST,
     ):
         self.image_path = image_path
+        self.image_filenames = np.array(sorted(os.listdir(self.image_path))) #!update: added variable to initialiser
         self.image_size = image_size
         self.image_resample = image_resample
+        
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
 
-        #! add this to the intializer
-        image_filenames = np.array(sorted(os.listdir(self.image_path)))
         for i in range(batch_size):
             image_index = i + dataset_index
             image = Image.open(
-                os.path.join(self.image_path, image_filenames[image_index])
+                os.path.join(self.image_path, self.image_filenames[image_index])
             ).resize(self.image_size, self.image_resample)
             image = np.array(image)
             image = image / 255
         return image
 
     def get_dataset_size(self, mini_batch) -> int:
-        image_filenames = np.array(sorted(os.listdir(self.image_path)))
-        dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
+        dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
     
 class HyperspectralImageStrategy:
-
+    #read images with rasterio
     def __init__(
         self,
         image_path:str,
@@ -55,17 +54,17 @@ def __init__(
         
     ):
         self.image_path = image_path
+        self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
         self.image_resize = image_resize
         self.image_resample = image_resample
   
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with rasterio
-        image_filenames = np.array(sorted(os.listdir(self.image_path)))
         for i in range(batch_size):
             image_index = i + dataset_index
             #open the source raster dataset
             with rasterio.open(
-                os.path.join(self.image_path, image_filenames[image_index])
+                os.path.join(self.image_path, self.image_filenames[image_index])
                 ) as dataset:
                  #.read() returns a numpy array that contains the raster cell values in your file.
                 image = dataset.read()
@@ -74,7 +73,6 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         return image
     
     def get_dataset_size(self, mini_batch) -> int:
-        image_filenames = np.array(sorted(os.listdir(self.image_path)))
-        dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
+        dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
     
\ No newline at end of file

From c5b32317fef70b20cf9b26e621eba5d81c970f8c Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Thu, 24 Aug 2023 01:07:46 +0100
Subject: [PATCH 11/75] updated 3 tests to pass, added rasterio but incomplete

---
 .../test_strategies.py                        | 68 +++++++++++++++++--
 1 file changed, 61 insertions(+), 7 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index 3148105..cabfdf7 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -1,17 +1,19 @@
 import os
-
 import numpy as np
 from PIL import Image
+import rasterio
 from pytest import MonkeyPatch
 
 from utilities.segmentation_utils.reading_strategies import RGBImageStrategy
 
 
 def test_read_batch_image_path() -> None:
-    # should check if path is being read in correctly
+    #checking if the file is being opened and read correctly
     patch = MonkeyPatch()
 
-    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+    mock_filenames = ["a", "b", "c"]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
 
     patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)))
 
@@ -31,27 +33,79 @@ def test_read_batch_image_path() -> None:
 def test_read_batch_returns_nparray() -> None:
     # checking if the returned value is a numpy array
 
+    patch = MonkeyPatch()
+
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)))
+    
     image_strategy = RGBImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         image_resample=Image.Resampling.NEAREST,
     )
 
-    result = image_strategy.read_batch(batch_size=2, dataset_index=0)
+    batch_size=2
+    dataset_index=0
+
+    result = image_strategy.read_batch(batch_size, dataset_index)
     assert isinstance(result, np.ndarray)
 
+    patch.undo()
+    patch.undo()
+
 
 def test_get_dataset_size() -> None:
     # checking if the calculation is done correctly
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a", "b", "c"]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)))
 
     image_strategy = RGBImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         image_resample=Image.Resampling.NEAREST,
     )
-    dataset = 100  # if there are 100 images in the specified path
-    mini_batch = 32  # and we want 32 images in each batch
-    expected_value = dataset / mini_batch  # number of sets of images we expect
+    dataset = len(mock_filenames)  # number of images in the specified path
+    mini_batch = 2  # number of images we want in each batch
+    expected_value = int (np.floor(dataset / float(mini_batch)))  # number of sets of images we expect
 
     dataset_size = image_strategy.get_dataset_size(mini_batch)
     assert dataset_size == expected_value
+    patch.undo()
+    patch.undo()
+
+
+#!to be continued...
+class MockRasterio():
+    # def __init__(self, image_path, image_filenames):
+    #     self.image_path = image_path
+    #     self.image_filenames = image_filenames
+    
+    def __init__(self, func):
+        self.func = func
+
+    def mock_open(self, *args, **kwargs):
+        patch = MonkeyPatch()
+        mock_filenames = ["a", "b", "c"]
+        patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+        image_file = os.path.join(self.image_path, self.image_filenames[image_index])
+        dataset = rasterio.open(image_file)
+        self.func(dataset)
+
+    def mock_join(self):
+        patch = MonkeyPatch()
+        join = lambda x: "image_path"
+        patch.setattr(os.path, "join", join)
+        return join
+
+
+
+
+def process_data(package=MockRasterio):
+    package.open
\ No newline at end of file

From 8437d4748ba1fb5a92760396861793406f24f3f6 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Sat, 12 Aug 2023 20:50:24 +0100
Subject: [PATCH 12/75] Added file including strategies for reader

---
 .../segmentation_utils/reading_strategies.py  | 117 ++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 utilities/segmentation_utils/reading_strategies.py

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
new file mode 100644
index 0000000..8677020
--- /dev/null
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -0,0 +1,117 @@
+from typing import Protocol, Tuple
+import os
+import numpy as np
+from PIL import Image
+import rasterio
+
+from flowreader import FlowGeneratorExperimental
+from utilities.segmentation_utils import ImagePreprocessor
+
+
+class ReaderInterface(Protocol):
+
+    def read_batch(self, start:int, end: int) -> None:
+        ...
+    
+    def get_dataset_size(self) -> None:
+        ...
+
+class RGB_Image_Strategy:
+
+    def __init__(
+        self,
+        image_path: str,
+        image_size: tuple [int, int],
+        batch_image_filenames: np.ndarray,   
+        mini_batch: int,
+    ):
+        self.image_path = image_path
+        self.image_size = image_size
+        self.mini_batch = mini_batch
+        self.batch_image_files = batch_image_filenames
+
+    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
+        #read images with PIL
+        for i in range(batch_size):
+                image_index = i + dataset_index
+                image = Image.open(
+                    os.path.join(self.image_path, self.batch_image_filenames[image_index])
+                    ).resize(image_size, Image.ANTIALIAS)
+                image = np.array(image)
+                image = image / 255
+        return image
+
+    def get_dataset_size(self) -> int:
+        dataset_size = FlowGeneratorExperimental.__len__
+        return dataset_size
+
+class Mask_Image_Strategy:
+
+    def __init__(
+        self,
+        mask_path: str,
+        batch_mask_filenames: np.ndarray,   
+        output_reshape: tuple[int, int],
+    ):
+    
+    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
+        #read images with PIL
+        for i in range(batch_size):
+            for j in range(mini_batch):
+                image_index = i * mini_batch + j
+                mask = Image.open(
+                    os.path.join(mask_path, batch_mask_filenames[image_index])
+                    ).resize(output_reshape)
+                mask = np.array(mask)
+        return mask
+
+    def get_dataset_size(self) -> int:
+        dataset_size = FlowGeneratorExperimental.__len__
+        return dataset_size
+
+
+
+#should this be a batch with read_batch as the function having all the code in it?
+    # def initialise_batch_img(self, mini_batch, image_size, channel_mask) -> np.ndarray:
+
+    #     num_mini_batches = Reader.calculate_mini_batch
+    #     channel_mask = np.array(channel_mask)
+    #     n_channels = np.sum(channel_mask)
+
+    #     batch_images = np.zeros(
+    #         (
+    #             num_mini_batches,
+    #             mini_batch,
+    #             image_size[0],
+    #             image_size[1],
+    #             n_channels,
+    #         )
+    #     )
+    #     return batch_images
+
+    # #output
+    # def initialise_batch_mask(self, output_size, mini_batch, num_classes) -> Tuple[bool, np.ndarray]:
+    #     #num_mini_batches = Reader.calculate_mini_batch
+
+    #     if self.output_size[1] == 1:
+    #         column = True
+    #         batch_masks = np.zeros(
+    #             (
+    #                 num_mini_batches,
+    #                 mini_batch, output_size[0],
+    #                 num_classes
+    #             )
+    #         )
+    #     else:
+    #         column = False
+    #         batch_masks = np.zeros(
+    #             (
+    #                 num_mini_batches,
+    #                 mini_batch,
+    #                 output_size[0],
+    #                 output_size[1],
+    #                 num_classes,
+    #             )
+    #         )
+
+    #     return column, batch_masks

From b216ee3d6af4028379ea55eebf5bd4bea4853635 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Sat, 12 Aug 2023 21:41:27 +0100
Subject: [PATCH 13/75] changes to for loops and class constructors

---
 .../segmentation_utils/reading_strategies.py  | 81 ++++---------------
 1 file changed, 17 insertions(+), 64 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 8677020..ad7c697 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -1,4 +1,4 @@
-from typing import Protocol, Tuple
+from typing import Protocol
 import os
 import numpy as np
 from PIL import Image
@@ -22,23 +22,21 @@ def __init__(
         self,
         image_path: str,
         image_size: tuple [int, int],
-        batch_image_filenames: np.ndarray,   
-        mini_batch: int,
+        batch_image_filenames: np.ndarray,
     ):
         self.image_path = image_path
         self.image_size = image_size
-        self.mini_batch = mini_batch
         self.batch_image_files = batch_image_filenames
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
         for i in range(batch_size):
-                image_index = i + dataset_index
-                image = Image.open(
-                    os.path.join(self.image_path, self.batch_image_filenames[image_index])
-                    ).resize(image_size, Image.ANTIALIAS)
-                image = np.array(image)
-                image = image / 255
+            image_index = i + dataset_index
+            image = Image.open(
+                os.path.join(self.image_path, self.batch_image_filenames[image_index])
+            ).resize(self.image_size, Image.ANTIALIAS)
+            image = np.array(image)
+            image = image / 255
         return image
 
     def get_dataset_size(self) -> int:
@@ -50,68 +48,23 @@ class Mask_Image_Strategy:
     def __init__(
         self,
         mask_path: str,
-        batch_mask_filenames: np.ndarray,   
+        batch_mask_filenames: np.ndarray,
         output_reshape: tuple[int, int],
     ):
+        self.mask_path = mask_path
+        self.batch_mask_filenames = batch_mask_filenames
+        self.output_reshape = output_reshape
     
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
         for i in range(batch_size):
-            for j in range(mini_batch):
-                image_index = i * mini_batch + j
-                mask = Image.open(
-                    os.path.join(mask_path, batch_mask_filenames[image_index])
-                    ).resize(output_reshape)
-                mask = np.array(mask)
+            image_index = i + dataset_index
+            mask = Image.open(
+                os.path.join(self.mask_path, self.batch_mask_filenames[image_index])
+                ).resize(self.output_reshape)
+            mask = np.array(mask)
         return mask
 
     def get_dataset_size(self) -> int:
         dataset_size = FlowGeneratorExperimental.__len__
         return dataset_size
-
-
-
-#should this be a batch with read_batch as the function having all the code in it?
-    # def initialise_batch_img(self, mini_batch, image_size, channel_mask) -> np.ndarray:
-
-    #     num_mini_batches = Reader.calculate_mini_batch
-    #     channel_mask = np.array(channel_mask)
-    #     n_channels = np.sum(channel_mask)
-
-    #     batch_images = np.zeros(
-    #         (
-    #             num_mini_batches,
-    #             mini_batch,
-    #             image_size[0],
-    #             image_size[1],
-    #             n_channels,
-    #         )
-    #     )
-    #     return batch_images
-
-    # #output
-    # def initialise_batch_mask(self, output_size, mini_batch, num_classes) -> Tuple[bool, np.ndarray]:
-    #     #num_mini_batches = Reader.calculate_mini_batch
-
-    #     if self.output_size[1] == 1:
-    #         column = True
-    #         batch_masks = np.zeros(
-    #             (
-    #                 num_mini_batches,
-    #                 mini_batch, output_size[0],
-    #                 num_classes
-    #             )
-    #         )
-    #     else:
-    #         column = False
-    #         batch_masks = np.zeros(
-    #             (
-    #                 num_mini_batches,
-    #                 mini_batch,
-    #                 output_size[0],
-    #                 output_size[1],
-    #                 num_classes,
-    #             )
-    #         )
-
-    #     return column, batch_masks

From e3c626a965866f5e78624e358e669d929e8ef3c9 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Sun, 13 Aug 2023 12:14:36 +0100
Subject: [PATCH 14/75] changes to interface name, get function and constructor

---
 .../segmentation_utils/reading_strategies.py  | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index ad7c697..ca3b120 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -8,7 +8,7 @@
 from utilities.segmentation_utils import ImagePreprocessor
 
 
-class ReaderInterface(Protocol):
+class IReader(Protocol):
 
     def read_batch(self, start:int, end: int) -> None:
         ...
@@ -22,25 +22,25 @@ def __init__(
         self,
         image_path: str,
         image_size: tuple [int, int],
-        batch_image_filenames: np.ndarray,
     ):
         self.image_path = image_path
         self.image_size = image_size
-        self.batch_image_files = batch_image_filenames
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
+        image_filenames = np.array(sorted(os.listdir(self.image_path)))
         for i in range(batch_size):
             image_index = i + dataset_index
             image = Image.open(
-                os.path.join(self.image_path, self.batch_image_filenames[image_index])
+                os.path.join(self.image_path, image_filenames[image_index])
             ).resize(self.image_size, Image.ANTIALIAS)
             image = np.array(image)
             image = image / 255
         return image
 
-    def get_dataset_size(self) -> int:
-        dataset_size = FlowGeneratorExperimental.__len__
+    def get_dataset_size(self, mini_batch) -> int:
+        image_filenames = np.array(sorted(os.listdir(self.image_path)))
+        dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
         return dataset_size
 
 class Mask_Image_Strategy:
@@ -48,23 +48,23 @@ class Mask_Image_Strategy:
     def __init__(
         self,
         mask_path: str,
-        batch_mask_filenames: np.ndarray,
         output_reshape: tuple[int, int],
     ):
         self.mask_path = mask_path
-        self.batch_mask_filenames = batch_mask_filenames
         self.output_reshape = output_reshape
     
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
+        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
         for i in range(batch_size):
             image_index = i + dataset_index
             mask = Image.open(
-                os.path.join(self.mask_path, self.batch_mask_filenames[image_index])
+                os.path.join(self.mask_path, mask_filenames[image_index])
                 ).resize(self.output_reshape)
             mask = np.array(mask)
         return mask
 
-    def get_dataset_size(self) -> int:
-        dataset_size = FlowGeneratorExperimental.__len__
+    def get_dataset_size(self, mini_batch) -> int:
+        image_filenames = np.array(sorted(os.listdir(self.image_path)))
+        dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
         return dataset_size

From 7f6798b0dac41d17c9ff6066dc3cd87393c6daee Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Tue, 15 Aug 2023 18:16:48 +0100
Subject: [PATCH 15/75] added strategies using rasterio to read images

---
 .../segmentation_utils/reading_strategies.py  | 57 +++++++++++++++++--
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index ca3b120..a36365b 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -4,10 +4,6 @@
 from PIL import Image
 import rasterio
 
-from flowreader import FlowGeneratorExperimental
-from utilities.segmentation_utils import ImagePreprocessor
-
-
 class IReader(Protocol):
 
     def read_batch(self, start:int, end: int) -> None:
@@ -64,7 +60,60 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
             mask = np.array(mask)
         return mask
 
+    def get_dataset_size(self, mini_batch) -> int:
+        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
+        dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch)))
+        return dataset_size
+    
+class Hyperspectral_Image_Strategy:
+
+    def __init__(
+        self,
+        image_path:str,
+    ):
+        self.image_path = image_path
+  
+    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
+        #read images with rasterio
+        image_filenames = np.array(sorted(os.listdir(self.image_path)))
+        for i in range(batch_size):
+            image_index = i + dataset_index
+            #open the source raster dataset
+            with rasterio.open(
+                os.path.join(self.image_path, image_filenames[image_index])
+                ) as dataset:
+                 #.read() returns a numpy array that contains the raster cell values in your file.
+                image = dataset.read()
+                image = image / 255
+        return image
+    
     def get_dataset_size(self, mini_batch) -> int:
         image_filenames = np.array(sorted(os.listdir(self.image_path)))
         dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
         return dataset_size
+
+class Hyperspectral_Mask_Image_Strategy:
+
+    def __init__(
+        self,
+        mask_path:str,
+    ):
+        self.mask_path = mask_path
+
+    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
+        #read images with rasterio
+        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
+        for i in range(batch_size):
+            image_index = i + dataset_index
+            #open the source raster dataset
+            with rasterio.open(
+                os.path.join(self.mask_path, mask_filenames[image_index])
+                ) as dataset:
+                #.read() returns a numpy array that contains the raster cell values in your file.
+                mask = dataset.read()
+        return mask
+    
+    def get_dataset_size(self, mini_batch) -> int:
+        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
+        dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch)))
+        return dataset_size

From 26d4576ef29e423efb5fe27cc218b22c6db84a58 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Tue, 15 Aug 2023 21:48:00 +0100
Subject: [PATCH 16/75] removed mask strategies and added to constructor

---
 .../segmentation_utils/reading_strategies.py  | 64 ++-----------------
 1 file changed, 7 insertions(+), 57 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index a36365b..9ae5a5d 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -12,15 +12,17 @@ def read_batch(self, start:int, end: int) -> None:
     def get_dataset_size(self) -> None:
         ...
 
-class RGB_Image_Strategy:
+class RGBImageStrategy:
 
     def __init__(
         self,
         image_path: str,
         image_size: tuple [int, int],
+        antialias: int = 1,
     ):
         self.image_path = image_path
         self.image_size = image_size
+        self.antialias = antialias
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
@@ -29,7 +31,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
             image_index = i + dataset_index
             image = Image.open(
                 os.path.join(self.image_path, image_filenames[image_index])
-            ).resize(self.image_size, Image.ANTIALIAS)
+            ).resize(self.image_size, self.antialias)
             image = np.array(image)
             image = image / 255
         return image
@@ -38,34 +40,8 @@ def get_dataset_size(self, mini_batch) -> int:
         image_filenames = np.array(sorted(os.listdir(self.image_path)))
         dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
         return dataset_size
-
-class Mask_Image_Strategy:
-
-    def __init__(
-        self,
-        mask_path: str,
-        output_reshape: tuple[int, int],
-    ):
-        self.mask_path = mask_path
-        self.output_reshape = output_reshape
     
-    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
-        #read images with PIL
-        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
-        for i in range(batch_size):
-            image_index = i + dataset_index
-            mask = Image.open(
-                os.path.join(self.mask_path, mask_filenames[image_index])
-                ).resize(self.output_reshape)
-            mask = np.array(mask)
-        return mask
-
-    def get_dataset_size(self, mini_batch) -> int:
-        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
-        dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch)))
-        return dataset_size
-    
-class Hyperspectral_Image_Strategy:
+class HyperspectralImageStrategy:
 
     def __init__(
         self,
@@ -83,37 +59,11 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
                 os.path.join(self.image_path, image_filenames[image_index])
                 ) as dataset:
                  #.read() returns a numpy array that contains the raster cell values in your file.
-                image = dataset.read()
+                image = dataset.read() #!resize using numpy resize function?
                 image = image / 255
         return image
     
     def get_dataset_size(self, mini_batch) -> int:
         image_filenames = np.array(sorted(os.listdir(self.image_path)))
         dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
-        return dataset_size
-
-class Hyperspectral_Mask_Image_Strategy:
-
-    def __init__(
-        self,
-        mask_path:str,
-    ):
-        self.mask_path = mask_path
-
-    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
-        #read images with rasterio
-        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
-        for i in range(batch_size):
-            image_index = i + dataset_index
-            #open the source raster dataset
-            with rasterio.open(
-                os.path.join(self.mask_path, mask_filenames[image_index])
-                ) as dataset:
-                #.read() returns a numpy array that contains the raster cell values in your file.
-                mask = dataset.read()
-        return mask
-    
-    def get_dataset_size(self, mini_batch) -> int:
-        mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
-        dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch)))
-        return dataset_size
+        return dataset_size
\ No newline at end of file

From e3b0fbd96d44551c47132816fae00834a6470b12 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Tue, 15 Aug 2023 22:00:18 +0100
Subject: [PATCH 17/75] fixed constructor and set image_resample to default

---
 utilities/segmentation_utils/reading_strategies.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 9ae5a5d..9421b89 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -18,11 +18,11 @@ def __init__(
         self,
         image_path: str,
         image_size: tuple [int, int],
-        antialias: int = 1,
+        image_resample: Image.Resampling.NEAREST,
     ):
         self.image_path = image_path
         self.image_size = image_size
-        self.antialias = antialias
+        self.image_resample = image_resample
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
@@ -31,7 +31,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
             image_index = i + dataset_index
             image = Image.open(
                 os.path.join(self.image_path, image_filenames[image_index])
-            ).resize(self.image_size, self.antialias)
+            ).resize(self.image_size, self.image_resample)
             image = np.array(image)
             image = image / 255
         return image

From 04483c0579de273d40e5a3f3a182e3d35ff5f247 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Thu, 17 Aug 2023 13:42:14 +0100
Subject: [PATCH 18/75] added resizing to hyperspectral strategy

---
 utilities/segmentation_utils/reading_strategies.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 9421b89..5c44615 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -17,7 +17,7 @@ class RGBImageStrategy:
     def __init__(
         self,
         image_path: str,
-        image_size: tuple [int, int],
+        image_size: tuple[int, int],
         image_resample: Image.Resampling.NEAREST,
     ):
         self.image_path = image_path
@@ -46,8 +46,13 @@ class HyperspectralImageStrategy:
     def __init__(
         self,
         image_path:str,
+        image_resize:tuple[int,int],
+        image_resample: Image.Resampling.NEAREST,
+        
     ):
         self.image_path = image_path
+        self.image_resize = image_resize
+        self.image_resample = image_resample
   
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with rasterio
@@ -59,11 +64,13 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
                 os.path.join(self.image_path, image_filenames[image_index])
                 ) as dataset:
                  #.read() returns a numpy array that contains the raster cell values in your file.
-                image = dataset.read() #!resize using numpy resize function?
+                image = dataset.read()
                 image = image / 255
+                image = np.resize(self.image_resize, self.image_resample)
         return image
     
     def get_dataset_size(self, mini_batch) -> int:
         image_filenames = np.array(sorted(os.listdir(self.image_path)))
         dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
-        return dataset_size
\ No newline at end of file
+        return dataset_size
+    
\ No newline at end of file

From b48a4e6aa71ebb74f213064d4975baf153205cef Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Thu, 17 Aug 2023 13:43:01 +0100
Subject: [PATCH 19/75] added tests for strategies - they do not yet pass

---
 .../test_strategies.py                        | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 tests/segmentation_utils_tests.py/test_strategies.py

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
new file mode 100644
index 0000000..4466781
--- /dev/null
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -0,0 +1,48 @@
+import os
+import numpy as np
+from PIL import Image
+from pytest import MonkeyPatch
+from utilities.segmentation_utils.reading_strategies import RGBImageStrategy
+
+def test_read_batch_image_path() -> None:
+    #should check if path is being read in correctly
+    image_strategy = RGBImageStrategy(
+    image_path="tests/segmentation_utils_tests/test_strategies",
+    image_size=(224, 224),
+    image_resample=Image.Resampling.NEAREST,
+)
+
+    batch_size = 2
+    dataset_index = 0
+    image_strategy.read_batch(batch_size, dataset_index)
+
+def test_read_batch_returns_nparray() -> None:
+    #checking if the returned value is a numpy array
+
+    image_strategy = RGBImageStrategy(
+    image_path="tests/segmentation_utils_tests/test_strategies",
+    image_size=(224, 224),
+    image_resample=Image.Resampling.NEAREST,
+)
+
+    result = image_strategy.read_batch(batch_size=2, dataset_index=0)
+    assert isinstance(result, np.ndarray) 
+
+def test_get_dataset_size() -> None:
+    #checking if the calculation is done correctly
+
+    image_strategy = RGBImageStrategy(
+    image_path="tests/segmentation_utils_tests/test_strategies",
+    image_size=(224, 224),
+    image_resample=Image.Resampling.NEAREST,
+)
+    dataset = 100 #if there are 100 images in the specified path
+    mini_batch = 32 #and we want 32 images in each batch
+    expected_value = dataset / mini_batch #number of sets of images we expect
+    
+    dataset_size = image_strategy.get_dataset_size(mini_batch)
+    assert dataset_size == expected_value
+
+
+
+

From 11e7aacd41baea628068f75bd302765119998be2 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Thu, 17 Aug 2023 22:31:39 +0100
Subject: [PATCH 20/75] updated test to pass, updates variable assigment in
 strategy constructor, adds extra comments

---
 .../test_strategies.py                        | 57 +++++++++++--------
 .../segmentation_utils/reading_strategies.py  | 12 ++--
 2 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index 4466781..3148105 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -1,48 +1,57 @@
 import os
+
 import numpy as np
 from PIL import Image
 from pytest import MonkeyPatch
+
 from utilities.segmentation_utils.reading_strategies import RGBImageStrategy
 
+
 def test_read_batch_image_path() -> None:
-    #should check if path is being read in correctly
+    # should check if path is being read in correctly
+    patch = MonkeyPatch()
+
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)))
+
     image_strategy = RGBImageStrategy(
-    image_path="tests/segmentation_utils_tests/test_strategies",
-    image_size=(224, 224),
-    image_resample=Image.Resampling.NEAREST,
-)
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
 
     batch_size = 2
     dataset_index = 0
     image_strategy.read_batch(batch_size, dataset_index)
+    patch.undo()
+    patch.undo()
+
 
 def test_read_batch_returns_nparray() -> None:
-    #checking if the returned value is a numpy array
+    # checking if the returned value is a numpy array
 
     image_strategy = RGBImageStrategy(
-    image_path="tests/segmentation_utils_tests/test_strategies",
-    image_size=(224, 224),
-    image_resample=Image.Resampling.NEAREST,
-)
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
 
     result = image_strategy.read_batch(batch_size=2, dataset_index=0)
-    assert isinstance(result, np.ndarray) 
+    assert isinstance(result, np.ndarray)
+
 
 def test_get_dataset_size() -> None:
-    #checking if the calculation is done correctly
+    # checking if the calculation is done correctly
 
     image_strategy = RGBImageStrategy(
-    image_path="tests/segmentation_utils_tests/test_strategies",
-    image_size=(224, 224),
-    image_resample=Image.Resampling.NEAREST,
-)
-    dataset = 100 #if there are 100 images in the specified path
-    mini_batch = 32 #and we want 32 images in each batch
-    expected_value = dataset / mini_batch #number of sets of images we expect
-    
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
+    dataset = 100  # if there are 100 images in the specified path
+    mini_batch = 32  # and we want 32 images in each batch
+    expected_value = dataset / mini_batch  # number of sets of images we expect
+
     dataset_size = image_strategy.get_dataset_size(mini_batch)
     assert dataset_size == expected_value
-
-
-
-
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 5c44615..cec427d 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -1,8 +1,10 @@
-from typing import Protocol
 import os
+from typing import Protocol
+
 import numpy as np
-from PIL import Image
 import rasterio
+from PIL import Image
+
 
 class IReader(Protocol):
 
@@ -18,7 +20,7 @@ def __init__(
         self,
         image_path: str,
         image_size: tuple[int, int],
-        image_resample: Image.Resampling.NEAREST,
+        image_resample = Image.Resampling.NEAREST,
     ):
         self.image_path = image_path
         self.image_size = image_size
@@ -26,6 +28,8 @@ def __init__(
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
+
+        #! add this to the intializer
         image_filenames = np.array(sorted(os.listdir(self.image_path)))
         for i in range(batch_size):
             image_index = i + dataset_index
@@ -47,7 +51,7 @@ def __init__(
         self,
         image_path:str,
         image_resize:tuple[int,int],
-        image_resample: Image.Resampling.NEAREST,
+        image_resample = Image.Resampling.NEAREST,
         
     ):
         self.image_path = image_path

From a416419040770300247e44450231c5c5d52d8bc0 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Thu, 24 Aug 2023 01:06:55 +0100
Subject: [PATCH 21/75] added variable to initialiser

---
 .../segmentation_utils/reading_strategies.py   | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index cec427d..9255a9d 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -23,30 +23,29 @@ def __init__(
         image_resample = Image.Resampling.NEAREST,
     ):
         self.image_path = image_path
+        self.image_filenames = np.array(sorted(os.listdir(self.image_path))) #!update: added variable to initialiser
         self.image_size = image_size
         self.image_resample = image_resample
+        
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with PIL
 
-        #! add this to the intializer
-        image_filenames = np.array(sorted(os.listdir(self.image_path)))
         for i in range(batch_size):
             image_index = i + dataset_index
             image = Image.open(
-                os.path.join(self.image_path, image_filenames[image_index])
+                os.path.join(self.image_path, self.image_filenames[image_index])
             ).resize(self.image_size, self.image_resample)
             image = np.array(image)
             image = image / 255
         return image
 
     def get_dataset_size(self, mini_batch) -> int:
-        image_filenames = np.array(sorted(os.listdir(self.image_path)))
-        dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
+        dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
     
 class HyperspectralImageStrategy:
-
+    #read images with rasterio
     def __init__(
         self,
         image_path:str,
@@ -55,17 +54,17 @@ def __init__(
         
     ):
         self.image_path = image_path
+        self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
         self.image_resize = image_resize
         self.image_resample = image_resample
   
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with rasterio
-        image_filenames = np.array(sorted(os.listdir(self.image_path)))
         for i in range(batch_size):
             image_index = i + dataset_index
             #open the source raster dataset
             with rasterio.open(
-                os.path.join(self.image_path, image_filenames[image_index])
+                os.path.join(self.image_path, self.image_filenames[image_index])
                 ) as dataset:
                  #.read() returns a numpy array that contains the raster cell values in your file.
                 image = dataset.read()
@@ -74,7 +73,6 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         return image
     
     def get_dataset_size(self, mini_batch) -> int:
-        image_filenames = np.array(sorted(os.listdir(self.image_path)))
-        dataset_size = int(np.floor(len(image_filenames) / float(mini_batch)))
+        dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
     
\ No newline at end of file

From ef1ffd01a3e874125ff8655b430a65c94e414220 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Thu, 24 Aug 2023 01:07:46 +0100
Subject: [PATCH 22/75] updated 3 tests to pass, added rasterio but incomplete

---
 .../test_strategies.py                        | 68 +++++++++++++++++--
 1 file changed, 61 insertions(+), 7 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index 3148105..cabfdf7 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -1,17 +1,19 @@
 import os
-
 import numpy as np
 from PIL import Image
+import rasterio
 from pytest import MonkeyPatch
 
 from utilities.segmentation_utils.reading_strategies import RGBImageStrategy
 
 
 def test_read_batch_image_path() -> None:
-    # should check if path is being read in correctly
+    #checking if the file is being opened and read correctly
     patch = MonkeyPatch()
 
-    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+    mock_filenames = ["a", "b", "c"]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
 
     patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)))
 
@@ -31,27 +33,79 @@ def test_read_batch_image_path() -> None:
 def test_read_batch_returns_nparray() -> None:
     # checking if the returned value is a numpy array
 
+    patch = MonkeyPatch()
+
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)))
+    
     image_strategy = RGBImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         image_resample=Image.Resampling.NEAREST,
     )
 
-    result = image_strategy.read_batch(batch_size=2, dataset_index=0)
+    batch_size=2
+    dataset_index=0
+
+    result = image_strategy.read_batch(batch_size, dataset_index)
     assert isinstance(result, np.ndarray)
 
+    patch.undo()
+    patch.undo()
+
 
 def test_get_dataset_size() -> None:
     # checking if the calculation is done correctly
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a", "b", "c"]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)))
 
     image_strategy = RGBImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         image_resample=Image.Resampling.NEAREST,
     )
-    dataset = 100  # if there are 100 images in the specified path
-    mini_batch = 32  # and we want 32 images in each batch
-    expected_value = dataset / mini_batch  # number of sets of images we expect
+    dataset = len(mock_filenames)  # number of images in the specified path
+    mini_batch = 2  # number of images we want in each batch
+    expected_value = int (np.floor(dataset / float(mini_batch)))  # number of sets of images we expect
 
     dataset_size = image_strategy.get_dataset_size(mini_batch)
     assert dataset_size == expected_value
+    patch.undo()
+    patch.undo()
+
+
+#!to be continued...
+class MockRasterio():
+    # def __init__(self, image_path, image_filenames):
+    #     self.image_path = image_path
+    #     self.image_filenames = image_filenames
+    
+    def __init__(self, func):
+        self.func = func
+
+    def mock_open(self, *args, **kwargs):
+        patch = MonkeyPatch()
+        mock_filenames = ["a", "b", "c"]
+        patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+        image_file = os.path.join(self.image_path, self.image_filenames[image_index])
+        dataset = rasterio.open(image_file)
+        self.func(dataset)
+
+    def mock_join(self):
+        patch = MonkeyPatch()
+        join = lambda x: "image_path"
+        patch.setattr(os.path, "join", join)
+        return join
+
+
+
+
+def process_data(package=MockRasterio):
+    package.open
\ No newline at end of file

From 13e60d2b8cd6f91ebfdb94b0dc36d7b21a386660 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Thu, 24 Aug 2023 13:57:40 +0100
Subject: [PATCH 23/75] updated the test for hyperspectral to pass

---
 .../test_strategies.py                        | 56 +++++++++++--------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index cabfdf7..fa93012 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -79,33 +79,43 @@ def test_get_dataset_size() -> None:
     patch.undo()
     patch.undo()
 
-
-#!to be continued...
 class MockRasterio():
-    # def __init__(self, image_path, image_filenames):
-    #     self.image_path = image_path
-    #     self.image_filenames = image_filenames
     
-    def __init__(self, func):
-        self.func = func
+    def __init__(self):
+        self.shape = (224, 224) #dimensions for raster data 
+        self.dtypes = ['int32'] #data type of raster data that would be returned by .open()
+        #a list containing a string representing a data type
+        #32 bit int data type
+
+    def read(self, *args, **kwargs):
+        return np.zeros(self.shape, self.dtypes[0])
+    
+    #these functions are invoked when a 'with' statement is executed
+    def __enter__(self):
+        #called at the beginning of a 'with' block
+        return self #returns instance of MockRasterio class itself
+    
+    def __exit__(self, type, value, traceback):
+        #called at the end of a 'with' block
+        pass
 
-    def mock_open(self, *args, **kwargs):
+def test_hyperspectral_open():
         patch = MonkeyPatch()
         mock_filenames = ["a", "b", "c"]
         patch.setattr(os, "listdir", lambda x: mock_filenames)
 
-        image_file = os.path.join(self.image_path, self.image_filenames[image_index])
-        dataset = rasterio.open(image_file)
-        self.func(dataset)
-
-    def mock_join(self):
-        patch = MonkeyPatch()
-        join = lambda x: "image_path"
-        patch.setattr(os.path, "join", join)
-        return join
-
-
-
-
-def process_data(package=MockRasterio):
-    package.open
\ No newline at end of file
+        def mock_open(*args, **kwargs): #local function to the test
+            #defines behaviour of mock object that replaces rasterio.open()
+            return MockRasterio()
+        
+        patch.setattr(rasterio, "open", mock_open)
+        image_path = "tests/segmentation_utils_tests/test_strategies"
+        dataset_list = []
+
+        for filename in mock_filenames:
+            file_path = os.path.join(image_path, filename)
+            dataset = rasterio.open(file_path)
+            dataset_list.append(dataset)
+    
+            assert dataset.shape == (224, 224)
+            assert np.array_equal (dataset.read(), np.zeros((224, 224), dtype='int32'))

From e99b86995a3e1ec785a7805b5f036c70be8936f1 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Thu, 24 Aug 2023 15:11:27 +0100
Subject: [PATCH 24/75] adds dependency injection to the rasterio strategy for
 better testability

---
 .../segmentation_utils/reading_strategies.py  | 26 ++++++++++++-------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 9255a9d..42d284c 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -51,26 +51,34 @@ def __init__(
         image_path:str,
         image_resize:tuple[int,int],
         image_resample = Image.Resampling.NEAREST,
-        
+        package = rasterio
     ):
         self.image_path = image_path
         self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
         self.image_resize = image_resize
         self.image_resample = image_resample
+        self.package = package
+        #gets the number of bands for the dataset
+        self.bands = package.open(os.path.join(self.image_path, self.image_filenames[0])).count
   
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         #read images with rasterio
-        for i in range(batch_size):
-            image_index = i + dataset_index
-            #open the source raster dataset
-            with rasterio.open(
-                os.path.join(self.image_path, self.image_filenames[image_index])
+        batch_filenames = self.image_filenames[dataset_index:dataset_index + batch_size]
+
+        #defines the array that will contain the images
+        images = np.zeros((batch_size, self.bands, self.image_resize[0], self.image_resize[1]))
+        for i,filename in enumerate(batch_filenames):
+            with self.package.open(
+                os.path.join(self.image_path, filename)
                 ) as dataset:
                  #.read() returns a numpy array that contains the raster cell values in your file.
                 image = dataset.read()
-                image = image / 255
-                image = np.resize(self.image_resize, self.image_resample)
-        return image
+            images[i,:,:,:] = np.resize(image,self.image_resize)
+
+        #ensures channel-last orientation for the reader
+        np.moveaxis(images,1,3)
+   
+        return np.array(images)
     
     def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))

From 047789adcd167bf797a55197cfb6a7d6b84f36c8 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Thu, 24 Aug 2023 15:59:09 +0100
Subject: [PATCH 25/75] adds rasterio strategy test, with functional mock
 class.

---
 .../test_strategies.py                        |  92 ++++++++--------
 .../segmentation_utils/reading_strategies.py  | 103 ++++++++++++------
 2 files changed, 113 insertions(+), 82 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index fa93012..c3d9146 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -1,21 +1,27 @@
 import os
+
 import numpy as np
-from PIL import Image
 import rasterio
+from PIL import Image
 from pytest import MonkeyPatch
 
-from utilities.segmentation_utils.reading_strategies import RGBImageStrategy
+from utilities.segmentation_utils.reading_strategies import (
+    HyperspectralImageStrategy, MockRasterio, RGBImageStrategy)
 
 
 def test_read_batch_image_path() -> None:
-    #checking if the file is being opened and read correctly
+    # checking if the file is being opened and read correctly
     patch = MonkeyPatch()
 
     mock_filenames = ["a", "b", "c"]
 
     patch.setattr(os, "listdir", lambda x: mock_filenames)
 
-    patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)))
+    patch.setattr(
+        Image,
+        "open",
+        lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
+    )
 
     image_strategy = RGBImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
@@ -37,16 +43,20 @@ def test_read_batch_returns_nparray() -> None:
 
     patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
 
-    patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)))
-    
+    patch.setattr(
+        Image,
+        "open",
+        lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
+    )
+
     image_strategy = RGBImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         image_resample=Image.Resampling.NEAREST,
     )
 
-    batch_size=2
-    dataset_index=0
+    batch_size = 2
+    dataset_index = 0
 
     result = image_strategy.read_batch(batch_size, dataset_index)
     assert isinstance(result, np.ndarray)
@@ -63,7 +73,12 @@ def test_get_dataset_size() -> None:
 
     patch.setattr(os, "listdir", lambda x: mock_filenames)
 
-    patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)))
+    #! not needed as you arent reading any image in this function
+    patch.setattr(
+        Image,
+        "open",
+        lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
+    )
 
     image_strategy = RGBImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
@@ -72,50 +87,33 @@ def test_get_dataset_size() -> None:
     )
     dataset = len(mock_filenames)  # number of images in the specified path
     mini_batch = 2  # number of images we want in each batch
-    expected_value = int (np.floor(dataset / float(mini_batch)))  # number of sets of images we expect
+    expected_value = int(
+        np.floor(dataset / float(mini_batch))
+    )  # number of sets of images we expect
 
     dataset_size = image_strategy.get_dataset_size(mini_batch)
     assert dataset_size == expected_value
     patch.undo()
     patch.undo()
 
-class MockRasterio():
-    
-    def __init__(self):
-        self.shape = (224, 224) #dimensions for raster data 
-        self.dtypes = ['int32'] #data type of raster data that would be returned by .open()
-        #a list containing a string representing a data type
-        #32 bit int data type
-
-    def read(self, *args, **kwargs):
-        return np.zeros(self.shape, self.dtypes[0])
-    
-    #these functions are invoked when a 'with' statement is executed
-    def __enter__(self):
-        #called at the beginning of a 'with' block
-        return self #returns instance of MockRasterio class itself
-    
-    def __exit__(self, type, value, traceback):
-        #called at the end of a 'with' block
-        pass
 
 def test_hyperspectral_open():
-        patch = MonkeyPatch()
-        mock_filenames = ["a", "b", "c"]
-        patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-        def mock_open(*args, **kwargs): #local function to the test
-            #defines behaviour of mock object that replaces rasterio.open()
-            return MockRasterio()
-        
-        patch.setattr(rasterio, "open", mock_open)
-        image_path = "tests/segmentation_utils_tests/test_strategies"
-        dataset_list = []
-
-        for filename in mock_filenames:
-            file_path = os.path.join(image_path, filename)
-            dataset = rasterio.open(file_path)
-            dataset_list.append(dataset)
+    patch = MonkeyPatch()
+    mock_filenames = ["a", "b", "c"]
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_path = "tests/segmentation_utils_tests/test_strategies"
     
-            assert dataset.shape == (224, 224)
-            assert np.array_equal (dataset.read(), np.zeros((224, 224), dtype='int32'))
+    mock_data = {
+        "n": 3,
+        "size": (224, 224),
+        "bands": 3,
+        "dtypes": ["uint8"],
+    }
+    strategy = HyperspectralImageStrategy(
+        image_path, (224, 224), package=MockRasterio(**mock_data)
+    )
+
+    read_images = strategy.read_batch(2, 0)
+
+    assert read_images.shape == (2, 224, 224, 3)
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 42d284c..751bc29 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -1,35 +1,63 @@
 import os
-from typing import Protocol
+from types import ModuleType
+from typing import Protocol, Type, Union
 
 import numpy as np
 import rasterio
 from PIL import Image
 
 
-class IReader(Protocol):
+class MockRasterio:
+    def __init__(self, n , size, bands, dtypes):
+        self.n = n
+        self.size = size
+        self.bands = bands
+        self.dtypes = dtypes
+ 
+    def open(self, *args, **kwargs):
+        return self
+
+    @property
+    def count(self) -> int:
+        return self.bands
+
+    def read(self, *args, **kwargs):
+        return np.zeros((self.bands,self.size[0],self.size[1]), self.dtypes[0])
+
+    # these functions are invoked when a 'with' statement is executed
+    def __enter__(self):
+        # called at the beginning of a 'with' block
+        return self  # returns instance of MockRasterio class itself
+
+    def __exit__(self, type, value, traceback):
+        # called at the end of a 'with' block
+        pass
 
-    def read_batch(self, start:int, end: int) -> None:
+
+class IReader(Protocol):
+    def read_batch(self, start: int, end: int) -> None:
         ...
-    
+
     def get_dataset_size(self) -> None:
         ...
 
-class RGBImageStrategy:
 
+class RGBImageStrategy:
     def __init__(
         self,
         image_path: str,
         image_size: tuple[int, int],
-        image_resample = Image.Resampling.NEAREST,
+        image_resample=Image.Resampling.NEAREST,
     ):
         self.image_path = image_path
-        self.image_filenames = np.array(sorted(os.listdir(self.image_path))) #!update: added variable to initialiser
+        self.image_filenames = np.array(
+            sorted(os.listdir(self.image_path))
+        )  #!update: added variable to initialiser
         self.image_size = image_size
         self.image_resample = image_resample
-        
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
-        #read images with PIL
+        # read images with PIL
 
         for i in range(batch_size):
             image_index = i + dataset_index
@@ -43,44 +71,49 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
     def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
-    
+
+
 class HyperspectralImageStrategy:
-    #read images with rasterio
+    # read images with rasterio
     def __init__(
         self,
-        image_path:str,
-        image_resize:tuple[int,int],
-        image_resample = Image.Resampling.NEAREST,
-        package = rasterio
+        image_path: str,
+        image_resize: tuple[int, int],
+        image_resample=Image.Resampling.NEAREST,
+        package: Union[MockRasterio, ModuleType] = rasterio,
     ):
         self.image_path = image_path
         self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
         self.image_resize = image_resize
         self.image_resample = image_resample
         self.package = package
-        #gets the number of bands for the dataset
-        self.bands = package.open(os.path.join(self.image_path, self.image_filenames[0])).count
-  
-    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
-        #read images with rasterio
-        batch_filenames = self.image_filenames[dataset_index:dataset_index + batch_size]
-
-        #defines the array that will contain the images
-        images = np.zeros((batch_size, self.bands, self.image_resize[0], self.image_resize[1]))
-        for i,filename in enumerate(batch_filenames):
-            with self.package.open(
-                os.path.join(self.image_path, filename)
-                ) as dataset:
-                 #.read() returns a numpy array that contains the raster cell values in your file.
+        # gets the number of bands for the dataset
+        self.bands = package.open(
+            os.path.join(self.image_path, self.image_filenames[0])
+        ).count
+        print("-----------My very cool bands--------: ",self.bands)
+
+    def read_batch(self, batch_size:int, dataset_index:int) -> np.ndarray:
+        # read images with rasterio
+        batch_filenames = self.image_filenames[
+            dataset_index : dataset_index + batch_size
+        ]
+
+        # defines the array that will contain the images
+        images = np.zeros(
+            (batch_size, self.bands, self.image_resize[0], self.image_resize[1])
+        )
+        for i, filename in enumerate(batch_filenames):
+            with self.package.open(os.path.join(self.image_path, filename)) as dataset:
+                # .read() returns a numpy array that contains the raster cell values in your file.
                 image = dataset.read()
-            images[i,:,:,:] = np.resize(image,self.image_resize)
+            images[i, :, :, :] = np.resize(image, self.image_resize)
+
+        # ensures channel-last orientation for the reader
+        images = np.moveaxis(images, 1, 3)
 
-        #ensures channel-last orientation for the reader
-        np.moveaxis(images,1,3)
-   
         return np.array(images)
-    
+
     def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
-    
\ No newline at end of file

From 0f873618c3c4860771b5b4f60a60f5d9a18a8d07 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Thu, 24 Aug 2023 16:20:26 +0100
Subject: [PATCH 26/75] updates strategies to store images in batches for
 proper value return

---
 .../test_strategies.py                        |  5 ++++-
 .../segmentation_utils/reading_strategies.py  | 20 +++++++++++--------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index c3d9146..46b6d1b 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -31,7 +31,9 @@ def test_read_batch_image_path() -> None:
 
     batch_size = 2
     dataset_index = 0
-    image_strategy.read_batch(batch_size, dataset_index)
+    result = image_strategy.read_batch(batch_size, dataset_index)
+
+    assert result.shape == (2, 224, 224, 3)
     patch.undo()
     patch.undo()
 
@@ -60,6 +62,7 @@ def test_read_batch_returns_nparray() -> None:
 
     result = image_strategy.read_batch(batch_size, dataset_index)
     assert isinstance(result, np.ndarray)
+    assert result.shape == (2, 224, 224, 3)
 
     patch.undo()
     patch.undo()
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 751bc29..d962223 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -8,12 +8,12 @@
 
 
 class MockRasterio:
-    def __init__(self, n , size, bands, dtypes):
+    def __init__(self, n, size, bands, dtypes):
         self.n = n
         self.size = size
         self.bands = bands
         self.dtypes = dtypes
- 
+
     def open(self, *args, **kwargs):
         return self
 
@@ -22,7 +22,7 @@ def count(self) -> int:
         return self.bands
 
     def read(self, *args, **kwargs):
-        return np.zeros((self.bands,self.size[0],self.size[1]), self.dtypes[0])
+        return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0])
 
     # these functions are invoked when a 'with' statement is executed
     def __enter__(self):
@@ -58,15 +58,19 @@ def __init__(
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         # read images with PIL
+        batch_filenames = self.image_filenames[
+            dataset_index : dataset_index + batch_size
+        ]
+        images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
 
         for i in range(batch_size):
             image_index = i + dataset_index
             image = Image.open(
-                os.path.join(self.image_path, self.image_filenames[image_index])
+                os.path.join(self.image_path, batch_filenames[i])
             ).resize(self.image_size, self.image_resample)
             image = np.array(image)
-            image = image / 255
-        return image
+            images[i, :, :, :] = image
+        return images
 
     def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
@@ -91,9 +95,9 @@ def __init__(
         self.bands = package.open(
             os.path.join(self.image_path, self.image_filenames[0])
         ).count
-        print("-----------My very cool bands--------: ",self.bands)
+        print("-----------My very cool bands--------: ", self.bands)
 
-    def read_batch(self, batch_size:int, dataset_index:int) -> np.ndarray:
+    def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
         # read images with rasterio
         batch_filenames = self.image_filenames[
             dataset_index : dataset_index + batch_size

From 830828898192f5d7018ba50c57d7e48d97bc69c9 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Thu, 24 Aug 2023 16:33:23 +0100
Subject: [PATCH 27/75] migrate mock class to a mock_classes.py package for
 better sectionability

---
 tests/mock_classes.py                         | 28 +++++++++++++++++
 .../segmentation_utils/reading_strategies.py  | 31 ++-----------------
 .../segmentation_utils/tempCodeRunnerFile.py  |  2 ++
 3 files changed, 32 insertions(+), 29 deletions(-)
 create mode 100644 tests/mock_classes.py

diff --git a/tests/mock_classes.py b/tests/mock_classes.py
new file mode 100644
index 0000000..fb83225
--- /dev/null
+++ b/tests/mock_classes.py
@@ -0,0 +1,28 @@
+import numpy as np
+
+
+class MockRasterio:
+    def __init__(self, n, size, bands, dtypes):
+        self.n = n
+        self.size = size
+        self.bands = bands
+        self.dtypes = dtypes
+
+    def open(self, *args, **kwargs):
+        return self
+
+    @property
+    def count(self) -> int:
+        return self.bands
+
+    def read(self, *args, **kwargs):
+        return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0])
+
+    # these functions are invoked when a 'with' statement is executed
+    def __enter__(self):
+        # called at the beginning of a 'with' block
+        return self  # returns instance of MockRasterio class itself
+
+    def __exit__(self, type, value, traceback):
+        # called at the end of a 'with' block
+        pass
\ No newline at end of file
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index d962223..5973e48 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -1,37 +1,12 @@
 import os
 from types import ModuleType
-from typing import Protocol, Type, Union
+from typing import Protocol, Union
 
 import numpy as np
 import rasterio
 from PIL import Image
 
-
-class MockRasterio:
-    def __init__(self, n, size, bands, dtypes):
-        self.n = n
-        self.size = size
-        self.bands = bands
-        self.dtypes = dtypes
-
-    def open(self, *args, **kwargs):
-        return self
-
-    @property
-    def count(self) -> int:
-        return self.bands
-
-    def read(self, *args, **kwargs):
-        return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0])
-
-    # these functions are invoked when a 'with' statement is executed
-    def __enter__(self):
-        # called at the beginning of a 'with' block
-        return self  # returns instance of MockRasterio class itself
-
-    def __exit__(self, type, value, traceback):
-        # called at the end of a 'with' block
-        pass
+from tests.mock_classes import MockRasterio
 
 
 class IReader(Protocol):
@@ -64,7 +39,6 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
 
         for i in range(batch_size):
-            image_index = i + dataset_index
             image = Image.open(
                 os.path.join(self.image_path, batch_filenames[i])
             ).resize(self.image_size, self.image_resample)
@@ -95,7 +69,6 @@ def __init__(
         self.bands = package.open(
             os.path.join(self.image_path, self.image_filenames[0])
         ).count
-        print("-----------My very cool bands--------: ", self.bands)
 
     def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
         # read images with rasterio
diff --git a/utilities/segmentation_utils/tempCodeRunnerFile.py b/utilities/segmentation_utils/tempCodeRunnerFile.py
index f53b566..7870c72 100644
--- a/utilities/segmentation_utils/tempCodeRunnerFile.py
+++ b/utilities/segmentation_utils/tempCodeRunnerFile.py
@@ -1 +1,3 @@
+
+#! I strongly recommend to not use this haha. not the most appropriate way of testing
 flowgenerator.__read_batch(start = start_index, end= end_index)
\ No newline at end of file

From 98364760a69e81b9c3b4479ce20567c24fbf668e Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Thu, 24 Aug 2023 17:05:40 +0100
Subject: [PATCH 28/75] remove mock class dependency, for better felxibility,
 adds marker flags for pytest for tests to run in staging or production to
 toml file

---
 pyproject.toml                                |  5 +++
 tests/mock_classes.py                         | 28 ----------------
 .../test_flowreader.py                        |  7 ++++
 .../test_strategies.py                        | 32 +++++++++++++++++--
 .../segmentation_utils/reading_strategies.py  |  7 ++--
 5 files changed, 43 insertions(+), 36 deletions(-)
 delete mode 100644 tests/mock_classes.py

diff --git a/pyproject.toml b/pyproject.toml
index 20b4059..922d766 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,11 @@ dependencies = [
 [tool.setuptools]
 packages = ["utilities"]
 
+[tool.pytest.ini_options]
+markers = [
+    "staging: Mark a test as part of the staging environment",
+    "production: Mark a test as part of the production environment",
+]
 
 [project.optional-dependencies]
 dev = [
diff --git a/tests/mock_classes.py b/tests/mock_classes.py
deleted file mode 100644
index fb83225..0000000
--- a/tests/mock_classes.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import numpy as np
-
-
-class MockRasterio:
-    def __init__(self, n, size, bands, dtypes):
-        self.n = n
-        self.size = size
-        self.bands = bands
-        self.dtypes = dtypes
-
-    def open(self, *args, **kwargs):
-        return self
-
-    @property
-    def count(self) -> int:
-        return self.bands
-
-    def read(self, *args, **kwargs):
-        return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0])
-
-    # these functions are invoked when a 'with' statement is executed
-    def __enter__(self):
-        # called at the beginning of a 'with' block
-        return self  # returns instance of MockRasterio class itself
-
-    def __exit__(self, type, value, traceback):
-        # called at the end of a 'with' block
-        pass
\ No newline at end of file
diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py
index 3676512..53382e0 100644
--- a/tests/segmentation_utils_tests.py/test_flowreader.py
+++ b/tests/segmentation_utils_tests.py/test_flowreader.py
@@ -110,3 +110,10 @@ def test_set_mini_batch_size_not_devisable() -> None:
 
     assert exc_info.value.args[0] == "The batch size must be divisible by the mini batch size"
     
+################
+# Staging tests#
+################
+
+@pytest.mark.staging
+def test_read_batch_staging() -> None:
+    pass
diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index 46b6d1b..b2084ae 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -1,12 +1,38 @@
 import os
 
 import numpy as np
-import rasterio
 from PIL import Image
 from pytest import MonkeyPatch
 
 from utilities.segmentation_utils.reading_strategies import (
-    HyperspectralImageStrategy, MockRasterio, RGBImageStrategy)
+    HyperspectralImageStrategy, RGBImageStrategy)
+
+
+class MockRasterio:
+    def __init__(self, n, size, bands, dtypes):
+        self.n = n
+        self.size = size
+        self.bands = bands
+        self.dtypes = dtypes
+
+    def open(self, *args, **kwargs):
+        return self
+
+    @property
+    def count(self) -> int:
+        return self.bands
+
+    def read(self, *args, **kwargs):
+        return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0])
+
+    # these functions are invoked when a 'with' statement is executed
+    def __enter__(self):
+        # called at the beginning of a 'with' block
+        return self  # returns instance of MockRasterio class itself
+
+    def __exit__(self, type, value, traceback):
+        # called at the end of a 'with' block
+        pass
 
 
 def test_read_batch_image_path() -> None:
@@ -106,7 +132,7 @@ def test_hyperspectral_open():
     patch.setattr(os, "listdir", lambda x: mock_filenames)
 
     image_path = "tests/segmentation_utils_tests/test_strategies"
-    
+
     mock_data = {
         "n": 3,
         "size": (224, 224),
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 5973e48..f9a4120 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -1,13 +1,10 @@
 import os
-from types import ModuleType
-from typing import Protocol, Union
+from typing import Any, Protocol
 
 import numpy as np
 import rasterio
 from PIL import Image
 
-from tests.mock_classes import MockRasterio
-
 
 class IReader(Protocol):
     def read_batch(self, start: int, end: int) -> None:
@@ -58,7 +55,7 @@ def __init__(
         image_path: str,
         image_resize: tuple[int, int],
         image_resample=Image.Resampling.NEAREST,
-        package: Union[MockRasterio, ModuleType] = rasterio,
+        package: Any = rasterio,
     ):
         self.image_path = image_path
         self.image_filenames = np.array(sorted(os.listdir(self.image_path)))

From 4e4eba6c88be2e53c8a78b8730ac8609c16d902e Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Thu, 24 Aug 2023 23:47:43 +0100
Subject: [PATCH 29/75] updated the read_batch function to use strategies

---
 utilities/segmentation_utils/flowreader.py | 111 ++++++++-------------
 1 file changed, 42 insertions(+), 69 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 491a7c6..b7cdec2 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -16,6 +16,8 @@
 from utilities.segmentation_utils import ImagePreprocessor
 from utilities.segmentation_utils.constants import ImageOrdering
 from utilities.segmentation_utils.ImagePreprocessor import IPreprocessor
+from utilities.segmentation_utils.reading_strategies import IReader
+
 
 
 class FlowGenerator:
@@ -266,6 +268,8 @@ def __init__(
         output_size: tuple[int, int],
         channel_mask: list[bool],
         num_classes: int,
+        input_strategy: IReader,
+        output_strategy: IReader,
         shuffle: bool = True,
         batch_size: int = 2,
         preprocessing_enabled: bool = True,
@@ -277,6 +281,7 @@ def __init__(
         weights_path: Optional[str] = None,
         shuffle_counter: int = 0,
         image_ordering: ImageOrdering = ImageOrdering.CHANNEL_LAST,
+        
     ):
         if len(output_size) != 2:
             raise ValueError("The output size has to be a tuple of length 2")
@@ -308,6 +313,9 @@ def __init__(
         self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
         self.mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
 
+        self.input_strategy = input_strategy
+        self.output_strategy = output_strategy
+
         # should be moved out as a strategy
         if self.read_weights:
             weights_df = pd.read_csv(self.weights_path, header=None)
@@ -387,10 +395,10 @@ def set_mini_batch_size(self, batch_size: int) -> None:
             raise ValueError("The batch size must be divisible by the mini batch size")
         self.mini_batch = batch_size
 
-    def __read_batch(self, start: int, end: int) -> None:
+    def __read_batch(self, dataset_index: int, end: int) -> None:
         # read image batch
-        batch_image_filenames = self.image_filenames[start:end]
-        batch_mask_filenames = self.mask_filenames[start:end]
+        batch_image_filenames = self.image_filenames[dataset_index:end]
+        batch_mask_filenames = self.mask_filenames[dataset_index:end]
         for image, mask in zip(batch_image_filenames, batch_mask_filenames):
             if image != mask:
                 raise ValueError("The image and mask directories do not match")
@@ -398,75 +406,40 @@ def __read_batch(self, start: int, end: int) -> None:
         # calculate number of mini batches in a batch
         n = self.batch_size // self.mini_batch
 
-        batch_images = np.zeros(
-            (
-                n,
-                self.mini_batch,
-                self.image_size[0],
-                self.image_size[1],
-                self.n_channels,
-            )
-        )
-
-        batch_masks = np.zeros(
-            (
-                n,
-                self.mini_batch,
-                self.output_reshape[0],
-                self.output_reshape[1],
-                self.num_classes,
-            )
-        )
+        batch_images = self.input_strategy.read_batch(self.batch_size, dataset_index)
+        batch_masks = self.output_strategy.read_batch(self.batch_size, dataset_index)
 
         # preprocess and assign images and masks to the batch
-        for i in range(n):
-            raw_masks = np.zeros(
-                (self.mini_batch, self.output_reshape[0], self.output_reshape[1])
-            )
-
-            for j in range(self.mini_batch):
-                image_index = i * self.mini_batch + j
-
-                image = Image.open(
-                    os.path.join(self.image_path, batch_image_filenames[image_index])
-                ).resize(self.image_size, Image.ANTIALIAS)
-
-                image = np.array(image)
-
-                mask = Image.open(
-                    os.path.join(self.mask_path, batch_mask_filenames[image_index])
-                ).resize(self.output_reshape)
-
-                mask = np.array(mask)
-                # image = image[:, :, self.channel_mask]
-
-                if self.preprocessing_enabled:
-                    if self.preprocessing_seed is None:
-                        image_seed = np.random.randint(0, 100000)
-                    else:
-                        state = np.random.RandomState(self.preprocessing_seed)
-                        image_seed = state.randint(0, 100000)
-
-                    (
-                        image,
-                        mask,
-                    ) = ImagePreprocessor.augmentation_pipeline(
-                        image,
-                        mask=mask,
-                        seed=image_seed,
-                        #!both preprocessing queues are assigned by this time
-                        image_queue=self.preprocessing_queue_image,  # type: ignore
-                        mask_queue=self.preprocessing_queue_mask,  # type: ignore
-                    )
-
-                batch_images[i, j, :, :, :] = image
-                # NOTE: this provides the flexibility required to process both
-                # column and matrix vectors
-                raw_masks[j, :, :] = mask
+    
+        if self.preprocessing_enabled:
+            for i in range(self.batch_size):
+                image = batch_images[i, ...]
+                mask = batch_masks[i, ...]
+                if self.preprocessing_seed is None:
+                    image_seed = np.random.randint(0, 100000)
+                else:
+                    state = np.random.RandomState(self.preprocessing_seed)
+                    image_seed = state.randint(0, 100000)
+                (
+                    image,
+                    mask,
+                ) = ImagePreprocessor.augmentation_pipeline(
+                    image,
+                    mask=mask,
+                    seed=image_seed,
+                    #!both preprocessing queues are assigned by this time
+                    image_queue=self.preprocessing_queue_image,  # type: ignore
+                    mask_queue=self.preprocessing_queue_mask,  # type: ignore
+                )
+                batch_images[i, ...] = image
+                batch_masks[i, ...] = mask
+
+        batch_masks = ImagePreprocessor.onehot_encode(
+            batch_masks, self.num_classes
+        )
 
-            batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode(
-                raw_masks, self.num_classes
-            )
+        batch_images = batch_images.reshape(n, self.mini_batch, batch_images.shape[1], batch_images.shape[2], batch_images.shape[3])
+        batch_masks = batch_masks.reshape(n, self.batch_size, batch_images.shape[1], batch_images[2], batch_images[3])
 
         # chaches the batch
         self.image_batch_store = batch_images

From f45d227a3dd549f040c58db49a0221b61ce01076 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 14:51:41 +0100
Subject: [PATCH 30/75] updates tests for flowgenerator experimental, updates
 flowreader reshaping to use tf as backend, fixes miss naming in IStrategy

---
 .../test_flowreader.py                        | 133 +++++++++++++++++-
 utilities/segmentation_utils/flowreader.py    |  33 +++--
 .../segmentation_utils/reading_strategies.py  |   4 +-
 3 files changed, 153 insertions(+), 17 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py
index 53382e0..55b732d 100644
--- a/tests/segmentation_utils_tests.py/test_flowreader.py
+++ b/tests/segmentation_utils_tests.py/test_flowreader.py
@@ -10,11 +10,25 @@
 from utilities.segmentation_utils.flowreader import FlowGeneratorExperimental
 
 
+class DummyStrategy:
+    def __init__(self, input_shape=(512, 512, 3)):
+        self.input_shape = input_shape
+
+    def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
+        return np.zeros((batch_size, *self.input_shape))
+    
+    def get_dataset_size(self) -> int:
+        return 10
+
+
 def test_can_create_instance() -> None:
     patch = MonkeyPatch()
     # mock list directory
     patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
 
+    input_strategy = DummyStrategy()
+    output_strategy = DummyStrategy()
+
     # create generator instance
     generator = FlowGeneratorExperimental(
         image_path="tests/segmentation_utils_tests/flow_reader_test",
@@ -22,7 +36,9 @@ def test_can_create_instance() -> None:
         image_size=(512, 512),
         output_size=(512,512),
         num_classes=7,
-        channel_mask= [True,True,True]
+        channel_mask= [True,True,True],
+        input_strategy=input_strategy,
+        output_strategy=output_strategy,
     )
     pass
 
@@ -31,6 +47,8 @@ def test_set_preprocessing_pipeline() -> None:
     # mock list directory
     patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
 
+    input_strategy = DummyStrategy()
+    output_strategy = DummyStrategy()
     # create generator instance
     generator = FlowGeneratorExperimental(
         image_path="tests/segmentation_utils_tests/flow_reader_test",
@@ -38,7 +56,9 @@ def test_set_preprocessing_pipeline() -> None:
         image_size=(512, 512),
         output_size=(512,512),
         num_classes=7,
-        channel_mask= [True,True,True]
+        channel_mask= [True,True,True],
+        input_strategy=input_strategy,
+        output_strategy=output_strategy,
     )
 
     image_queue = ImagePreprocessor.PreprocessingQueue(queue=[])
@@ -54,6 +74,9 @@ def test_set_mini_batch_size() -> None:
     # mock list directory
     patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
 
+    input_strategy = DummyStrategy()
+    output_strategy = DummyStrategy()
+
     # create generator instance
     generator = FlowGeneratorExperimental(
         image_path="tests/segmentation_utils_tests/flow_reader_test",
@@ -61,7 +84,9 @@ def test_set_mini_batch_size() -> None:
         image_size=(512, 512),
         output_size=(512,512),
         num_classes=7,
-        channel_mask= [True,True,True]
+        channel_mask= [True,True,True],
+        input_strategy=input_strategy,
+        output_strategy=output_strategy,
     )
 
     generator.set_mini_batch_size(2)
@@ -73,6 +98,9 @@ def test_set_mini_batch_size_too_large() -> None:
     # mock list directory
     patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
 
+    input_strategy = DummyStrategy()
+    output_strategy = DummyStrategy()
+
     # create generator instance
     generator = FlowGeneratorExperimental(
         image_path="tests/segmentation_utils_tests/flow_reader_test",
@@ -80,7 +108,9 @@ def test_set_mini_batch_size_too_large() -> None:
         image_size=(512, 512),
         output_size=(512,512),
         num_classes=7,
-        channel_mask= [True,True,True]
+        channel_mask= [True,True,True],
+        input_strategy=input_strategy,
+        output_strategy=output_strategy,
     )
     with pytest.raises(ValueError) as exc_info:
         generator.set_mini_batch_size(5)
@@ -94,6 +124,9 @@ def test_set_mini_batch_size_not_devisable() -> None:
     # mock list directory
     patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
 
+    input_strategy = DummyStrategy()
+    output_strategy = DummyStrategy()
+
     # create generator instance
     generator = FlowGeneratorExperimental(
         image_path="tests/segmentation_utils_tests/flow_reader_test",
@@ -102,18 +135,104 @@ def test_set_mini_batch_size_not_devisable() -> None:
         output_size=(512,512),
         num_classes=7,
         channel_mask= [True,True,True],
-        batch_size=3
+        batch_size=3,
+        input_strategy=input_strategy,
+        output_strategy=output_strategy,
         
     )
     with pytest.raises(ValueError) as exc_info:
         generator.set_mini_batch_size(2)
 
     assert exc_info.value.args[0] == "The batch size must be divisible by the mini batch size"
-    
+
+def test_read_batch_get_item() -> None:
+    patch = MonkeyPatch()
+    # mock list directory
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    input_strategy = DummyStrategy()
+    output_strategy = DummyStrategy(input_shape=(512, 512))
+
+    # create generator instance
+
+    generator = FlowGeneratorExperimental(
+        image_path="tests/segmentation_utils_tests/flow_reader_test",
+        mask_path="tests/segmentation_utils_tests/flow_reader_test",
+        batch_size=2,
+        image_size=(512, 512),
+        output_size=(512,512),
+        num_classes=7,
+        channel_mask= [True,True,True],
+        input_strategy=input_strategy,
+        output_strategy=output_strategy,
+    )
+
+    batch = generator[0]
+
+    assert batch[0].shape == (2, 512, 512, 3)
+    assert batch[1].shape == (2, 512, 512, 7)
+
+def test_read_batch_get_item_expand_dim_fail() -> None:
+    with pytest.raises(ValueError) as exc_info:
+        patch = MonkeyPatch()
+        # mock list directory
+        patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+        input_strategy = DummyStrategy()
+        output_strategy = DummyStrategy(input_shape=(512, 512, 1))
+
+        # create generator instance
+
+        generator = FlowGeneratorExperimental(
+            image_path="tests/segmentation_utils_tests/flow_reader_test",
+            mask_path="tests/segmentation_utils_tests/flow_reader_test",
+            batch_size=2,
+            image_size=(512, 512),
+            output_size=(512,512),
+            num_classes=7,
+            channel_mask= [True,True,True],
+            input_strategy=input_strategy,
+            output_strategy=output_strategy,
+        )
+
+        batch = generator[0]
+
 ################
 # Staging tests#
 ################
 
 @pytest.mark.staging
 def test_read_batch_staging() -> None:
-    pass
+    classes = 7
+    n_images = 4
+    #prepare test files
+    for i in range(n_images):
+        image = np.random.randint(0,255,(512,512,3))
+        mask = np.random.randint(0,classes,(512,512))
+        np.save(f"tests/segmentation_utils_tests/flow_reader_test/image_{i}",image)
+        np.save(f"tests/segmentation_utils_tests/flow_reader_test/mask_{i}",mask)
+
+
+    dummy_model = tf.keras.models.Sequential(
+        [
+            tf.keras.layers.Conv2D(input_shape=(512, 512, 3), filters = 3, kernel_size=(3,3), padding="same"),
+            tf.keras.layers.Conv2D(classes, kernel_size=(1,1), padding="same"),
+        ]
+    )
+    dummy_model.compile(
+        optimizer="adam",
+        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
+        metrics=["accuracy"],
+    )
+    
+    
+
+    reader = FlowGeneratorExperimental(
+        image_path="tests/segmentation_utils_tests/flow_reader_test",
+        mask_path="tests/segmentation_utils_tests/flow_reader_test",
+        image_size=(512, 512),
+        output_size=(512,512),
+        num_classes=classes,
+        channel_mask= [True,True,True],
+    )
+
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index b7cdec2..384c6d3 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import pandas as pd
+import tensorflow as tf
 from keras.preprocessing.image import ImageDataGenerator
 from keras.utils import Sequence
 from PIL import Image
@@ -19,7 +20,6 @@
 from utilities.segmentation_utils.reading_strategies import IReader
 
 
-
 class FlowGenerator:
     """
     Initializes the flow generator object,
@@ -281,7 +281,6 @@ def __init__(
         weights_path: Optional[str] = None,
         shuffle_counter: int = 0,
         image_ordering: ImageOrdering = ImageOrdering.CHANNEL_LAST,
-        
     ):
         if len(output_size) != 2:
             raise ValueError("The output size has to be a tuple of length 2")
@@ -409,8 +408,10 @@ def __read_batch(self, dataset_index: int, end: int) -> None:
         batch_images = self.input_strategy.read_batch(self.batch_size, dataset_index)
         batch_masks = self.output_strategy.read_batch(self.batch_size, dataset_index)
 
+        print(batch_masks.shape)
+
         # preprocess and assign images and masks to the batch
-    
+
         if self.preprocessing_enabled:
             for i in range(self.batch_size):
                 image = batch_images[i, ...]
@@ -434,12 +435,28 @@ def __read_batch(self, dataset_index: int, end: int) -> None:
                 batch_images[i, ...] = image
                 batch_masks[i, ...] = mask
 
-        batch_masks = ImagePreprocessor.onehot_encode(
-            batch_masks, self.num_classes
-        )
+        batch_masks = ImagePreprocessor.onehot_encode(batch_masks, self.num_classes)
 
-        batch_images = batch_images.reshape(n, self.mini_batch, batch_images.shape[1], batch_images.shape[2], batch_images.shape[3])
-        batch_masks = batch_masks.reshape(n, self.batch_size, batch_images.shape[1], batch_images[2], batch_images[3])
+        batch_images = tf.reshape(
+            batch_images,
+            (
+                n,
+                self.mini_batch,
+                self.image_size[0],
+                self.image_size[1],
+                self.n_channels,
+            ),
+        )
+        batch_masks = tf.reshape(
+            batch_masks,
+            (
+                n,
+                self.mini_batch,
+                self.output_size[0],
+                self.output_size[1],
+                self.num_classes,
+            ),
+        )
 
         # chaches the batch
         self.image_batch_store = batch_images
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index f9a4120..29e27cb 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -7,10 +7,10 @@
 
 
 class IReader(Protocol):
-    def read_batch(self, start: int, end: int) -> None:
+    def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
         ...
 
-    def get_dataset_size(self) -> None:
+    def get_dataset_size(self) -> int:
         ...
 
 

From dca7bb013e016ef469037b063a6db7fee923f9c2 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 14:59:22 +0100
Subject: [PATCH 31/75] update development pipeline to filter staging for now

---
 .github/workflows/development.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml
index aa8c832..6eaba5f 100644
--- a/.github/workflows/development.yml
+++ b/.github/workflows/development.yml
@@ -45,7 +45,7 @@ jobs:
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
     - name: Test with pytest
       run: |
-        python -m pytest
+        python -m pytest -v -m "not staging"
 
   devops:
     needs: test

From 692bb8980cfbdff9087227fddfec999c99d1dfed Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 15:06:11 +0100
Subject: [PATCH 32/75] remove unnecessary files

---
 utilities/segmentation_utils/tempCodeRunnerFile.py | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 utilities/segmentation_utils/tempCodeRunnerFile.py

diff --git a/utilities/segmentation_utils/tempCodeRunnerFile.py b/utilities/segmentation_utils/tempCodeRunnerFile.py
deleted file mode 100644
index 7870c72..0000000
--- a/utilities/segmentation_utils/tempCodeRunnerFile.py
+++ /dev/null
@@ -1,3 +0,0 @@
-
-#! I strongly recommend to not use this haha. not the most appropriate way of testing
-flowgenerator.__read_batch(start = start_index, end= end_index)
\ No newline at end of file

From a83bb424b5d9989af7deb156d7ce2f3d3c6493e6 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 15:11:47 +0100
Subject: [PATCH 33/75] update code cov not to include staging

---
 .github/workflows/development.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml
index 6eaba5f..8e04cb9 100644
--- a/.github/workflows/development.yml
+++ b/.github/workflows/development.yml
@@ -71,7 +71,7 @@ jobs:
       run: |
         mkdir -p ./coverage
         pip install pytest-cov pytest-mock
-        python -m pytest --cov --cov-report=xml:./coverage/coverage.xml
+        python -m pytest -m "not staging" --cov --cov-report=xml:./coverage/coverage.xml
     
     - name: Upload coverage
       uses: codecov/codecov-action@v3

From cfcef39dc62cf0fd34947cc692dfbc7eafddf3eb Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 15:18:06 +0100
Subject: [PATCH 34/75] adds development markers to image_preprocessor_test,
 and test_flowreader

---
 .../image_preprocessor_test.py                | 10 +--
 .../test_flowreader.py                        | 85 +++++++++++--------
 2 files changed, 54 insertions(+), 41 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/image_preprocessor_test.py b/tests/segmentation_utils_tests.py/image_preprocessor_test.py
index b596aaf..4841892 100644
--- a/tests/segmentation_utils_tests.py/image_preprocessor_test.py
+++ b/tests/segmentation_utils_tests.py/image_preprocessor_test.py
@@ -31,7 +31,7 @@ def test_image_onehot_encoder_column() -> None:
     )
     assert np.array_equal(one_hot_image, onehot_test)
 
-
+@pytest.mark.development
 def test_image_onehot_encoder_squarematrix() -> None:
     # predifining input variables
     n_classes = 2
@@ -58,7 +58,7 @@ def test_image_onehot_encoder_squarematrix() -> None:
     )
     assert np.array_equal(one_hot_image, onehot_test)
 
-
+@pytest.mark.development
 def test_image_augmentation_pipeline_squarematrix() -> None:
     # predifining input variables
     image = np.zeros((512, 512, 3))
@@ -89,7 +89,7 @@ def test_image_augmentation_pipeline_squarematrix() -> None:
     assert image_new.shape == (512, 512, 3)
     assert mask_new.shape == (256, 256, 1)
 
-
+@pytest.mark.development
 def test_processing_queue() -> None:
     # creating dummy queues
     
@@ -102,7 +102,7 @@ def test_processing_queue() -> None:
 
     assert image_queue.queue[0].kwargs["seed"] == new_seed
 
-
+@pytest.mark.development
 def test_generate_default_queue() -> None:
     # creating default queues
     image_queue, mask_queue = ImagePreprocessor.generate_default_queue()
@@ -111,7 +111,7 @@ def test_generate_default_queue() -> None:
     assert image_queue.get_queue_length() == 5
     assert mask_queue.get_queue_length() == 2
 
-
+@pytest.mark.development
 def test_flatten() -> None:
     image = np.zeros((512, 512, 3))
     image = tf.convert_to_tensor(image)
diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py
index 55b732d..39cb0ac 100644
--- a/tests/segmentation_utils_tests.py/test_flowreader.py
+++ b/tests/segmentation_utils_tests.py/test_flowreader.py
@@ -16,11 +16,12 @@ def __init__(self, input_shape=(512, 512, 3)):
 
     def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
         return np.zeros((batch_size, *self.input_shape))
-    
+
     def get_dataset_size(self) -> int:
         return 10
 
 
+@pytest.mark.development
 def test_can_create_instance() -> None:
     patch = MonkeyPatch()
     # mock list directory
@@ -34,14 +35,16 @@ def test_can_create_instance() -> None:
         image_path="tests/segmentation_utils_tests/flow_reader_test",
         mask_path="tests/segmentation_utils_tests/flow_reader_test",
         image_size=(512, 512),
-        output_size=(512,512),
+        output_size=(512, 512),
         num_classes=7,
-        channel_mask= [True,True,True],
+        channel_mask=[True, True, True],
         input_strategy=input_strategy,
         output_strategy=output_strategy,
     )
     pass
 
+
+@pytest.mark.development
 def test_set_preprocessing_pipeline() -> None:
     patch = MonkeyPatch()
     # mock list directory
@@ -54,9 +57,9 @@ def test_set_preprocessing_pipeline() -> None:
         image_path="tests/segmentation_utils_tests/flow_reader_test",
         mask_path="tests/segmentation_utils_tests/flow_reader_test",
         image_size=(512, 512),
-        output_size=(512,512),
+        output_size=(512, 512),
         num_classes=7,
-        channel_mask= [True,True,True],
+        channel_mask=[True, True, True],
         input_strategy=input_strategy,
         output_strategy=output_strategy,
     )
@@ -64,11 +67,11 @@ def test_set_preprocessing_pipeline() -> None:
     image_queue = ImagePreprocessor.PreprocessingQueue(queue=[])
     mask_queue = ImagePreprocessor.PreprocessingQueue(queue=[])
 
-    generator.set_preprocessing_pipeline(
-        image_queue,mask_queue
-    )
+    generator.set_preprocessing_pipeline(image_queue, mask_queue)
     pass
 
+
+@pytest.mark.development
 def test_set_mini_batch_size() -> None:
     patch = MonkeyPatch()
     # mock list directory
@@ -82,9 +85,9 @@ def test_set_mini_batch_size() -> None:
         image_path="tests/segmentation_utils_tests/flow_reader_test",
         mask_path="tests/segmentation_utils_tests/flow_reader_test",
         image_size=(512, 512),
-        output_size=(512,512),
+        output_size=(512, 512),
         num_classes=7,
-        channel_mask= [True,True,True],
+        channel_mask=[True, True, True],
         input_strategy=input_strategy,
         output_strategy=output_strategy,
     )
@@ -92,8 +95,9 @@ def test_set_mini_batch_size() -> None:
     generator.set_mini_batch_size(2)
     assert generator.mini_batch == 2
 
-def test_set_mini_batch_size_too_large() -> None:
 
+@pytest.mark.development
+def test_set_mini_batch_size_too_large() -> None:
     patch = MonkeyPatch()
     # mock list directory
     patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
@@ -106,20 +110,23 @@ def test_set_mini_batch_size_too_large() -> None:
         image_path="tests/segmentation_utils_tests/flow_reader_test",
         mask_path="tests/segmentation_utils_tests/flow_reader_test",
         image_size=(512, 512),
-        output_size=(512,512),
+        output_size=(512, 512),
         num_classes=7,
-        channel_mask= [True,True,True],
+        channel_mask=[True, True, True],
         input_strategy=input_strategy,
         output_strategy=output_strategy,
     )
     with pytest.raises(ValueError) as exc_info:
         generator.set_mini_batch_size(5)
 
-    assert exc_info.value.args[0] == "The mini batch size cannot be larger than the batch size"
+    assert (
+        exc_info.value.args[0]
+        == "The mini batch size cannot be larger than the batch size"
+    )
 
 
+@pytest.mark.development
 def test_set_mini_batch_size_not_devisable() -> None:
-
     patch = MonkeyPatch()
     # mock list directory
     patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
@@ -132,19 +139,23 @@ def test_set_mini_batch_size_not_devisable() -> None:
         image_path="tests/segmentation_utils_tests/flow_reader_test",
         mask_path="tests/segmentation_utils_tests/flow_reader_test",
         image_size=(512, 512),
-        output_size=(512,512),
+        output_size=(512, 512),
         num_classes=7,
-        channel_mask= [True,True,True],
+        channel_mask=[True, True, True],
         batch_size=3,
         input_strategy=input_strategy,
         output_strategy=output_strategy,
-        
     )
     with pytest.raises(ValueError) as exc_info:
         generator.set_mini_batch_size(2)
 
-    assert exc_info.value.args[0] == "The batch size must be divisible by the mini batch size"
+    assert (
+        exc_info.value.args[0]
+        == "The batch size must be divisible by the mini batch size"
+    )
+
 
+@pytest.mark.development
 def test_read_batch_get_item() -> None:
     patch = MonkeyPatch()
     # mock list directory
@@ -160,9 +171,9 @@ def test_read_batch_get_item() -> None:
         mask_path="tests/segmentation_utils_tests/flow_reader_test",
         batch_size=2,
         image_size=(512, 512),
-        output_size=(512,512),
+        output_size=(512, 512),
         num_classes=7,
-        channel_mask= [True,True,True],
+        channel_mask=[True, True, True],
         input_strategy=input_strategy,
         output_strategy=output_strategy,
     )
@@ -172,6 +183,8 @@ def test_read_batch_get_item() -> None:
     assert batch[0].shape == (2, 512, 512, 3)
     assert batch[1].shape == (2, 512, 512, 7)
 
+
+@pytest.mark.development
 def test_read_batch_get_item_expand_dim_fail() -> None:
     with pytest.raises(ValueError) as exc_info:
         patch = MonkeyPatch()
@@ -188,35 +201,38 @@ def test_read_batch_get_item_expand_dim_fail() -> None:
             mask_path="tests/segmentation_utils_tests/flow_reader_test",
             batch_size=2,
             image_size=(512, 512),
-            output_size=(512,512),
+            output_size=(512, 512),
             num_classes=7,
-            channel_mask= [True,True,True],
+            channel_mask=[True, True, True],
             input_strategy=input_strategy,
             output_strategy=output_strategy,
         )
 
         batch = generator[0]
 
+
 ################
 # Staging tests#
 ################
 
+
 @pytest.mark.staging
 def test_read_batch_staging() -> None:
     classes = 7
     n_images = 4
-    #prepare test files
+    # prepare test files
     for i in range(n_images):
-        image = np.random.randint(0,255,(512,512,3))
-        mask = np.random.randint(0,classes,(512,512))
-        np.save(f"tests/segmentation_utils_tests/flow_reader_test/image_{i}",image)
-        np.save(f"tests/segmentation_utils_tests/flow_reader_test/mask_{i}",mask)
-
+        image = np.random.randint(0, 255, (512, 512, 3))
+        mask = np.random.randint(0, classes, (512, 512))
+        np.save(f"tests/segmentation_utils_tests/flow_reader_test/image_{i}", image)
+        np.save(f"tests/segmentation_utils_tests/flow_reader_test/mask_{i}", mask)
 
     dummy_model = tf.keras.models.Sequential(
         [
-            tf.keras.layers.Conv2D(input_shape=(512, 512, 3), filters = 3, kernel_size=(3,3), padding="same"),
-            tf.keras.layers.Conv2D(classes, kernel_size=(1,1), padding="same"),
+            tf.keras.layers.Conv2D(
+                input_shape=(512, 512, 3), filters=3, kernel_size=(3, 3), padding="same"
+            ),
+            tf.keras.layers.Conv2D(classes, kernel_size=(1, 1), padding="same"),
         ]
     )
     dummy_model.compile(
@@ -224,15 +240,12 @@ def test_read_batch_staging() -> None:
         loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
         metrics=["accuracy"],
     )
-    
-    
 
     reader = FlowGeneratorExperimental(
         image_path="tests/segmentation_utils_tests/flow_reader_test",
         mask_path="tests/segmentation_utils_tests/flow_reader_test",
         image_size=(512, 512),
-        output_size=(512,512),
+        output_size=(512, 512),
         num_classes=classes,
-        channel_mask= [True,True,True],
+        channel_mask=[True, True, True],
     )
-

From 522d54f02d1ae9b987effa2fbe828e7d0275d01f Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 15:48:54 +0100
Subject: [PATCH 35/75] updates flowgenerator tests to cover additional cases,
 removes unnecessary print statements from flowgenerator exp

---
 .../test_flowreader.py                        | 121 ++++++++++++++++++
 utilities/segmentation_utils/flowreader.py    |  11 +-
 2 files changed, 126 insertions(+), 6 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py
index 39cb0ac..0dae7c9 100644
--- a/tests/segmentation_utils_tests.py/test_flowreader.py
+++ b/tests/segmentation_utils_tests.py/test_flowreader.py
@@ -7,6 +7,7 @@
 from pytest import MonkeyPatch
 
 from utilities.segmentation_utils import ImagePreprocessor
+from utilities.segmentation_utils.constants import ImageOrdering
 from utilities.segmentation_utils.flowreader import FlowGeneratorExperimental
 
 
@@ -183,6 +184,126 @@ def test_read_batch_get_item() -> None:
     assert batch[0].shape == (2, 512, 512, 3)
     assert batch[1].shape == (2, 512, 512, 7)
 
+@pytest.mark.development
+def test_read_batch_get_item_diff_minibatch() -> None:
+    patch = MonkeyPatch()
+    # mock list directory
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    input_strategy = DummyStrategy()
+    output_strategy = DummyStrategy(input_shape=(512, 512))
+
+    # create generator instance
+
+    generator = FlowGeneratorExperimental(
+        image_path="tests/segmentation_utils_tests/flow_reader_test",
+        mask_path="tests/segmentation_utils_tests/flow_reader_test",
+        batch_size=2,
+        image_size=(512, 512),
+        output_size=(512, 512),
+        num_classes=7,
+        channel_mask=[True, True, True],
+        input_strategy=input_strategy,
+        output_strategy=output_strategy,
+    )
+
+    generator.set_mini_batch_size(1)
+
+    batch = generator[0]
+    
+
+    assert batch[0].shape == (1, 512, 512, 3)
+    assert batch[1].shape == (1, 512, 512, 7)
+
+
+@pytest.mark.development
+def test_read_batch_get_item_channel_first() -> None:
+    patch = MonkeyPatch()
+    # mock list directory
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    input_strategy = DummyStrategy()
+    output_strategy = DummyStrategy(input_shape=(512, 512))
+
+    # create generator instance
+
+    generator = FlowGeneratorExperimental(
+        image_path="tests/segmentation_utils_tests/flow_reader_test",
+        mask_path="tests/segmentation_utils_tests/flow_reader_test",
+        batch_size=2,
+        image_size=(512, 512),
+        output_size=(512, 512),
+        num_classes=7,
+        channel_mask=[True, True, True],
+        input_strategy=input_strategy,
+        output_strategy=output_strategy,
+        image_ordering=ImageOrdering.CHANNEL_FIRST,
+    )
+
+    batch = generator[0]
+
+    assert batch[0].shape == (2, 3, 512, 512)
+    assert batch[1].shape == (2, 7, 512, 512)
+
+
+@pytest.mark.development
+def test_read_batch_get_item_column() -> None:
+    patch = MonkeyPatch()
+    # mock list directory
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    input_strategy = DummyStrategy()
+    output_strategy = DummyStrategy(input_shape=(512, 512))
+
+    # create generator instance
+
+    generator = FlowGeneratorExperimental(
+        image_path="tests/segmentation_utils_tests/flow_reader_test",
+        mask_path="tests/segmentation_utils_tests/flow_reader_test",
+        batch_size=2,
+        image_size=(512, 512),
+        output_size=(512 * 512, 1),
+        num_classes=7,
+        channel_mask=[True, True, True],
+        input_strategy=input_strategy,
+        output_strategy=output_strategy,
+    )
+
+    batch = generator[0]
+
+    assert batch[0].shape == (2, 512, 512, 3)
+    assert batch[1].shape == (2, 512 * 512, 7)
+
+
+@pytest.mark.development
+def test_read_batch_get_item_column_channel_first() -> None:
+    patch = MonkeyPatch()
+    # mock list directory
+    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+    input_strategy = DummyStrategy()
+    output_strategy = DummyStrategy(input_shape=(512, 512))
+
+    # create generator instance
+
+    generator = FlowGeneratorExperimental(
+        image_path="tests/segmentation_utils_tests/flow_reader_test",
+        mask_path="tests/segmentation_utils_tests/flow_reader_test",
+        batch_size=2,
+        image_size=(512, 512),
+        output_size=(512 * 512, 1),
+        num_classes=7,
+        channel_mask=[True, True, True],
+        input_strategy=input_strategy,
+        output_strategy=output_strategy,
+        image_ordering=ImageOrdering.CHANNEL_FIRST,
+    )
+
+    batch = generator[0]
+
+    assert batch[0].shape == (2, 3, 512, 512)
+    assert batch[1].shape == (2, 7, 512 * 512)
+
 
 @pytest.mark.development
 def test_read_batch_get_item_expand_dim_fail() -> None:
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 384c6d3..a764a6b 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -408,8 +408,6 @@ def __read_batch(self, dataset_index: int, end: int) -> None:
         batch_images = self.input_strategy.read_batch(self.batch_size, dataset_index)
         batch_masks = self.output_strategy.read_batch(self.batch_size, dataset_index)
 
-        print(batch_masks.shape)
-
         # preprocess and assign images and masks to the batch
 
         if self.preprocessing_enabled:
@@ -485,13 +483,14 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
 
         batch_images = self.image_batch_store[store_index, ...]  # type: ignore
         batch_masks = self.mask_batch_store[store_index, ...]  # type: ignore
+
         if self.column_vector:
-            batch_masks = np.reshape(
+            batch_masks = tf.reshape(
                 batch_masks,
                 (
-                    self.mini_batch,
-                    batch_masks.shape[1] * batch_masks[2],
-                    self.num_classes,
+                    batch_masks.shape[0],
+                    batch_masks.shape[1] * batch_masks.shape[2],
+                    batch_masks.shape[3],
                 ),
             )
 

From 4bf77e9842ce08a262c64ed739cc0079d9f0d5a0 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 16:47:47 +0100
Subject: [PATCH 36/75] update protocol of strategies

---
 utilities/segmentation_utils/reading_strategies.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 29e27cb..08723c4 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -10,7 +10,7 @@ class IReader(Protocol):
     def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
         ...
 
-    def get_dataset_size(self) -> int:
+    def get_dataset_size(self, minibatch:int) -> int:
         ...
 
 

From dfd0f1f8b733c3b78128febac49e9a46fb3bec7d Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Fri, 25 Aug 2023 17:06:16 +0100
Subject: [PATCH 37/75] added shuffle method and getter for image_size

---
 .../segmentation_utils/reading_strategies.py  | 31 ++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 08723c4..9078c4a 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -10,9 +10,14 @@ class IReader(Protocol):
     def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
         ...
 
-    def get_dataset_size(self, minibatch:int) -> int:
+    def get_dataset_size(self) -> int:
         ...
 
+    def get_image_size(self) -> int:
+        ...
+
+    def __shuffle_filenames__(self) -> None:
+        ...
 
 class RGBImageStrategy:
     def __init__(
@@ -46,6 +51,18 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
     def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
+    
+    def get_image_size(self) -> int:
+        return self.image_size
+    
+    def __shuffle_filenames__(self) -> None:
+        if self.shuffle:
+            state = np.random.RandomState(self.seed + self.shuffle_counter)
+            self.shuffle_counter += 1
+            shuffled_indices = state.permutation(len(self.image_filenames))
+            shuffled_indices = shuffled_indices.astype(int)
+            for array in self.linked_data:
+                array = array[shuffled_indices]
 
 
 class HyperspectralImageStrategy:
@@ -91,3 +108,15 @@ def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
     def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
+    
+    def get_image_size(self) -> int:
+        return self.image_size
+    
+    def __shuffle_filenames__(self) -> None:
+        if self.shuffle:
+            state = np.random.RandomState(self.seed + self.shuffle_counter)
+            self.shuffle_counter += 1
+            shuffled_indices = state.permutation(len(self.image_filenames))
+            shuffled_indices = shuffled_indices.astype(int)
+            for array in self.linked_data:
+                array = array[shuffled_indices]

From 5ea374ab484a002a8e5787e2265edc3ef4f45a6f Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 17:22:11 +0100
Subject: [PATCH 38/75] update strategies with shuffle and get image size
 method, updates flowgenerator removes unnecessary arguments. updates tests
 accordingly

---
 .../test_flowreader.py                        |  76 +++++------
 utilities/segmentation_utils/flowreader.py    | 120 +++++-------------
 .../segmentation_utils/reading_strategies.py  |  47 +++----
 3 files changed, 81 insertions(+), 162 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py
index 0dae7c9..5bdcd36 100644
--- a/tests/segmentation_utils_tests.py/test_flowreader.py
+++ b/tests/segmentation_utils_tests.py/test_flowreader.py
@@ -18,9 +18,15 @@ def __init__(self, input_shape=(512, 512, 3)):
     def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
         return np.zeros((batch_size, *self.input_shape))
 
-    def get_dataset_size(self) -> int:
+    def get_dataset_size(self,minibatch) -> int:
         return 10
 
+    def get_image_size(self) -> tuple[int, int]:
+        return self.input_shape[:2]
+
+    def shuffle_filenames(self, seed: int) -> None:
+        pass
+
 
 @pytest.mark.development
 def test_can_create_instance() -> None:
@@ -33,10 +39,6 @@ def test_can_create_instance() -> None:
 
     # create generator instance
     generator = FlowGeneratorExperimental(
-        image_path="tests/segmentation_utils_tests/flow_reader_test",
-        mask_path="tests/segmentation_utils_tests/flow_reader_test",
-        image_size=(512, 512),
-        output_size=(512, 512),
         num_classes=7,
         channel_mask=[True, True, True],
         input_strategy=input_strategy,
@@ -55,10 +57,6 @@ def test_set_preprocessing_pipeline() -> None:
     output_strategy = DummyStrategy()
     # create generator instance
     generator = FlowGeneratorExperimental(
-        image_path="tests/segmentation_utils_tests/flow_reader_test",
-        mask_path="tests/segmentation_utils_tests/flow_reader_test",
-        image_size=(512, 512),
-        output_size=(512, 512),
         num_classes=7,
         channel_mask=[True, True, True],
         input_strategy=input_strategy,
@@ -83,10 +81,6 @@ def test_set_mini_batch_size() -> None:
 
     # create generator instance
     generator = FlowGeneratorExperimental(
-        image_path="tests/segmentation_utils_tests/flow_reader_test",
-        mask_path="tests/segmentation_utils_tests/flow_reader_test",
-        image_size=(512, 512),
-        output_size=(512, 512),
         num_classes=7,
         channel_mask=[True, True, True],
         input_strategy=input_strategy,
@@ -108,10 +102,6 @@ def test_set_mini_batch_size_too_large() -> None:
 
     # create generator instance
     generator = FlowGeneratorExperimental(
-        image_path="tests/segmentation_utils_tests/flow_reader_test",
-        mask_path="tests/segmentation_utils_tests/flow_reader_test",
-        image_size=(512, 512),
-        output_size=(512, 512),
         num_classes=7,
         channel_mask=[True, True, True],
         input_strategy=input_strategy,
@@ -137,10 +127,6 @@ def test_set_mini_batch_size_not_devisable() -> None:
 
     # create generator instance
     generator = FlowGeneratorExperimental(
-        image_path="tests/segmentation_utils_tests/flow_reader_test",
-        mask_path="tests/segmentation_utils_tests/flow_reader_test",
-        image_size=(512, 512),
-        output_size=(512, 512),
         num_classes=7,
         channel_mask=[True, True, True],
         batch_size=3,
@@ -168,11 +154,7 @@ def test_read_batch_get_item() -> None:
     # create generator instance
 
     generator = FlowGeneratorExperimental(
-        image_path="tests/segmentation_utils_tests/flow_reader_test",
-        mask_path="tests/segmentation_utils_tests/flow_reader_test",
         batch_size=2,
-        image_size=(512, 512),
-        output_size=(512, 512),
         num_classes=7,
         channel_mask=[True, True, True],
         input_strategy=input_strategy,
@@ -184,6 +166,7 @@ def test_read_batch_get_item() -> None:
     assert batch[0].shape == (2, 512, 512, 3)
     assert batch[1].shape == (2, 512, 512, 7)
 
+
 @pytest.mark.development
 def test_read_batch_get_item_diff_minibatch() -> None:
     patch = MonkeyPatch()
@@ -196,11 +179,7 @@ def test_read_batch_get_item_diff_minibatch() -> None:
     # create generator instance
 
     generator = FlowGeneratorExperimental(
-        image_path="tests/segmentation_utils_tests/flow_reader_test",
-        mask_path="tests/segmentation_utils_tests/flow_reader_test",
         batch_size=2,
-        image_size=(512, 512),
-        output_size=(512, 512),
         num_classes=7,
         channel_mask=[True, True, True],
         input_strategy=input_strategy,
@@ -210,7 +189,6 @@ def test_read_batch_get_item_diff_minibatch() -> None:
     generator.set_mini_batch_size(1)
 
     batch = generator[0]
-    
 
     assert batch[0].shape == (1, 512, 512, 3)
     assert batch[1].shape == (1, 512, 512, 7)
@@ -228,11 +206,7 @@ def test_read_batch_get_item_channel_first() -> None:
     # create generator instance
 
     generator = FlowGeneratorExperimental(
-        image_path="tests/segmentation_utils_tests/flow_reader_test",
-        mask_path="tests/segmentation_utils_tests/flow_reader_test",
         batch_size=2,
-        image_size=(512, 512),
-        output_size=(512, 512),
         num_classes=7,
         channel_mask=[True, True, True],
         input_strategy=input_strategy,
@@ -258,15 +232,12 @@ def test_read_batch_get_item_column() -> None:
     # create generator instance
 
     generator = FlowGeneratorExperimental(
-        image_path="tests/segmentation_utils_tests/flow_reader_test",
-        mask_path="tests/segmentation_utils_tests/flow_reader_test",
         batch_size=2,
-        image_size=(512, 512),
-        output_size=(512 * 512, 1),
         num_classes=7,
         channel_mask=[True, True, True],
         input_strategy=input_strategy,
         output_strategy=output_strategy,
+        is_column=True,
     )
 
     batch = generator[0]
@@ -287,16 +258,13 @@ def test_read_batch_get_item_column_channel_first() -> None:
     # create generator instance
 
     generator = FlowGeneratorExperimental(
-        image_path="tests/segmentation_utils_tests/flow_reader_test",
-        mask_path="tests/segmentation_utils_tests/flow_reader_test",
         batch_size=2,
-        image_size=(512, 512),
-        output_size=(512 * 512, 1),
         num_classes=7,
         channel_mask=[True, True, True],
         input_strategy=input_strategy,
         output_strategy=output_strategy,
         image_ordering=ImageOrdering.CHANNEL_FIRST,
+        is_column=True,
     )
 
     batch = generator[0]
@@ -318,11 +286,7 @@ def test_read_batch_get_item_expand_dim_fail() -> None:
         # create generator instance
 
         generator = FlowGeneratorExperimental(
-            image_path="tests/segmentation_utils_tests/flow_reader_test",
-            mask_path="tests/segmentation_utils_tests/flow_reader_test",
             batch_size=2,
-            image_size=(512, 512),
-            output_size=(512, 512),
             num_classes=7,
             channel_mask=[True, True, True],
             input_strategy=input_strategy,
@@ -332,6 +296,26 @@ def test_read_batch_get_item_expand_dim_fail() -> None:
         batch = generator[0]
 
 
+def test_raises_error_not_compatible_shape() -> None:
+    with pytest.raises(ValueError) as exc_info:
+        patch = MonkeyPatch()
+        # mock list directory
+        patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
+
+        input_strategy = DummyStrategy()
+        output_strategy = DummyStrategy(input_shape=(512, 200, 1))
+
+        # create generator instance
+
+        generator = FlowGeneratorExperimental(
+            batch_size=2,
+            num_classes=7,
+            channel_mask=[True, True, True],
+            input_strategy=input_strategy,
+            output_strategy=output_strategy,
+        )
+
+
 ################
 # Staging tests#
 ################
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index a764a6b..b1fcb66 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -255,21 +255,16 @@ class FlowGeneratorExperimental(Sequence):
     
     Raises
     ------
-    :ValueError: if the names of the images and masks do not match
     :ValueError: if the output size is not a tuple of length 2
     :ValueError: if the output size is not a square matrix or a column vector
     """
 
     def __init__(
         self,
-        image_path: str,
-        mask_path: str,
-        image_size: tuple[int, int],
-        output_size: tuple[int, int],
-        channel_mask: list[bool],
-        num_classes: int,
         input_strategy: IReader,
         output_strategy: IReader,
+        channel_mask: list[bool],
+        num_classes: int,
         shuffle: bool = True,
         batch_size: int = 2,
         preprocessing_enabled: bool = True,
@@ -277,24 +272,16 @@ def __init__(
         preprocessing_seed: Optional[int] = None,
         preprocessing_queue_image: IPreprocessor = ImagePreprocessor.generate_image_queue(),
         preprocessing_queue_mask: IPreprocessor = ImagePreprocessor.generate_mask_queue(),
-        read_weights: bool = False,
-        weights_path: Optional[str] = None,
-        shuffle_counter: int = 0,
         image_ordering: ImageOrdering = ImageOrdering.CHANNEL_LAST,
+        is_column:bool = False,
     ):
-        if len(output_size) != 2:
-            raise ValueError("The output size has to be a tuple of length 2")
-        if output_size[1] != 1 and output_size[0] != output_size[1]:
-            raise ValueError(
-                "The output size has to be a square matrix or a column vector"
-            )
-
-        self.image_path = image_path
-        self.mask_path = mask_path
+        
+        self.input_strategy = input_strategy
+        self.output_strategy = output_strategy
         self.batch_size = batch_size
         self.mini_batch = batch_size
-        self.image_size = image_size
-        self.output_size = output_size
+        self.image_size = input_strategy.get_image_size()
+        self.output_size = output_strategy.get_image_size()
         self.channel_mask = np.array(channel_mask)
         self.n_channels = np.sum(channel_mask)
         self.num_classes = num_classes
@@ -302,61 +289,28 @@ def __init__(
         self.seed = seed
         self.preprocessing_enabled = preprocessing_enabled
         self.preprocessing_seed = preprocessing_seed
-        self.read_weights = read_weights
-        self.weights_path = weights_path
+
         self.preprocessing_queue_image = preprocessing_queue_image
         self.preprocessing_queue_mask = preprocessing_queue_mask
-        self.shuffle_counter = shuffle_counter
+        
         self.image_ordering = image_ordering
+        self.is_column = is_column
 
-        self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
-        self.mask_filenames = np.array(sorted(os.listdir(self.mask_path)))
-
-        self.input_strategy = input_strategy
-        self.output_strategy = output_strategy
-
-        # should be moved out as a strategy
-        if self.read_weights:
-            weights_df = pd.read_csv(self.weights_path, header=None)
-            weights_np = weights_df.to_numpy()
-            print(weights_np.shape)
-            # sort the numpy array by the first column
-            weights_np = weights_np[weights_np[:, 0].argsort()]
-
-            print(weights_np)
-            self.weights = weights_np[:, 1:].astype(np.float64)
-            weight_names = weights_np[:, 0]
-            for mask, weight_name in zip(self.mask_filenames, weight_names):
-                if mask != weight_name:
-                    raise ValueError("The mask and weight directories do not match")
-
-        self.linked_data = [self.image_filenames, self.mask_filenames]
-        if self.read_weights:
-            self.linked_data.append(self.weights)
-
-        self.__shuffle_filenames()
-        self.dataset_size = self.__len__()
-
-        print("Validating dataset...")
-        for i_name, m_name in tqdm(zip(self.image_filenames, self.mask_filenames)):
-            if i_name != m_name:
-                raise ValueError("The image and mask directories do not match")
+        
 
         self.image_batch_store = None
         self.mask_batch_store = None
         self.validity_index = 0
+        self.shuffle_counter = 0
 
-        if self.output_size[1] == 1:
-            # only enters if the output is a column vector
-            # such no need to define it otherwise
-            dimension = math.sqrt(self.output_size[0])
-            self.output_reshape = (int(dimension), int(dimension))
-            self.column_vector = True
-        else:
-            self.output_reshape = self.output_size
-            self.column_vector = False
+        self.__shuffle_filenames()
 
-        print("Reading images from: ", self.image_path)
+        if len(self.output_size) != 2:
+            raise ValueError("The output size has to be a tuple of length 2")
+        if self.output_size[1] != 1 and self.output_size[0] != self.output_size[1]:
+            raise ValueError(
+                "The output size has to be a square matrix or a column vector"
+            )
 
     def set_preprocessing_pipeline(
         self,
@@ -394,13 +348,8 @@ def set_mini_batch_size(self, batch_size: int) -> None:
             raise ValueError("The batch size must be divisible by the mini batch size")
         self.mini_batch = batch_size
 
-    def __read_batch(self, dataset_index: int, end: int) -> None:
+    def __read_batch(self, dataset_index: int) -> None:
         # read image batch
-        batch_image_filenames = self.image_filenames[dataset_index:end]
-        batch_mask_filenames = self.mask_filenames[dataset_index:end]
-        for image, mask in zip(batch_image_filenames, batch_mask_filenames):
-            if image != mask:
-                raise ValueError("The image and mask directories do not match")
 
         # calculate number of mini batches in a batch
         n = self.batch_size // self.mini_batch
@@ -463,17 +412,17 @@ def __read_batch(self, dataset_index: int, end: int) -> None:
         # required to check when to read the next batch
 
     def __len__(self) -> int:
-        return int(np.floor(len(self.image_filenames) / float(self.mini_batch)))
+        return self.input_strategy.get_dataset_size(self.mini_batch)
 
     def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
         # check if the batch is already cached
-        index = index % self.dataset_size
+        index = index % self.__len__()
 
         if index < self.validity_index - self.batch_size // self.mini_batch:
             self.validity_index = 0
 
         if index == self.validity_index:
-            self.__read_batch(index * self.batch_size, (index + 1) * self.batch_size)
+            self.__read_batch(index * self.batch_size)
             self.validity_index = (self.batch_size // self.mini_batch) + index
 
         # slices new batch
@@ -484,7 +433,7 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
         batch_images = self.image_batch_store[store_index, ...]  # type: ignore
         batch_masks = self.mask_batch_store[store_index, ...]  # type: ignore
 
-        if self.column_vector:
+        if self.is_column:
             batch_masks = tf.reshape(
                 batch_masks,
                 (
@@ -498,24 +447,15 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
             batch_images = np.moveaxis(batch_images, -1, 1)
             batch_masks = np.moveaxis(batch_masks, -1, 1)
 
-        if self.read_weights:
-            batch_weights = self.weights[
-                index * self.batch_size : (index + 1) * self.batch_size, ...
-            ]
 
-            return batch_images, batch_masks, batch_weights
-        else:
-            return batch_images, batch_masks
+        return batch_images, batch_masks
 
     def on_epoch_end(self) -> None:
         # Shuffle image and mask filenames
         self.__shuffle_filenames()
 
     def __shuffle_filenames(self) -> None:
-        if self.shuffle:
-            state = np.random.RandomState(self.seed + self.shuffle_counter)
-            self.shuffle_counter += 1
-            shuffled_indices = state.permutation(len(self.image_filenames))
-            shuffled_indices = shuffled_indices.astype(int)
-            for array in self.linked_data:
-                array = array[shuffled_indices]
+        new_seed = self.seed + self.shuffle_counter
+        self.input_strategy.shuffle_filenames(new_seed)
+        self.output_strategy.shuffle_filenames(new_seed)
+        self.shuffle_counter += 1
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 9078c4a..7451fd5 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -10,15 +10,16 @@ class IReader(Protocol):
     def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
         ...
 
-    def get_dataset_size(self) -> int:
+    def get_dataset_size(self,minibatch:int) -> int:
         ...
 
-    def get_image_size(self) -> int:
+    def get_image_size(self) -> tuple[int,int]:
         ...
 
-    def __shuffle_filenames__(self) -> None:
+    def shuffle_filenames(self,seed:int) -> None:
         ...
 
+
 class RGBImageStrategy:
     def __init__(
         self,
@@ -51,18 +52,15 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
     def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
-    
-    def get_image_size(self) -> int:
+
+    def get_image_size(self) -> tuple[int,int]:
         return self.image_size
-    
-    def __shuffle_filenames__(self) -> None:
-        if self.shuffle:
-            state = np.random.RandomState(self.seed + self.shuffle_counter)
-            self.shuffle_counter += 1
-            shuffled_indices = state.permutation(len(self.image_filenames))
-            shuffled_indices = shuffled_indices.astype(int)
-            for array in self.linked_data:
-                array = array[shuffled_indices]
+
+    def shuffle_filenames(self,seed:int) -> None:
+        state = np.random.RandomState(seed)
+        shuffled_indices = state.permutation(len(self.image_filenames))
+        shuffled_indices = shuffled_indices.astype(int)
+        self.image_filenames = self.image_filenames[shuffled_indices]
 
 
 class HyperspectralImageStrategy:
@@ -108,15 +106,12 @@ def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
     def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
-    
-    def get_image_size(self) -> int:
-        return self.image_size
-    
-    def __shuffle_filenames__(self) -> None:
-        if self.shuffle:
-            state = np.random.RandomState(self.seed + self.shuffle_counter)
-            self.shuffle_counter += 1
-            shuffled_indices = state.permutation(len(self.image_filenames))
-            shuffled_indices = shuffled_indices.astype(int)
-            for array in self.linked_data:
-                array = array[shuffled_indices]
+
+    def get_image_size(self) -> tuple[int,int]:
+        return self.image_resize
+
+    def shuffle_filenames(self,seed:int) -> None:
+        state = np.random.RandomState(seed)
+        shuffled_indices = state.permutation(len(self.image_filenames))
+        shuffled_indices = shuffled_indices.astype(int)
+        self.image_filenames = self.image_filenames[shuffled_indices]

From 51900d8ad612619504b72dcff8c2079c2301fccf Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Fri, 25 Aug 2023 18:22:08 +0100
Subject: [PATCH 39/75] added more tests, changed variable name image_resize to
 image_size HyperspectralImageStrategy, modified marker development to
 pyproject.toml

---
 pyproject.toml                                |   1 +
 .../test_strategies.py                        | 222 +++++++++++++++++-
 .../segmentation_utils/reading_strategies.py  |  10 +-
 3 files changed, 216 insertions(+), 17 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 922d766..77f08e4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,7 @@ packages = ["utilities"]
 markers = [
     "staging: Mark a test as part of the staging environment",
     "production: Mark a test as part of the production environment",
+    "development: Mark a test as part of the development environment",
 ]
 
 [project.optional-dependencies]
diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index b2084ae..209c3bf 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 from PIL import Image
+import pytest
 from pytest import MonkeyPatch
 
 from utilities.segmentation_utils.reading_strategies import (
@@ -34,7 +35,7 @@ def __exit__(self, type, value, traceback):
         # called at the end of a 'with' block
         pass
 
-
+@pytest.mark.development
 def test_read_batch_image_path() -> None:
     # checking if the file is being opened and read correctly
     patch = MonkeyPatch()
@@ -63,7 +64,7 @@ def test_read_batch_image_path() -> None:
     patch.undo()
     patch.undo()
 
-
+@pytest.mark.development
 def test_read_batch_returns_nparray() -> None:
     # checking if the returned value is a numpy array
 
@@ -93,8 +94,8 @@ def test_read_batch_returns_nparray() -> None:
     patch.undo()
     patch.undo()
 
-
-def test_get_dataset_size() -> None:
+@pytest.mark.development
+def test_RGB_get_dataset_size() -> None:
     # checking if the calculation is done correctly
     patch = MonkeyPatch()
 
@@ -102,20 +103,13 @@ def test_get_dataset_size() -> None:
 
     patch.setattr(os, "listdir", lambda x: mock_filenames)
 
-    #! not needed as you arent reading any image in this function
-    patch.setattr(
-        Image,
-        "open",
-        lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
-    )
-
     image_strategy = RGBImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         image_resample=Image.Resampling.NEAREST,
     )
     dataset = len(mock_filenames)  # number of images in the specified path
-    mini_batch = 2  # number of images we want in each batch
+    mini_batch = 2  # number of images we want in each batch 
     expected_value = int(
         np.floor(dataset / float(mini_batch))
     )  # number of sets of images we expect
@@ -125,7 +119,33 @@ def test_get_dataset_size() -> None:
     patch.undo()
     patch.undo()
 
+@pytest.mark.development
+def test_Hyperspectral_get_dataset_size() -> None:
+    # checking if the calculation is done correctly
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a", "b", "c"]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy = HyperspectralImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
+    )
 
+    dataset = len(mock_filenames)  # number of images in the specified path
+    mini_batch = 2  # number of images we want in each batch 
+    expected_value = int(
+        np.floor(dataset / float(mini_batch))
+    )  # number of sets of images we expect
+
+    dataset_size = image_strategy.get_dataset_size(mini_batch)
+    assert dataset_size == expected_value
+    patch.undo()
+    patch.undo()
+
+@pytest.mark.development
 def test_hyperspectral_open():
     patch = MonkeyPatch()
     mock_filenames = ["a", "b", "c"]
@@ -146,3 +166,181 @@ def test_hyperspectral_open():
     read_images = strategy.read_batch(2, 0)
 
     assert read_images.shape == (2, 224, 224, 3)
+
+@pytest.mark.development
+def test_empty_batch():
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a", "b", "c"]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    patch.setattr(
+        Image,
+        "open",
+        lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
+    )
+
+    image_strategy = RGBImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
+
+    batch_size = 0
+    dataset_index = 0
+    result = image_strategy.read_batch(batch_size, dataset_index)
+
+    assert result.shape == (0, 224, 224, 3) #0 indicates there are no images in the batch
+    patch.undo()
+    patch.undo()
+
+@pytest.mark.development
+def test_out_of_bounds_index():
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a", "b", "c"]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    patch.setattr(
+        Image,
+        "open",
+        lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
+    )
+
+    image_strategy = RGBImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
+
+    batch_size = 2 #not an empty batch
+    dataset_index = len(image_strategy.image_filenames) #out of bounds index 
+
+    try:
+        result = image_strategy.read_batch(batch_size, dataset_index)
+        assert True
+    
+    except IndexError:
+        pass
+    patch.undo()
+    patch.undo()
+
+@pytest.mark.development
+def test_batch_slicing():
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a" for _ in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    patch.setattr(
+        Image,
+        "open",
+        lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
+    )
+
+    image_strategy = RGBImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
+
+    batch_size = 10
+    dataset_index = 2
+    result = image_strategy.read_batch(batch_size, dataset_index) 
+    assert result.shape[0] == batch_size #compare the size of returned data with batch_size 
+    patch.undo()
+    patch.undo()
+
+@pytest.mark.development
+def test_RGB_get_image_index():
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a" for _ in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy = RGBImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
+
+    result = image_strategy.get_image_size(
+    )
+    assert result == (224,224)
+
+@pytest.mark.development
+def test_HyperSpectral_get_image_index():
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a" for _ in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy = HyperspectralImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"])
+    )
+
+    result = image_strategy.get_image_size(
+    )
+    assert result == (224,224)
+
+
+def test_RGB_shuffle():
+    patch = MonkeyPatch()
+
+    mock_filenames = [str(i) for i in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy_1 = RGBImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
+
+    image_strategy_2 = RGBImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        image_resample=Image.Resampling.NEAREST,
+    )
+
+    n = 100
+
+    for i in range(n):
+        image_strategy_1.shuffle_filenames(i)
+        image_strategy_2.shuffle_filenames(i)
+
+    assert np.array_equal(image_strategy_1.image_filenames, image_strategy_2.image_filenames)
+
+def test_Hyperspectral_shuffle():
+    patch = MonkeyPatch()
+
+    mock_filenames = [str(i) for i in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy_1 = HyperspectralImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"])
+    )
+
+    image_strategy_2 = HyperspectralImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"])
+    )
+
+    n = 100
+
+    for i in range(n):
+        image_strategy_1.shuffle_filenames(i)
+        image_strategy_2.shuffle_filenames(i)
+
+    assert np.array_equal(image_strategy_1.image_filenames, image_strategy_2.image_filenames)
\ No newline at end of file
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 7451fd5..586af7d 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -68,13 +68,13 @@ class HyperspectralImageStrategy:
     def __init__(
         self,
         image_path: str,
-        image_resize: tuple[int, int],
+        image_size: tuple[int, int],
         image_resample=Image.Resampling.NEAREST,
         package: Any = rasterio,
     ):
         self.image_path = image_path
         self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
-        self.image_resize = image_resize
+        self.image_size = image_size
         self.image_resample = image_resample
         self.package = package
         # gets the number of bands for the dataset
@@ -90,13 +90,13 @@ def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
 
         # defines the array that will contain the images
         images = np.zeros(
-            (batch_size, self.bands, self.image_resize[0], self.image_resize[1])
+            (batch_size, self.bands, self.image_size[0], self.image_size[1])
         )
         for i, filename in enumerate(batch_filenames):
             with self.package.open(os.path.join(self.image_path, filename)) as dataset:
                 # .read() returns a numpy array that contains the raster cell values in your file.
                 image = dataset.read()
-            images[i, :, :, :] = np.resize(image, self.image_resize)
+            images[i, :, :, :] = np.resize(image, self.image_size)
 
         # ensures channel-last orientation for the reader
         images = np.moveaxis(images, 1, 3)
@@ -108,7 +108,7 @@ def get_dataset_size(self, mini_batch) -> int:
         return dataset_size
 
     def get_image_size(self) -> tuple[int,int]:
-        return self.image_resize
+        return self.image_size
 
     def shuffle_filenames(self,seed:int) -> None:
         state = np.random.RandomState(seed)

From 579891817b179bb05207d37c68830ca031660661 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 19:10:56 +0100
Subject: [PATCH 40/75] adds ability to rgbstrategy readbatch to determine if
 it is reading grayscale

---
 utilities/segmentation_utils/reading_strategies.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 586af7d..d1aa2d0 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -33,6 +33,7 @@ def __init__(
         )  #!update: added variable to initialiser
         self.image_size = image_size
         self.image_resample = image_resample
+        self.is_color = True
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         # read images with PIL
@@ -40,12 +41,15 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
             dataset_index : dataset_index + batch_size
         ]
         images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
-
+        
         for i in range(batch_size):
             image = Image.open(
                 os.path.join(self.image_path, batch_filenames[i])
             ).resize(self.image_size, self.image_resample)
             image = np.array(image)
+            if len(image.shape) == 2 and self.is_color:
+                images = np.zeros((batch_size, self.image_size[0], self.image_size[1]))
+                is_color = False
             images[i, :, :, :] = image
         return images
 

From ea85f3f54bbadc56846a523c121bc5c85cd7d944 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 19:17:11 +0100
Subject: [PATCH 41/75] fixes possible bug in read_batch

---
 utilities/segmentation_utils/reading_strategies.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index d1aa2d0..59c76f2 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -49,7 +49,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
             image = np.array(image)
             if len(image.shape) == 2 and self.is_color:
                 images = np.zeros((batch_size, self.image_size[0], self.image_size[1]))
-                is_color = False
+                self.is_color = False
             images[i, :, :, :] = image
         return images
 

From 1dc5a8c6eb896f836218ac39ab1b4248eb50307a Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 19:32:54 +0100
Subject: [PATCH 42/75] fixes bug where there are too many indices indexed

---
 utilities/segmentation_utils/reading_strategies.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 59c76f2..83100de 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -50,7 +50,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
             if len(image.shape) == 2 and self.is_color:
                 images = np.zeros((batch_size, self.image_size[0], self.image_size[1]))
                 self.is_color = False
-            images[i, :, :, :] = image
+            images[i, ...] = image
         return images
 
     def get_dataset_size(self, mini_batch) -> int:

From a2f5ac9266c8b5f243af5c558c62bfcc5541fbbb Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 19:38:44 +0100
Subject: [PATCH 43/75] add a print for testing

---
 utilities/segmentation_utils/reading_strategies.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 83100de..289b46f 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -4,6 +4,7 @@
 import numpy as np
 import rasterio
 from PIL import Image
+import tensorflow as tf
 
 
 class IReader(Protocol):
@@ -43,6 +44,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
         
         for i in range(batch_size):
+            tf.print("Reading image: ", batch_filenames[i]")
             image = Image.open(
                 os.path.join(self.image_path, batch_filenames[i])
             ).resize(self.image_size, self.image_resample)

From f366fb751f6f1ffac1c3412f2c7dac33efa08ddc Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 19:40:11 +0100
Subject: [PATCH 44/75] removes unnecessary comma

---
 utilities/segmentation_utils/reading_strategies.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 289b46f..99ddb7f 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -3,8 +3,8 @@
 
 import numpy as np
 import rasterio
-from PIL import Image
 import tensorflow as tf
+from PIL import Image
 
 
 class IReader(Protocol):
@@ -44,7 +44,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
         
         for i in range(batch_size):
-            tf.print("Reading image: ", batch_filenames[i]")
+            tf.print("Reading image: ", batch_filenames[i])
             image = Image.open(
                 os.path.join(self.image_path, batch_filenames[i])
             ).resize(self.image_size, self.image_resample)

From 4fa965d35a0630311971fa2a16a65322cf5c22cd Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 19:46:12 +0100
Subject: [PATCH 45/75] adds debugging print statements

---
 utilities/segmentation_utils/flowreader.py         |  2 ++
 utilities/segmentation_utils/reading_strategies.py | 12 ++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index b1fcb66..51b47ce 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -420,8 +420,10 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
 
         if index < self.validity_index - self.batch_size // self.mini_batch:
             self.validity_index = 0
+            tf.print("Resetting validity index")
 
         if index == self.validity_index:
+            tf.print("Reading new batch")
             self.__read_batch(index * self.batch_size)
             self.validity_index = (self.batch_size // self.mini_batch) + index
 
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 99ddb7f..173c681 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -34,24 +34,28 @@ def __init__(
         )  #!update: added variable to initialiser
         self.image_size = image_size
         self.image_resample = image_resample
-        self.is_color = True
+        
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
+        tf.print("Reading batch: ", dataset_index)
         # read images with PIL
         batch_filenames = self.image_filenames[
             dataset_index : dataset_index + batch_size
         ]
+
+        tf.print("Extracted filenames")
+
         images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
-        
+        is_color = True
         for i in range(batch_size):
             tf.print("Reading image: ", batch_filenames[i])
             image = Image.open(
                 os.path.join(self.image_path, batch_filenames[i])
             ).resize(self.image_size, self.image_resample)
             image = np.array(image)
-            if len(image.shape) == 2 and self.is_color:
+            if len(image.shape) == 2 and is_color:
                 images = np.zeros((batch_size, self.image_size[0], self.image_size[1]))
-                self.is_color = False
+                is_color = False
             images[i, ...] = image
         return images
 

From f6de5cff5f82c5606f233d124a84d6a989671f87 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 19:51:15 +0100
Subject: [PATCH 46/75] adds more debug info

---
 utilities/segmentation_utils/flowreader.py         | 2 +-
 utilities/segmentation_utils/reading_strategies.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 51b47ce..73876b3 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -423,7 +423,7 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
             tf.print("Resetting validity index")
 
         if index == self.validity_index:
-            tf.print("Reading new batch")
+            tf.print("Reading new batch at index: ", index)
             self.__read_batch(index * self.batch_size)
             self.validity_index = (self.batch_size // self.mini_batch) + index
 
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 173c681..1187b29 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -43,7 +43,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
             dataset_index : dataset_index + batch_size
         ]
 
-        tf.print("Extracted filenames")
+        tf.print("Extracted filenames: ", batch_filenames.shape)
 
         images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
         is_color = True

From 01e9e81f04cf4c8b4d86bfbd59bfc1faece09db1 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 20:00:14 +0100
Subject: [PATCH 47/75] fixes possible problem with dataset indexing

---
 utilities/segmentation_utils/flowreader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 73876b3..0d0d544 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -424,7 +424,7 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
 
         if index == self.validity_index:
             tf.print("Reading new batch at index: ", index)
-            self.__read_batch(index * self.batch_size)
+            self.__read_batch(index * self.mini_batch)
             self.validity_index = (self.batch_size // self.mini_batch) + index
 
         # slices new batch

From 1f010bac615ffc4831d42272d76b3e9e7085a4ad Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 20:05:23 +0100
Subject: [PATCH 48/75] removes print statements for debugging

---
 utilities/segmentation_utils/flowreader.py         | 4 ++--
 utilities/segmentation_utils/reading_strategies.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 0d0d544..5ac8a3a 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -420,10 +420,10 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
 
         if index < self.validity_index - self.batch_size // self.mini_batch:
             self.validity_index = 0
-            tf.print("Resetting validity index")
+    
 
         if index == self.validity_index:
-            tf.print("Reading new batch at index: ", index)
+ 
             self.__read_batch(index * self.mini_batch)
             self.validity_index = (self.batch_size // self.mini_batch) + index
 
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 1187b29..ca12fdf 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -37,18 +37,18 @@ def __init__(
         
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
-        tf.print("Reading batch: ", dataset_index)
+
         # read images with PIL
         batch_filenames = self.image_filenames[
             dataset_index : dataset_index + batch_size
         ]
 
-        tf.print("Extracted filenames: ", batch_filenames.shape)
+
 
         images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
         is_color = True
         for i in range(batch_size):
-            tf.print("Reading image: ", batch_filenames[i])
+
             image = Image.open(
                 os.path.join(self.image_path, batch_filenames[i])
             ).resize(self.image_size, self.image_resample)

From cab20a8755d4b1e9bf333a6ce771b4ece351ecee Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Fri, 25 Aug 2023 20:13:30 +0100
Subject: [PATCH 49/75] reads debug logs

---
 utilities/segmentation_utils/flowreader.py         | 10 +++++-----
 utilities/segmentation_utils/reading_strategies.py |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 5ac8a3a..3e47c5d 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -420,18 +420,18 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
 
         if index < self.validity_index - self.batch_size // self.mini_batch:
             self.validity_index = 0
-    
+            tf.print("Resetting validity index")
 
         if index == self.validity_index:
- 
-            self.__read_batch(index * self.mini_batch)
+            tf.print("Reading new batch at index: ", index)
+            self.__read_batch(index * self.batch_size)
             self.validity_index = (self.batch_size // self.mini_batch) + index
-
+        tf.print("batch read at index: ", index)
         # slices new batch
         store_index = (self.batch_size // self.mini_batch) - (
             self.validity_index - index
         )
-
+        tf.print("store index: ", store_index)
         batch_images = self.image_batch_store[store_index, ...]  # type: ignore
         batch_masks = self.mask_batch_store[store_index, ...]  # type: ignore
 
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index ca12fdf..1187b29 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -37,18 +37,18 @@ def __init__(
         
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
-
+        tf.print("Reading batch: ", dataset_index)
         # read images with PIL
         batch_filenames = self.image_filenames[
             dataset_index : dataset_index + batch_size
         ]
 
-
+        tf.print("Extracted filenames: ", batch_filenames.shape)
 
         images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
         is_color = True
         for i in range(batch_size):
-
+            tf.print("Reading image: ", batch_filenames[i])
             image = Image.open(
                 os.path.join(self.image_path, batch_filenames[i])
             ).resize(self.image_size, self.image_resample)

From b74b2ccaa0f230262506f9b42e9db3bd282e79a3 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 09:26:29 +0100
Subject: [PATCH 50/75] removes print statements

---
 utilities/segmentation_utils/flowreader.py         | 8 ++++----
 utilities/segmentation_utils/reading_strategies.py | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 3e47c5d..3da5c77 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -420,18 +420,18 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
 
         if index < self.validity_index - self.batch_size // self.mini_batch:
             self.validity_index = 0
-            tf.print("Resetting validity index")
+         
 
         if index == self.validity_index:
-            tf.print("Reading new batch at index: ", index)
+       
             self.__read_batch(index * self.batch_size)
             self.validity_index = (self.batch_size // self.mini_batch) + index
-        tf.print("batch read at index: ", index)
+       
         # slices new batch
         store_index = (self.batch_size // self.mini_batch) - (
             self.validity_index - index
         )
-        tf.print("store index: ", store_index)
+      
         batch_images = self.image_batch_store[store_index, ...]  # type: ignore
         batch_masks = self.mask_batch_store[store_index, ...]  # type: ignore
 
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 1187b29..2fb8f55 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -37,18 +37,18 @@ def __init__(
         
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
-        tf.print("Reading batch: ", dataset_index)
+
         # read images with PIL
         batch_filenames = self.image_filenames[
             dataset_index : dataset_index + batch_size
         ]
 
-        tf.print("Extracted filenames: ", batch_filenames.shape)
+      
 
         images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
         is_color = True
         for i in range(batch_size):
-            tf.print("Reading image: ", batch_filenames[i])
+        
             image = Image.open(
                 os.path.join(self.image_path, batch_filenames[i])
             ).resize(self.image_size, self.image_resample)

From a3c8070c16f7f57d21edbcc5c94562b0e847b9e1 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 09:58:35 +0100
Subject: [PATCH 51/75] push fix to indexing error

---
 utilities/segmentation_utils/flowreader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 3da5c77..20e9ca8 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -424,7 +424,7 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
 
         if index == self.validity_index:
        
-            self.__read_batch(index * self.batch_size)
+            self.__read_batch(index * self.mini_batch)
             self.validity_index = (self.batch_size // self.mini_batch) + index
        
         # slices new batch

From a88497eb3145e8fcc813c92973353c01340f386a Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 10:05:06 +0100
Subject: [PATCH 52/75] fixing linting issues, removing unused imports

---
 utilities/segmentation_utils/flowreader.py    | 17 ++++------------
 .../segmentation_utils/reading_strategies.py  | 20 +++++++------------
 2 files changed, 11 insertions(+), 26 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 20e9ca8..6d20be5 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -7,12 +7,9 @@
 from typing import Optional
 
 import numpy as np
-import pandas as pd
 import tensorflow as tf
 from keras.preprocessing.image import ImageDataGenerator
 from keras.utils import Sequence
-from PIL import Image
-from tqdm import tqdm
 
 from utilities.segmentation_utils import ImagePreprocessor
 from utilities.segmentation_utils.constants import ImageOrdering
@@ -273,9 +270,8 @@ def __init__(
         preprocessing_queue_image: IPreprocessor = ImagePreprocessor.generate_image_queue(),
         preprocessing_queue_mask: IPreprocessor = ImagePreprocessor.generate_mask_queue(),
         image_ordering: ImageOrdering = ImageOrdering.CHANNEL_LAST,
-        is_column:bool = False,
+        is_column: bool = False,
     ):
-        
         self.input_strategy = input_strategy
         self.output_strategy = output_strategy
         self.batch_size = batch_size
@@ -292,12 +288,10 @@ def __init__(
 
         self.preprocessing_queue_image = preprocessing_queue_image
         self.preprocessing_queue_mask = preprocessing_queue_mask
-        
+
         self.image_ordering = image_ordering
         self.is_column = is_column
 
-        
-
         self.image_batch_store = None
         self.mask_batch_store = None
         self.validity_index = 0
@@ -420,18 +414,16 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
 
         if index < self.validity_index - self.batch_size // self.mini_batch:
             self.validity_index = 0
-         
 
         if index == self.validity_index:
-       
             self.__read_batch(index * self.mini_batch)
             self.validity_index = (self.batch_size // self.mini_batch) + index
-       
+
         # slices new batch
         store_index = (self.batch_size // self.mini_batch) - (
             self.validity_index - index
         )
-      
+
         batch_images = self.image_batch_store[store_index, ...]  # type: ignore
         batch_masks = self.mask_batch_store[store_index, ...]  # type: ignore
 
@@ -449,7 +441,6 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
             batch_images = np.moveaxis(batch_images, -1, 1)
             batch_masks = np.moveaxis(batch_masks, -1, 1)
 
-
         return batch_images, batch_masks
 
     def on_epoch_end(self) -> None:
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 2fb8f55..30cd6e7 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 import rasterio
-import tensorflow as tf
 from PIL import Image
 
 
@@ -11,13 +10,13 @@ class IReader(Protocol):
     def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
         ...
 
-    def get_dataset_size(self,minibatch:int) -> int:
+    def get_dataset_size(self, minibatch: int) -> int:
         ...
 
-    def get_image_size(self) -> tuple[int,int]:
+    def get_image_size(self) -> tuple[int, int]:
         ...
 
-    def shuffle_filenames(self,seed:int) -> None:
+    def shuffle_filenames(self, seed: int) -> None:
         ...
 
 
@@ -34,21 +33,16 @@ def __init__(
         )  #!update: added variable to initialiser
         self.image_size = image_size
         self.image_resample = image_resample
-        
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
-
         # read images with PIL
         batch_filenames = self.image_filenames[
             dataset_index : dataset_index + batch_size
         ]
 
-      
-
         images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
         is_color = True
         for i in range(batch_size):
-        
             image = Image.open(
                 os.path.join(self.image_path, batch_filenames[i])
             ).resize(self.image_size, self.image_resample)
@@ -63,10 +57,10 @@ def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
 
-    def get_image_size(self) -> tuple[int,int]:
+    def get_image_size(self) -> tuple[int, int]:
         return self.image_size
 
-    def shuffle_filenames(self,seed:int) -> None:
+    def shuffle_filenames(self, seed: int) -> None:
         state = np.random.RandomState(seed)
         shuffled_indices = state.permutation(len(self.image_filenames))
         shuffled_indices = shuffled_indices.astype(int)
@@ -117,10 +111,10 @@ def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
 
-    def get_image_size(self) -> tuple[int,int]:
+    def get_image_size(self) -> tuple[int, int]:
         return self.image_size
 
-    def shuffle_filenames(self,seed:int) -> None:
+    def shuffle_filenames(self, seed: int) -> None:
         state = np.random.RandomState(seed)
         shuffled_indices = state.permutation(len(self.image_filenames))
         shuffled_indices = shuffled_indices.astype(int)

From 841ee0ff41db57de4f0f4c25ae6657f347b768b7 Mon Sep 17 00:00:00 2001
From: Ayleen Sohaib <2684413S@student.gla.ac.uk>
Date: Sat, 26 Aug 2023 14:44:02 +0100
Subject: [PATCH 53/75] added partial_dataset and adjusted_batch_size to
 read_batch - to allow loading partial batches

---
 utilities/segmentation_utils/flowreader.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 6d20be5..8076a42 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -343,13 +343,19 @@ def set_mini_batch_size(self, batch_size: int) -> None:
         self.mini_batch = batch_size
 
     def __read_batch(self, dataset_index: int) -> None:
-        # read image batch
-
-        # calculate number of mini batches in a batch
-        n = self.batch_size // self.mini_batch
-
-        batch_images = self.input_strategy.read_batch(self.batch_size, dataset_index)
-        batch_masks = self.output_strategy.read_batch(self.batch_size, dataset_index)
+        
+        #!adjust the batch size as it is passed to the function
+        #calculates remaining images in a dataset and scales it down by multiplying with minibatch
+        partial_dataset = self.dataset_size * self.mini_batch - dataset_index 
+
+        #compare and choose the smaller value, to avoid making a larger batch_size
+        adjusted_batch_size = min(self.batch_size, partial_dataset)
+            
+        #calculate number of mini batches in a batch
+        n = adjusted_batch_size // self.mini_batch
+
+        batch_images = self.input_strategy.read_batch(adjusted_batch_size, dataset_index)
+        batch_masks = self.output_strategy.read_batch(adjusted_batch_size, dataset_index)
 
         # preprocess and assign images and masks to the batch
 

From 8ae3d0eab5a771fa832604dcb330983e160982a3 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 14:49:26 +0100
Subject: [PATCH 54/75] adds dataset size to flowgenerator_exp, adds multi
 threaded version of the rasterio strategy

---
 .../test_strategies.py                        | 172 +++++++++++++++---
 utilities/segmentation_utils/flowreader.py    |   2 +
 .../segmentation_utils/reading_strategies.py  |  75 ++++++++
 3 files changed, 225 insertions(+), 24 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index 209c3bf..aad5a4b 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -1,12 +1,13 @@
 import os
 
 import numpy as np
-from PIL import Image
 import pytest
+from PIL import Image
 from pytest import MonkeyPatch
 
 from utilities.segmentation_utils.reading_strategies import (
-    HyperspectralImageStrategy, RGBImageStrategy)
+    HyperspectralImageStrategy, HyperspectralImageStrategyMultiThread,
+    RGBImageStrategy)
 
 
 class MockRasterio:
@@ -15,6 +16,7 @@ def __init__(self, n, size, bands, dtypes):
         self.size = size
         self.bands = bands
         self.dtypes = dtypes
+        self.call_count = 0
 
     def open(self, *args, **kwargs):
         return self
@@ -24,7 +26,10 @@ def count(self) -> int:
         return self.bands
 
     def read(self, *args, **kwargs):
-        return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0])
+        self.call_count += 1
+        return np.full(
+            (self.bands, self.size[0], self.size[1]), self.call_count, self.dtypes[0]
+        )
 
     # these functions are invoked when a 'with' statement is executed
     def __enter__(self):
@@ -35,6 +40,10 @@ def __exit__(self, type, value, traceback):
         # called at the end of a 'with' block
         pass
 
+    def get_count(self):
+        return self.call_count
+
+
 @pytest.mark.development
 def test_read_batch_image_path() -> None:
     # checking if the file is being opened and read correctly
@@ -64,6 +73,7 @@ def test_read_batch_image_path() -> None:
     patch.undo()
     patch.undo()
 
+
 @pytest.mark.development
 def test_read_batch_returns_nparray() -> None:
     # checking if the returned value is a numpy array
@@ -94,6 +104,7 @@ def test_read_batch_returns_nparray() -> None:
     patch.undo()
     patch.undo()
 
+
 @pytest.mark.development
 def test_RGB_get_dataset_size() -> None:
     # checking if the calculation is done correctly
@@ -109,7 +120,7 @@ def test_RGB_get_dataset_size() -> None:
         image_resample=Image.Resampling.NEAREST,
     )
     dataset = len(mock_filenames)  # number of images in the specified path
-    mini_batch = 2  # number of images we want in each batch 
+    mini_batch = 2  # number of images we want in each batch
     expected_value = int(
         np.floor(dataset / float(mini_batch))
     )  # number of sets of images we expect
@@ -119,6 +130,7 @@ def test_RGB_get_dataset_size() -> None:
     patch.undo()
     patch.undo()
 
+
 @pytest.mark.development
 def test_Hyperspectral_get_dataset_size() -> None:
     # checking if the calculation is done correctly
@@ -135,7 +147,7 @@ def test_Hyperspectral_get_dataset_size() -> None:
     )
 
     dataset = len(mock_filenames)  # number of images in the specified path
-    mini_batch = 2  # number of images we want in each batch 
+    mini_batch = 2  # number of images we want in each batch
     expected_value = int(
         np.floor(dataset / float(mini_batch))
     )  # number of sets of images we expect
@@ -145,6 +157,7 @@ def test_Hyperspectral_get_dataset_size() -> None:
     patch.undo()
     patch.undo()
 
+
 @pytest.mark.development
 def test_hyperspectral_open():
     patch = MonkeyPatch()
@@ -167,6 +180,30 @@ def test_hyperspectral_open():
 
     assert read_images.shape == (2, 224, 224, 3)
 
+
+@pytest.mark.development
+def test_hyperspectral_mt_open():
+    patch = MonkeyPatch()
+    mock_filenames = ["a", "b", "c"]
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_path = "tests/segmentation_utils_tests/test_strategies"
+
+    mock_data = {
+        "n": 3,
+        "size": (224, 224),
+        "bands": 3,
+        "dtypes": ["uint8"],
+    }
+    strategy = HyperspectralImageStrategyMultiThread(
+        image_path, (224, 224), package=MockRasterio(**mock_data)
+    )
+
+    read_images = strategy.read_batch(2, 0)
+
+    assert read_images.shape == (2, 224, 224, 3)
+
+
 @pytest.mark.development
 def test_empty_batch():
     patch = MonkeyPatch()
@@ -191,10 +228,16 @@ def test_empty_batch():
     dataset_index = 0
     result = image_strategy.read_batch(batch_size, dataset_index)
 
-    assert result.shape == (0, 224, 224, 3) #0 indicates there are no images in the batch
+    assert result.shape == (
+        0,
+        224,
+        224,
+        3,
+    )  # 0 indicates there are no images in the batch
     patch.undo()
     patch.undo()
 
+
 @pytest.mark.development
 def test_out_of_bounds_index():
     patch = MonkeyPatch()
@@ -215,18 +258,19 @@ def test_out_of_bounds_index():
         image_resample=Image.Resampling.NEAREST,
     )
 
-    batch_size = 2 #not an empty batch
-    dataset_index = len(image_strategy.image_filenames) #out of bounds index 
+    batch_size = 2  # not an empty batch
+    dataset_index = len(image_strategy.image_filenames)  # out of bounds index
 
     try:
-        result = image_strategy.read_batch(batch_size, dataset_index)
+        image_strategy.read_batch(batch_size, dataset_index)
         assert True
-    
+
     except IndexError:
         pass
     patch.undo()
     patch.undo()
 
+
 @pytest.mark.development
 def test_batch_slicing():
     patch = MonkeyPatch()
@@ -249,13 +293,16 @@ def test_batch_slicing():
 
     batch_size = 10
     dataset_index = 2
-    result = image_strategy.read_batch(batch_size, dataset_index) 
-    assert result.shape[0] == batch_size #compare the size of returned data with batch_size 
+    result = image_strategy.read_batch(batch_size, dataset_index)
+    assert (
+        result.shape[0] == batch_size
+    )  # compare the size of returned data with batch_size
     patch.undo()
     patch.undo()
 
+
 @pytest.mark.development
-def test_RGB_get_image_index():
+def test_RGB_get_image_size():
     patch = MonkeyPatch()
 
     mock_filenames = ["a" for _ in range(20)]
@@ -268,12 +315,12 @@ def test_RGB_get_image_index():
         image_resample=Image.Resampling.NEAREST,
     )
 
-    result = image_strategy.get_image_size(
-    )
-    assert result == (224,224)
+    result = image_strategy.get_image_size()
+    assert result == (224, 224)
+
 
 @pytest.mark.development
-def test_HyperSpectral_get_image_index():
+def test_HyperSpectral_get_image_size():
     patch = MonkeyPatch()
 
     mock_filenames = ["a" for _ in range(20)]
@@ -283,14 +330,31 @@ def test_HyperSpectral_get_image_index():
     image_strategy = HyperspectralImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
-        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"])
+        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
     )
 
-    result = image_strategy.get_image_size(
+    result = image_strategy.get_image_size()
+    assert result == (224, 224)
+
+
+@pytest.mark.development
+def test_HyperSpectral_MT_get_image_size():
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a" for _ in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy = HyperspectralImageStrategyMultiThread(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
     )
-    assert result == (224,224)
 
+    result = image_strategy.get_image_size()
+    assert result == (224, 224)
 
+@pytest.mark.development
 def test_RGB_shuffle():
     patch = MonkeyPatch()
 
@@ -316,8 +380,11 @@ def test_RGB_shuffle():
         image_strategy_1.shuffle_filenames(i)
         image_strategy_2.shuffle_filenames(i)
 
-    assert np.array_equal(image_strategy_1.image_filenames, image_strategy_2.image_filenames)
+    assert np.array_equal(
+        image_strategy_1.image_filenames, image_strategy_2.image_filenames
+    )
 
+@pytest.mark.development
 def test_Hyperspectral_shuffle():
     patch = MonkeyPatch()
 
@@ -328,13 +395,43 @@ def test_Hyperspectral_shuffle():
     image_strategy_1 = HyperspectralImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
-        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"])
+        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
+    )
+
+    image_strategy_2 = HyperspectralImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
+    )
+
+    n = 100
+
+    for i in range(n):
+        image_strategy_1.shuffle_filenames(i)
+        image_strategy_2.shuffle_filenames(i)
+
+    assert np.array_equal(
+        image_strategy_1.image_filenames, image_strategy_2.image_filenames
+    )
+
+@pytest.mark.development
+def test_Hyperspectral_mt_shuffle():
+    patch = MonkeyPatch()
+
+    mock_filenames = [str(i) for i in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy_1 = HyperspectralImageStrategyMultiThread(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
     )
 
     image_strategy_2 = HyperspectralImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
-        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"])
+        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
     )
 
     n = 100
@@ -343,4 +440,31 @@ def test_Hyperspectral_shuffle():
         image_strategy_1.shuffle_filenames(i)
         image_strategy_2.shuffle_filenames(i)
 
-    assert np.array_equal(image_strategy_1.image_filenames, image_strategy_2.image_filenames)
\ No newline at end of file
+    assert np.array_equal(
+        image_strategy_1.image_filenames, image_strategy_2.image_filenames
+    )
+
+@pytest.mark.development
+def test_Hyperspectral_mt_image_in_order():
+    patch = MonkeyPatch()
+
+    mock_filenames = [str(i) for i in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+    mock_package = MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"])
+    image_strategy = HyperspectralImageStrategyMultiThread(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=mock_package,
+    )
+
+    batch_size = 10
+
+    call_count = mock_package.get_count()
+
+    result = image_strategy.read_batch(batch_size, 0)
+
+    for i in range(call_count, call_count + batch_size):
+        assert np.array_equal(
+            result[i - call_count, :, :, :], np.full((224, 224, 3), i + 1)
+        )
diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 8076a42..1b96d65 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -297,6 +297,8 @@ def __init__(
         self.validity_index = 0
         self.shuffle_counter = 0
 
+        self.dataset_size = self.__len__()
+
         self.__shuffle_filenames()
 
         if len(self.output_size) != 2:
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 30cd6e7..7c743a2 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -1,4 +1,6 @@
 import os
+from concurrent import futures
+from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Protocol
 
 import numpy as np
@@ -119,3 +121,76 @@ def shuffle_filenames(self, seed: int) -> None:
         shuffled_indices = state.permutation(len(self.image_filenames))
         shuffled_indices = shuffled_indices.astype(int)
         self.image_filenames = self.image_filenames[shuffled_indices]
+
+
+class HyperspectralImageStrategyMultiThread:
+    # read images with rasterio
+    def __init__(
+        self,
+        image_path: str,
+        image_size: tuple[int, int],
+        image_resample=Image.Resampling.NEAREST,
+        max_workers: int = 8,
+        package: Any = rasterio,
+    ):
+        self.image_path = image_path
+        self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
+        self.image_size = image_size
+        self.image_resample = image_resample
+        self.package = package
+        self.max_workers = max_workers
+        # gets the number of bands for the dataset
+        self.bands = package.open(
+            os.path.join(self.image_path, self.image_filenames[0])
+        ).count
+
+    def __read_single_image(
+        self, filename: str, image_path: str, package: Any, image_size: tuple[int, int]
+    ):
+        with package.open(os.path.join(image_path, filename)) as dataset:
+            image = dataset.read()
+        resized_image = np.resize(image, image_size)
+        return resized_image
+
+    def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
+        batch_filenames = self.image_filenames[
+            dataset_index : dataset_index + batch_size
+        ]
+
+        # defines the array that will contain the images
+        images = np.zeros(
+            (batch_size, self.bands, self.image_size[0], self.image_size[1])
+        )
+
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            future_to_index = {
+                executor.submit(
+                    self.__read_single_image,
+                    filename,
+                    self.image_path,
+                    self.package,
+                    self.image_size,
+                ): i
+                for i, filename in enumerate(batch_filenames)
+            }
+            for future in futures.as_completed(future_to_index):
+                i = future_to_index[future]
+                images[i, :, :, :] = future.result()
+
+        # ensures channel-last orientation for the reader
+        images = np.moveaxis(images, 1, 3)
+
+        return np.array(images)
+
+    def get_dataset_size(self, mini_batch) -> int:
+        dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
+        return dataset_size
+
+    def get_image_size(self) -> tuple[int, int]:
+        return self.image_size
+
+    def shuffle_filenames(self, seed: int) -> None:
+        state = np.random.RandomState(seed)
+        shuffled_indices = state.permutation(len(self.image_filenames))
+        shuffled_indices = shuffled_indices.astype(int)
+        self.image_filenames = self.image_filenames[shuffled_indices]

From 97d291324247e636ac1ca4e5833b3c9961b57e63 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 15:03:32 +0100
Subject: [PATCH 55/75] update preprocessing to use adjusted_batch

---
 utilities/segmentation_utils/flowreader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 1b96d65..c6a5db1 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -362,7 +362,7 @@ def __read_batch(self, dataset_index: int) -> None:
         # preprocess and assign images and masks to the batch
 
         if self.preprocessing_enabled:
-            for i in range(self.batch_size):
+            for i in range(adjusted_batch_size):
                 image = batch_images[i, ...]
                 mask = batch_masks[i, ...]
                 if self.preprocessing_seed is None:
@@ -418,7 +418,7 @@ def __len__(self) -> int:
 
     def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
         # check if the batch is already cached
-        index = index % self.__len__()
+        
 
         if index < self.validity_index - self.batch_size // self.mini_batch:
             self.validity_index = 0

From d2c8ecd7bf036b0af7eb122236fdfe7598bfd183 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 15:08:06 +0100
Subject: [PATCH 56/75] test if works with no adjustment

---
 utilities/segmentation_utils/flowreader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index c6a5db1..bdbb46a 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -351,7 +351,7 @@ def __read_batch(self, dataset_index: int) -> None:
         partial_dataset = self.dataset_size * self.mini_batch - dataset_index 
 
         #compare and choose the smaller value, to avoid making a larger batch_size
-        adjusted_batch_size = min(self.batch_size, partial_dataset)
+        adjusted_batch_size = self.batch_size
             
         #calculate number of mini batches in a batch
         n = adjusted_batch_size // self.mini_batch

From e20f19c41c6c7b525b9ea22f58efb57839c8ce49 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 15:26:55 +0100
Subject: [PATCH 57/75] adds printstatements for debugging

---
 utilities/segmentation_utils/flowreader.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index bdbb46a..d1fb6d7 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -350,9 +350,12 @@ def __read_batch(self, dataset_index: int) -> None:
         #calculates remaining images in a dataset and scales it down by multiplying with minibatch
         partial_dataset = self.dataset_size * self.mini_batch - dataset_index 
 
+        tf.print("partial_dataset: ", partial_dataset)
         #compare and choose the smaller value, to avoid making a larger batch_size
-        adjusted_batch_size = self.batch_size
-            
+        adjusted_batch_size = min(self.batch_size, partial_dataset)
+        
+        tf.print("adjusted_batch_size: ", adjusted_batch_size)
+
         #calculate number of mini batches in a batch
         n = adjusted_batch_size // self.mini_batch
 

From e06f47f337c67b1ccc8d8371ca4352c90d1bdce8 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 15:31:33 +0100
Subject: [PATCH 58/75] adds more debugging statements

---
 utilities/segmentation_utils/flowreader.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index d1fb6d7..ba38d2c 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -345,7 +345,8 @@ def set_mini_batch_size(self, batch_size: int) -> None:
         self.mini_batch = batch_size
 
     def __read_batch(self, dataset_index: int) -> None:
-        
+        tf.print("dataset_index: ", dataset_index)
+        tf.print("dataset_size: ", self.dataset_size)
         #!adjust the batch size as it is passed to the function
         #calculates remaining images in a dataset and scales it down by multiplying with minibatch
         partial_dataset = self.dataset_size * self.mini_batch - dataset_index 

From f2a9cef2c559f6b7b72eb314bca4f264d1538299 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 15:44:23 +0100
Subject: [PATCH 59/75] adds even more debugging print

---
 utilities/segmentation_utils/flowreader.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index ba38d2c..294ffa1 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -298,6 +298,7 @@ def __init__(
         self.shuffle_counter = 0
 
         self.dataset_size = self.__len__()
+        tf.print("dataset_size: ", self.dataset_size)
 
         self.__shuffle_filenames()
 

From 095cfe95181c2397f0cc52c0d44206006eadb90d Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 15:46:34 +0100
Subject: [PATCH 60/75] possibly fixes error with incorrect dataset size
 calculation in initializer

---
 utilities/segmentation_utils/flowreader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 294ffa1..02d6a20 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -297,7 +297,7 @@ def __init__(
         self.validity_index = 0
         self.shuffle_counter = 0
 
-        self.dataset_size = self.__len__()
+        self.dataset_size = self.input_strategy.get_dataset_size(self.mini_batch)
         tf.print("dataset_size: ", self.dataset_size)
 
         self.__shuffle_filenames()

From 7ef8fc95956539e62df1794950fa543da5fd9e42 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 15:50:26 +0100
Subject: [PATCH 61/75] more print

---
 utilities/segmentation_utils/flowreader.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 02d6a20..a121a9d 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -419,6 +419,7 @@ def __read_batch(self, dataset_index: int) -> None:
         # required to check when to read the next batch
 
     def __len__(self) -> int:
+        tf.print("len called")
         return self.input_strategy.get_dataset_size(self.mini_batch)
 
     def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:

From 603bc036d42f599d0ca5a4a0bac474f11c7ec0d5 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 15:55:56 +0100
Subject: [PATCH 62/75] adds possible fix for incorrect dataset size
 calculation

---
 utilities/segmentation_utils/flowreader.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index a121a9d..6e40a47 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -297,8 +297,7 @@ def __init__(
         self.validity_index = 0
         self.shuffle_counter = 0
 
-        self.dataset_size = self.input_strategy.get_dataset_size(self.mini_batch)
-        tf.print("dataset_size: ", self.dataset_size)
+        self.__update_dataset_size()
 
         self.__shuffle_filenames()
 
@@ -344,6 +343,11 @@ def set_mini_batch_size(self, batch_size: int) -> None:
         if self.batch_size % batch_size != 0:
             raise ValueError("The batch size must be divisible by the mini batch size")
         self.mini_batch = batch_size
+        self.__update_dataset_size()
+    
+    def __update_dataset_size(self) -> None:
+        self.dataset_size = self.input_strategy.get_dataset_size(self.mini_batch)
+
 
     def __read_batch(self, dataset_index: int) -> None:
         tf.print("dataset_index: ", dataset_index)

From f23bc4d8187feb0abdd2253766c589000b2d060d Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 16:17:23 +0100
Subject: [PATCH 63/75] pil image loader strategy multi threaded version

---
 .../segmentation_utils/reading_strategies.py  | 69 +++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 7c743a2..b24ceaa 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -69,6 +69,75 @@ def shuffle_filenames(self, seed: int) -> None:
         self.image_filenames = self.image_filenames[shuffled_indices]
 
 
+class RGBImageStrategyMultiThread:
+    def __init__(
+        self,
+        image_path: str,
+        image_size: tuple[int, int],
+        image_resample=Image.Resampling.NEAREST,
+        max_workers: int = 8,
+    ):
+        self.image_path = image_path
+        self.image_filenames = np.array(
+            sorted(os.listdir(self.image_path))
+        )  #!update: added variable to initialiser
+        self.image_size = image_size
+        self.image_resample = image_resample
+        self.max_workers = max_workers
+
+    def __read_single_image_pil(self, filename, image_path, image_size, image_resample):
+        image = Image.open(os.path.join(image_path, filename)).resize(
+            image_size, image_resample
+        )
+        return np.array(image)
+
+    def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
+        batch_filenames = self.image_filenames[
+            dataset_index : dataset_index + batch_size
+        ]
+
+        images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3))
+        is_color = True
+
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            future_to_index = {
+                executor.submit(
+                    self.__read_single_image_pil,
+                    filename,
+                    self.image_path,
+                    self.image_size,
+                    self.image_resample,
+                ): i
+                for i, filename in enumerate(batch_filenames)
+            }
+            for future in futures.as_completed(future_to_index):
+                i = future_to_index[future]
+                image = future.result()
+
+                if len(image.shape) == 2 and is_color:
+                    images = np.zeros(
+                        (batch_size, self.image_size[0], self.image_size[1])
+                    )
+                    is_color = False
+
+                images[i, ...] = image
+
+        return images
+
+    def get_dataset_size(self, mini_batch) -> int:
+        dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
+        return dataset_size
+
+    def get_image_size(self) -> tuple[int, int]:
+        return self.image_size
+
+    def shuffle_filenames(self, seed: int) -> None:
+        state = np.random.RandomState(seed)
+        shuffled_indices = state.permutation(len(self.image_filenames))
+        shuffled_indices = shuffled_indices.astype(int)
+        self.image_filenames = self.image_filenames[shuffled_indices]
+
+
 class HyperspectralImageStrategy:
     # read images with rasterio
     def __init__(

From a777cc0224fe438941a3882df5d465b541b98c71 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sat, 26 Aug 2023 16:42:14 +0100
Subject: [PATCH 64/75] removes debugging print statements from flowgenerator,
 optimizes multithreaded workloads in strategies

---
 utilities/segmentation_utils/flowreader.py    | 26 +++++------
 .../segmentation_utils/reading_strategies.py  | 44 +++++++++----------
 2 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py
index 6e40a47..1282a6b 100644
--- a/utilities/segmentation_utils/flowreader.py
+++ b/utilities/segmentation_utils/flowreader.py
@@ -344,29 +344,27 @@ def set_mini_batch_size(self, batch_size: int) -> None:
             raise ValueError("The batch size must be divisible by the mini batch size")
         self.mini_batch = batch_size
         self.__update_dataset_size()
-    
+
     def __update_dataset_size(self) -> None:
         self.dataset_size = self.input_strategy.get_dataset_size(self.mini_batch)
 
-
     def __read_batch(self, dataset_index: int) -> None:
-        tf.print("dataset_index: ", dataset_index)
-        tf.print("dataset_size: ", self.dataset_size)
         #!adjust the batch size as it is passed to the function
-        #calculates remaining images in a dataset and scales it down by multiplying with minibatch
-        partial_dataset = self.dataset_size * self.mini_batch - dataset_index 
+        # calculates remaining images in a dataset and scales it down by multiplying with minibatch
+        partial_dataset = self.dataset_size * self.mini_batch - dataset_index
 
-        tf.print("partial_dataset: ", partial_dataset)
-        #compare and choose the smaller value, to avoid making a larger batch_size
+        # compare and choose the smaller value, to avoid making a larger batch_size
         adjusted_batch_size = min(self.batch_size, partial_dataset)
-        
-        tf.print("adjusted_batch_size: ", adjusted_batch_size)
 
-        #calculate number of mini batches in a batch
+        # calculate number of mini batches in a batch
         n = adjusted_batch_size // self.mini_batch
 
-        batch_images = self.input_strategy.read_batch(adjusted_batch_size, dataset_index)
-        batch_masks = self.output_strategy.read_batch(adjusted_batch_size, dataset_index)
+        batch_images = self.input_strategy.read_batch(
+            adjusted_batch_size, dataset_index
+        )
+        batch_masks = self.output_strategy.read_batch(
+            adjusted_batch_size, dataset_index
+        )
 
         # preprocess and assign images and masks to the batch
 
@@ -423,12 +421,10 @@ def __read_batch(self, dataset_index: int) -> None:
         # required to check when to read the next batch
 
     def __len__(self) -> int:
-        tf.print("len called")
         return self.input_strategy.get_dataset_size(self.mini_batch)
 
     def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]:
         # check if the batch is already cached
-        
 
         if index < self.validity_index - self.batch_size // self.mini_batch:
             self.validity_index = 0
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index b24ceaa..c570ef0 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -171,7 +171,7 @@ def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
             with self.package.open(os.path.join(self.image_path, filename)) as dataset:
                 # .read() returns a numpy array that contains the raster cell values in your file.
                 image = dataset.read()
-            images[i, :, :, :] = np.resize(image, self.image_size)
+            images[i, :, :, :] = np.resize(image, (self.bands, *self.image_size))
 
         # ensures channel-last orientation for the reader
         images = np.moveaxis(images, 1, 3)
@@ -214,42 +214,42 @@ def __init__(
         ).count
 
     def __read_single_image(
-        self, filename: str, image_path: str, package: Any, image_size: tuple[int, int]
-    ):
-        with package.open(os.path.join(image_path, filename)) as dataset:
+        self, filename: str, package: Any, image_size: tuple[int, int, int]
+    ) -> np.ndarray:
+        with package.open(filename) as dataset:
             image = dataset.read()
         resized_image = np.resize(image, image_size)
         return resized_image
 
     def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray:
-        batch_filenames = self.image_filenames[
-            dataset_index : dataset_index + batch_size
+        batch_filenames = [
+            os.path.join(self.image_path, filename)
+            for filename in self.image_filenames[
+                dataset_index : dataset_index + batch_size
+            ]
         ]
 
-        # defines the array that will contain the images
+        # Pre-allocate memory
         images = np.zeros(
             (batch_size, self.bands, self.image_size[0], self.image_size[1])
         )
 
+        # Use ThreadPoolExecutor.map for more efficient multi-threading
         with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
-            future_to_index = {
-                executor.submit(
+            for i, image in enumerate(
+                executor.map(
                     self.__read_single_image,
-                    filename,
-                    self.image_path,
-                    self.package,
-                    self.image_size,
-                ): i
-                for i, filename in enumerate(batch_filenames)
-            }
-            for future in futures.as_completed(future_to_index):
-                i = future_to_index[future]
-                images[i, :, :, :] = future.result()
-
-        # ensures channel-last orientation for the reader
+                    batch_filenames,
+                    [self.package] * batch_size,
+                    [(self.bands, *self.image_size)] * batch_size,
+                )
+            ):
+                images[i, :, :, :] = image
+
+        # Ensure channel-last orientation
         images = np.moveaxis(images, 1, 3)
 
-        return np.array(images)
+        return images
 
     def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))

From 14ad5a1761fd4bfb043077451ae98f6f62773c9d Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 27 Aug 2023 15:39:36 +0100
Subject: [PATCH 65/75] renames previous hyperspectralstrategy to
 rasterstrategy, adds new hyperspectral strategy based on spectral python

---
 pyproject.toml                                |  1 +
 .../test_strategies.py                        | 86 +++++++++++++++----
 .../segmentation_utils/reading_strategies.py  | 73 +++++++++++++++-
 3 files changed, 139 insertions(+), 21 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 77f08e4..6f05b5b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,7 @@ dependencies = [
     "Pillow >= 9.4.0",
     "tensorflow >= 2.10",
     "toml >= 0.10.2",
+    "spectral >= 0.23.1",
 ]
 
 [tool.setuptools]
diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index aad5a4b..0632f63 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -6,7 +6,7 @@
 from pytest import MonkeyPatch
 
 from utilities.segmentation_utils.reading_strategies import (
-    HyperspectralImageStrategy, HyperspectralImageStrategyMultiThread,
+    HSImageStrategy, RasterImageStrategy, RasterImageStrategyMultiThread,
     RGBImageStrategy)
 
 
@@ -43,6 +43,26 @@ def __exit__(self, type, value, traceback):
     def get_count(self):
         return self.call_count
 
+class SPyMock:
+    def __init__(self,n,size,bands) -> None:
+        self.n = n
+        self.size = size
+        self.bands = bands
+        self.call_count = 0
+
+    @property
+    def shape(self):
+        return (self.size[0],self.size[1],self.bands)
+
+    def open_image(self,*args,**kwargs):
+        return self
+    
+    def load(self,*args,**kwargs):
+        self.call_count += 1
+        return np.full((self.size[0],self.size[1],self.bands),self.call_count,np.uint8)
+
+
+
 
 @pytest.mark.development
 def test_read_batch_image_path() -> None:
@@ -132,7 +152,7 @@ def test_RGB_get_dataset_size() -> None:
 
 
 @pytest.mark.development
-def test_Hyperspectral_get_dataset_size() -> None:
+def test_raster_get_dataset_size() -> None:
     # checking if the calculation is done correctly
     patch = MonkeyPatch()
 
@@ -140,7 +160,7 @@ def test_Hyperspectral_get_dataset_size() -> None:
 
     patch.setattr(os, "listdir", lambda x: mock_filenames)
 
-    image_strategy = HyperspectralImageStrategy(
+    image_strategy = RasterImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
@@ -159,7 +179,7 @@ def test_Hyperspectral_get_dataset_size() -> None:
 
 
 @pytest.mark.development
-def test_hyperspectral_open():
+def test_raster_open():
     patch = MonkeyPatch()
     mock_filenames = ["a", "b", "c"]
     patch.setattr(os, "listdir", lambda x: mock_filenames)
@@ -172,7 +192,7 @@ def test_hyperspectral_open():
         "bands": 3,
         "dtypes": ["uint8"],
     }
-    strategy = HyperspectralImageStrategy(
+    strategy = RasterImageStrategy(
         image_path, (224, 224), package=MockRasterio(**mock_data)
     )
 
@@ -181,8 +201,10 @@ def test_hyperspectral_open():
     assert read_images.shape == (2, 224, 224, 3)
 
 
+
+
 @pytest.mark.development
-def test_hyperspectral_mt_open():
+def test_raster_mt_open():
     patch = MonkeyPatch()
     mock_filenames = ["a", "b", "c"]
     patch.setattr(os, "listdir", lambda x: mock_filenames)
@@ -195,7 +217,7 @@ def test_hyperspectral_mt_open():
         "bands": 3,
         "dtypes": ["uint8"],
     }
-    strategy = HyperspectralImageStrategyMultiThread(
+    strategy = RasterImageStrategyMultiThread(
         image_path, (224, 224), package=MockRasterio(**mock_data)
     )
 
@@ -204,6 +226,28 @@ def test_hyperspectral_mt_open():
     assert read_images.shape == (2, 224, 224, 3)
 
 
+@pytest.mark.development
+def test_hyperspectral_open():
+    patch = MonkeyPatch()
+    mock_filenames = ["a", "b", "c"]
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_path = "tests/segmentation_utils_tests/test_strategies"
+
+    mock_data = {
+        "n": 3,
+        "size": (224, 224),
+        "bands": 3,
+    }
+    strategy = HSImageStrategy(
+        image_path, (224, 224), package=SPyMock(**mock_data)
+    )
+
+    read_images = strategy.read_batch(2, 0)
+    
+    assert read_images.shape == (2, 224, 224, 3)
+
+
 @pytest.mark.development
 def test_empty_batch():
     patch = MonkeyPatch()
@@ -320,14 +364,14 @@ def test_RGB_get_image_size():
 
 
 @pytest.mark.development
-def test_HyperSpectral_get_image_size():
+def test_raster_get_image_size():
     patch = MonkeyPatch()
 
     mock_filenames = ["a" for _ in range(20)]
 
     patch.setattr(os, "listdir", lambda x: mock_filenames)
 
-    image_strategy = HyperspectralImageStrategy(
+    image_strategy = RasterImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
@@ -338,14 +382,14 @@ def test_HyperSpectral_get_image_size():
 
 
 @pytest.mark.development
-def test_HyperSpectral_MT_get_image_size():
+def test_raster_MT_get_image_size():
     patch = MonkeyPatch()
 
     mock_filenames = ["a" for _ in range(20)]
 
     patch.setattr(os, "listdir", lambda x: mock_filenames)
 
-    image_strategy = HyperspectralImageStrategyMultiThread(
+    image_strategy = RasterImageStrategyMultiThread(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
@@ -354,6 +398,7 @@ def test_HyperSpectral_MT_get_image_size():
     result = image_strategy.get_image_size()
     assert result == (224, 224)
 
+
 @pytest.mark.development
 def test_RGB_shuffle():
     patch = MonkeyPatch()
@@ -384,21 +429,22 @@ def test_RGB_shuffle():
         image_strategy_1.image_filenames, image_strategy_2.image_filenames
     )
 
+
 @pytest.mark.development
-def test_Hyperspectral_shuffle():
+def test_raster_shuffle():
     patch = MonkeyPatch()
 
     mock_filenames = [str(i) for i in range(20)]
 
     patch.setattr(os, "listdir", lambda x: mock_filenames)
 
-    image_strategy_1 = HyperspectralImageStrategy(
+    image_strategy_1 = RasterImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
     )
 
-    image_strategy_2 = HyperspectralImageStrategy(
+    image_strategy_2 = RasterImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
@@ -414,21 +460,22 @@ def test_Hyperspectral_shuffle():
         image_strategy_1.image_filenames, image_strategy_2.image_filenames
     )
 
+
 @pytest.mark.development
-def test_Hyperspectral_mt_shuffle():
+def test_raster_mt_shuffle():
     patch = MonkeyPatch()
 
     mock_filenames = [str(i) for i in range(20)]
 
     patch.setattr(os, "listdir", lambda x: mock_filenames)
 
-    image_strategy_1 = HyperspectralImageStrategyMultiThread(
+    image_strategy_1 = RasterImageStrategyMultiThread(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
     )
 
-    image_strategy_2 = HyperspectralImageStrategy(
+    image_strategy_2 = RasterImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
@@ -444,15 +491,16 @@ def test_Hyperspectral_mt_shuffle():
         image_strategy_1.image_filenames, image_strategy_2.image_filenames
     )
 
+
 @pytest.mark.development
-def test_Hyperspectral_mt_image_in_order():
+def test_raster_mt_image_in_order():
     patch = MonkeyPatch()
 
     mock_filenames = [str(i) for i in range(20)]
 
     patch.setattr(os, "listdir", lambda x: mock_filenames)
     mock_package = MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"])
-    image_strategy = HyperspectralImageStrategyMultiThread(
+    image_strategy = RasterImageStrategyMultiThread(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         package=mock_package,
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index c570ef0..59d1616 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -5,7 +5,9 @@
 
 import numpy as np
 import rasterio
+import spectral
 from PIL import Image
+from scipy.ndimage import zoom
 
 
 class IReader(Protocol):
@@ -138,7 +140,74 @@ def shuffle_filenames(self, seed: int) -> None:
         self.image_filenames = self.image_filenames[shuffled_indices]
 
 
-class HyperspectralImageStrategy:
+class HSImageStrategy:
+    """
+    Reads hyperspectral imagedata using Spectral Python
+    """
+
+    def __init__(
+        self, image_path: str, image_size: tuple[int, int], package: Any = spectral
+    ) -> None:
+        self.image_path = image_path
+        self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
+        self.image_size = image_size
+        self.package = package
+        self.bands = self.__get_channels()
+
+    def __get_channels(self) -> int:
+        # Open the first image to determine the number of channels
+        first_image = self.package.open_image(
+            os.path.join(self.image_path, self.image_filenames[0])
+        )
+        return first_image.shape[-1] if len(first_image.shape) == 3 else 1
+
+    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
+        # read images with Spectral Python
+        batch_filenames = self.image_filenames[
+            dataset_index : dataset_index + batch_size
+        ]
+
+        images = np.zeros(
+            (batch_size, self.image_size[0], self.image_size[1], self.bands)
+        )
+        is_color = True
+        for i in range(batch_size):
+            image = self.package.open_image(
+                os.path.join(self.image_path, batch_filenames[i])
+            )
+            image_data = image.load()
+
+            # Calculate the zoom factor for resizing
+            zoom_factor = (
+                self.image_size[0] / image_data.shape[0],
+                self.image_size[1] / image_data.shape[1],
+                1,
+            )
+
+            # Resize the image using scipy's zoom function
+            resized_image = zoom(image_data, zoom_factor, order=1)
+
+            if len(resized_image.shape) == 2 and is_color:
+                images = np.zeros((batch_size, self.image_size[0], self.image_size[1]))
+                is_color = False
+            images[i, ...] = resized_image
+        return images
+    
+    def get_dataset_size(self, mini_batch) -> int:
+        dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
+        return dataset_size
+    
+    def get_image_size(self) -> tuple[int, int]:
+        return self.image_size
+    
+    def shuffle_filenames(self, seed: int) -> None:
+        state = np.random.RandomState(seed)
+        shuffled_indices = state.permutation(len(self.image_filenames))
+        shuffled_indices = shuffled_indices.astype(int)
+        self.image_filenames = self.image_filenames[shuffled_indices]
+
+
+class RasterImageStrategy:
     # read images with rasterio
     def __init__(
         self,
@@ -192,7 +261,7 @@ def shuffle_filenames(self, seed: int) -> None:
         self.image_filenames = self.image_filenames[shuffled_indices]
 
 
-class HyperspectralImageStrategyMultiThread:
+class RasterImageStrategyMultiThread:
     # read images with rasterio
     def __init__(
         self,

From a14348c9c4a321d67a402dc9aa87ef12c928e5d3 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 27 Aug 2023 15:46:58 +0100
Subject: [PATCH 66/75] adds scipy to the dependency list

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 6f05b5b..332e653 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,7 @@ dependencies = [
     "tensorflow >= 2.10",
     "toml >= 0.10.2",
     "spectral >= 0.23.1",
+    "scipy >= 1.10.0",
 ]
 
 [tool.setuptools]

From 65513288e98d738e6ad1e049d3e04ccb760ee533 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 27 Aug 2023 15:50:47 +0100
Subject: [PATCH 67/75] update pipeline to use toml file for dependency install

---
 .github/workflows/development.yml | 2 +-
 pyproject.toml                    | 2 ++
 requirements.txt                  | 7 -------
 3 files changed, 3 insertions(+), 8 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml
index 8e04cb9..0488813 100644
--- a/.github/workflows/development.yml
+++ b/.github/workflows/development.yml
@@ -62,7 +62,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pre-commit
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install .[dev]
     - name: Lint with pylint
       run: |
         pip install pylint
diff --git a/pyproject.toml b/pyproject.toml
index 332e653..1f1081b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,8 @@ dependencies = [
     "toml >= 0.10.2",
     "spectral >= 0.23.1",
     "scipy >= 1.10.0",
+    "tqdm >= 4.64.1",
+    "pandas >= 1.5.1",
 ]
 
 [tool.setuptools]
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 1200270..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-tensorflow==2.10
-numpy==1.24.1
-rasterio==1.3.6
-Pillow==9.4.0
-tqdm==4.64.1
-pandas==1.5.1
-toml==0.10.2

From 04f976bd7a8f92b6094ad1ac425b91e9b3e4b4da Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 27 Aug 2023 15:53:04 +0100
Subject: [PATCH 68/75] update all stages to install dependencies from toml
 config

---
 .github/workflows/development.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml
index 0488813..467af14 100644
--- a/.github/workflows/development.yml
+++ b/.github/workflows/development.yml
@@ -26,7 +26,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pre-commit
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install .[dev]
 
   test:
     needs: dependency-install
@@ -42,7 +42,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pytest pytest-mock
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install .[dev]
     - name: Test with pytest
       run: |
         python -m pytest -v -m "not staging"

From 58d6d2cc7a9ee39683b2603215e91996843887f2 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 27 Aug 2023 16:27:27 +0100
Subject: [PATCH 69/75] adds experimental hsi strategy based on cv2

---
 .../test_strategies.py                        | 41 ++++-----------
 .../segmentation_utils/reading_strategies.py  | 50 +++++++++----------
 2 files changed, 32 insertions(+), 59 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index 0632f63..da5d553 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -43,8 +43,9 @@ def __exit__(self, type, value, traceback):
     def get_count(self):
         return self.call_count
 
+
 class SPyMock:
-    def __init__(self,n,size,bands) -> None:
+    def __init__(self, n, size, bands) -> None:
         self.n = n
         self.size = size
         self.bands = bands
@@ -52,16 +53,16 @@ def __init__(self,n,size,bands) -> None:
 
     @property
     def shape(self):
-        return (self.size[0],self.size[1],self.bands)
+        return (self.size[0], self.size[1], self.bands)
 
-    def open_image(self,*args,**kwargs):
+    def open_image(self, *args, **kwargs):
         return self
-    
-    def load(self,*args,**kwargs):
-        self.call_count += 1
-        return np.full((self.size[0],self.size[1],self.bands),self.call_count,np.uint8)
-
 
+    def load(self, *args, **kwargs):
+        self.call_count += 1
+        return np.full(
+            (self.size[0], self.size[1], self.bands), self.call_count, np.uint8
+        )
 
 
 @pytest.mark.development
@@ -201,8 +202,6 @@ def test_raster_open():
     assert read_images.shape == (2, 224, 224, 3)
 
 
-
-
 @pytest.mark.development
 def test_raster_mt_open():
     patch = MonkeyPatch()
@@ -226,28 +225,6 @@ def test_raster_mt_open():
     assert read_images.shape == (2, 224, 224, 3)
 
 
-@pytest.mark.development
-def test_hyperspectral_open():
-    patch = MonkeyPatch()
-    mock_filenames = ["a", "b", "c"]
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_path = "tests/segmentation_utils_tests/test_strategies"
-
-    mock_data = {
-        "n": 3,
-        "size": (224, 224),
-        "bands": 3,
-    }
-    strategy = HSImageStrategy(
-        image_path, (224, 224), package=SPyMock(**mock_data)
-    )
-
-    read_images = strategy.read_batch(2, 0)
-    
-    assert read_images.shape == (2, 224, 224, 3)
-
-
 @pytest.mark.development
 def test_empty_batch():
     patch = MonkeyPatch()
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 59d1616..84960c6 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -3,11 +3,10 @@
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Protocol
 
+import cv2
 import numpy as np
 import rasterio
-import spectral
 from PIL import Image
-from scipy.ndimage import zoom
 
 
 class IReader(Protocol):
@@ -142,11 +141,11 @@ def shuffle_filenames(self, seed: int) -> None:
 
 class HSImageStrategy:
     """
-    Reads hyperspectral imagedata using Spectral Python
+    Reads hyperspectral optimized images with OpenCV
     """
 
     def __init__(
-        self, image_path: str, image_size: tuple[int, int], package: Any = spectral
+        self, image_path: str, image_size: tuple[int, int], package: Any = cv2
     ) -> None:
         self.image_path = image_path
         self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
@@ -162,35 +161,32 @@ def __get_channels(self) -> int:
         return first_image.shape[-1] if len(first_image.shape) == 3 else 1
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
-        # read images with Spectral Python
+        # Read a sample image to determine the number of bands
+        sample_image_path = os.path.join(self.image_path, self.image_filenames[0])
+        sample_image = self.package.imread(sample_image_path, self.package.IMREAD_UNCHANGED)
+        num_bands = sample_image.shape[2] if len(sample_image.shape) == 3 else 1
+
+        # Initialize images array
+        images = np.zeros((batch_size, self.image_size[1], self.image_size[0], num_bands))
+
+        # Read images with OpenCV
         batch_filenames = self.image_filenames[
             dataset_index : dataset_index + batch_size
         ]
 
-        images = np.zeros(
-            (batch_size, self.image_size[0], self.image_size[1], self.bands)
-        )
-        is_color = True
         for i in range(batch_size):
-            image = self.package.open_image(
-                os.path.join(self.image_path, batch_filenames[i])
-            )
-            image_data = image.load()
-
-            # Calculate the zoom factor for resizing
-            zoom_factor = (
-                self.image_size[0] / image_data.shape[0],
-                self.image_size[1] / image_data.shape[1],
-                1,
-            )
-
-            # Resize the image using scipy's zoom function
-            resized_image = zoom(image_data, zoom_factor, order=1)
+            image_path = os.path.join(self.image_path, batch_filenames[i])
+            image = self.package.imread(image_path, self.package.IMREAD_UNCHANGED)
+            
+            # Resize the image
+            image = self.package.resize(image, self.image_size)
+            
+            # If the image is color, convert BGR to RGB
+            if len(image.shape) == 3 and image.shape[2] == 3:
+                image = self.package.cvtColor(image, self.package.COLOR_BGR2RGB)
+            
+            images[i, ...] = image
 
-            if len(resized_image.shape) == 2 and is_color:
-                images = np.zeros((batch_size, self.image_size[0], self.image_size[1]))
-                is_color = False
-            images[i, ...] = resized_image
         return images
     
     def get_dataset_size(self, mini_batch) -> int:

From 872916622ce61cea2532b2385529590518c08137 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 27 Aug 2023 16:29:53 +0100
Subject: [PATCH 70/75] removes scipy and SPy

---
 pyproject.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1f1081b..6305a5d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,10 +21,9 @@ dependencies = [
     "Pillow >= 9.4.0",
     "tensorflow >= 2.10",
     "toml >= 0.10.2",
-    "spectral >= 0.23.1",
-    "scipy >= 1.10.0",
     "tqdm >= 4.64.1",
     "pandas >= 1.5.1",
+    "opencv-python >= 4.7.0.68"
 ]
 
 [tool.setuptools]

From 8d055b284844b0680e9e2c9d141f523d41653f3a Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 27 Aug 2023 16:53:46 +0100
Subject: [PATCH 71/75] update get band function to use opencv in HSI strategy,
 updates opencv package to headless version

---
 pyproject.toml                                     |  2 +-
 utilities/segmentation_utils/reading_strategies.py | 13 +++++--------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6305a5d..d2be2d6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,7 @@ dependencies = [
     "toml >= 0.10.2",
     "tqdm >= 4.64.1",
     "pandas >= 1.5.1",
-    "opencv-python >= 4.7.0.68"
+    "opencv-python-headless >= 4.8.0.76"
 ]
 
 [tool.setuptools]
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index 84960c6..c091a21 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -155,19 +155,16 @@ def __init__(
 
     def __get_channels(self) -> int:
         # Open the first image to determine the number of channels
-        first_image = self.package.open_image(
-            os.path.join(self.image_path, self.image_filenames[0])
-        )
-        return first_image.shape[-1] if len(first_image.shape) == 3 else 1
+        sample_image_path = os.path.join(self.image_path, self.image_filenames[0])
+        sample_image = self.package.imread(sample_image_path, self.package.IMREAD_UNCHANGED)
+        return sample_image.shape[2] if len(sample_image.shape) == 3 else 1
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         # Read a sample image to determine the number of bands
-        sample_image_path = os.path.join(self.image_path, self.image_filenames[0])
-        sample_image = self.package.imread(sample_image_path, self.package.IMREAD_UNCHANGED)
-        num_bands = sample_image.shape[2] if len(sample_image.shape) == 3 else 1
+
 
         # Initialize images array
-        images = np.zeros((batch_size, self.image_size[1], self.image_size[0], num_bands))
+        images = np.zeros((batch_size, self.image_size[1], self.image_size[0], self.bands))
 
         # Read images with OpenCV
         batch_filenames = self.image_filenames[

From 118371c56d45e377c1f6d0db12b75d542532f44b Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 27 Aug 2023 17:34:41 +0100
Subject: [PATCH 72/75] adds multi threaded hsi strategy

---
 .../test_strategies.py                        | 287 +++++++++++++++++-
 .../segmentation_utils/reading_strategies.py  | 115 ++++++-
 2 files changed, 379 insertions(+), 23 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index da5d553..88ff6c3 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -6,8 +6,8 @@
 from pytest import MonkeyPatch
 
 from utilities.segmentation_utils.reading_strategies import (
-    HSImageStrategy, RasterImageStrategy, RasterImageStrategyMultiThread,
-    RGBImageStrategy)
+    HSImageStrategy, HSImageStrategyMultiThread, RasterImageStrategy,
+    RasterImageStrategyMultiThread, RGBImageStrategy)
 
 
 class MockRasterio:
@@ -44,26 +44,35 @@ def get_count(self):
         return self.call_count
 
 
-class SPyMock:
+class CV2Mock:
+    IMREAD_UNCHANGED = 1
+    COLOR_BGR2RGB = 1
+
     def __init__(self, n, size, bands) -> None:
         self.n = n
         self.size = size
         self.bands = bands
         self.call_count = 0
 
-    @property
-    def shape(self):
-        return (self.size[0], self.size[1], self.bands)
-
-    def open_image(self, *args, **kwargs):
-        return self
-
-    def load(self, *args, **kwargs):
+    def imread(self, *args, **kwargs):
         self.call_count += 1
         return np.full(
             (self.size[0], self.size[1], self.bands), self.call_count, np.uint8
         )
 
+    def resize(self, *args, **kwargs):
+        img = args[0]
+        size = args[1]
+        return np.full((size[0], size[1], self.bands), img[0,0,0], np.uint8)
+
+    def cvtColor(self, *args, **kwargs):
+        img = args[0]
+        return np.full(
+            (self.size[0], self.size[1], self.bands), img[0,0,0], np.uint8
+        )
+
+    def get_count(self):
+        return self.call_count
 
 @pytest.mark.development
 def test_read_batch_image_path() -> None:
@@ -179,6 +188,59 @@ def test_raster_get_dataset_size() -> None:
     patch.undo()
 
 
+@pytest.mark.development
+def test_hsi_get_dataset_size() -> None:
+    # checking if the calculation is done correctly
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a", "b", "c"]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy = HSImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=CV2Mock(n=3, size=(224, 224), bands=3),
+    )
+
+    dataset = len(mock_filenames)  # number of images in the specified path
+    mini_batch = 2  # number of images we want in each batch
+    expected_value = int(
+        np.floor(dataset / float(mini_batch))
+    )  # number of sets of images we expect
+
+    dataset_size = image_strategy.get_dataset_size(mini_batch)
+    assert dataset_size == expected_value
+    patch.undo()
+    patch.undo()
+
+@pytest.mark.development
+def test_hsi_mt_get_dataset_size() -> None:
+    # checking if the calculation is done correctly
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a", "b", "c"]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy = HSImageStrategyMultiThread(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=CV2Mock(n=3, size=(224, 224), bands=3),
+    )
+
+    dataset = len(mock_filenames)  # number of images in the specified path
+    mini_batch = 2  # number of images we want in each batch
+    expected_value = int(
+        np.floor(dataset / float(mini_batch))
+    )  # number of sets of images we expect
+
+    dataset_size = image_strategy.get_dataset_size(mini_batch)
+    assert dataset_size == expected_value
+    patch.undo()
+    patch.undo()
+
+
 @pytest.mark.development
 def test_raster_open():
     patch = MonkeyPatch()
@@ -225,6 +287,84 @@ def test_raster_mt_open():
     assert read_images.shape == (2, 224, 224, 3)
 
 
+@pytest.mark.development
+def test_hsi_open():
+    patch = MonkeyPatch()
+    mock_filenames = ["a", "b", "c"]
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_path = "tests/segmentation_utils_tests/test_strategies"
+
+    mock_data = {
+        "n": 3,
+        "size": (224, 224),
+        "bands": 3,
+    }
+    strategy = HSImageStrategy(image_path, (224, 224), package=CV2Mock(**mock_data))
+
+    read_images = strategy.read_batch(2, 0)
+
+    assert read_images.shape == (2, 224, 224, 3)
+
+@pytest.mark.development
+def test_hsi_mt_open():
+    patch = MonkeyPatch()
+    mock_filenames = ["a", "b", "c"]
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_path = "tests/segmentation_utils_tests/test_strategies"
+
+    mock_data = {
+        "n": 3,
+        "size": (224, 224),
+        "bands": 3,
+    }
+    strategy = HSImageStrategyMultiThread(image_path, (224, 224), package=CV2Mock(**mock_data))
+
+    read_images = strategy.read_batch(2, 0)
+
+    assert read_images.shape == (2, 224, 224, 3)
+
+
+@pytest.mark.development
+def test_hsi_get_channels():
+    patch = MonkeyPatch()
+    mock_filenames = ["a", "b", "c"]
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_path = "tests/segmentation_utils_tests/test_strategies"
+
+    mock_data = {
+        "n": 3,
+        "size": (224, 224),
+        "bands": 3,
+    }
+    strategy = HSImageStrategy(image_path, (224, 224), package=CV2Mock(**mock_data))
+
+    channels = strategy._HSImageStrategy__get_channels()
+
+    assert channels == 3
+
+@pytest.mark.development
+def test_hsi_mt_get_channels():
+    patch = MonkeyPatch()
+    mock_filenames = ["a", "b", "c"]
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_path = "tests/segmentation_utils_tests/test_strategies"
+
+    mock_data = {
+        "n": 3,
+        "size": (224, 224),
+        "bands": 3,
+    }
+    strategy = HSImageStrategyMultiThread(image_path, (224, 224), package=CV2Mock(**mock_data))
+
+    channels = strategy._HSImageStrategyMultiThread__get_channels()
+
+    assert channels == 3
+
+
 @pytest.mark.development
 def test_empty_batch():
     patch = MonkeyPatch()
@@ -359,7 +499,42 @@ def test_raster_get_image_size():
 
 
 @pytest.mark.development
-def test_raster_MT_get_image_size():
+def test_hsi_get_image_size():
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a" for _ in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy = HSImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=CV2Mock(n=3, size=(224, 224), bands=3),
+    )
+
+    result = image_strategy.get_image_size()
+    assert result == (224, 224)
+
+@pytest.mark.development
+def test_hsi_mt_get_image_size():
+    patch = MonkeyPatch()
+
+    mock_filenames = ["a" for _ in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy = HSImageStrategyMultiThread(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=CV2Mock(n=3, size=(224, 224), bands=3),
+    )
+
+    result = image_strategy.get_image_size()
+    assert result == (224, 224)
+
+
+@pytest.mark.development
+def test_raster_mt_get_image_size():
     patch = MonkeyPatch()
 
     mock_filenames = ["a" for _ in range(20)]
@@ -377,7 +552,7 @@ def test_raster_MT_get_image_size():
 
 
 @pytest.mark.development
-def test_RGB_shuffle():
+def test_rgb_shuffle():
     patch = MonkeyPatch()
 
     mock_filenames = [str(i) for i in range(20)]
@@ -493,3 +668,89 @@ def test_raster_mt_image_in_order():
         assert np.array_equal(
             result[i - call_count, :, :, :], np.full((224, 224, 3), i + 1)
         )
+
+
+@pytest.mark.development
+def test_hsi_shuffle():
+    patch = MonkeyPatch()
+
+    mock_filenames = [str(i) for i in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy_1 = HSImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=CV2Mock(n=3, size=(224, 224), bands=3),
+    )
+
+    image_strategy_2 = HSImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=CV2Mock(n=3, size=(224, 224), bands=3),
+    )
+
+    n = 100
+
+    for i in range(n):
+        image_strategy_1.shuffle_filenames(i)
+        image_strategy_2.shuffle_filenames(i)
+
+    assert np.array_equal(
+        image_strategy_1.image_filenames, image_strategy_2.image_filenames
+    )
+
+@pytest.mark.development
+def test_hsi_mt_shuffle():
+    patch = MonkeyPatch()
+
+    mock_filenames = [str(i) for i in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+
+    image_strategy_1 = HSImageStrategyMultiThread(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=CV2Mock(n=3, size=(224, 224), bands=3),
+    )
+
+    image_strategy_2 = HSImageStrategyMultiThread(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=CV2Mock(n=3, size=(224, 224), bands=3),
+    )
+
+    n = 100
+
+    for i in range(n):
+        image_strategy_1.shuffle_filenames(i)
+        image_strategy_2.shuffle_filenames(i)
+
+    assert np.array_equal(
+        image_strategy_1.image_filenames, image_strategy_2.image_filenames
+    )
+
+@pytest.mark.development
+def test_hsi_mt_image_in_order():
+    patch = MonkeyPatch()
+
+    mock_filenames = [str(i) for i in range(20)]
+
+    patch.setattr(os, "listdir", lambda x: mock_filenames)
+    mock_package = CV2Mock(n=3, size=(224, 224), bands=3)
+    image_strategy = HSImageStrategyMultiThread(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=mock_package,
+    )
+
+    batch_size = 10
+
+    call_count = mock_package.get_count()
+
+    result = image_strategy.read_batch(batch_size, 0)
+
+    for i in range(call_count, call_count + batch_size):
+        assert np.array_equal(
+            result[i - call_count, :, :, :], np.full((224, 224, 3), i + 1)
+        )
diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py
index c091a21..5e30a14 100644
--- a/utilities/segmentation_utils/reading_strategies.py
+++ b/utilities/segmentation_utils/reading_strategies.py
@@ -24,6 +24,10 @@ def shuffle_filenames(self, seed: int) -> None:
 
 
 class RGBImageStrategy:
+    """
+    Strategy optimized for reading RGB images powered by backend PIL.
+    """
+
     def __init__(
         self,
         image_path: str,
@@ -71,6 +75,11 @@ def shuffle_filenames(self, seed: int) -> None:
 
 
 class RGBImageStrategyMultiThread:
+    """
+    Strategy optimized for reading RGB images powered by backend PIL.
+    Multi threaded version.
+    """
+
     def __init__(
         self,
         image_path: str,
@@ -141,7 +150,7 @@ def shuffle_filenames(self, seed: int) -> None:
 
 class HSImageStrategy:
     """
-    Reads hyperspectral optimized images with OpenCV
+    Strategy optimized for reading hyperspectral images powered by backend OpenCV
     """
 
     def __init__(
@@ -156,15 +165,18 @@ def __init__(
     def __get_channels(self) -> int:
         # Open the first image to determine the number of channels
         sample_image_path = os.path.join(self.image_path, self.image_filenames[0])
-        sample_image = self.package.imread(sample_image_path, self.package.IMREAD_UNCHANGED)
+        sample_image = self.package.imread(
+            sample_image_path, self.package.IMREAD_UNCHANGED
+        )
         return sample_image.shape[2] if len(sample_image.shape) == 3 else 1
 
     def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         # Read a sample image to determine the number of bands
 
-
         # Initialize images array
-        images = np.zeros((batch_size, self.image_size[1], self.image_size[0], self.bands))
+        images = np.zeros(
+            (batch_size, self.image_size[1], self.image_size[0], self.bands)
+        )
 
         # Read images with OpenCV
         batch_filenames = self.image_filenames[
@@ -174,25 +186,99 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray:
         for i in range(batch_size):
             image_path = os.path.join(self.image_path, batch_filenames[i])
             image = self.package.imread(image_path, self.package.IMREAD_UNCHANGED)
-            
+
             # Resize the image
             image = self.package.resize(image, self.image_size)
-            
+
             # If the image is color, convert BGR to RGB
             if len(image.shape) == 3 and image.shape[2] == 3:
                 image = self.package.cvtColor(image, self.package.COLOR_BGR2RGB)
-            
+
+            images[i, ...] = image
+
+        return images
+
+    def get_dataset_size(self, mini_batch) -> int:
+        dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
+        return dataset_size
+
+    def get_image_size(self) -> tuple[int, int]:
+        return self.image_size
+
+    def shuffle_filenames(self, seed: int) -> None:
+        state = np.random.RandomState(seed)
+        shuffled_indices = state.permutation(len(self.image_filenames))
+        shuffled_indices = shuffled_indices.astype(int)
+        self.image_filenames = self.image_filenames[shuffled_indices]
+
+
+class HSImageStrategyMultiThread:
+    def __init__(
+        self,
+        image_path: str,
+        image_size: tuple[int, int],
+        package: Any = cv2,
+        max_workers: int = 8,
+    ) -> None:
+        self.image_path = image_path
+        self.image_filenames = np.array(sorted(os.listdir(self.image_path)))
+        self.image_size = image_size
+        self.package = package
+        self.bands = self.__get_channels()
+        self.max_workers = max_workers
+
+    def __get_channels(self) -> int:
+        # Open the first image to determine the number of channels
+        sample_image_path = os.path.join(self.image_path, self.image_filenames[0])
+        sample_image = self.package.imread(
+            sample_image_path, self.package.IMREAD_UNCHANGED
+        )
+        return sample_image.shape[2] if len(sample_image.shape) == 3 else 1
+
+    def __read_single_image(
+        self, filename: str, package: Any, image_size: tuple[int, int, int]
+    ) -> np.ndarray:
+        image = package.imread(filename, package.IMREAD_UNCHANGED)
+        image = package.resize(image, image_size)
+        if len(image.shape) == 3 and image.shape[2] == 3:
+            image = package.cvtColor(image, package.COLOR_BGR2RGB)
+        return image
+
+    def read_batch(self, batch_size, dataset_index) -> np.ndarray:
+        # Initialize images array
+        images = np.zeros(
+            (batch_size, self.image_size[1], self.image_size[0], self.bands)
+        )
+
+        # Read images with OpenCV
+        batch_filenames = self.image_filenames[
+            dataset_index : dataset_index + batch_size
+        ]
+
+        image_paths = [
+            os.path.join(self.image_path, batch_filenames[i]) for i in range(batch_size)
+        ]
+
+        with ThreadPoolExecutor() as executor:
+            results = executor.map(
+                self.__read_single_image,
+                image_paths,
+                [self.package] * batch_size,
+                [self.image_size] * batch_size,
+            )
+
+        for i, image in enumerate(results):
             images[i, ...] = image
 
         return images
-    
+
     def get_dataset_size(self, mini_batch) -> int:
         dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch)))
         return dataset_size
-    
+
     def get_image_size(self) -> tuple[int, int]:
         return self.image_size
-    
+
     def shuffle_filenames(self, seed: int) -> None:
         state = np.random.RandomState(seed)
         shuffled_indices = state.permutation(len(self.image_filenames))
@@ -201,6 +287,10 @@ def shuffle_filenames(self, seed: int) -> None:
 
 
 class RasterImageStrategy:
+    """
+    Strategy optimized for reading raster images powered by backend rasterio.
+    """
+
     # read images with rasterio
     def __init__(
         self,
@@ -255,6 +345,11 @@ def shuffle_filenames(self, seed: int) -> None:
 
 
 class RasterImageStrategyMultiThread:
+    """
+    Strategy optimized for reading raster images powered by backend rasterio.
+    Multi threaded version.
+    """
+
     # read images with rasterio
     def __init__(
         self,

From 0d9c97f910ff723201a158a99176b010794c96a0 Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 27 Aug 2023 17:35:29 +0100
Subject: [PATCH 73/75] fixed linting problems in strategies file

---
 .../test_strategies.py                        | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index 88ff6c3..47e14a9 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -63,17 +63,16 @@ def imread(self, *args, **kwargs):
     def resize(self, *args, **kwargs):
         img = args[0]
         size = args[1]
-        return np.full((size[0], size[1], self.bands), img[0,0,0], np.uint8)
+        return np.full((size[0], size[1], self.bands), img[0, 0, 0], np.uint8)
 
     def cvtColor(self, *args, **kwargs):
         img = args[0]
-        return np.full(
-            (self.size[0], self.size[1], self.bands), img[0,0,0], np.uint8
-        )
+        return np.full((self.size[0], self.size[1], self.bands), img[0, 0, 0], np.uint8)
 
     def get_count(self):
         return self.call_count
 
+
 @pytest.mark.development
 def test_read_batch_image_path() -> None:
     # checking if the file is being opened and read correctly
@@ -214,6 +213,7 @@ def test_hsi_get_dataset_size() -> None:
     patch.undo()
     patch.undo()
 
+
 @pytest.mark.development
 def test_hsi_mt_get_dataset_size() -> None:
     # checking if the calculation is done correctly
@@ -306,6 +306,7 @@ def test_hsi_open():
 
     assert read_images.shape == (2, 224, 224, 3)
 
+
 @pytest.mark.development
 def test_hsi_mt_open():
     patch = MonkeyPatch()
@@ -319,7 +320,9 @@ def test_hsi_mt_open():
         "size": (224, 224),
         "bands": 3,
     }
-    strategy = HSImageStrategyMultiThread(image_path, (224, 224), package=CV2Mock(**mock_data))
+    strategy = HSImageStrategyMultiThread(
+        image_path, (224, 224), package=CV2Mock(**mock_data)
+    )
 
     read_images = strategy.read_batch(2, 0)
 
@@ -345,6 +348,7 @@ def test_hsi_get_channels():
 
     assert channels == 3
 
+
 @pytest.mark.development
 def test_hsi_mt_get_channels():
     patch = MonkeyPatch()
@@ -358,7 +362,9 @@ def test_hsi_mt_get_channels():
         "size": (224, 224),
         "bands": 3,
     }
-    strategy = HSImageStrategyMultiThread(image_path, (224, 224), package=CV2Mock(**mock_data))
+    strategy = HSImageStrategyMultiThread(
+        image_path, (224, 224), package=CV2Mock(**mock_data)
+    )
 
     channels = strategy._HSImageStrategyMultiThread__get_channels()
 
@@ -515,6 +521,7 @@ def test_hsi_get_image_size():
     result = image_strategy.get_image_size()
     assert result == (224, 224)
 
+
 @pytest.mark.development
 def test_hsi_mt_get_image_size():
     patch = MonkeyPatch()
@@ -700,6 +707,7 @@ def test_hsi_shuffle():
         image_strategy_1.image_filenames, image_strategy_2.image_filenames
     )
 
+
 @pytest.mark.development
 def test_hsi_mt_shuffle():
     patch = MonkeyPatch()
@@ -730,6 +738,7 @@ def test_hsi_mt_shuffle():
         image_strategy_1.image_filenames, image_strategy_2.image_filenames
     )
 
+
 @pytest.mark.development
 def test_hsi_mt_image_in_order():
     patch = MonkeyPatch()

From 2defb3e8d85c0565ed7be7a352044eb97e4c499d Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 27 Aug 2023 22:56:09 +0100
Subject: [PATCH 74/75] updates pipelines to rely on pyproject.toml to install
 packages. Updates strategy tests to use fixtures for way more scalability.
 adds development markers for remaining tests

---
 .github/workflows/development.yml             |   2 +-
 .github/workflows/documentation.yml           |   4 +-
 .github/workflows/master.yml                  |  16 +-
 .github/workflows/staging.yml                 |  13 +-
 .../flow_reader_test.py                       |  13 +-
 .../test_flowreader.py                        |   2 +-
 .../test_strategies.py                        | 680 ++++--------------
 .../image_cutting_test.py                     |  12 +
 8 files changed, 167 insertions(+), 575 deletions(-)

diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml
index 467af14..9943143 100644
--- a/.github/workflows/development.yml
+++ b/.github/workflows/development.yml
@@ -45,7 +45,7 @@ jobs:
         pip install .[dev]
     - name: Test with pytest
       run: |
-        python -m pytest -v -m "not staging"
+        python -m pytest -v -m "development"
 
   devops:
     needs: test
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index bbdd86f..628a486 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -34,15 +34,13 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install .[dev]
         pip install -U sphinx
         pip install furo
         
     - name: Build documentation
       run: |
         cd docs
-
-
         sphinx-apidoc -e -M --force -o . ../utilities/
         make html
     - name: Upload build data
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 4b3ba3c..5979c2d 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -26,7 +26,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pre-commit
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install .[dev]
 
   test:
     needs: dependency-install
@@ -42,10 +42,16 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pytest pytest-mock
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-    - name: Test with pytest
+        pip install .[dev]
+    - name: Test with pytest development
       run: |
-        python -m pytest
+        python -m pytest -v -m "development"
+    - name: Test with pytest staging
+      run: |
+        python -m pytest -v -m "staging"
+    - name: Test with pytest production
+      run: |
+        python -m pytest -v -m "production"
 
   devops:
     needs: test
@@ -62,7 +68,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pre-commit
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install .[dev]
     - name: Lint with pylint
       run: |
         pip install pylint
diff --git a/.github/workflows/staging.yml b/.github/workflows/staging.yml
index 72bf0c4..f863dc4 100644
--- a/.github/workflows/staging.yml
+++ b/.github/workflows/staging.yml
@@ -26,7 +26,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pre-commit
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install .[dev]
 
   test:
     needs: dependency-install
@@ -42,10 +42,13 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pytest pytest-mock
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-    - name: Test with pytest
+        pip install .[dev]
+    - name: Test with pytest development
       run: |
-        python -m pytest
+        python -m pytest -v -m "development"
+    - name: Test with pytest staging
+      run: |
+        python -m pytest -v -m "staging"
 
   devops:
     needs: test
@@ -62,7 +65,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pre-commit
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install .[dev]
     - name: Lint with pylint
       run: |
         pip install pylint
diff --git a/tests/segmentation_utils_tests.py/flow_reader_test.py b/tests/segmentation_utils_tests.py/flow_reader_test.py
index af89e19..20d2fb2 100644
--- a/tests/segmentation_utils_tests.py/flow_reader_test.py
+++ b/tests/segmentation_utils_tests.py/flow_reader_test.py
@@ -36,6 +36,7 @@ def flow_from_directory_mock(*args, **kwargs):
 
 
 # tests
+@pytest.mark.development
 def test_makes_flow_generator() -> None:
     patch = MonkeyPatch()
     # mock an imagedatagenerator from keras
@@ -48,7 +49,7 @@ def test_makes_flow_generator() -> None:
     # create a flow generator
     FlowGenerator(**generator_args)
 
-
+@pytest.mark.development
 def test_makes_flow_generator_with_queue() -> None:
     patch = MonkeyPatch()
     # mock an imagedatagenerator from keras
@@ -78,7 +79,7 @@ def test_makes_flow_generator_with_queue() -> None:
     generator = FlowGenerator(**new_generator_args)
     generator.set_preprocessing_pipeline(image_queue, mask_queue)
 
-
+@pytest.mark.development
 def test_makes_flow_generator_wrong_shape() -> None:
     try:
         patch = MonkeyPatch()
@@ -98,7 +99,7 @@ def test_makes_flow_generator_wrong_shape() -> None:
     except ValueError:
         assert True
 
-
+@pytest.mark.development
 def test_makes_flow_generator_wrong_dimension() -> None:
     try:
         patch = MonkeyPatch()
@@ -118,7 +119,7 @@ def test_makes_flow_generator_wrong_dimension() -> None:
     except ValueError:
         assert True
 
-
+@pytest.mark.development
 def test_flow_generator_with_preprocess() -> None:
     patch = MonkeyPatch()
     # mock an imagedatagenerator from keras
@@ -141,7 +142,7 @@ def test_flow_generator_with_preprocess() -> None:
     patch.undo()
     patch.undo()
 
-
+@pytest.mark.development
 def test_get_dataset_size() -> None:
     patch = MonkeyPatch()
     patch.setattr(os, "listdir", lambda x: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
@@ -166,7 +167,7 @@ def test_get_dataset_size() -> None:
     patch.undo()
     patch.undo()
 
-
+@pytest.mark.development
 def test_get_generator() -> None:
     patch = MonkeyPatch()
 
diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py
index 5bdcd36..4648e19 100644
--- a/tests/segmentation_utils_tests.py/test_flowreader.py
+++ b/tests/segmentation_utils_tests.py/test_flowreader.py
@@ -295,7 +295,7 @@ def test_read_batch_get_item_expand_dim_fail() -> None:
 
         batch = generator[0]
 
-
+@pytest.mark.development
 def test_raises_error_not_compatible_shape() -> None:
     with pytest.raises(ValueError) as exc_info:
         patch = MonkeyPatch()
diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index 47e14a9..bbc01b3 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -73,256 +73,163 @@ def get_count(self):
         return self.call_count
 
 
-@pytest.mark.development
-def test_read_batch_image_path() -> None:
-    # checking if the file is being opened and read correctly
-    patch = MonkeyPatch()
-
-    mock_filenames = ["a", "b", "c"]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    patch.setattr(
-        Image,
-        "open",
-        lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
-    )
-
-    image_strategy = RGBImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        image_resample=Image.Resampling.NEAREST,
-    )
-
-    batch_size = 2
-    dataset_index = 0
-    result = image_strategy.read_batch(batch_size, dataset_index)
+@pytest.fixture
+def rasterio_mock() -> MockRasterio:
+    return MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"])
 
-    assert result.shape == (2, 224, 224, 3)
-    patch.undo()
-    patch.undo()
 
+@pytest.fixture
+def cv2_mock() -> CV2Mock:
+    return CV2Mock(n=3, size=(224, 224), bands=3)
 
-@pytest.mark.development
-def test_read_batch_returns_nparray() -> None:
-    # checking if the returned value is a numpy array
 
-    patch = MonkeyPatch()
+@pytest.fixture
+def directory_mock(monkeypatch):
+    mock_filenames = [str(i) for i in range(20)]
+    monkeypatch.setattr(os, "listdir", lambda x: mock_filenames)
+    return len(mock_filenames)
 
-    patch.setattr(os, "listdir", lambda x: ["a", "b", "c"])
 
-    patch.setattr(
+@pytest.fixture
+def mock_image_open(monkeypatch):
+    monkeypatch.setattr(
         Image,
         "open",
         lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
     )
 
-    image_strategy = RGBImageStrategy(
+
+@pytest.fixture
+def rgb_strategy(mock_image_open) -> RGBImageStrategy:
+    return RGBImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         image_resample=Image.Resampling.NEAREST,
     )
 
-    batch_size = 2
-    dataset_index = 0
-
-    result = image_strategy.read_batch(batch_size, dataset_index)
-    assert isinstance(result, np.ndarray)
-    assert result.shape == (2, 224, 224, 3)
-
-    patch.undo()
-    patch.undo()
 
-
-@pytest.mark.development
-def test_RGB_get_dataset_size() -> None:
-    # checking if the calculation is done correctly
-    patch = MonkeyPatch()
-
-    mock_filenames = ["a", "b", "c"]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_strategy = RGBImageStrategy(
+@pytest.fixture
+def raster_strategy(rasterio_mock) -> RasterImageStrategy:
+    return RasterImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
-        image_resample=Image.Resampling.NEAREST,
+        package=rasterio_mock,
     )
-    dataset = len(mock_filenames)  # number of images in the specified path
-    mini_batch = 2  # number of images we want in each batch
-    expected_value = int(
-        np.floor(dataset / float(mini_batch))
-    )  # number of sets of images we expect
 
-    dataset_size = image_strategy.get_dataset_size(mini_batch)
-    assert dataset_size == expected_value
-    patch.undo()
-    patch.undo()
-
-
-@pytest.mark.development
-def test_raster_get_dataset_size() -> None:
-    # checking if the calculation is done correctly
-    patch = MonkeyPatch()
 
-    mock_filenames = ["a", "b", "c"]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_strategy = RasterImageStrategy(
+@pytest.fixture
+def raster_mt_strategy(rasterio_mock) -> RasterImageStrategyMultiThread:
+    return RasterImageStrategyMultiThread(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
-        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
+        package=rasterio_mock,
     )
 
-    dataset = len(mock_filenames)  # number of images in the specified path
-    mini_batch = 2  # number of images we want in each batch
-    expected_value = int(
-        np.floor(dataset / float(mini_batch))
-    )  # number of sets of images we expect
-
-    dataset_size = image_strategy.get_dataset_size(mini_batch)
-    assert dataset_size == expected_value
-    patch.undo()
-    patch.undo()
 
+@pytest.fixture
+def hsi_strategy(cv2_mock) -> HSImageStrategy:
+    return HSImageStrategy(
+        image_path="tests/segmentation_utils_tests/test_strategies",
+        image_size=(224, 224),
+        package=cv2_mock,
+    )
 
-@pytest.mark.development
-def test_hsi_get_dataset_size() -> None:
-    # checking if the calculation is done correctly
-    patch = MonkeyPatch()
-
-    mock_filenames = ["a", "b", "c"]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
 
-    image_strategy = HSImageStrategy(
+@pytest.fixture
+def hsi_mt_strategy(cv2_mock) -> HSImageStrategyMultiThread:
+    return HSImageStrategyMultiThread(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
-        package=CV2Mock(n=3, size=(224, 224), bands=3),
+        package=cv2_mock,
     )
 
-    dataset = len(mock_filenames)  # number of images in the specified path
-    mini_batch = 2  # number of images we want in each batch
-    expected_value = int(
-        np.floor(dataset / float(mini_batch))
-    )  # number of sets of images we expect
 
-    dataset_size = image_strategy.get_dataset_size(mini_batch)
-    assert dataset_size == expected_value
-    patch.undo()
-    patch.undo()
+FIXTURE_LIST = [
+    "rgb_strategy",
+    "raster_strategy",
+    "raster_mt_strategy",
+    "hsi_strategy",
+    "hsi_mt_strategy",
+]
 
+FIXTURE_LIST_MT = [
+    "raster_mt_strategy",
+    "hsi_mt_strategy",
+]
 
-@pytest.mark.development
-def test_hsi_mt_get_dataset_size() -> None:
-    # checking if the calculation is done correctly
-    patch = MonkeyPatch()
 
-    mock_filenames = ["a", "b", "c"]
+@pytest.fixture(params=FIXTURE_LIST)
+def image_strategy(request, directory_mock):
+    strategy = request.getfixturevalue(request.param)
+    return strategy
 
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
 
-    image_strategy = HSImageStrategyMultiThread(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=CV2Mock(n=3, size=(224, 224), bands=3),
-    )
+@pytest.fixture(params=FIXTURE_LIST_MT)
+def mt_image_strategy(request, directory_mock):
+    strategy = request.getfixturevalue(request.param)
+    return strategy
 
-    dataset = len(mock_filenames)  # number of images in the specified path
-    mini_batch = 2  # number of images we want in each batch
-    expected_value = int(
-        np.floor(dataset / float(mini_batch))
-    )  # number of sets of images we expect
 
-    dataset_size = image_strategy.get_dataset_size(mini_batch)
-    assert dataset_size == expected_value
-    patch.undo()
-    patch.undo()
+@pytest.fixture(params=FIXTURE_LIST)
+def fixture_factory(request, directory_mock):
+    def make_instance():
+        return request.getfixturevalue(request.param)
 
+    return make_instance
 
-@pytest.mark.development
-def test_raster_open():
-    patch = MonkeyPatch()
-    mock_filenames = ["a", "b", "c"]
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_path = "tests/segmentation_utils_tests/test_strategies"
-
-    mock_data = {
-        "n": 3,
-        "size": (224, 224),
-        "bands": 3,
-        "dtypes": ["uint8"],
-    }
-    strategy = RasterImageStrategy(
-        image_path, (224, 224), package=MockRasterio(**mock_data)
-    )
 
-    read_images = strategy.read_batch(2, 0)
+@pytest.fixture(params=FIXTURE_LIST_MT)
+def mt_fixture_factory(request, directory_mock):
+    def make_instance():
+        return request.getfixturevalue(request.param)
 
-    assert read_images.shape == (2, 224, 224, 3)
+    return make_instance
 
 
 @pytest.mark.development
-def test_raster_mt_open():
-    patch = MonkeyPatch()
-    mock_filenames = ["a", "b", "c"]
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_path = "tests/segmentation_utils_tests/test_strategies"
-
-    mock_data = {
-        "n": 3,
-        "size": (224, 224),
-        "bands": 3,
-        "dtypes": ["uint8"],
-    }
-    strategy = RasterImageStrategyMultiThread(
-        image_path, (224, 224), package=MockRasterio(**mock_data)
-    )
+def test_read_batch_image_path(image_strategy, mock_image_open) -> None:
+    # checking if the file is being opened and read correctly
 
-    read_images = strategy.read_batch(2, 0)
+    strategy = image_strategy
 
-    assert read_images.shape == (2, 224, 224, 3)
+    batch_size = 2
+    dataset_index = 0
+    result = strategy.read_batch(batch_size, dataset_index)
+
+    assert result.shape == (2, 224, 224, 3)
 
 
 @pytest.mark.development
-def test_hsi_open():
-    patch = MonkeyPatch()
-    mock_filenames = ["a", "b", "c"]
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
+def test_read_batch_returns_nparray(image_strategy) -> None:
+    # checking if the returned value is a numpy array
+    strategy = image_strategy
 
-    image_path = "tests/segmentation_utils_tests/test_strategies"
+    batch_size = 2
+    dataset_index = 0
 
-    mock_data = {
-        "n": 3,
-        "size": (224, 224),
-        "bands": 3,
-    }
-    strategy = HSImageStrategy(image_path, (224, 224), package=CV2Mock(**mock_data))
+    result = strategy.read_batch(batch_size, dataset_index)
+    assert isinstance(result, np.ndarray)
+    assert result.shape == (2, 224, 224, 3)
 
-    read_images = strategy.read_batch(2, 0)
 
-    assert read_images.shape == (2, 224, 224, 3)
+@pytest.mark.development
+def test_get_dataset_size(image_strategy, directory_mock) -> None:
+    # checking if the calculation is done correctly
+
+    strategy = image_strategy
+    dataset = directory_mock  # number of images in the specified path
+    mini_batch = 2  # number of images we want in each batch
+    expected_value = int(
+        np.floor(dataset / float(mini_batch))
+    )  # number of sets of images we expect
+
+    dataset_size = strategy.get_dataset_size(mini_batch)
+    assert dataset_size == expected_value
 
 
 @pytest.mark.development
-def test_hsi_mt_open():
-    patch = MonkeyPatch()
-    mock_filenames = ["a", "b", "c"]
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_path = "tests/segmentation_utils_tests/test_strategies"
-
-    mock_data = {
-        "n": 3,
-        "size": (224, 224),
-        "bands": 3,
-    }
-    strategy = HSImageStrategyMultiThread(
-        image_path, (224, 224), package=CV2Mock(**mock_data)
-    )
+def test_open(image_strategy):
+    strategy = image_strategy
 
     read_images = strategy.read_batch(2, 0)
 
@@ -330,19 +237,8 @@ def test_hsi_mt_open():
 
 
 @pytest.mark.development
-def test_hsi_get_channels():
-    patch = MonkeyPatch()
-    mock_filenames = ["a", "b", "c"]
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_path = "tests/segmentation_utils_tests/test_strategies"
-
-    mock_data = {
-        "n": 3,
-        "size": (224, 224),
-        "bands": 3,
-    }
-    strategy = HSImageStrategy(image_path, (224, 224), package=CV2Mock(**mock_data))
+def test_hsi_get_channels(directory_mock, hsi_strategy):
+    strategy = hsi_strategy
 
     channels = strategy._HSImageStrategy__get_channels()
 
@@ -350,21 +246,8 @@ def test_hsi_get_channels():
 
 
 @pytest.mark.development
-def test_hsi_mt_get_channels():
-    patch = MonkeyPatch()
-    mock_filenames = ["a", "b", "c"]
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_path = "tests/segmentation_utils_tests/test_strategies"
-
-    mock_data = {
-        "n": 3,
-        "size": (224, 224),
-        "bands": 3,
-    }
-    strategy = HSImageStrategyMultiThread(
-        image_path, (224, 224), package=CV2Mock(**mock_data)
-    )
+def test_hsi_mt_get_channels(directory_mock, hsi_mt_strategy):
+    strategy = hsi_mt_strategy
 
     channels = strategy._HSImageStrategyMultiThread__get_channels()
 
@@ -372,28 +255,12 @@ def test_hsi_mt_get_channels():
 
 
 @pytest.mark.development
-def test_empty_batch():
-    patch = MonkeyPatch()
-
-    mock_filenames = ["a", "b", "c"]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    patch.setattr(
-        Image,
-        "open",
-        lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
-    )
-
-    image_strategy = RGBImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        image_resample=Image.Resampling.NEAREST,
-    )
+def test_empty_batch(image_strategy):
+    strategy = image_strategy
 
     batch_size = 0
     dataset_index = 0
-    result = image_strategy.read_batch(batch_size, dataset_index)
+    result = strategy.read_batch(batch_size, dataset_index)
 
     assert result.shape == (
         0,
@@ -401,363 +268,68 @@ def test_empty_batch():
         224,
         3,
     )  # 0 indicates there are no images in the batch
-    patch.undo()
-    patch.undo()
 
 
 @pytest.mark.development
-def test_out_of_bounds_index():
-    patch = MonkeyPatch()
-
-    mock_filenames = ["a", "b", "c"]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    patch.setattr(
-        Image,
-        "open",
-        lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
-    )
-
-    image_strategy = RGBImageStrategy(
+def test_out_of_bounds_index(image_strategy):
+    strategy = RGBImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
         image_resample=Image.Resampling.NEAREST,
     )
 
     batch_size = 2  # not an empty batch
-    dataset_index = len(image_strategy.image_filenames)  # out of bounds index
+    dataset_index = len(strategy.image_filenames)  # out of bounds index
 
-    try:
-        image_strategy.read_batch(batch_size, dataset_index)
-        assert True
-
-    except IndexError:
-        pass
-    patch.undo()
-    patch.undo()
+    with pytest.raises(IndexError):
+        strategy.read_batch(batch_size, dataset_index)
 
 
 @pytest.mark.development
-def test_batch_slicing():
-    patch = MonkeyPatch()
-
-    mock_filenames = ["a" for _ in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    patch.setattr(
-        Image,
-        "open",
-        lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)),
-    )
-
-    image_strategy = RGBImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        image_resample=Image.Resampling.NEAREST,
-    )
+def test_batch_slicing(image_strategy):
+    strategy = image_strategy
 
     batch_size = 10
     dataset_index = 2
-    result = image_strategy.read_batch(batch_size, dataset_index)
+    result = strategy.read_batch(batch_size, dataset_index)
     assert (
         result.shape[0] == batch_size
     )  # compare the size of returned data with batch_size
-    patch.undo()
-    patch.undo()
-
-
-@pytest.mark.development
-def test_RGB_get_image_size():
-    patch = MonkeyPatch()
-
-    mock_filenames = ["a" for _ in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_strategy = RGBImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        image_resample=Image.Resampling.NEAREST,
-    )
-
-    result = image_strategy.get_image_size()
-    assert result == (224, 224)
-
-
-@pytest.mark.development
-def test_raster_get_image_size():
-    patch = MonkeyPatch()
-
-    mock_filenames = ["a" for _ in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_strategy = RasterImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
-    )
-
-    result = image_strategy.get_image_size()
-    assert result == (224, 224)
 
 
 @pytest.mark.development
-def test_hsi_get_image_size():
-    patch = MonkeyPatch()
-
-    mock_filenames = ["a" for _ in range(20)]
+def test_get_image_size(image_strategy):
+    strategy = image_strategy
 
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_strategy = HSImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=CV2Mock(n=3, size=(224, 224), bands=3),
-    )
-
-    result = image_strategy.get_image_size()
+    result = strategy.get_image_size()
     assert result == (224, 224)
 
 
 @pytest.mark.development
-def test_hsi_mt_get_image_size():
-    patch = MonkeyPatch()
-
-    mock_filenames = ["a" for _ in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_strategy = HSImageStrategyMultiThread(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=CV2Mock(n=3, size=(224, 224), bands=3),
-    )
-
-    result = image_strategy.get_image_size()
-    assert result == (224, 224)
-
-
-@pytest.mark.development
-def test_raster_mt_get_image_size():
-    patch = MonkeyPatch()
-
-    mock_filenames = ["a" for _ in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_strategy = RasterImageStrategyMultiThread(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
-    )
-
-    result = image_strategy.get_image_size()
-    assert result == (224, 224)
-
-
-@pytest.mark.development
-def test_rgb_shuffle():
-    patch = MonkeyPatch()
-
-    mock_filenames = [str(i) for i in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_strategy_1 = RGBImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        image_resample=Image.Resampling.NEAREST,
-    )
-
-    image_strategy_2 = RGBImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        image_resample=Image.Resampling.NEAREST,
-    )
-
-    n = 100
-
-    for i in range(n):
-        image_strategy_1.shuffle_filenames(i)
-        image_strategy_2.shuffle_filenames(i)
-
-    assert np.array_equal(
-        image_strategy_1.image_filenames, image_strategy_2.image_filenames
-    )
-
-
-@pytest.mark.development
-def test_raster_shuffle():
-    patch = MonkeyPatch()
-
-    mock_filenames = [str(i) for i in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_strategy_1 = RasterImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
-    )
-
-    image_strategy_2 = RasterImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
-    )
-
-    n = 100
-
-    for i in range(n):
-        image_strategy_1.shuffle_filenames(i)
-        image_strategy_2.shuffle_filenames(i)
-
-    assert np.array_equal(
-        image_strategy_1.image_filenames, image_strategy_2.image_filenames
-    )
-
-
-@pytest.mark.development
-def test_raster_mt_shuffle():
-    patch = MonkeyPatch()
-
-    mock_filenames = [str(i) for i in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
+def test_shuffle(fixture_factory):
+    strategy_1 = fixture_factory()
 
-    image_strategy_1 = RasterImageStrategyMultiThread(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
-    )
-
-    image_strategy_2 = RasterImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]),
-    )
+    strategy_2 = fixture_factory()
 
     n = 100
 
     for i in range(n):
-        image_strategy_1.shuffle_filenames(i)
-        image_strategy_2.shuffle_filenames(i)
+        strategy_1.shuffle_filenames(i)
+        strategy_2.shuffle_filenames(i)
 
-    assert np.array_equal(
-        image_strategy_1.image_filenames, image_strategy_2.image_filenames
-    )
+    assert np.array_equal(strategy_1.image_filenames, strategy_2.image_filenames)
+    assert type(strategy_1) == type(strategy_2)
 
 
 @pytest.mark.development
-def test_raster_mt_image_in_order():
-    patch = MonkeyPatch()
-
-    mock_filenames = [str(i) for i in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-    mock_package = MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"])
-    image_strategy = RasterImageStrategyMultiThread(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=mock_package,
-    )
-
-    batch_size = 10
-
-    call_count = mock_package.get_count()
-
-    result = image_strategy.read_batch(batch_size, 0)
-
-    for i in range(call_count, call_count + batch_size):
-        assert np.array_equal(
-            result[i - call_count, :, :, :], np.full((224, 224, 3), i + 1)
-        )
-
-
-@pytest.mark.development
-def test_hsi_shuffle():
-    patch = MonkeyPatch()
-
-    mock_filenames = [str(i) for i in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_strategy_1 = HSImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=CV2Mock(n=3, size=(224, 224), bands=3),
-    )
-
-    image_strategy_2 = HSImageStrategy(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=CV2Mock(n=3, size=(224, 224), bands=3),
-    )
-
-    n = 100
-
-    for i in range(n):
-        image_strategy_1.shuffle_filenames(i)
-        image_strategy_2.shuffle_filenames(i)
-
-    assert np.array_equal(
-        image_strategy_1.image_filenames, image_strategy_2.image_filenames
-    )
-
-
-@pytest.mark.development
-def test_hsi_mt_shuffle():
-    patch = MonkeyPatch()
-
-    mock_filenames = [str(i) for i in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-
-    image_strategy_1 = HSImageStrategyMultiThread(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=CV2Mock(n=3, size=(224, 224), bands=3),
-    )
-
-    image_strategy_2 = HSImageStrategyMultiThread(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=CV2Mock(n=3, size=(224, 224), bands=3),
-    )
-
-    n = 100
-
-    for i in range(n):
-        image_strategy_1.shuffle_filenames(i)
-        image_strategy_2.shuffle_filenames(i)
-
-    assert np.array_equal(
-        image_strategy_1.image_filenames, image_strategy_2.image_filenames
-    )
-
-
-@pytest.mark.development
-def test_hsi_mt_image_in_order():
-    patch = MonkeyPatch()
-
-    mock_filenames = [str(i) for i in range(20)]
-
-    patch.setattr(os, "listdir", lambda x: mock_filenames)
-    mock_package = CV2Mock(n=3, size=(224, 224), bands=3)
-    image_strategy = HSImageStrategyMultiThread(
-        image_path="tests/segmentation_utils_tests/test_strategies",
-        image_size=(224, 224),
-        package=mock_package,
-    )
+def test_mt_image_in_order(mt_image_strategy):
+    strategy = mt_image_strategy
 
     batch_size = 10
 
-    call_count = mock_package.get_count()
+    call_count = strategy.package.get_count()
 
-    result = image_strategy.read_batch(batch_size, 0)
+    result = strategy.read_batch(batch_size, 0)
 
     for i in range(call_count, call_count + batch_size):
         assert np.array_equal(
diff --git a/tests/transform_utils_test.py/image_cutting_test.py b/tests/transform_utils_test.py/image_cutting_test.py
index f3171e1..ef2a800 100644
--- a/tests/transform_utils_test.py/image_cutting_test.py
+++ b/tests/transform_utils_test.py/image_cutting_test.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 import rasterio
 from PIL import Image
 from pytest import MonkeyPatch
@@ -7,6 +8,7 @@
                                                      image_cut, image_stich)
 
 
+@pytest.mark.development
 def test_image_cut() -> None:
     img = np.zeros((512, 512, 3))
     img[-1, -1, 0] = 1
@@ -29,6 +31,7 @@ def test_image_cut() -> None:
 #         assert True
 
 
+@pytest.mark.development
 def test_image_cut_incorrect_shape_too_many() -> None:
     # does not pass
     try:
@@ -40,6 +43,7 @@ def test_image_cut_incorrect_shape_too_many() -> None:
         assert True
 
 
+@pytest.mark.development
 def test_image_cut_incorrect_band_specified() -> None:
     # passes however the function doesn't rasie a value error
     # when the bands do not match
@@ -52,6 +56,7 @@ def test_image_cut_incorrect_band_specified() -> None:
         assert True
 
 
+@pytest.mark.development
 def test_image_cut_slack_cut() -> None:
     img = np.zeros((513, 513, 3))
     img[-2, -2, 0] = 1
@@ -62,6 +67,7 @@ def test_image_cut_slack_cut() -> None:
     assert cut_ims[-1, -1, -1, 0] == 1
 
 
+@pytest.mark.development
 def test_image_cut_slack_cut_exact() -> None:
     img = np.zeros((512, 512, 3))
     img[-2, -2, 0] = 1
@@ -72,6 +78,7 @@ def test_image_cut_slack_cut_exact() -> None:
     assert cut_ims[-1, -2, -2, 0] == 1
 
 
+@pytest.mark.development
 def test_image_cut_pad() -> None:
     img = np.zeros((511, 511, 3))
     img[-2, -2, 0] = 1
@@ -82,6 +89,7 @@ def test_image_cut_pad() -> None:
     assert cut_ims[-1, -3, -3, 0] == 1
 
 
+@pytest.mark.development
 def test_image_cut_pad_exact() -> None:
     img = np.zeros((512, 512, 3))
     img[-2, -2, 0] = 1
@@ -92,6 +100,7 @@ def test_image_cut_pad_exact() -> None:
     assert cut_ims[-1, -2, -2, 0] == 1
 
 
+@pytest.mark.development
 def test_image_cut_incorrect_band() -> None:
     try:
         img = np.zeros((512, 512))
@@ -102,6 +111,7 @@ def test_image_cut_incorrect_band() -> None:
         assert True
 
 
+@pytest.mark.development
 def test_image_cut_can_add_dimension() -> None:
     img = np.zeros((512, 512))
     img[-1, -1] = 1
@@ -112,6 +122,7 @@ def test_image_cut_can_add_dimension() -> None:
     assert cut_ims[-1, -1, -1, 0] == 1
 
 
+@pytest.mark.development
 def test_image_stich() -> None:
     img1 = np.zeros((256, 256, 3), dtype=np.uint8)
     img2 = np.zeros((256, 256, 3), dtype=np.uint8)
@@ -131,6 +142,7 @@ def test_image_stich() -> None:
     assert stiched_img[-1, -1, 0] == 9
 
 
+@pytest.mark.development
 def test_cut_ims_in_directory(mocker) -> None:
     patch = MonkeyPatch()
 

From f5e312ca5189c0169615017997110a7e4163a71b Mon Sep 17 00:00:00 2001
From: Sajtospoga01 <abodrogai@gmail.com>
Date: Sun, 27 Aug 2023 23:08:41 +0100
Subject: [PATCH 75/75] adds documentation to fixtures

---
 .../test_strategies.py                        | 76 +++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py
index bbc01b3..c094414 100644
--- a/tests/segmentation_utils_tests.py/test_strategies.py
+++ b/tests/segmentation_utils_tests.py/test_strategies.py
@@ -73,18 +73,37 @@ def get_count(self):
         return self.call_count
 
 
+####################################################################################################
+#                                     Package Mocks                                                #
+####################################################################################################
+
+
 @pytest.fixture
 def rasterio_mock() -> MockRasterio:
+    """
+    Creates a mock of the rasterio package
+    """
     return MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"])
 
 
 @pytest.fixture
 def cv2_mock() -> CV2Mock:
+    """
+    Creates a mock of the cv2 package
+    """
     return CV2Mock(n=3, size=(224, 224), bands=3)
 
 
+####################################################################################################
+#                                        OS mocks                                                  #
+####################################################################################################
+
+
 @pytest.fixture
 def directory_mock(monkeypatch):
+    """
+    Mocks the os.listdir function to return a list of filenames
+    """
     mock_filenames = [str(i) for i in range(20)]
     monkeypatch.setattr(os, "listdir", lambda x: mock_filenames)
     return len(mock_filenames)
@@ -92,6 +111,9 @@ def directory_mock(monkeypatch):
 
 @pytest.fixture
 def mock_image_open(monkeypatch):
+    """
+    Mocks the Image.open function to return a numpy array
+    """
     monkeypatch.setattr(
         Image,
         "open",
@@ -99,8 +121,18 @@ def mock_image_open(monkeypatch):
     )
 
 
+####################################################################################################
+#                                    Strategy fixtures                                             #
+####################################################################################################
+
+
 @pytest.fixture
 def rgb_strategy(mock_image_open) -> RGBImageStrategy:
+    """
+    Creates a RGBImageStrategy instance
+
+    Relies on the mock_image_open fixture to mock the Image.open function
+    """
     return RGBImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
@@ -110,6 +142,11 @@ def rgb_strategy(mock_image_open) -> RGBImageStrategy:
 
 @pytest.fixture
 def raster_strategy(rasterio_mock) -> RasterImageStrategy:
+    """
+    Creates a RasterImageStrategy instance
+
+    Relies on the rasterio_mock fixture to mock the rasterio package
+    """
     return RasterImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
@@ -119,6 +156,11 @@ def raster_strategy(rasterio_mock) -> RasterImageStrategy:
 
 @pytest.fixture
 def raster_mt_strategy(rasterio_mock) -> RasterImageStrategyMultiThread:
+    """
+    Creates a RasterImageStrategyMultiThread instance
+
+    Relies on the rasterio_mock fixture to mock the rasterio package
+    """
     return RasterImageStrategyMultiThread(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
@@ -128,6 +170,11 @@ def raster_mt_strategy(rasterio_mock) -> RasterImageStrategyMultiThread:
 
 @pytest.fixture
 def hsi_strategy(cv2_mock) -> HSImageStrategy:
+    """
+    Creates a HSImageStrategy instance
+
+    Relies on the cv2_mock fixture to mock the cv2 package
+    """
     return HSImageStrategy(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
@@ -137,6 +184,11 @@ def hsi_strategy(cv2_mock) -> HSImageStrategy:
 
 @pytest.fixture
 def hsi_mt_strategy(cv2_mock) -> HSImageStrategyMultiThread:
+    """
+    Creates a HSImageStrategyMultiThread instance
+
+    Relies on the cv2_mock fixture to mock the cv2 package
+    """
     return HSImageStrategyMultiThread(
         image_path="tests/segmentation_utils_tests/test_strategies",
         image_size=(224, 224),
@@ -144,6 +196,10 @@ def hsi_mt_strategy(cv2_mock) -> HSImageStrategyMultiThread:
     )
 
 
+####################################################################################################
+#                                 Test Generators                                                  #
+####################################################################################################
+
 FIXTURE_LIST = [
     "rgb_strategy",
     "raster_strategy",
@@ -160,18 +216,29 @@ def hsi_mt_strategy(cv2_mock) -> HSImageStrategyMultiThread:
 
 @pytest.fixture(params=FIXTURE_LIST)
 def image_strategy(request, directory_mock):
+    """
+    Generates a strategy instance for each strategy type
+    """
     strategy = request.getfixturevalue(request.param)
     return strategy
 
 
 @pytest.fixture(params=FIXTURE_LIST_MT)
 def mt_image_strategy(request, directory_mock):
+    """
+    Generates a strategy instance for each multi threaded strategy type
+    """
     strategy = request.getfixturevalue(request.param)
     return strategy
 
 
 @pytest.fixture(params=FIXTURE_LIST)
 def fixture_factory(request, directory_mock):
+    """
+    Generates a strategy instance for each strategy type
+    
+    Can be used to generate multiple instances of the same strategy type
+    """
     def make_instance():
         return request.getfixturevalue(request.param)
 
@@ -180,12 +247,21 @@ def make_instance():
 
 @pytest.fixture(params=FIXTURE_LIST_MT)
 def mt_fixture_factory(request, directory_mock):
+    """
+    Generates a strategy instance for each multi threaded strategy type
+
+    Can be used to generate multiple instances of the same strategy type
+    """
     def make_instance():
         return request.getfixturevalue(request.param)
 
     return make_instance
 
 
+####################################################################################################
+#                                 Test Functions                                                   #
+####################################################################################################
+
 @pytest.mark.development
 def test_read_batch_image_path(image_strategy, mock_image_open) -> None:
     # checking if the file is being opened and read correctly