From 72dc235621d8336ef5d463cad64cf63b58820cf4 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Sat, 12 Aug 2023 20:50:24 +0100 Subject: [PATCH 01/75] Added file including strategies for reader --- .../segmentation_utils/reading_strategies.py | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 utilities/segmentation_utils/reading_strategies.py diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py new file mode 100644 index 0000000..8677020 --- /dev/null +++ b/utilities/segmentation_utils/reading_strategies.py @@ -0,0 +1,117 @@ +from typing import Protocol, Tuple +import os +import numpy as np +from PIL import Image +import rasterio + +from flowreader import FlowGeneratorExperimental +from utilities.segmentation_utils import ImagePreprocessor + + +class ReaderInterface(Protocol): + + def read_batch(self, start:int, end: int) -> None: + ... + + def get_dataset_size(self) -> None: + ... + +class RGB_Image_Strategy: + + def __init__( + self, + image_path: str, + image_size: tuple [int, int], + batch_image_filenames: np.ndarray, + mini_batch: int, + ): + self.image_path = image_path + self.image_size = image_size + self.mini_batch = mini_batch + self.batch_image_files = batch_image_filenames + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + #read images with PIL + for i in range(batch_size): + image_index = i + dataset_index + image = Image.open( + os.path.join(self.image_path, self.batch_image_filenames[image_index]) + ).resize(image_size, Image.ANTIALIAS) + image = np.array(image) + image = image / 255 + return image + + def get_dataset_size(self) -> int: + dataset_size = FlowGeneratorExperimental.__len__ + return dataset_size + +class Mask_Image_Strategy: + + def __init__( + self, + mask_path: str, + batch_mask_filenames: np.ndarray, + output_reshape: tuple[int, int], + ): + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + #read images with PIL + for i in range(batch_size): + for j in range(mini_batch): + image_index = i * mini_batch + j + mask = Image.open( + os.path.join(mask_path, batch_mask_filenames[image_index]) + ).resize(output_reshape) + mask = np.array(mask) + return mask + + def get_dataset_size(self) -> int: + dataset_size = FlowGeneratorExperimental.__len__ + return dataset_size + + + +#should this be a batch with read_batch as the function having all the code in it? + # def initialise_batch_img(self, mini_batch, image_size, channel_mask) -> np.ndarray: + + # num_mini_batches = Reader.calculate_mini_batch + # channel_mask = np.array(channel_mask) + # n_channels = np.sum(channel_mask) + + # batch_images = np.zeros( + # ( + # num_mini_batches, + # mini_batch, + # image_size[0], + # image_size[1], + # n_channels, + # ) + # ) + # return batch_images + + # #output + # def initialise_batch_mask(self, output_size, mini_batch, num_classes) -> Tuple[bool, np.ndarray]: + # #num_mini_batches = Reader.calculate_mini_batch + + # if self.output_size[1] == 1: + # column = True + # batch_masks = np.zeros( + # ( + # num_mini_batches, + # mini_batch, output_size[0], + # num_classes + # ) + # ) + # else: + # column = False + # batch_masks = np.zeros( + # ( + # num_mini_batches, + # mini_batch, + # output_size[0], + # output_size[1], + # num_classes, + # ) + # ) + + # return column, batch_masks From b3fad7f19481de532f26ebaf0a912bb991c4512c Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Sat, 12 Aug 2023 21:41:27 +0100 Subject: [PATCH 02/75] changes to for loops and class constructors --- .../segmentation_utils/reading_strategies.py | 81 ++++--------------- 1 file changed, 17 insertions(+), 64 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 8677020..ad7c697 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -1,4 +1,4 @@ -from typing import Protocol, Tuple +from typing import Protocol import os import numpy as np from PIL import Image @@ -22,23 +22,21 @@ def __init__( self, image_path: str, image_size: tuple [int, int], - batch_image_filenames: np.ndarray, - mini_batch: int, + batch_image_filenames: np.ndarray, ): self.image_path = image_path self.image_size = image_size - self.mini_batch = mini_batch self.batch_image_files = batch_image_filenames def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL for i in range(batch_size): - image_index = i + dataset_index - image = Image.open( - os.path.join(self.image_path, self.batch_image_filenames[image_index]) - ).resize(image_size, Image.ANTIALIAS) - image = np.array(image) - image = image / 255 + image_index = i + dataset_index + image = Image.open( + os.path.join(self.image_path, self.batch_image_filenames[image_index]) + ).resize(self.image_size, Image.ANTIALIAS) + image = np.array(image) + image = image / 255 return image def get_dataset_size(self) -> int: @@ -50,68 +48,23 @@ class Mask_Image_Strategy: def __init__( self, mask_path: str, - batch_mask_filenames: np.ndarray, + batch_mask_filenames: np.ndarray, output_reshape: tuple[int, int], ): + self.mask_path = mask_path + self.batch_mask_filenames = batch_mask_filenames + self.output_reshape = output_reshape def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL for i in range(batch_size): - for j in range(mini_batch): - image_index = i * mini_batch + j - mask = Image.open( - os.path.join(mask_path, batch_mask_filenames[image_index]) - ).resize(output_reshape) - mask = np.array(mask) + image_index = i + dataset_index + mask = Image.open( + os.path.join(self.mask_path, self.batch_mask_filenames[image_index]) + ).resize(self.output_reshape) + mask = np.array(mask) return mask def get_dataset_size(self) -> int: dataset_size = FlowGeneratorExperimental.__len__ return dataset_size - - - -#should this be a batch with read_batch as the function having all the code in it? - # def initialise_batch_img(self, mini_batch, image_size, channel_mask) -> np.ndarray: - - # num_mini_batches = Reader.calculate_mini_batch - # channel_mask = np.array(channel_mask) - # n_channels = np.sum(channel_mask) - - # batch_images = np.zeros( - # ( - # num_mini_batches, - # mini_batch, - # image_size[0], - # image_size[1], - # n_channels, - # ) - # ) - # return batch_images - - # #output - # def initialise_batch_mask(self, output_size, mini_batch, num_classes) -> Tuple[bool, np.ndarray]: - # #num_mini_batches = Reader.calculate_mini_batch - - # if self.output_size[1] == 1: - # column = True - # batch_masks = np.zeros( - # ( - # num_mini_batches, - # mini_batch, output_size[0], - # num_classes - # ) - # ) - # else: - # column = False - # batch_masks = np.zeros( - # ( - # num_mini_batches, - # mini_batch, - # output_size[0], - # output_size[1], - # num_classes, - # ) - # ) - - # return column, batch_masks From e062bf9d1460272a3a7714a17483cad79bae1cfd Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Sun, 13 Aug 2023 12:14:36 +0100 Subject: [PATCH 03/75] changes to interface name, get function and constructor --- .../segmentation_utils/reading_strategies.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index ad7c697..ca3b120 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -8,7 +8,7 @@ from utilities.segmentation_utils import ImagePreprocessor -class ReaderInterface(Protocol): +class IReader(Protocol): def read_batch(self, start:int, end: int) -> None: ... @@ -22,25 +22,25 @@ def __init__( self, image_path: str, image_size: tuple [int, int], - batch_image_filenames: np.ndarray, ): self.image_path = image_path self.image_size = image_size - self.batch_image_files = batch_image_filenames def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL + image_filenames = np.array(sorted(os.listdir(self.image_path))) for i in range(batch_size): image_index = i + dataset_index image = Image.open( - os.path.join(self.image_path, self.batch_image_filenames[image_index]) + os.path.join(self.image_path, image_filenames[image_index]) ).resize(self.image_size, Image.ANTIALIAS) image = np.array(image) image = image / 255 return image - def get_dataset_size(self) -> int: - dataset_size = FlowGeneratorExperimental.__len__ + def get_dataset_size(self, mini_batch) -> int: + image_filenames = np.array(sorted(os.listdir(self.image_path))) + dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) return dataset_size class Mask_Image_Strategy: @@ -48,23 +48,23 @@ class Mask_Image_Strategy: def __init__( self, mask_path: str, - batch_mask_filenames: np.ndarray, output_reshape: tuple[int, int], ): self.mask_path = mask_path - self.batch_mask_filenames = batch_mask_filenames self.output_reshape = output_reshape def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL + mask_filenames = np.array(sorted(os.listdir(self.mask_path))) for i in range(batch_size): image_index = i + dataset_index mask = Image.open( - os.path.join(self.mask_path, self.batch_mask_filenames[image_index]) + os.path.join(self.mask_path, mask_filenames[image_index]) ).resize(self.output_reshape) mask = np.array(mask) return mask - def get_dataset_size(self) -> int: - dataset_size = FlowGeneratorExperimental.__len__ + def get_dataset_size(self, mini_batch) -> int: + image_filenames = np.array(sorted(os.listdir(self.image_path))) + dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) return dataset_size From f043497030833c31ed7f793aab870bd9409fb022 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Tue, 15 Aug 2023 18:16:48 +0100 Subject: [PATCH 04/75] added strategies using rasterio to read images --- .../segmentation_utils/reading_strategies.py | 57 +++++++++++++++++-- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index ca3b120..a36365b 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -4,10 +4,6 @@ from PIL import Image import rasterio -from flowreader import FlowGeneratorExperimental -from utilities.segmentation_utils import ImagePreprocessor - - class IReader(Protocol): def read_batch(self, start:int, end: int) -> None: @@ -64,7 +60,60 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: mask = np.array(mask) return mask + def get_dataset_size(self, mini_batch) -> int: + mask_filenames = np.array(sorted(os.listdir(self.mask_path))) + dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch))) + return dataset_size + +class Hyperspectral_Image_Strategy: + + def __init__( + self, + image_path:str, + ): + self.image_path = image_path + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + #read images with rasterio + image_filenames = np.array(sorted(os.listdir(self.image_path))) + for i in range(batch_size): + image_index = i + dataset_index + #open the source raster dataset + with rasterio.open( + os.path.join(self.image_path, image_filenames[image_index]) + ) as dataset: + #.read() returns a numpy array that contains the raster cell values in your file. + image = dataset.read() + image = image / 255 + return image + def get_dataset_size(self, mini_batch) -> int: image_filenames = np.array(sorted(os.listdir(self.image_path))) dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) return dataset_size + +class Hyperspectral_Mask_Image_Strategy: + + def __init__( + self, + mask_path:str, + ): + self.mask_path = mask_path + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + #read images with rasterio + mask_filenames = np.array(sorted(os.listdir(self.mask_path))) + for i in range(batch_size): + image_index = i + dataset_index + #open the source raster dataset + with rasterio.open( + os.path.join(self.mask_path, mask_filenames[image_index]) + ) as dataset: + #.read() returns a numpy array that contains the raster cell values in your file. + mask = dataset.read() + return mask + + def get_dataset_size(self, mini_batch) -> int: + mask_filenames = np.array(sorted(os.listdir(self.mask_path))) + dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch))) + return dataset_size From 0384f8ace95ad685f9b04d8e629f6b3995fa7115 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Tue, 15 Aug 2023 21:48:00 +0100 Subject: [PATCH 05/75] removed mask strategies and added to constructor --- .../segmentation_utils/reading_strategies.py | 64 ++----------------- 1 file changed, 7 insertions(+), 57 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index a36365b..9ae5a5d 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -12,15 +12,17 @@ def read_batch(self, start:int, end: int) -> None: def get_dataset_size(self) -> None: ... -class RGB_Image_Strategy: +class RGBImageStrategy: def __init__( self, image_path: str, image_size: tuple [int, int], + antialias: int = 1, ): self.image_path = image_path self.image_size = image_size + self.antialias = antialias def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL @@ -29,7 +31,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: image_index = i + dataset_index image = Image.open( os.path.join(self.image_path, image_filenames[image_index]) - ).resize(self.image_size, Image.ANTIALIAS) + ).resize(self.image_size, self.antialias) image = np.array(image) image = image / 255 return image @@ -38,34 +40,8 @@ def get_dataset_size(self, mini_batch) -> int: image_filenames = np.array(sorted(os.listdir(self.image_path))) dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) return dataset_size - -class Mask_Image_Strategy: - - def __init__( - self, - mask_path: str, - output_reshape: tuple[int, int], - ): - self.mask_path = mask_path - self.output_reshape = output_reshape - def read_batch(self, batch_size, dataset_index) -> np.ndarray: - #read images with PIL - mask_filenames = np.array(sorted(os.listdir(self.mask_path))) - for i in range(batch_size): - image_index = i + dataset_index - mask = Image.open( - os.path.join(self.mask_path, mask_filenames[image_index]) - ).resize(self.output_reshape) - mask = np.array(mask) - return mask - - def get_dataset_size(self, mini_batch) -> int: - mask_filenames = np.array(sorted(os.listdir(self.mask_path))) - dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch))) - return dataset_size - -class Hyperspectral_Image_Strategy: +class HyperspectralImageStrategy: def __init__( self, @@ -83,37 +59,11 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: os.path.join(self.image_path, image_filenames[image_index]) ) as dataset: #.read() returns a numpy array that contains the raster cell values in your file. - image = dataset.read() + image = dataset.read() #!resize using numpy resize function? image = image / 255 return image def get_dataset_size(self, mini_batch) -> int: image_filenames = np.array(sorted(os.listdir(self.image_path))) dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) - return dataset_size - -class Hyperspectral_Mask_Image_Strategy: - - def __init__( - self, - mask_path:str, - ): - self.mask_path = mask_path - - def read_batch(self, batch_size, dataset_index) -> np.ndarray: - #read images with rasterio - mask_filenames = np.array(sorted(os.listdir(self.mask_path))) - for i in range(batch_size): - image_index = i + dataset_index - #open the source raster dataset - with rasterio.open( - os.path.join(self.mask_path, mask_filenames[image_index]) - ) as dataset: - #.read() returns a numpy array that contains the raster cell values in your file. - mask = dataset.read() - return mask - - def get_dataset_size(self, mini_batch) -> int: - mask_filenames = np.array(sorted(os.listdir(self.mask_path))) - dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch))) - return dataset_size + return dataset_size \ No newline at end of file From ba9339bfa34da5d388f30fe1c014702c9ee4b042 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Tue, 15 Aug 2023 22:00:18 +0100 Subject: [PATCH 06/75] fixed constructor and set image_resample to default --- utilities/segmentation_utils/reading_strategies.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 9ae5a5d..9421b89 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -18,11 +18,11 @@ def __init__( self, image_path: str, image_size: tuple [int, int], - antialias: int = 1, + image_resample: Image.Resampling.NEAREST, ): self.image_path = image_path self.image_size = image_size - self.antialias = antialias + self.image_resample = image_resample def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL @@ -31,7 +31,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: image_index = i + dataset_index image = Image.open( os.path.join(self.image_path, image_filenames[image_index]) - ).resize(self.image_size, self.antialias) + ).resize(self.image_size, self.image_resample) image = np.array(image) image = image / 255 return image From ff018b00199f3f9d3fe56e4a712850cc21a31f65 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Thu, 17 Aug 2023 13:42:14 +0100 Subject: [PATCH 07/75] added resizing to hyperspectral strategy --- utilities/segmentation_utils/reading_strategies.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 9421b89..5c44615 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -17,7 +17,7 @@ class RGBImageStrategy: def __init__( self, image_path: str, - image_size: tuple [int, int], + image_size: tuple[int, int], image_resample: Image.Resampling.NEAREST, ): self.image_path = image_path @@ -46,8 +46,13 @@ class HyperspectralImageStrategy: def __init__( self, image_path:str, + image_resize:tuple[int,int], + image_resample: Image.Resampling.NEAREST, + ): self.image_path = image_path + self.image_resize = image_resize + self.image_resample = image_resample def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with rasterio @@ -59,11 +64,13 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: os.path.join(self.image_path, image_filenames[image_index]) ) as dataset: #.read() returns a numpy array that contains the raster cell values in your file. - image = dataset.read() #!resize using numpy resize function? + image = dataset.read() image = image / 255 + image = np.resize(self.image_resize, self.image_resample) return image def get_dataset_size(self, mini_batch) -> int: image_filenames = np.array(sorted(os.listdir(self.image_path))) dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) - return dataset_size \ No newline at end of file + return dataset_size + \ No newline at end of file From cd9cd37659e0b2a6faa8555b425563bac9b5eba4 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Thu, 17 Aug 2023 13:43:01 +0100 Subject: [PATCH 08/75] added tests for strategies - they do not yet pass --- .../test_strategies.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 tests/segmentation_utils_tests.py/test_strategies.py diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py new file mode 100644 index 0000000..4466781 --- /dev/null +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -0,0 +1,48 @@ +import os +import numpy as np +from PIL import Image +from pytest import MonkeyPatch +from utilities.segmentation_utils.reading_strategies import RGBImageStrategy + +def test_read_batch_image_path() -> None: + #should check if path is being read in correctly + image_strategy = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, +) + + batch_size = 2 + dataset_index = 0 + image_strategy.read_batch(batch_size, dataset_index) + +def test_read_batch_returns_nparray() -> None: + #checking if the returned value is a numpy array + + image_strategy = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, +) + + result = image_strategy.read_batch(batch_size=2, dataset_index=0) + assert isinstance(result, np.ndarray) + +def test_get_dataset_size() -> None: + #checking if the calculation is done correctly + + image_strategy = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, +) + dataset = 100 #if there are 100 images in the specified path + mini_batch = 32 #and we want 32 images in each batch + expected_value = dataset / mini_batch #number of sets of images we expect + + dataset_size = image_strategy.get_dataset_size(mini_batch) + assert dataset_size == expected_value + + + + From 3e05fef84d0b27773f581a49336d3ef46a2c5c69 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Thu, 17 Aug 2023 22:31:39 +0100 Subject: [PATCH 09/75] updated test to pass, updates variable assigment in strategy constructor, adds extra comments --- .../test_strategies.py | 57 +++++++++++-------- .../segmentation_utils/reading_strategies.py | 12 ++-- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index 4466781..3148105 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -1,48 +1,57 @@ import os + import numpy as np from PIL import Image from pytest import MonkeyPatch + from utilities.segmentation_utils.reading_strategies import RGBImageStrategy + def test_read_batch_image_path() -> None: - #should check if path is being read in correctly + # should check if path is being read in correctly + patch = MonkeyPatch() + + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8))) + image_strategy = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, -) + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) batch_size = 2 dataset_index = 0 image_strategy.read_batch(batch_size, dataset_index) + patch.undo() + patch.undo() + def test_read_batch_returns_nparray() -> None: - #checking if the returned value is a numpy array + # checking if the returned value is a numpy array image_strategy = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, -) + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) result = image_strategy.read_batch(batch_size=2, dataset_index=0) - assert isinstance(result, np.ndarray) + assert isinstance(result, np.ndarray) + def test_get_dataset_size() -> None: - #checking if the calculation is done correctly + # checking if the calculation is done correctly image_strategy = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, -) - dataset = 100 #if there are 100 images in the specified path - mini_batch = 32 #and we want 32 images in each batch - expected_value = dataset / mini_batch #number of sets of images we expect - + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) + dataset = 100 # if there are 100 images in the specified path + mini_batch = 32 # and we want 32 images in each batch + expected_value = dataset / mini_batch # number of sets of images we expect + dataset_size = image_strategy.get_dataset_size(mini_batch) assert dataset_size == expected_value - - - - diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 5c44615..cec427d 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -1,8 +1,10 @@ -from typing import Protocol import os +from typing import Protocol + import numpy as np -from PIL import Image import rasterio +from PIL import Image + class IReader(Protocol): @@ -18,7 +20,7 @@ def __init__( self, image_path: str, image_size: tuple[int, int], - image_resample: Image.Resampling.NEAREST, + image_resample = Image.Resampling.NEAREST, ): self.image_path = image_path self.image_size = image_size @@ -26,6 +28,8 @@ def __init__( def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL + + #! add this to the intializer image_filenames = np.array(sorted(os.listdir(self.image_path))) for i in range(batch_size): image_index = i + dataset_index @@ -47,7 +51,7 @@ def __init__( self, image_path:str, image_resize:tuple[int,int], - image_resample: Image.Resampling.NEAREST, + image_resample = Image.Resampling.NEAREST, ): self.image_path = image_path From 583fd503be1e26d6953d195bb13395f26cac0b7f Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Thu, 24 Aug 2023 01:06:55 +0100 Subject: [PATCH 10/75] added variable to initialiser --- .../segmentation_utils/reading_strategies.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index cec427d..9255a9d 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -23,30 +23,29 @@ def __init__( image_resample = Image.Resampling.NEAREST, ): self.image_path = image_path + self.image_filenames = np.array(sorted(os.listdir(self.image_path))) #!update: added variable to initialiser self.image_size = image_size self.image_resample = image_resample + def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL - #! add this to the intializer - image_filenames = np.array(sorted(os.listdir(self.image_path))) for i in range(batch_size): image_index = i + dataset_index image = Image.open( - os.path.join(self.image_path, image_filenames[image_index]) + os.path.join(self.image_path, self.image_filenames[image_index]) ).resize(self.image_size, self.image_resample) image = np.array(image) image = image / 255 return image def get_dataset_size(self, mini_batch) -> int: - image_filenames = np.array(sorted(os.listdir(self.image_path))) - dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size class HyperspectralImageStrategy: - + #read images with rasterio def __init__( self, image_path:str, @@ -55,17 +54,17 @@ def __init__( ): self.image_path = image_path + self.image_filenames = np.array(sorted(os.listdir(self.image_path))) self.image_resize = image_resize self.image_resample = image_resample def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with rasterio - image_filenames = np.array(sorted(os.listdir(self.image_path))) for i in range(batch_size): image_index = i + dataset_index #open the source raster dataset with rasterio.open( - os.path.join(self.image_path, image_filenames[image_index]) + os.path.join(self.image_path, self.image_filenames[image_index]) ) as dataset: #.read() returns a numpy array that contains the raster cell values in your file. image = dataset.read() @@ -74,7 +73,6 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: return image def get_dataset_size(self, mini_batch) -> int: - image_filenames = np.array(sorted(os.listdir(self.image_path))) - dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size \ No newline at end of file From c5b32317fef70b20cf9b26e621eba5d81c970f8c Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Thu, 24 Aug 2023 01:07:46 +0100 Subject: [PATCH 11/75] updated 3 tests to pass, added rasterio but incomplete --- .../test_strategies.py | 68 +++++++++++++++++-- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index 3148105..cabfdf7 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -1,17 +1,19 @@ import os - import numpy as np from PIL import Image +import rasterio from pytest import MonkeyPatch from utilities.segmentation_utils.reading_strategies import RGBImageStrategy def test_read_batch_image_path() -> None: - # should check if path is being read in correctly + #checking if the file is being opened and read correctly patch = MonkeyPatch() - patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + mock_filenames = ["a", "b", "c"] + + patch.setattr(os, "listdir", lambda x: mock_filenames) patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8))) @@ -31,27 +33,79 @@ def test_read_batch_image_path() -> None: def test_read_batch_returns_nparray() -> None: # checking if the returned value is a numpy array + patch = MonkeyPatch() + + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8))) + image_strategy = RGBImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), image_resample=Image.Resampling.NEAREST, ) - result = image_strategy.read_batch(batch_size=2, dataset_index=0) + batch_size=2 + dataset_index=0 + + result = image_strategy.read_batch(batch_size, dataset_index) assert isinstance(result, np.ndarray) + patch.undo() + patch.undo() + def test_get_dataset_size() -> None: # checking if the calculation is done correctly + patch = MonkeyPatch() + + mock_filenames = ["a", "b", "c"] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8))) image_strategy = RGBImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), image_resample=Image.Resampling.NEAREST, ) - dataset = 100 # if there are 100 images in the specified path - mini_batch = 32 # and we want 32 images in each batch - expected_value = dataset / mini_batch # number of sets of images we expect + dataset = len(mock_filenames) # number of images in the specified path + mini_batch = 2 # number of images we want in each batch + expected_value = int (np.floor(dataset / float(mini_batch))) # number of sets of images we expect dataset_size = image_strategy.get_dataset_size(mini_batch) assert dataset_size == expected_value + patch.undo() + patch.undo() + + +#!to be continued... +class MockRasterio(): + # def __init__(self, image_path, image_filenames): + # self.image_path = image_path + # self.image_filenames = image_filenames + + def __init__(self, func): + self.func = func + + def mock_open(self, *args, **kwargs): + patch = MonkeyPatch() + mock_filenames = ["a", "b", "c"] + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_file = os.path.join(self.image_path, self.image_filenames[image_index]) + dataset = rasterio.open(image_file) + self.func(dataset) + + def mock_join(self): + patch = MonkeyPatch() + join = lambda x: "image_path" + patch.setattr(os.path, "join", join) + return join + + + + +def process_data(package=MockRasterio): + package.open \ No newline at end of file From 8437d4748ba1fb5a92760396861793406f24f3f6 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Sat, 12 Aug 2023 20:50:24 +0100 Subject: [PATCH 12/75] Added file including strategies for reader --- .../segmentation_utils/reading_strategies.py | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 utilities/segmentation_utils/reading_strategies.py diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py new file mode 100644 index 0000000..8677020 --- /dev/null +++ b/utilities/segmentation_utils/reading_strategies.py @@ -0,0 +1,117 @@ +from typing import Protocol, Tuple +import os +import numpy as np +from PIL import Image +import rasterio + +from flowreader import FlowGeneratorExperimental +from utilities.segmentation_utils import ImagePreprocessor + + +class ReaderInterface(Protocol): + + def read_batch(self, start:int, end: int) -> None: + ... + + def get_dataset_size(self) -> None: + ... + +class RGB_Image_Strategy: + + def __init__( + self, + image_path: str, + image_size: tuple [int, int], + batch_image_filenames: np.ndarray, + mini_batch: int, + ): + self.image_path = image_path + self.image_size = image_size + self.mini_batch = mini_batch + self.batch_image_files = batch_image_filenames + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + #read images with PIL + for i in range(batch_size): + image_index = i + dataset_index + image = Image.open( + os.path.join(self.image_path, self.batch_image_filenames[image_index]) + ).resize(image_size, Image.ANTIALIAS) + image = np.array(image) + image = image / 255 + return image + + def get_dataset_size(self) -> int: + dataset_size = FlowGeneratorExperimental.__len__ + return dataset_size + +class Mask_Image_Strategy: + + def __init__( + self, + mask_path: str, + batch_mask_filenames: np.ndarray, + output_reshape: tuple[int, int], + ): + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + #read images with PIL + for i in range(batch_size): + for j in range(mini_batch): + image_index = i * mini_batch + j + mask = Image.open( + os.path.join(mask_path, batch_mask_filenames[image_index]) + ).resize(output_reshape) + mask = np.array(mask) + return mask + + def get_dataset_size(self) -> int: + dataset_size = FlowGeneratorExperimental.__len__ + return dataset_size + + + +#should this be a batch with read_batch as the function having all the code in it? + # def initialise_batch_img(self, mini_batch, image_size, channel_mask) -> np.ndarray: + + # num_mini_batches = Reader.calculate_mini_batch + # channel_mask = np.array(channel_mask) + # n_channels = np.sum(channel_mask) + + # batch_images = np.zeros( + # ( + # num_mini_batches, + # mini_batch, + # image_size[0], + # image_size[1], + # n_channels, + # ) + # ) + # return batch_images + + # #output + # def initialise_batch_mask(self, output_size, mini_batch, num_classes) -> Tuple[bool, np.ndarray]: + # #num_mini_batches = Reader.calculate_mini_batch + + # if self.output_size[1] == 1: + # column = True + # batch_masks = np.zeros( + # ( + # num_mini_batches, + # mini_batch, output_size[0], + # num_classes + # ) + # ) + # else: + # column = False + # batch_masks = np.zeros( + # ( + # num_mini_batches, + # mini_batch, + # output_size[0], + # output_size[1], + # num_classes, + # ) + # ) + + # return column, batch_masks From b216ee3d6af4028379ea55eebf5bd4bea4853635 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Sat, 12 Aug 2023 21:41:27 +0100 Subject: [PATCH 13/75] changes to for loops and class constructors --- .../segmentation_utils/reading_strategies.py | 81 ++++--------------- 1 file changed, 17 insertions(+), 64 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 8677020..ad7c697 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -1,4 +1,4 @@ -from typing import Protocol, Tuple +from typing import Protocol import os import numpy as np from PIL import Image @@ -22,23 +22,21 @@ def __init__( self, image_path: str, image_size: tuple [int, int], - batch_image_filenames: np.ndarray, - mini_batch: int, + batch_image_filenames: np.ndarray, ): self.image_path = image_path self.image_size = image_size - self.mini_batch = mini_batch self.batch_image_files = batch_image_filenames def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL for i in range(batch_size): - image_index = i + dataset_index - image = Image.open( - os.path.join(self.image_path, self.batch_image_filenames[image_index]) - ).resize(image_size, Image.ANTIALIAS) - image = np.array(image) - image = image / 255 + image_index = i + dataset_index + image = Image.open( + os.path.join(self.image_path, self.batch_image_filenames[image_index]) + ).resize(self.image_size, Image.ANTIALIAS) + image = np.array(image) + image = image / 255 return image def get_dataset_size(self) -> int: @@ -50,68 +48,23 @@ class Mask_Image_Strategy: def __init__( self, mask_path: str, - batch_mask_filenames: np.ndarray, + batch_mask_filenames: np.ndarray, output_reshape: tuple[int, int], ): + self.mask_path = mask_path + self.batch_mask_filenames = batch_mask_filenames + self.output_reshape = output_reshape def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL for i in range(batch_size): - for j in range(mini_batch): - image_index = i * mini_batch + j - mask = Image.open( - os.path.join(mask_path, batch_mask_filenames[image_index]) - ).resize(output_reshape) - mask = np.array(mask) + image_index = i + dataset_index + mask = Image.open( + os.path.join(self.mask_path, self.batch_mask_filenames[image_index]) + ).resize(self.output_reshape) + mask = np.array(mask) return mask def get_dataset_size(self) -> int: dataset_size = FlowGeneratorExperimental.__len__ return dataset_size - - - -#should this be a batch with read_batch as the function having all the code in it? - # def initialise_batch_img(self, mini_batch, image_size, channel_mask) -> np.ndarray: - - # num_mini_batches = Reader.calculate_mini_batch - # channel_mask = np.array(channel_mask) - # n_channels = np.sum(channel_mask) - - # batch_images = np.zeros( - # ( - # num_mini_batches, - # mini_batch, - # image_size[0], - # image_size[1], - # n_channels, - # ) - # ) - # return batch_images - - # #output - # def initialise_batch_mask(self, output_size, mini_batch, num_classes) -> Tuple[bool, np.ndarray]: - # #num_mini_batches = Reader.calculate_mini_batch - - # if self.output_size[1] == 1: - # column = True - # batch_masks = np.zeros( - # ( - # num_mini_batches, - # mini_batch, output_size[0], - # num_classes - # ) - # ) - # else: - # column = False - # batch_masks = np.zeros( - # ( - # num_mini_batches, - # mini_batch, - # output_size[0], - # output_size[1], - # num_classes, - # ) - # ) - - # return column, batch_masks From e3c626a965866f5e78624e358e669d929e8ef3c9 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Sun, 13 Aug 2023 12:14:36 +0100 Subject: [PATCH 14/75] changes to interface name, get function and constructor --- .../segmentation_utils/reading_strategies.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index ad7c697..ca3b120 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -8,7 +8,7 @@ from utilities.segmentation_utils import ImagePreprocessor -class ReaderInterface(Protocol): +class IReader(Protocol): def read_batch(self, start:int, end: int) -> None: ... @@ -22,25 +22,25 @@ def __init__( self, image_path: str, image_size: tuple [int, int], - batch_image_filenames: np.ndarray, ): self.image_path = image_path self.image_size = image_size - self.batch_image_files = batch_image_filenames def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL + image_filenames = np.array(sorted(os.listdir(self.image_path))) for i in range(batch_size): image_index = i + dataset_index image = Image.open( - os.path.join(self.image_path, self.batch_image_filenames[image_index]) + os.path.join(self.image_path, image_filenames[image_index]) ).resize(self.image_size, Image.ANTIALIAS) image = np.array(image) image = image / 255 return image - def get_dataset_size(self) -> int: - dataset_size = FlowGeneratorExperimental.__len__ + def get_dataset_size(self, mini_batch) -> int: + image_filenames = np.array(sorted(os.listdir(self.image_path))) + dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) return dataset_size class Mask_Image_Strategy: @@ -48,23 +48,23 @@ class Mask_Image_Strategy: def __init__( self, mask_path: str, - batch_mask_filenames: np.ndarray, output_reshape: tuple[int, int], ): self.mask_path = mask_path - self.batch_mask_filenames = batch_mask_filenames self.output_reshape = output_reshape def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL + mask_filenames = np.array(sorted(os.listdir(self.mask_path))) for i in range(batch_size): image_index = i + dataset_index mask = Image.open( - os.path.join(self.mask_path, self.batch_mask_filenames[image_index]) + os.path.join(self.mask_path, mask_filenames[image_index]) ).resize(self.output_reshape) mask = np.array(mask) return mask - def get_dataset_size(self) -> int: - dataset_size = FlowGeneratorExperimental.__len__ + def get_dataset_size(self, mini_batch) -> int: + image_filenames = np.array(sorted(os.listdir(self.image_path))) + dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) return dataset_size From 7f6798b0dac41d17c9ff6066dc3cd87393c6daee Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Tue, 15 Aug 2023 18:16:48 +0100 Subject: [PATCH 15/75] added strategies using rasterio to read images --- .../segmentation_utils/reading_strategies.py | 57 +++++++++++++++++-- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index ca3b120..a36365b 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -4,10 +4,6 @@ from PIL import Image import rasterio -from flowreader import FlowGeneratorExperimental -from utilities.segmentation_utils import ImagePreprocessor - - class IReader(Protocol): def read_batch(self, start:int, end: int) -> None: @@ -64,7 +60,60 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: mask = np.array(mask) return mask + def get_dataset_size(self, mini_batch) -> int: + mask_filenames = np.array(sorted(os.listdir(self.mask_path))) + dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch))) + return dataset_size + +class Hyperspectral_Image_Strategy: + + def __init__( + self, + image_path:str, + ): + self.image_path = image_path + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + #read images with rasterio + image_filenames = np.array(sorted(os.listdir(self.image_path))) + for i in range(batch_size): + image_index = i + dataset_index + #open the source raster dataset + with rasterio.open( + os.path.join(self.image_path, image_filenames[image_index]) + ) as dataset: + #.read() returns a numpy array that contains the raster cell values in your file. + image = dataset.read() + image = image / 255 + return image + def get_dataset_size(self, mini_batch) -> int: image_filenames = np.array(sorted(os.listdir(self.image_path))) dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) return dataset_size + +class Hyperspectral_Mask_Image_Strategy: + + def __init__( + self, + mask_path:str, + ): + self.mask_path = mask_path + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + #read images with rasterio + mask_filenames = np.array(sorted(os.listdir(self.mask_path))) + for i in range(batch_size): + image_index = i + dataset_index + #open the source raster dataset + with rasterio.open( + os.path.join(self.mask_path, mask_filenames[image_index]) + ) as dataset: + #.read() returns a numpy array that contains the raster cell values in your file. + mask = dataset.read() + return mask + + def get_dataset_size(self, mini_batch) -> int: + mask_filenames = np.array(sorted(os.listdir(self.mask_path))) + dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch))) + return dataset_size From 26d4576ef29e423efb5fe27cc218b22c6db84a58 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Tue, 15 Aug 2023 21:48:00 +0100 Subject: [PATCH 16/75] removed mask strategies and added to constructor --- .../segmentation_utils/reading_strategies.py | 64 ++----------------- 1 file changed, 7 insertions(+), 57 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index a36365b..9ae5a5d 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -12,15 +12,17 @@ def read_batch(self, start:int, end: int) -> None: def get_dataset_size(self) -> None: ... -class RGB_Image_Strategy: +class RGBImageStrategy: def __init__( self, image_path: str, image_size: tuple [int, int], + antialias: int = 1, ): self.image_path = image_path self.image_size = image_size + self.antialias = antialias def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL @@ -29,7 +31,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: image_index = i + dataset_index image = Image.open( os.path.join(self.image_path, image_filenames[image_index]) - ).resize(self.image_size, Image.ANTIALIAS) + ).resize(self.image_size, self.antialias) image = np.array(image) image = image / 255 return image @@ -38,34 +40,8 @@ def get_dataset_size(self, mini_batch) -> int: image_filenames = np.array(sorted(os.listdir(self.image_path))) dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) return dataset_size - -class Mask_Image_Strategy: - - def __init__( - self, - mask_path: str, - output_reshape: tuple[int, int], - ): - self.mask_path = mask_path - self.output_reshape = output_reshape - def read_batch(self, batch_size, dataset_index) -> np.ndarray: - #read images with PIL - mask_filenames = np.array(sorted(os.listdir(self.mask_path))) - for i in range(batch_size): - image_index = i + dataset_index - mask = Image.open( - os.path.join(self.mask_path, mask_filenames[image_index]) - ).resize(self.output_reshape) - mask = np.array(mask) - return mask - - def get_dataset_size(self, mini_batch) -> int: - mask_filenames = np.array(sorted(os.listdir(self.mask_path))) - dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch))) - return dataset_size - -class Hyperspectral_Image_Strategy: +class HyperspectralImageStrategy: def __init__( self, @@ -83,37 +59,11 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: os.path.join(self.image_path, image_filenames[image_index]) ) as dataset: #.read() returns a numpy array that contains the raster cell values in your file. - image = dataset.read() + image = dataset.read() #!resize using numpy resize function? image = image / 255 return image def get_dataset_size(self, mini_batch) -> int: image_filenames = np.array(sorted(os.listdir(self.image_path))) dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) - return dataset_size - -class Hyperspectral_Mask_Image_Strategy: - - def __init__( - self, - mask_path:str, - ): - self.mask_path = mask_path - - def read_batch(self, batch_size, dataset_index) -> np.ndarray: - #read images with rasterio - mask_filenames = np.array(sorted(os.listdir(self.mask_path))) - for i in range(batch_size): - image_index = i + dataset_index - #open the source raster dataset - with rasterio.open( - os.path.join(self.mask_path, mask_filenames[image_index]) - ) as dataset: - #.read() returns a numpy array that contains the raster cell values in your file. - mask = dataset.read() - return mask - - def get_dataset_size(self, mini_batch) -> int: - mask_filenames = np.array(sorted(os.listdir(self.mask_path))) - dataset_size = int(np.floor(len(mask_filenames) / float(mini_batch))) - return dataset_size + return dataset_size \ No newline at end of file From e3b0fbd96d44551c47132816fae00834a6470b12 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Tue, 15 Aug 2023 22:00:18 +0100 Subject: [PATCH 17/75] fixed constructor and set image_resample to default --- utilities/segmentation_utils/reading_strategies.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 9ae5a5d..9421b89 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -18,11 +18,11 @@ def __init__( self, image_path: str, image_size: tuple [int, int], - antialias: int = 1, + image_resample: Image.Resampling.NEAREST, ): self.image_path = image_path self.image_size = image_size - self.antialias = antialias + self.image_resample = image_resample def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL @@ -31,7 +31,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: image_index = i + dataset_index image = Image.open( os.path.join(self.image_path, image_filenames[image_index]) - ).resize(self.image_size, self.antialias) + ).resize(self.image_size, self.image_resample) image = np.array(image) image = image / 255 return image From 04483c0579de273d40e5a3f3a182e3d35ff5f247 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Thu, 17 Aug 2023 13:42:14 +0100 Subject: [PATCH 18/75] added resizing to hyperspectral strategy --- utilities/segmentation_utils/reading_strategies.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 9421b89..5c44615 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -17,7 +17,7 @@ class RGBImageStrategy: def __init__( self, image_path: str, - image_size: tuple [int, int], + image_size: tuple[int, int], image_resample: Image.Resampling.NEAREST, ): self.image_path = image_path @@ -46,8 +46,13 @@ class HyperspectralImageStrategy: def __init__( self, image_path:str, + image_resize:tuple[int,int], + image_resample: Image.Resampling.NEAREST, + ): self.image_path = image_path + self.image_resize = image_resize + self.image_resample = image_resample def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with rasterio @@ -59,11 +64,13 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: os.path.join(self.image_path, image_filenames[image_index]) ) as dataset: #.read() returns a numpy array that contains the raster cell values in your file. - image = dataset.read() #!resize using numpy resize function? + image = dataset.read() image = image / 255 + image = np.resize(self.image_resize, self.image_resample) return image def get_dataset_size(self, mini_batch) -> int: image_filenames = np.array(sorted(os.listdir(self.image_path))) dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) - return dataset_size \ No newline at end of file + return dataset_size + \ No newline at end of file From b48a4e6aa71ebb74f213064d4975baf153205cef Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Thu, 17 Aug 2023 13:43:01 +0100 Subject: [PATCH 19/75] added tests for strategies - they do not yet pass --- .../test_strategies.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 tests/segmentation_utils_tests.py/test_strategies.py diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py new file mode 100644 index 0000000..4466781 --- /dev/null +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -0,0 +1,48 @@ +import os +import numpy as np +from PIL import Image +from pytest import MonkeyPatch +from utilities.segmentation_utils.reading_strategies import RGBImageStrategy + +def test_read_batch_image_path() -> None: + #should check if path is being read in correctly + image_strategy = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, +) + + batch_size = 2 + dataset_index = 0 + image_strategy.read_batch(batch_size, dataset_index) + +def test_read_batch_returns_nparray() -> None: + #checking if the returned value is a numpy array + + image_strategy = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, +) + + result = image_strategy.read_batch(batch_size=2, dataset_index=0) + assert isinstance(result, np.ndarray) + +def test_get_dataset_size() -> None: + #checking if the calculation is done correctly + + image_strategy = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, +) + dataset = 100 #if there are 100 images in the specified path + mini_batch = 32 #and we want 32 images in each batch + expected_value = dataset / mini_batch #number of sets of images we expect + + dataset_size = image_strategy.get_dataset_size(mini_batch) + assert dataset_size == expected_value + + + + From 11e7aacd41baea628068f75bd302765119998be2 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Thu, 17 Aug 2023 22:31:39 +0100 Subject: [PATCH 20/75] updated test to pass, updates variable assigment in strategy constructor, adds extra comments --- .../test_strategies.py | 57 +++++++++++-------- .../segmentation_utils/reading_strategies.py | 12 ++-- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index 4466781..3148105 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -1,48 +1,57 @@ import os + import numpy as np from PIL import Image from pytest import MonkeyPatch + from utilities.segmentation_utils.reading_strategies import RGBImageStrategy + def test_read_batch_image_path() -> None: - #should check if path is being read in correctly + # should check if path is being read in correctly + patch = MonkeyPatch() + + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8))) + image_strategy = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, -) + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) batch_size = 2 dataset_index = 0 image_strategy.read_batch(batch_size, dataset_index) + patch.undo() + patch.undo() + def test_read_batch_returns_nparray() -> None: - #checking if the returned value is a numpy array + # checking if the returned value is a numpy array image_strategy = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, -) + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) result = image_strategy.read_batch(batch_size=2, dataset_index=0) - assert isinstance(result, np.ndarray) + assert isinstance(result, np.ndarray) + def test_get_dataset_size() -> None: - #checking if the calculation is done correctly + # checking if the calculation is done correctly image_strategy = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, -) - dataset = 100 #if there are 100 images in the specified path - mini_batch = 32 #and we want 32 images in each batch - expected_value = dataset / mini_batch #number of sets of images we expect - + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) + dataset = 100 # if there are 100 images in the specified path + mini_batch = 32 # and we want 32 images in each batch + expected_value = dataset / mini_batch # number of sets of images we expect + dataset_size = image_strategy.get_dataset_size(mini_batch) assert dataset_size == expected_value - - - - diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 5c44615..cec427d 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -1,8 +1,10 @@ -from typing import Protocol import os +from typing import Protocol + import numpy as np -from PIL import Image import rasterio +from PIL import Image + class IReader(Protocol): @@ -18,7 +20,7 @@ def __init__( self, image_path: str, image_size: tuple[int, int], - image_resample: Image.Resampling.NEAREST, + image_resample = Image.Resampling.NEAREST, ): self.image_path = image_path self.image_size = image_size @@ -26,6 +28,8 @@ def __init__( def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL + + #! add this to the intializer image_filenames = np.array(sorted(os.listdir(self.image_path))) for i in range(batch_size): image_index = i + dataset_index @@ -47,7 +51,7 @@ def __init__( self, image_path:str, image_resize:tuple[int,int], - image_resample: Image.Resampling.NEAREST, + image_resample = Image.Resampling.NEAREST, ): self.image_path = image_path From a416419040770300247e44450231c5c5d52d8bc0 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Thu, 24 Aug 2023 01:06:55 +0100 Subject: [PATCH 21/75] added variable to initialiser --- .../segmentation_utils/reading_strategies.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index cec427d..9255a9d 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -23,30 +23,29 @@ def __init__( image_resample = Image.Resampling.NEAREST, ): self.image_path = image_path + self.image_filenames = np.array(sorted(os.listdir(self.image_path))) #!update: added variable to initialiser self.image_size = image_size self.image_resample = image_resample + def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with PIL - #! add this to the intializer - image_filenames = np.array(sorted(os.listdir(self.image_path))) for i in range(batch_size): image_index = i + dataset_index image = Image.open( - os.path.join(self.image_path, image_filenames[image_index]) + os.path.join(self.image_path, self.image_filenames[image_index]) ).resize(self.image_size, self.image_resample) image = np.array(image) image = image / 255 return image def get_dataset_size(self, mini_batch) -> int: - image_filenames = np.array(sorted(os.listdir(self.image_path))) - dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size class HyperspectralImageStrategy: - + #read images with rasterio def __init__( self, image_path:str, @@ -55,17 +54,17 @@ def __init__( ): self.image_path = image_path + self.image_filenames = np.array(sorted(os.listdir(self.image_path))) self.image_resize = image_resize self.image_resample = image_resample def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with rasterio - image_filenames = np.array(sorted(os.listdir(self.image_path))) for i in range(batch_size): image_index = i + dataset_index #open the source raster dataset with rasterio.open( - os.path.join(self.image_path, image_filenames[image_index]) + os.path.join(self.image_path, self.image_filenames[image_index]) ) as dataset: #.read() returns a numpy array that contains the raster cell values in your file. image = dataset.read() @@ -74,7 +73,6 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: return image def get_dataset_size(self, mini_batch) -> int: - image_filenames = np.array(sorted(os.listdir(self.image_path))) - dataset_size = int(np.floor(len(image_filenames) / float(mini_batch))) + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size \ No newline at end of file From ef1ffd01a3e874125ff8655b430a65c94e414220 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Thu, 24 Aug 2023 01:07:46 +0100 Subject: [PATCH 22/75] updated 3 tests to pass, added rasterio but incomplete --- .../test_strategies.py | 68 +++++++++++++++++-- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index 3148105..cabfdf7 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -1,17 +1,19 @@ import os - import numpy as np from PIL import Image +import rasterio from pytest import MonkeyPatch from utilities.segmentation_utils.reading_strategies import RGBImageStrategy def test_read_batch_image_path() -> None: - # should check if path is being read in correctly + #checking if the file is being opened and read correctly patch = MonkeyPatch() - patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + mock_filenames = ["a", "b", "c"] + + patch.setattr(os, "listdir", lambda x: mock_filenames) patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8))) @@ -31,27 +33,79 @@ def test_read_batch_image_path() -> None: def test_read_batch_returns_nparray() -> None: # checking if the returned value is a numpy array + patch = MonkeyPatch() + + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8))) + image_strategy = RGBImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), image_resample=Image.Resampling.NEAREST, ) - result = image_strategy.read_batch(batch_size=2, dataset_index=0) + batch_size=2 + dataset_index=0 + + result = image_strategy.read_batch(batch_size, dataset_index) assert isinstance(result, np.ndarray) + patch.undo() + patch.undo() + def test_get_dataset_size() -> None: # checking if the calculation is done correctly + patch = MonkeyPatch() + + mock_filenames = ["a", "b", "c"] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8))) image_strategy = RGBImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), image_resample=Image.Resampling.NEAREST, ) - dataset = 100 # if there are 100 images in the specified path - mini_batch = 32 # and we want 32 images in each batch - expected_value = dataset / mini_batch # number of sets of images we expect + dataset = len(mock_filenames) # number of images in the specified path + mini_batch = 2 # number of images we want in each batch + expected_value = int (np.floor(dataset / float(mini_batch))) # number of sets of images we expect dataset_size = image_strategy.get_dataset_size(mini_batch) assert dataset_size == expected_value + patch.undo() + patch.undo() + + +#!to be continued... +class MockRasterio(): + # def __init__(self, image_path, image_filenames): + # self.image_path = image_path + # self.image_filenames = image_filenames + + def __init__(self, func): + self.func = func + + def mock_open(self, *args, **kwargs): + patch = MonkeyPatch() + mock_filenames = ["a", "b", "c"] + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_file = os.path.join(self.image_path, self.image_filenames[image_index]) + dataset = rasterio.open(image_file) + self.func(dataset) + + def mock_join(self): + patch = MonkeyPatch() + join = lambda x: "image_path" + patch.setattr(os.path, "join", join) + return join + + + + +def process_data(package=MockRasterio): + package.open \ No newline at end of file From 13e60d2b8cd6f91ebfdb94b0dc36d7b21a386660 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Thu, 24 Aug 2023 13:57:40 +0100 Subject: [PATCH 23/75] updated the test for hyperspectral to pass --- .../test_strategies.py | 56 +++++++++++-------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index cabfdf7..fa93012 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -79,33 +79,43 @@ def test_get_dataset_size() -> None: patch.undo() patch.undo() - -#!to be continued... class MockRasterio(): - # def __init__(self, image_path, image_filenames): - # self.image_path = image_path - # self.image_filenames = image_filenames - def __init__(self, func): - self.func = func + def __init__(self): + self.shape = (224, 224) #dimensions for raster data + self.dtypes = ['int32'] #data type of raster data that would be returned by .open() + #a list containing a string representing a data type + #32 bit int data type + + def read(self, *args, **kwargs): + return np.zeros(self.shape, self.dtypes[0]) + + #these functions are invoked when a 'with' statement is executed + def __enter__(self): + #called at the beginning of a 'with' block + return self #returns instance of MockRasterio class itself + + def __exit__(self, type, value, traceback): + #called at the end of a 'with' block + pass - def mock_open(self, *args, **kwargs): +def test_hyperspectral_open(): patch = MonkeyPatch() mock_filenames = ["a", "b", "c"] patch.setattr(os, "listdir", lambda x: mock_filenames) - image_file = os.path.join(self.image_path, self.image_filenames[image_index]) - dataset = rasterio.open(image_file) - self.func(dataset) - - def mock_join(self): - patch = MonkeyPatch() - join = lambda x: "image_path" - patch.setattr(os.path, "join", join) - return join - - - - -def process_data(package=MockRasterio): - package.open \ No newline at end of file + def mock_open(*args, **kwargs): #local function to the test + #defines behaviour of mock object that replaces rasterio.open() + return MockRasterio() + + patch.setattr(rasterio, "open", mock_open) + image_path = "tests/segmentation_utils_tests/test_strategies" + dataset_list = [] + + for filename in mock_filenames: + file_path = os.path.join(image_path, filename) + dataset = rasterio.open(file_path) + dataset_list.append(dataset) + + assert dataset.shape == (224, 224) + assert np.array_equal (dataset.read(), np.zeros((224, 224), dtype='int32')) From e99b86995a3e1ec785a7805b5f036c70be8936f1 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Thu, 24 Aug 2023 15:11:27 +0100 Subject: [PATCH 24/75] adds dependency injection to the rasterio strategy for better testability --- .../segmentation_utils/reading_strategies.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 9255a9d..42d284c 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -51,26 +51,34 @@ def __init__( image_path:str, image_resize:tuple[int,int], image_resample = Image.Resampling.NEAREST, - + package = rasterio ): self.image_path = image_path self.image_filenames = np.array(sorted(os.listdir(self.image_path))) self.image_resize = image_resize self.image_resample = image_resample + self.package = package + #gets the number of bands for the dataset + self.bands = package.open(os.path.join(self.image_path, self.image_filenames[0])).count def read_batch(self, batch_size, dataset_index) -> np.ndarray: #read images with rasterio - for i in range(batch_size): - image_index = i + dataset_index - #open the source raster dataset - with rasterio.open( - os.path.join(self.image_path, self.image_filenames[image_index]) + batch_filenames = self.image_filenames[dataset_index:dataset_index + batch_size] + + #defines the array that will contain the images + images = np.zeros((batch_size, self.bands, self.image_resize[0], self.image_resize[1])) + for i,filename in enumerate(batch_filenames): + with self.package.open( + os.path.join(self.image_path, filename) ) as dataset: #.read() returns a numpy array that contains the raster cell values in your file. image = dataset.read() - image = image / 255 - image = np.resize(self.image_resize, self.image_resample) - return image + images[i,:,:,:] = np.resize(image,self.image_resize) + + #ensures channel-last orientation for the reader + np.moveaxis(images,1,3) + + return np.array(images) def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) From 047789adcd167bf797a55197cfb6a7d6b84f36c8 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Thu, 24 Aug 2023 15:59:09 +0100 Subject: [PATCH 25/75] adds rasterio strategy test, with functional mock class. --- .../test_strategies.py | 92 ++++++++-------- .../segmentation_utils/reading_strategies.py | 103 ++++++++++++------ 2 files changed, 113 insertions(+), 82 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index fa93012..c3d9146 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -1,21 +1,27 @@ import os + import numpy as np -from PIL import Image import rasterio +from PIL import Image from pytest import MonkeyPatch -from utilities.segmentation_utils.reading_strategies import RGBImageStrategy +from utilities.segmentation_utils.reading_strategies import ( + HyperspectralImageStrategy, MockRasterio, RGBImageStrategy) def test_read_batch_image_path() -> None: - #checking if the file is being opened and read correctly + # checking if the file is being opened and read correctly patch = MonkeyPatch() mock_filenames = ["a", "b", "c"] patch.setattr(os, "listdir", lambda x: mock_filenames) - patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8))) + patch.setattr( + Image, + "open", + lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), + ) image_strategy = RGBImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", @@ -37,16 +43,20 @@ def test_read_batch_returns_nparray() -> None: patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) - patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8))) - + patch.setattr( + Image, + "open", + lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), + ) + image_strategy = RGBImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), image_resample=Image.Resampling.NEAREST, ) - batch_size=2 - dataset_index=0 + batch_size = 2 + dataset_index = 0 result = image_strategy.read_batch(batch_size, dataset_index) assert isinstance(result, np.ndarray) @@ -63,7 +73,12 @@ def test_get_dataset_size() -> None: patch.setattr(os, "listdir", lambda x: mock_filenames) - patch.setattr(Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8))) + #! not needed as you arent reading any image in this function + patch.setattr( + Image, + "open", + lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), + ) image_strategy = RGBImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", @@ -72,50 +87,33 @@ def test_get_dataset_size() -> None: ) dataset = len(mock_filenames) # number of images in the specified path mini_batch = 2 # number of images we want in each batch - expected_value = int (np.floor(dataset / float(mini_batch))) # number of sets of images we expect + expected_value = int( + np.floor(dataset / float(mini_batch)) + ) # number of sets of images we expect dataset_size = image_strategy.get_dataset_size(mini_batch) assert dataset_size == expected_value patch.undo() patch.undo() -class MockRasterio(): - - def __init__(self): - self.shape = (224, 224) #dimensions for raster data - self.dtypes = ['int32'] #data type of raster data that would be returned by .open() - #a list containing a string representing a data type - #32 bit int data type - - def read(self, *args, **kwargs): - return np.zeros(self.shape, self.dtypes[0]) - - #these functions are invoked when a 'with' statement is executed - def __enter__(self): - #called at the beginning of a 'with' block - return self #returns instance of MockRasterio class itself - - def __exit__(self, type, value, traceback): - #called at the end of a 'with' block - pass def test_hyperspectral_open(): - patch = MonkeyPatch() - mock_filenames = ["a", "b", "c"] - patch.setattr(os, "listdir", lambda x: mock_filenames) - - def mock_open(*args, **kwargs): #local function to the test - #defines behaviour of mock object that replaces rasterio.open() - return MockRasterio() - - patch.setattr(rasterio, "open", mock_open) - image_path = "tests/segmentation_utils_tests/test_strategies" - dataset_list = [] - - for filename in mock_filenames: - file_path = os.path.join(image_path, filename) - dataset = rasterio.open(file_path) - dataset_list.append(dataset) + patch = MonkeyPatch() + mock_filenames = ["a", "b", "c"] + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_path = "tests/segmentation_utils_tests/test_strategies" - assert dataset.shape == (224, 224) - assert np.array_equal (dataset.read(), np.zeros((224, 224), dtype='int32')) + mock_data = { + "n": 3, + "size": (224, 224), + "bands": 3, + "dtypes": ["uint8"], + } + strategy = HyperspectralImageStrategy( + image_path, (224, 224), package=MockRasterio(**mock_data) + ) + + read_images = strategy.read_batch(2, 0) + + assert read_images.shape == (2, 224, 224, 3) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 42d284c..751bc29 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -1,35 +1,63 @@ import os -from typing import Protocol +from types import ModuleType +from typing import Protocol, Type, Union import numpy as np import rasterio from PIL import Image -class IReader(Protocol): +class MockRasterio: + def __init__(self, n , size, bands, dtypes): + self.n = n + self.size = size + self.bands = bands + self.dtypes = dtypes + + def open(self, *args, **kwargs): + return self + + @property + def count(self) -> int: + return self.bands + + def read(self, *args, **kwargs): + return np.zeros((self.bands,self.size[0],self.size[1]), self.dtypes[0]) + + # these functions are invoked when a 'with' statement is executed + def __enter__(self): + # called at the beginning of a 'with' block + return self # returns instance of MockRasterio class itself + + def __exit__(self, type, value, traceback): + # called at the end of a 'with' block + pass - def read_batch(self, start:int, end: int) -> None: + +class IReader(Protocol): + def read_batch(self, start: int, end: int) -> None: ... - + def get_dataset_size(self) -> None: ... -class RGBImageStrategy: +class RGBImageStrategy: def __init__( self, image_path: str, image_size: tuple[int, int], - image_resample = Image.Resampling.NEAREST, + image_resample=Image.Resampling.NEAREST, ): self.image_path = image_path - self.image_filenames = np.array(sorted(os.listdir(self.image_path))) #!update: added variable to initialiser + self.image_filenames = np.array( + sorted(os.listdir(self.image_path)) + ) #!update: added variable to initialiser self.image_size = image_size self.image_resample = image_resample - def read_batch(self, batch_size, dataset_index) -> np.ndarray: - #read images with PIL + # read images with PIL for i in range(batch_size): image_index = i + dataset_index @@ -43,44 +71,49 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size - + + class HyperspectralImageStrategy: - #read images with rasterio + # read images with rasterio def __init__( self, - image_path:str, - image_resize:tuple[int,int], - image_resample = Image.Resampling.NEAREST, - package = rasterio + image_path: str, + image_resize: tuple[int, int], + image_resample=Image.Resampling.NEAREST, + package: Union[MockRasterio, ModuleType] = rasterio, ): self.image_path = image_path self.image_filenames = np.array(sorted(os.listdir(self.image_path))) self.image_resize = image_resize self.image_resample = image_resample self.package = package - #gets the number of bands for the dataset - self.bands = package.open(os.path.join(self.image_path, self.image_filenames[0])).count - - def read_batch(self, batch_size, dataset_index) -> np.ndarray: - #read images with rasterio - batch_filenames = self.image_filenames[dataset_index:dataset_index + batch_size] - - #defines the array that will contain the images - images = np.zeros((batch_size, self.bands, self.image_resize[0], self.image_resize[1])) - for i,filename in enumerate(batch_filenames): - with self.package.open( - os.path.join(self.image_path, filename) - ) as dataset: - #.read() returns a numpy array that contains the raster cell values in your file. + # gets the number of bands for the dataset + self.bands = package.open( + os.path.join(self.image_path, self.image_filenames[0]) + ).count + print("-----------My very cool bands--------: ",self.bands) + + def read_batch(self, batch_size:int, dataset_index:int) -> np.ndarray: + # read images with rasterio + batch_filenames = self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + + # defines the array that will contain the images + images = np.zeros( + (batch_size, self.bands, self.image_resize[0], self.image_resize[1]) + ) + for i, filename in enumerate(batch_filenames): + with self.package.open(os.path.join(self.image_path, filename)) as dataset: + # .read() returns a numpy array that contains the raster cell values in your file. image = dataset.read() - images[i,:,:,:] = np.resize(image,self.image_resize) + images[i, :, :, :] = np.resize(image, self.image_resize) + + # ensures channel-last orientation for the reader + images = np.moveaxis(images, 1, 3) - #ensures channel-last orientation for the reader - np.moveaxis(images,1,3) - return np.array(images) - + def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size - \ No newline at end of file From 0f873618c3c4860771b5b4f60a60f5d9a18a8d07 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Thu, 24 Aug 2023 16:20:26 +0100 Subject: [PATCH 26/75] updates strategies to store images in batches for proper value return --- .../test_strategies.py | 5 ++++- .../segmentation_utils/reading_strategies.py | 20 +++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index c3d9146..46b6d1b 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -31,7 +31,9 @@ def test_read_batch_image_path() -> None: batch_size = 2 dataset_index = 0 - image_strategy.read_batch(batch_size, dataset_index) + result = image_strategy.read_batch(batch_size, dataset_index) + + assert result.shape == (2, 224, 224, 3) patch.undo() patch.undo() @@ -60,6 +62,7 @@ def test_read_batch_returns_nparray() -> None: result = image_strategy.read_batch(batch_size, dataset_index) assert isinstance(result, np.ndarray) + assert result.shape == (2, 224, 224, 3) patch.undo() patch.undo() diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 751bc29..d962223 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -8,12 +8,12 @@ class MockRasterio: - def __init__(self, n , size, bands, dtypes): + def __init__(self, n, size, bands, dtypes): self.n = n self.size = size self.bands = bands self.dtypes = dtypes - + def open(self, *args, **kwargs): return self @@ -22,7 +22,7 @@ def count(self) -> int: return self.bands def read(self, *args, **kwargs): - return np.zeros((self.bands,self.size[0],self.size[1]), self.dtypes[0]) + return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0]) # these functions are invoked when a 'with' statement is executed def __enter__(self): @@ -58,15 +58,19 @@ def __init__( def read_batch(self, batch_size, dataset_index) -> np.ndarray: # read images with PIL + batch_filenames = self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) for i in range(batch_size): image_index = i + dataset_index image = Image.open( - os.path.join(self.image_path, self.image_filenames[image_index]) + os.path.join(self.image_path, batch_filenames[i]) ).resize(self.image_size, self.image_resample) image = np.array(image) - image = image / 255 - return image + images[i, :, :, :] = image + return images def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) @@ -91,9 +95,9 @@ def __init__( self.bands = package.open( os.path.join(self.image_path, self.image_filenames[0]) ).count - print("-----------My very cool bands--------: ",self.bands) + print("-----------My very cool bands--------: ", self.bands) - def read_batch(self, batch_size:int, dataset_index:int) -> np.ndarray: + def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: # read images with rasterio batch_filenames = self.image_filenames[ dataset_index : dataset_index + batch_size From 830828898192f5d7018ba50c57d7e48d97bc69c9 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Thu, 24 Aug 2023 16:33:23 +0100 Subject: [PATCH 27/75] migrate mock class to a mock_classes.py package for better sectionability --- tests/mock_classes.py | 28 +++++++++++++++++ .../segmentation_utils/reading_strategies.py | 31 ++----------------- .../segmentation_utils/tempCodeRunnerFile.py | 2 ++ 3 files changed, 32 insertions(+), 29 deletions(-) create mode 100644 tests/mock_classes.py diff --git a/tests/mock_classes.py b/tests/mock_classes.py new file mode 100644 index 0000000..fb83225 --- /dev/null +++ b/tests/mock_classes.py @@ -0,0 +1,28 @@ +import numpy as np + + +class MockRasterio: + def __init__(self, n, size, bands, dtypes): + self.n = n + self.size = size + self.bands = bands + self.dtypes = dtypes + + def open(self, *args, **kwargs): + return self + + @property + def count(self) -> int: + return self.bands + + def read(self, *args, **kwargs): + return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0]) + + # these functions are invoked when a 'with' statement is executed + def __enter__(self): + # called at the beginning of a 'with' block + return self # returns instance of MockRasterio class itself + + def __exit__(self, type, value, traceback): + # called at the end of a 'with' block + pass \ No newline at end of file diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index d962223..5973e48 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -1,37 +1,12 @@ import os from types import ModuleType -from typing import Protocol, Type, Union +from typing import Protocol, Union import numpy as np import rasterio from PIL import Image - -class MockRasterio: - def __init__(self, n, size, bands, dtypes): - self.n = n - self.size = size - self.bands = bands - self.dtypes = dtypes - - def open(self, *args, **kwargs): - return self - - @property - def count(self) -> int: - return self.bands - - def read(self, *args, **kwargs): - return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0]) - - # these functions are invoked when a 'with' statement is executed - def __enter__(self): - # called at the beginning of a 'with' block - return self # returns instance of MockRasterio class itself - - def __exit__(self, type, value, traceback): - # called at the end of a 'with' block - pass +from tests.mock_classes import MockRasterio class IReader(Protocol): @@ -64,7 +39,6 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) for i in range(batch_size): - image_index = i + dataset_index image = Image.open( os.path.join(self.image_path, batch_filenames[i]) ).resize(self.image_size, self.image_resample) @@ -95,7 +69,6 @@ def __init__( self.bands = package.open( os.path.join(self.image_path, self.image_filenames[0]) ).count - print("-----------My very cool bands--------: ", self.bands) def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: # read images with rasterio diff --git a/utilities/segmentation_utils/tempCodeRunnerFile.py b/utilities/segmentation_utils/tempCodeRunnerFile.py index f53b566..7870c72 100644 --- a/utilities/segmentation_utils/tempCodeRunnerFile.py +++ b/utilities/segmentation_utils/tempCodeRunnerFile.py @@ -1 +1,3 @@ + +#! I strongly recommend to not use this haha. not the most appropriate way of testing flowgenerator.__read_batch(start = start_index, end= end_index) \ No newline at end of file From 98364760a69e81b9c3b4479ce20567c24fbf668e Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Thu, 24 Aug 2023 17:05:40 +0100 Subject: [PATCH 28/75] remove mock class dependency, for better felxibility, adds marker flags for pytest for tests to run in staging or production to toml file --- pyproject.toml | 5 +++ tests/mock_classes.py | 28 ---------------- .../test_flowreader.py | 7 ++++ .../test_strategies.py | 32 +++++++++++++++++-- .../segmentation_utils/reading_strategies.py | 7 ++-- 5 files changed, 43 insertions(+), 36 deletions(-) delete mode 100644 tests/mock_classes.py diff --git a/pyproject.toml b/pyproject.toml index 20b4059..922d766 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,11 @@ dependencies = [ [tool.setuptools] packages = ["utilities"] +[tool.pytest.ini_options] +markers = [ + "staging: Mark a test as part of the staging environment", + "production: Mark a test as part of the production environment", +] [project.optional-dependencies] dev = [ diff --git a/tests/mock_classes.py b/tests/mock_classes.py deleted file mode 100644 index fb83225..0000000 --- a/tests/mock_classes.py +++ /dev/null @@ -1,28 +0,0 @@ -import numpy as np - - -class MockRasterio: - def __init__(self, n, size, bands, dtypes): - self.n = n - self.size = size - self.bands = bands - self.dtypes = dtypes - - def open(self, *args, **kwargs): - return self - - @property - def count(self) -> int: - return self.bands - - def read(self, *args, **kwargs): - return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0]) - - # these functions are invoked when a 'with' statement is executed - def __enter__(self): - # called at the beginning of a 'with' block - return self # returns instance of MockRasterio class itself - - def __exit__(self, type, value, traceback): - # called at the end of a 'with' block - pass \ No newline at end of file diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py index 3676512..53382e0 100644 --- a/tests/segmentation_utils_tests.py/test_flowreader.py +++ b/tests/segmentation_utils_tests.py/test_flowreader.py @@ -110,3 +110,10 @@ def test_set_mini_batch_size_not_devisable() -> None: assert exc_info.value.args[0] == "The batch size must be divisible by the mini batch size" +################ +# Staging tests# +################ + +@pytest.mark.staging +def test_read_batch_staging() -> None: + pass diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index 46b6d1b..b2084ae 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -1,12 +1,38 @@ import os import numpy as np -import rasterio from PIL import Image from pytest import MonkeyPatch from utilities.segmentation_utils.reading_strategies import ( - HyperspectralImageStrategy, MockRasterio, RGBImageStrategy) + HyperspectralImageStrategy, RGBImageStrategy) + + +class MockRasterio: + def __init__(self, n, size, bands, dtypes): + self.n = n + self.size = size + self.bands = bands + self.dtypes = dtypes + + def open(self, *args, **kwargs): + return self + + @property + def count(self) -> int: + return self.bands + + def read(self, *args, **kwargs): + return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0]) + + # these functions are invoked when a 'with' statement is executed + def __enter__(self): + # called at the beginning of a 'with' block + return self # returns instance of MockRasterio class itself + + def __exit__(self, type, value, traceback): + # called at the end of a 'with' block + pass def test_read_batch_image_path() -> None: @@ -106,7 +132,7 @@ def test_hyperspectral_open(): patch.setattr(os, "listdir", lambda x: mock_filenames) image_path = "tests/segmentation_utils_tests/test_strategies" - + mock_data = { "n": 3, "size": (224, 224), diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 5973e48..f9a4120 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -1,13 +1,10 @@ import os -from types import ModuleType -from typing import Protocol, Union +from typing import Any, Protocol import numpy as np import rasterio from PIL import Image -from tests.mock_classes import MockRasterio - class IReader(Protocol): def read_batch(self, start: int, end: int) -> None: @@ -58,7 +55,7 @@ def __init__( image_path: str, image_resize: tuple[int, int], image_resample=Image.Resampling.NEAREST, - package: Union[MockRasterio, ModuleType] = rasterio, + package: Any = rasterio, ): self.image_path = image_path self.image_filenames = np.array(sorted(os.listdir(self.image_path))) From 4e4eba6c88be2e53c8a78b8730ac8609c16d902e Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Thu, 24 Aug 2023 23:47:43 +0100 Subject: [PATCH 29/75] updated the read_batch function to use strategies --- utilities/segmentation_utils/flowreader.py | 111 ++++++++------------- 1 file changed, 42 insertions(+), 69 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 491a7c6..b7cdec2 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -16,6 +16,8 @@ from utilities.segmentation_utils import ImagePreprocessor from utilities.segmentation_utils.constants import ImageOrdering from utilities.segmentation_utils.ImagePreprocessor import IPreprocessor +from utilities.segmentation_utils.reading_strategies import IReader + class FlowGenerator: @@ -266,6 +268,8 @@ def __init__( output_size: tuple[int, int], channel_mask: list[bool], num_classes: int, + input_strategy: IReader, + output_strategy: IReader, shuffle: bool = True, batch_size: int = 2, preprocessing_enabled: bool = True, @@ -277,6 +281,7 @@ def __init__( weights_path: Optional[str] = None, shuffle_counter: int = 0, image_ordering: ImageOrdering = ImageOrdering.CHANNEL_LAST, + ): if len(output_size) != 2: raise ValueError("The output size has to be a tuple of length 2") @@ -308,6 +313,9 @@ def __init__( self.image_filenames = np.array(sorted(os.listdir(self.image_path))) self.mask_filenames = np.array(sorted(os.listdir(self.mask_path))) + self.input_strategy = input_strategy + self.output_strategy = output_strategy + # should be moved out as a strategy if self.read_weights: weights_df = pd.read_csv(self.weights_path, header=None) @@ -387,10 +395,10 @@ def set_mini_batch_size(self, batch_size: int) -> None: raise ValueError("The batch size must be divisible by the mini batch size") self.mini_batch = batch_size - def __read_batch(self, start: int, end: int) -> None: + def __read_batch(self, dataset_index: int, end: int) -> None: # read image batch - batch_image_filenames = self.image_filenames[start:end] - batch_mask_filenames = self.mask_filenames[start:end] + batch_image_filenames = self.image_filenames[dataset_index:end] + batch_mask_filenames = self.mask_filenames[dataset_index:end] for image, mask in zip(batch_image_filenames, batch_mask_filenames): if image != mask: raise ValueError("The image and mask directories do not match") @@ -398,75 +406,40 @@ def __read_batch(self, start: int, end: int) -> None: # calculate number of mini batches in a batch n = self.batch_size // self.mini_batch - batch_images = np.zeros( - ( - n, - self.mini_batch, - self.image_size[0], - self.image_size[1], - self.n_channels, - ) - ) - - batch_masks = np.zeros( - ( - n, - self.mini_batch, - self.output_reshape[0], - self.output_reshape[1], - self.num_classes, - ) - ) + batch_images = self.input_strategy.read_batch(self.batch_size, dataset_index) + batch_masks = self.output_strategy.read_batch(self.batch_size, dataset_index) # preprocess and assign images and masks to the batch - for i in range(n): - raw_masks = np.zeros( - (self.mini_batch, self.output_reshape[0], self.output_reshape[1]) - ) - - for j in range(self.mini_batch): - image_index = i * self.mini_batch + j - - image = Image.open( - os.path.join(self.image_path, batch_image_filenames[image_index]) - ).resize(self.image_size, Image.ANTIALIAS) - - image = np.array(image) - - mask = Image.open( - os.path.join(self.mask_path, batch_mask_filenames[image_index]) - ).resize(self.output_reshape) - - mask = np.array(mask) - # image = image[:, :, self.channel_mask] - - if self.preprocessing_enabled: - if self.preprocessing_seed is None: - image_seed = np.random.randint(0, 100000) - else: - state = np.random.RandomState(self.preprocessing_seed) - image_seed = state.randint(0, 100000) - - ( - image, - mask, - ) = ImagePreprocessor.augmentation_pipeline( - image, - mask=mask, - seed=image_seed, - #!both preprocessing queues are assigned by this time - image_queue=self.preprocessing_queue_image, # type: ignore - mask_queue=self.preprocessing_queue_mask, # type: ignore - ) - - batch_images[i, j, :, :, :] = image - # NOTE: this provides the flexibility required to process both - # column and matrix vectors - raw_masks[j, :, :] = mask + + if self.preprocessing_enabled: + for i in range(self.batch_size): + image = batch_images[i, ...] + mask = batch_masks[i, ...] + if self.preprocessing_seed is None: + image_seed = np.random.randint(0, 100000) + else: + state = np.random.RandomState(self.preprocessing_seed) + image_seed = state.randint(0, 100000) + ( + image, + mask, + ) = ImagePreprocessor.augmentation_pipeline( + image, + mask=mask, + seed=image_seed, + #!both preprocessing queues are assigned by this time + image_queue=self.preprocessing_queue_image, # type: ignore + mask_queue=self.preprocessing_queue_mask, # type: ignore + ) + batch_images[i, ...] = image + batch_masks[i, ...] = mask + + batch_masks = ImagePreprocessor.onehot_encode( + batch_masks, self.num_classes + ) - batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode( - raw_masks, self.num_classes - ) + batch_images = batch_images.reshape(n, self.mini_batch, batch_images.shape[1], batch_images.shape[2], batch_images.shape[3]) + batch_masks = batch_masks.reshape(n, self.batch_size, batch_images.shape[1], batch_images[2], batch_images[3]) # chaches the batch self.image_batch_store = batch_images From f45d227a3dd549f040c58db49a0221b61ce01076 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 14:51:41 +0100 Subject: [PATCH 30/75] updates tests for flowgenerator experimental, updates flowreader reshaping to use tf as backend, fixes miss naming in IStrategy --- .../test_flowreader.py | 133 +++++++++++++++++- utilities/segmentation_utils/flowreader.py | 33 +++-- .../segmentation_utils/reading_strategies.py | 4 +- 3 files changed, 153 insertions(+), 17 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py index 53382e0..55b732d 100644 --- a/tests/segmentation_utils_tests.py/test_flowreader.py +++ b/tests/segmentation_utils_tests.py/test_flowreader.py @@ -10,11 +10,25 @@ from utilities.segmentation_utils.flowreader import FlowGeneratorExperimental +class DummyStrategy: + def __init__(self, input_shape=(512, 512, 3)): + self.input_shape = input_shape + + def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: + return np.zeros((batch_size, *self.input_shape)) + + def get_dataset_size(self) -> int: + return 10 + + def test_can_create_instance() -> None: patch = MonkeyPatch() # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + input_strategy = DummyStrategy() + output_strategy = DummyStrategy() + # create generator instance generator = FlowGeneratorExperimental( image_path="tests/segmentation_utils_tests/flow_reader_test", @@ -22,7 +36,9 @@ def test_can_create_instance() -> None: image_size=(512, 512), output_size=(512,512), num_classes=7, - channel_mask= [True,True,True] + channel_mask= [True,True,True], + input_strategy=input_strategy, + output_strategy=output_strategy, ) pass @@ -31,6 +47,8 @@ def test_set_preprocessing_pipeline() -> None: # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + input_strategy = DummyStrategy() + output_strategy = DummyStrategy() # create generator instance generator = FlowGeneratorExperimental( image_path="tests/segmentation_utils_tests/flow_reader_test", @@ -38,7 +56,9 @@ def test_set_preprocessing_pipeline() -> None: image_size=(512, 512), output_size=(512,512), num_classes=7, - channel_mask= [True,True,True] + channel_mask= [True,True,True], + input_strategy=input_strategy, + output_strategy=output_strategy, ) image_queue = ImagePreprocessor.PreprocessingQueue(queue=[]) @@ -54,6 +74,9 @@ def test_set_mini_batch_size() -> None: # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + input_strategy = DummyStrategy() + output_strategy = DummyStrategy() + # create generator instance generator = FlowGeneratorExperimental( image_path="tests/segmentation_utils_tests/flow_reader_test", @@ -61,7 +84,9 @@ def test_set_mini_batch_size() -> None: image_size=(512, 512), output_size=(512,512), num_classes=7, - channel_mask= [True,True,True] + channel_mask= [True,True,True], + input_strategy=input_strategy, + output_strategy=output_strategy, ) generator.set_mini_batch_size(2) @@ -73,6 +98,9 @@ def test_set_mini_batch_size_too_large() -> None: # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + input_strategy = DummyStrategy() + output_strategy = DummyStrategy() + # create generator instance generator = FlowGeneratorExperimental( image_path="tests/segmentation_utils_tests/flow_reader_test", @@ -80,7 +108,9 @@ def test_set_mini_batch_size_too_large() -> None: image_size=(512, 512), output_size=(512,512), num_classes=7, - channel_mask= [True,True,True] + channel_mask= [True,True,True], + input_strategy=input_strategy, + output_strategy=output_strategy, ) with pytest.raises(ValueError) as exc_info: generator.set_mini_batch_size(5) @@ -94,6 +124,9 @@ def test_set_mini_batch_size_not_devisable() -> None: # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + input_strategy = DummyStrategy() + output_strategy = DummyStrategy() + # create generator instance generator = FlowGeneratorExperimental( image_path="tests/segmentation_utils_tests/flow_reader_test", @@ -102,18 +135,104 @@ def test_set_mini_batch_size_not_devisable() -> None: output_size=(512,512), num_classes=7, channel_mask= [True,True,True], - batch_size=3 + batch_size=3, + input_strategy=input_strategy, + output_strategy=output_strategy, ) with pytest.raises(ValueError) as exc_info: generator.set_mini_batch_size(2) assert exc_info.value.args[0] == "The batch size must be divisible by the mini batch size" - + +def test_read_batch_get_item() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512)) + + # create generator instance + + generator = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + batch_size=2, + image_size=(512, 512), + output_size=(512,512), + num_classes=7, + channel_mask= [True,True,True], + input_strategy=input_strategy, + output_strategy=output_strategy, + ) + + batch = generator[0] + + assert batch[0].shape == (2, 512, 512, 3) + assert batch[1].shape == (2, 512, 512, 7) + +def test_read_batch_get_item_expand_dim_fail() -> None: + with pytest.raises(ValueError) as exc_info: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512, 1)) + + # create generator instance + + generator = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + batch_size=2, + image_size=(512, 512), + output_size=(512,512), + num_classes=7, + channel_mask= [True,True,True], + input_strategy=input_strategy, + output_strategy=output_strategy, + ) + + batch = generator[0] + ################ # Staging tests# ################ @pytest.mark.staging def test_read_batch_staging() -> None: - pass + classes = 7 + n_images = 4 + #prepare test files + for i in range(n_images): + image = np.random.randint(0,255,(512,512,3)) + mask = np.random.randint(0,classes,(512,512)) + np.save(f"tests/segmentation_utils_tests/flow_reader_test/image_{i}",image) + np.save(f"tests/segmentation_utils_tests/flow_reader_test/mask_{i}",mask) + + + dummy_model = tf.keras.models.Sequential( + [ + tf.keras.layers.Conv2D(input_shape=(512, 512, 3), filters = 3, kernel_size=(3,3), padding="same"), + tf.keras.layers.Conv2D(classes, kernel_size=(1,1), padding="same"), + ] + ) + dummy_model.compile( + optimizer="adam", + loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), + metrics=["accuracy"], + ) + + + + reader = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + image_size=(512, 512), + output_size=(512,512), + num_classes=classes, + channel_mask= [True,True,True], + ) + diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index b7cdec2..384c6d3 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd +import tensorflow as tf from keras.preprocessing.image import ImageDataGenerator from keras.utils import Sequence from PIL import Image @@ -19,7 +20,6 @@ from utilities.segmentation_utils.reading_strategies import IReader - class FlowGenerator: """ Initializes the flow generator object, @@ -281,7 +281,6 @@ def __init__( weights_path: Optional[str] = None, shuffle_counter: int = 0, image_ordering: ImageOrdering = ImageOrdering.CHANNEL_LAST, - ): if len(output_size) != 2: raise ValueError("The output size has to be a tuple of length 2") @@ -409,8 +408,10 @@ def __read_batch(self, dataset_index: int, end: int) -> None: batch_images = self.input_strategy.read_batch(self.batch_size, dataset_index) batch_masks = self.output_strategy.read_batch(self.batch_size, dataset_index) + print(batch_masks.shape) + # preprocess and assign images and masks to the batch - + if self.preprocessing_enabled: for i in range(self.batch_size): image = batch_images[i, ...] @@ -434,12 +435,28 @@ def __read_batch(self, dataset_index: int, end: int) -> None: batch_images[i, ...] = image batch_masks[i, ...] = mask - batch_masks = ImagePreprocessor.onehot_encode( - batch_masks, self.num_classes - ) + batch_masks = ImagePreprocessor.onehot_encode(batch_masks, self.num_classes) - batch_images = batch_images.reshape(n, self.mini_batch, batch_images.shape[1], batch_images.shape[2], batch_images.shape[3]) - batch_masks = batch_masks.reshape(n, self.batch_size, batch_images.shape[1], batch_images[2], batch_images[3]) + batch_images = tf.reshape( + batch_images, + ( + n, + self.mini_batch, + self.image_size[0], + self.image_size[1], + self.n_channels, + ), + ) + batch_masks = tf.reshape( + batch_masks, + ( + n, + self.mini_batch, + self.output_size[0], + self.output_size[1], + self.num_classes, + ), + ) # chaches the batch self.image_batch_store = batch_images diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index f9a4120..29e27cb 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -7,10 +7,10 @@ class IReader(Protocol): - def read_batch(self, start: int, end: int) -> None: + def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: ... - def get_dataset_size(self) -> None: + def get_dataset_size(self) -> int: ... From dca7bb013e016ef469037b063a6db7fee923f9c2 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 14:59:22 +0100 Subject: [PATCH 31/75] update development pipeline to filter staging for now --- .github/workflows/development.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml index aa8c832..6eaba5f 100644 --- a/.github/workflows/development.yml +++ b/.github/workflows/development.yml @@ -45,7 +45,7 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Test with pytest run: | - python -m pytest + python -m pytest -v -m "not staging" devops: needs: test From 692bb8980cfbdff9087227fddfec999c99d1dfed Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 15:06:11 +0100 Subject: [PATCH 32/75] remove unnecessary files --- utilities/segmentation_utils/tempCodeRunnerFile.py | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 utilities/segmentation_utils/tempCodeRunnerFile.py diff --git a/utilities/segmentation_utils/tempCodeRunnerFile.py b/utilities/segmentation_utils/tempCodeRunnerFile.py deleted file mode 100644 index 7870c72..0000000 --- a/utilities/segmentation_utils/tempCodeRunnerFile.py +++ /dev/null @@ -1,3 +0,0 @@ - -#! I strongly recommend to not use this haha. not the most appropriate way of testing -flowgenerator.__read_batch(start = start_index, end= end_index) \ No newline at end of file From a83bb424b5d9989af7deb156d7ce2f3d3c6493e6 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 15:11:47 +0100 Subject: [PATCH 33/75] update code cov not to include staging --- .github/workflows/development.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml index 6eaba5f..8e04cb9 100644 --- a/.github/workflows/development.yml +++ b/.github/workflows/development.yml @@ -71,7 +71,7 @@ jobs: run: | mkdir -p ./coverage pip install pytest-cov pytest-mock - python -m pytest --cov --cov-report=xml:./coverage/coverage.xml + python -m pytest -m "not staging" --cov --cov-report=xml:./coverage/coverage.xml - name: Upload coverage uses: codecov/codecov-action@v3 From cfcef39dc62cf0fd34947cc692dfbc7eafddf3eb Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 15:18:06 +0100 Subject: [PATCH 34/75] adds development markers to image_preprocessor_test, and test_flowreader --- .../image_preprocessor_test.py | 10 +-- .../test_flowreader.py | 85 +++++++++++-------- 2 files changed, 54 insertions(+), 41 deletions(-) diff --git a/tests/segmentation_utils_tests.py/image_preprocessor_test.py b/tests/segmentation_utils_tests.py/image_preprocessor_test.py index b596aaf..4841892 100644 --- a/tests/segmentation_utils_tests.py/image_preprocessor_test.py +++ b/tests/segmentation_utils_tests.py/image_preprocessor_test.py @@ -31,7 +31,7 @@ def test_image_onehot_encoder_column() -> None: ) assert np.array_equal(one_hot_image, onehot_test) - +@pytest.mark.development def test_image_onehot_encoder_squarematrix() -> None: # predifining input variables n_classes = 2 @@ -58,7 +58,7 @@ def test_image_onehot_encoder_squarematrix() -> None: ) assert np.array_equal(one_hot_image, onehot_test) - +@pytest.mark.development def test_image_augmentation_pipeline_squarematrix() -> None: # predifining input variables image = np.zeros((512, 512, 3)) @@ -89,7 +89,7 @@ def test_image_augmentation_pipeline_squarematrix() -> None: assert image_new.shape == (512, 512, 3) assert mask_new.shape == (256, 256, 1) - +@pytest.mark.development def test_processing_queue() -> None: # creating dummy queues @@ -102,7 +102,7 @@ def test_processing_queue() -> None: assert image_queue.queue[0].kwargs["seed"] == new_seed - +@pytest.mark.development def test_generate_default_queue() -> None: # creating default queues image_queue, mask_queue = ImagePreprocessor.generate_default_queue() @@ -111,7 +111,7 @@ def test_generate_default_queue() -> None: assert image_queue.get_queue_length() == 5 assert mask_queue.get_queue_length() == 2 - +@pytest.mark.development def test_flatten() -> None: image = np.zeros((512, 512, 3)) image = tf.convert_to_tensor(image) diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py index 55b732d..39cb0ac 100644 --- a/tests/segmentation_utils_tests.py/test_flowreader.py +++ b/tests/segmentation_utils_tests.py/test_flowreader.py @@ -16,11 +16,12 @@ def __init__(self, input_shape=(512, 512, 3)): def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: return np.zeros((batch_size, *self.input_shape)) - + def get_dataset_size(self) -> int: return 10 +@pytest.mark.development def test_can_create_instance() -> None: patch = MonkeyPatch() # mock list directory @@ -34,14 +35,16 @@ def test_can_create_instance() -> None: image_path="tests/segmentation_utils_tests/flow_reader_test", mask_path="tests/segmentation_utils_tests/flow_reader_test", image_size=(512, 512), - output_size=(512,512), + output_size=(512, 512), num_classes=7, - channel_mask= [True,True,True], + channel_mask=[True, True, True], input_strategy=input_strategy, output_strategy=output_strategy, ) pass + +@pytest.mark.development def test_set_preprocessing_pipeline() -> None: patch = MonkeyPatch() # mock list directory @@ -54,9 +57,9 @@ def test_set_preprocessing_pipeline() -> None: image_path="tests/segmentation_utils_tests/flow_reader_test", mask_path="tests/segmentation_utils_tests/flow_reader_test", image_size=(512, 512), - output_size=(512,512), + output_size=(512, 512), num_classes=7, - channel_mask= [True,True,True], + channel_mask=[True, True, True], input_strategy=input_strategy, output_strategy=output_strategy, ) @@ -64,11 +67,11 @@ def test_set_preprocessing_pipeline() -> None: image_queue = ImagePreprocessor.PreprocessingQueue(queue=[]) mask_queue = ImagePreprocessor.PreprocessingQueue(queue=[]) - generator.set_preprocessing_pipeline( - image_queue,mask_queue - ) + generator.set_preprocessing_pipeline(image_queue, mask_queue) pass + +@pytest.mark.development def test_set_mini_batch_size() -> None: patch = MonkeyPatch() # mock list directory @@ -82,9 +85,9 @@ def test_set_mini_batch_size() -> None: image_path="tests/segmentation_utils_tests/flow_reader_test", mask_path="tests/segmentation_utils_tests/flow_reader_test", image_size=(512, 512), - output_size=(512,512), + output_size=(512, 512), num_classes=7, - channel_mask= [True,True,True], + channel_mask=[True, True, True], input_strategy=input_strategy, output_strategy=output_strategy, ) @@ -92,8 +95,9 @@ def test_set_mini_batch_size() -> None: generator.set_mini_batch_size(2) assert generator.mini_batch == 2 -def test_set_mini_batch_size_too_large() -> None: +@pytest.mark.development +def test_set_mini_batch_size_too_large() -> None: patch = MonkeyPatch() # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) @@ -106,20 +110,23 @@ def test_set_mini_batch_size_too_large() -> None: image_path="tests/segmentation_utils_tests/flow_reader_test", mask_path="tests/segmentation_utils_tests/flow_reader_test", image_size=(512, 512), - output_size=(512,512), + output_size=(512, 512), num_classes=7, - channel_mask= [True,True,True], + channel_mask=[True, True, True], input_strategy=input_strategy, output_strategy=output_strategy, ) with pytest.raises(ValueError) as exc_info: generator.set_mini_batch_size(5) - assert exc_info.value.args[0] == "The mini batch size cannot be larger than the batch size" + assert ( + exc_info.value.args[0] + == "The mini batch size cannot be larger than the batch size" + ) +@pytest.mark.development def test_set_mini_batch_size_not_devisable() -> None: - patch = MonkeyPatch() # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) @@ -132,19 +139,23 @@ def test_set_mini_batch_size_not_devisable() -> None: image_path="tests/segmentation_utils_tests/flow_reader_test", mask_path="tests/segmentation_utils_tests/flow_reader_test", image_size=(512, 512), - output_size=(512,512), + output_size=(512, 512), num_classes=7, - channel_mask= [True,True,True], + channel_mask=[True, True, True], batch_size=3, input_strategy=input_strategy, output_strategy=output_strategy, - ) with pytest.raises(ValueError) as exc_info: generator.set_mini_batch_size(2) - assert exc_info.value.args[0] == "The batch size must be divisible by the mini batch size" + assert ( + exc_info.value.args[0] + == "The batch size must be divisible by the mini batch size" + ) + +@pytest.mark.development def test_read_batch_get_item() -> None: patch = MonkeyPatch() # mock list directory @@ -160,9 +171,9 @@ def test_read_batch_get_item() -> None: mask_path="tests/segmentation_utils_tests/flow_reader_test", batch_size=2, image_size=(512, 512), - output_size=(512,512), + output_size=(512, 512), num_classes=7, - channel_mask= [True,True,True], + channel_mask=[True, True, True], input_strategy=input_strategy, output_strategy=output_strategy, ) @@ -172,6 +183,8 @@ def test_read_batch_get_item() -> None: assert batch[0].shape == (2, 512, 512, 3) assert batch[1].shape == (2, 512, 512, 7) + +@pytest.mark.development def test_read_batch_get_item_expand_dim_fail() -> None: with pytest.raises(ValueError) as exc_info: patch = MonkeyPatch() @@ -188,35 +201,38 @@ def test_read_batch_get_item_expand_dim_fail() -> None: mask_path="tests/segmentation_utils_tests/flow_reader_test", batch_size=2, image_size=(512, 512), - output_size=(512,512), + output_size=(512, 512), num_classes=7, - channel_mask= [True,True,True], + channel_mask=[True, True, True], input_strategy=input_strategy, output_strategy=output_strategy, ) batch = generator[0] + ################ # Staging tests# ################ + @pytest.mark.staging def test_read_batch_staging() -> None: classes = 7 n_images = 4 - #prepare test files + # prepare test files for i in range(n_images): - image = np.random.randint(0,255,(512,512,3)) - mask = np.random.randint(0,classes,(512,512)) - np.save(f"tests/segmentation_utils_tests/flow_reader_test/image_{i}",image) - np.save(f"tests/segmentation_utils_tests/flow_reader_test/mask_{i}",mask) - + image = np.random.randint(0, 255, (512, 512, 3)) + mask = np.random.randint(0, classes, (512, 512)) + np.save(f"tests/segmentation_utils_tests/flow_reader_test/image_{i}", image) + np.save(f"tests/segmentation_utils_tests/flow_reader_test/mask_{i}", mask) dummy_model = tf.keras.models.Sequential( [ - tf.keras.layers.Conv2D(input_shape=(512, 512, 3), filters = 3, kernel_size=(3,3), padding="same"), - tf.keras.layers.Conv2D(classes, kernel_size=(1,1), padding="same"), + tf.keras.layers.Conv2D( + input_shape=(512, 512, 3), filters=3, kernel_size=(3, 3), padding="same" + ), + tf.keras.layers.Conv2D(classes, kernel_size=(1, 1), padding="same"), ] ) dummy_model.compile( @@ -224,15 +240,12 @@ def test_read_batch_staging() -> None: loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"], ) - - reader = FlowGeneratorExperimental( image_path="tests/segmentation_utils_tests/flow_reader_test", mask_path="tests/segmentation_utils_tests/flow_reader_test", image_size=(512, 512), - output_size=(512,512), + output_size=(512, 512), num_classes=classes, - channel_mask= [True,True,True], + channel_mask=[True, True, True], ) - From 522d54f02d1ae9b987effa2fbe828e7d0275d01f Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 15:48:54 +0100 Subject: [PATCH 35/75] updates flowgenerator tests to cover additional cases, removes unnecessary print statements from flowgenerator exp --- .../test_flowreader.py | 121 ++++++++++++++++++ utilities/segmentation_utils/flowreader.py | 11 +- 2 files changed, 126 insertions(+), 6 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py index 39cb0ac..0dae7c9 100644 --- a/tests/segmentation_utils_tests.py/test_flowreader.py +++ b/tests/segmentation_utils_tests.py/test_flowreader.py @@ -7,6 +7,7 @@ from pytest import MonkeyPatch from utilities.segmentation_utils import ImagePreprocessor +from utilities.segmentation_utils.constants import ImageOrdering from utilities.segmentation_utils.flowreader import FlowGeneratorExperimental @@ -183,6 +184,126 @@ def test_read_batch_get_item() -> None: assert batch[0].shape == (2, 512, 512, 3) assert batch[1].shape == (2, 512, 512, 7) +@pytest.mark.development +def test_read_batch_get_item_diff_minibatch() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512)) + + # create generator instance + + generator = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + batch_size=2, + image_size=(512, 512), + output_size=(512, 512), + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + ) + + generator.set_mini_batch_size(1) + + batch = generator[0] + + + assert batch[0].shape == (1, 512, 512, 3) + assert batch[1].shape == (1, 512, 512, 7) + + +@pytest.mark.development +def test_read_batch_get_item_channel_first() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512)) + + # create generator instance + + generator = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + batch_size=2, + image_size=(512, 512), + output_size=(512, 512), + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + image_ordering=ImageOrdering.CHANNEL_FIRST, + ) + + batch = generator[0] + + assert batch[0].shape == (2, 3, 512, 512) + assert batch[1].shape == (2, 7, 512, 512) + + +@pytest.mark.development +def test_read_batch_get_item_column() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512)) + + # create generator instance + + generator = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + batch_size=2, + image_size=(512, 512), + output_size=(512 * 512, 1), + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + ) + + batch = generator[0] + + assert batch[0].shape == (2, 512, 512, 3) + assert batch[1].shape == (2, 512 * 512, 7) + + +@pytest.mark.development +def test_read_batch_get_item_column_channel_first() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512)) + + # create generator instance + + generator = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + batch_size=2, + image_size=(512, 512), + output_size=(512 * 512, 1), + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + image_ordering=ImageOrdering.CHANNEL_FIRST, + ) + + batch = generator[0] + + assert batch[0].shape == (2, 3, 512, 512) + assert batch[1].shape == (2, 7, 512 * 512) + @pytest.mark.development def test_read_batch_get_item_expand_dim_fail() -> None: diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 384c6d3..a764a6b 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -408,8 +408,6 @@ def __read_batch(self, dataset_index: int, end: int) -> None: batch_images = self.input_strategy.read_batch(self.batch_size, dataset_index) batch_masks = self.output_strategy.read_batch(self.batch_size, dataset_index) - print(batch_masks.shape) - # preprocess and assign images and masks to the batch if self.preprocessing_enabled: @@ -485,13 +483,14 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: batch_images = self.image_batch_store[store_index, ...] # type: ignore batch_masks = self.mask_batch_store[store_index, ...] # type: ignore + if self.column_vector: - batch_masks = np.reshape( + batch_masks = tf.reshape( batch_masks, ( - self.mini_batch, - batch_masks.shape[1] * batch_masks[2], - self.num_classes, + batch_masks.shape[0], + batch_masks.shape[1] * batch_masks.shape[2], + batch_masks.shape[3], ), ) From 4bf77e9842ce08a262c64ed739cc0079d9f0d5a0 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 16:47:47 +0100 Subject: [PATCH 36/75] update protocol of strategies --- utilities/segmentation_utils/reading_strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 29e27cb..08723c4 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -10,7 +10,7 @@ class IReader(Protocol): def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: ... - def get_dataset_size(self) -> int: + def get_dataset_size(self, minibatch:int) -> int: ... From dfd0f1f8b733c3b78128febac49e9a46fb3bec7d Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Fri, 25 Aug 2023 17:06:16 +0100 Subject: [PATCH 37/75] added shuffle method and getter for image_size --- .../segmentation_utils/reading_strategies.py | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 08723c4..9078c4a 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -10,9 +10,14 @@ class IReader(Protocol): def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: ... - def get_dataset_size(self, minibatch:int) -> int: + def get_dataset_size(self) -> int: ... + def get_image_size(self) -> int: + ... + + def __shuffle_filenames__(self) -> None: + ... class RGBImageStrategy: def __init__( @@ -46,6 +51,18 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size + + def get_image_size(self) -> int: + return self.image_size + + def __shuffle_filenames__(self) -> None: + if self.shuffle: + state = np.random.RandomState(self.seed + self.shuffle_counter) + self.shuffle_counter += 1 + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + for array in self.linked_data: + array = array[shuffled_indices] class HyperspectralImageStrategy: @@ -91,3 +108,15 @@ def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size + + def get_image_size(self) -> int: + return self.image_size + + def __shuffle_filenames__(self) -> None: + if self.shuffle: + state = np.random.RandomState(self.seed + self.shuffle_counter) + self.shuffle_counter += 1 + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + for array in self.linked_data: + array = array[shuffled_indices] From 5ea374ab484a002a8e5787e2265edc3ef4f45a6f Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 17:22:11 +0100 Subject: [PATCH 38/75] update strategies with shuffle and get image size method, updates flowgenerator removes unnecessary arguments. updates tests accordingly --- .../test_flowreader.py | 76 +++++------ utilities/segmentation_utils/flowreader.py | 120 +++++------------- .../segmentation_utils/reading_strategies.py | 47 +++---- 3 files changed, 81 insertions(+), 162 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py index 0dae7c9..5bdcd36 100644 --- a/tests/segmentation_utils_tests.py/test_flowreader.py +++ b/tests/segmentation_utils_tests.py/test_flowreader.py @@ -18,9 +18,15 @@ def __init__(self, input_shape=(512, 512, 3)): def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: return np.zeros((batch_size, *self.input_shape)) - def get_dataset_size(self) -> int: + def get_dataset_size(self,minibatch) -> int: return 10 + def get_image_size(self) -> tuple[int, int]: + return self.input_shape[:2] + + def shuffle_filenames(self, seed: int) -> None: + pass + @pytest.mark.development def test_can_create_instance() -> None: @@ -33,10 +39,6 @@ def test_can_create_instance() -> None: # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", - image_size=(512, 512), - output_size=(512, 512), num_classes=7, channel_mask=[True, True, True], input_strategy=input_strategy, @@ -55,10 +57,6 @@ def test_set_preprocessing_pipeline() -> None: output_strategy = DummyStrategy() # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", - image_size=(512, 512), - output_size=(512, 512), num_classes=7, channel_mask=[True, True, True], input_strategy=input_strategy, @@ -83,10 +81,6 @@ def test_set_mini_batch_size() -> None: # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", - image_size=(512, 512), - output_size=(512, 512), num_classes=7, channel_mask=[True, True, True], input_strategy=input_strategy, @@ -108,10 +102,6 @@ def test_set_mini_batch_size_too_large() -> None: # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", - image_size=(512, 512), - output_size=(512, 512), num_classes=7, channel_mask=[True, True, True], input_strategy=input_strategy, @@ -137,10 +127,6 @@ def test_set_mini_batch_size_not_devisable() -> None: # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", - image_size=(512, 512), - output_size=(512, 512), num_classes=7, channel_mask=[True, True, True], batch_size=3, @@ -168,11 +154,7 @@ def test_read_batch_get_item() -> None: # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", batch_size=2, - image_size=(512, 512), - output_size=(512, 512), num_classes=7, channel_mask=[True, True, True], input_strategy=input_strategy, @@ -184,6 +166,7 @@ def test_read_batch_get_item() -> None: assert batch[0].shape == (2, 512, 512, 3) assert batch[1].shape == (2, 512, 512, 7) + @pytest.mark.development def test_read_batch_get_item_diff_minibatch() -> None: patch = MonkeyPatch() @@ -196,11 +179,7 @@ def test_read_batch_get_item_diff_minibatch() -> None: # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", batch_size=2, - image_size=(512, 512), - output_size=(512, 512), num_classes=7, channel_mask=[True, True, True], input_strategy=input_strategy, @@ -210,7 +189,6 @@ def test_read_batch_get_item_diff_minibatch() -> None: generator.set_mini_batch_size(1) batch = generator[0] - assert batch[0].shape == (1, 512, 512, 3) assert batch[1].shape == (1, 512, 512, 7) @@ -228,11 +206,7 @@ def test_read_batch_get_item_channel_first() -> None: # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", batch_size=2, - image_size=(512, 512), - output_size=(512, 512), num_classes=7, channel_mask=[True, True, True], input_strategy=input_strategy, @@ -258,15 +232,12 @@ def test_read_batch_get_item_column() -> None: # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", batch_size=2, - image_size=(512, 512), - output_size=(512 * 512, 1), num_classes=7, channel_mask=[True, True, True], input_strategy=input_strategy, output_strategy=output_strategy, + is_column=True, ) batch = generator[0] @@ -287,16 +258,13 @@ def test_read_batch_get_item_column_channel_first() -> None: # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", batch_size=2, - image_size=(512, 512), - output_size=(512 * 512, 1), num_classes=7, channel_mask=[True, True, True], input_strategy=input_strategy, output_strategy=output_strategy, image_ordering=ImageOrdering.CHANNEL_FIRST, + is_column=True, ) batch = generator[0] @@ -318,11 +286,7 @@ def test_read_batch_get_item_expand_dim_fail() -> None: # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", batch_size=2, - image_size=(512, 512), - output_size=(512, 512), num_classes=7, channel_mask=[True, True, True], input_strategy=input_strategy, @@ -332,6 +296,26 @@ def test_read_batch_get_item_expand_dim_fail() -> None: batch = generator[0] +def test_raises_error_not_compatible_shape() -> None: + with pytest.raises(ValueError) as exc_info: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 200, 1)) + + # create generator instance + + generator = FlowGeneratorExperimental( + batch_size=2, + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + ) + + ################ # Staging tests# ################ diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index a764a6b..b1fcb66 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -255,21 +255,16 @@ class FlowGeneratorExperimental(Sequence): Raises ------ - :ValueError: if the names of the images and masks do not match :ValueError: if the output size is not a tuple of length 2 :ValueError: if the output size is not a square matrix or a column vector """ def __init__( self, - image_path: str, - mask_path: str, - image_size: tuple[int, int], - output_size: tuple[int, int], - channel_mask: list[bool], - num_classes: int, input_strategy: IReader, output_strategy: IReader, + channel_mask: list[bool], + num_classes: int, shuffle: bool = True, batch_size: int = 2, preprocessing_enabled: bool = True, @@ -277,24 +272,16 @@ def __init__( preprocessing_seed: Optional[int] = None, preprocessing_queue_image: IPreprocessor = ImagePreprocessor.generate_image_queue(), preprocessing_queue_mask: IPreprocessor = ImagePreprocessor.generate_mask_queue(), - read_weights: bool = False, - weights_path: Optional[str] = None, - shuffle_counter: int = 0, image_ordering: ImageOrdering = ImageOrdering.CHANNEL_LAST, + is_column:bool = False, ): - if len(output_size) != 2: - raise ValueError("The output size has to be a tuple of length 2") - if output_size[1] != 1 and output_size[0] != output_size[1]: - raise ValueError( - "The output size has to be a square matrix or a column vector" - ) - - self.image_path = image_path - self.mask_path = mask_path + + self.input_strategy = input_strategy + self.output_strategy = output_strategy self.batch_size = batch_size self.mini_batch = batch_size - self.image_size = image_size - self.output_size = output_size + self.image_size = input_strategy.get_image_size() + self.output_size = output_strategy.get_image_size() self.channel_mask = np.array(channel_mask) self.n_channels = np.sum(channel_mask) self.num_classes = num_classes @@ -302,61 +289,28 @@ def __init__( self.seed = seed self.preprocessing_enabled = preprocessing_enabled self.preprocessing_seed = preprocessing_seed - self.read_weights = read_weights - self.weights_path = weights_path + self.preprocessing_queue_image = preprocessing_queue_image self.preprocessing_queue_mask = preprocessing_queue_mask - self.shuffle_counter = shuffle_counter + self.image_ordering = image_ordering + self.is_column = is_column - self.image_filenames = np.array(sorted(os.listdir(self.image_path))) - self.mask_filenames = np.array(sorted(os.listdir(self.mask_path))) - - self.input_strategy = input_strategy - self.output_strategy = output_strategy - - # should be moved out as a strategy - if self.read_weights: - weights_df = pd.read_csv(self.weights_path, header=None) - weights_np = weights_df.to_numpy() - print(weights_np.shape) - # sort the numpy array by the first column - weights_np = weights_np[weights_np[:, 0].argsort()] - - print(weights_np) - self.weights = weights_np[:, 1:].astype(np.float64) - weight_names = weights_np[:, 0] - for mask, weight_name in zip(self.mask_filenames, weight_names): - if mask != weight_name: - raise ValueError("The mask and weight directories do not match") - - self.linked_data = [self.image_filenames, self.mask_filenames] - if self.read_weights: - self.linked_data.append(self.weights) - - self.__shuffle_filenames() - self.dataset_size = self.__len__() - - print("Validating dataset...") - for i_name, m_name in tqdm(zip(self.image_filenames, self.mask_filenames)): - if i_name != m_name: - raise ValueError("The image and mask directories do not match") + self.image_batch_store = None self.mask_batch_store = None self.validity_index = 0 + self.shuffle_counter = 0 - if self.output_size[1] == 1: - # only enters if the output is a column vector - # such no need to define it otherwise - dimension = math.sqrt(self.output_size[0]) - self.output_reshape = (int(dimension), int(dimension)) - self.column_vector = True - else: - self.output_reshape = self.output_size - self.column_vector = False + self.__shuffle_filenames() - print("Reading images from: ", self.image_path) + if len(self.output_size) != 2: + raise ValueError("The output size has to be a tuple of length 2") + if self.output_size[1] != 1 and self.output_size[0] != self.output_size[1]: + raise ValueError( + "The output size has to be a square matrix or a column vector" + ) def set_preprocessing_pipeline( self, @@ -394,13 +348,8 @@ def set_mini_batch_size(self, batch_size: int) -> None: raise ValueError("The batch size must be divisible by the mini batch size") self.mini_batch = batch_size - def __read_batch(self, dataset_index: int, end: int) -> None: + def __read_batch(self, dataset_index: int) -> None: # read image batch - batch_image_filenames = self.image_filenames[dataset_index:end] - batch_mask_filenames = self.mask_filenames[dataset_index:end] - for image, mask in zip(batch_image_filenames, batch_mask_filenames): - if image != mask: - raise ValueError("The image and mask directories do not match") # calculate number of mini batches in a batch n = self.batch_size // self.mini_batch @@ -463,17 +412,17 @@ def __read_batch(self, dataset_index: int, end: int) -> None: # required to check when to read the next batch def __len__(self) -> int: - return int(np.floor(len(self.image_filenames) / float(self.mini_batch))) + return self.input_strategy.get_dataset_size(self.mini_batch) def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: # check if the batch is already cached - index = index % self.dataset_size + index = index % self.__len__() if index < self.validity_index - self.batch_size // self.mini_batch: self.validity_index = 0 if index == self.validity_index: - self.__read_batch(index * self.batch_size, (index + 1) * self.batch_size) + self.__read_batch(index * self.batch_size) self.validity_index = (self.batch_size // self.mini_batch) + index # slices new batch @@ -484,7 +433,7 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: batch_images = self.image_batch_store[store_index, ...] # type: ignore batch_masks = self.mask_batch_store[store_index, ...] # type: ignore - if self.column_vector: + if self.is_column: batch_masks = tf.reshape( batch_masks, ( @@ -498,24 +447,15 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: batch_images = np.moveaxis(batch_images, -1, 1) batch_masks = np.moveaxis(batch_masks, -1, 1) - if self.read_weights: - batch_weights = self.weights[ - index * self.batch_size : (index + 1) * self.batch_size, ... - ] - return batch_images, batch_masks, batch_weights - else: - return batch_images, batch_masks + return batch_images, batch_masks def on_epoch_end(self) -> None: # Shuffle image and mask filenames self.__shuffle_filenames() def __shuffle_filenames(self) -> None: - if self.shuffle: - state = np.random.RandomState(self.seed + self.shuffle_counter) - self.shuffle_counter += 1 - shuffled_indices = state.permutation(len(self.image_filenames)) - shuffled_indices = shuffled_indices.astype(int) - for array in self.linked_data: - array = array[shuffled_indices] + new_seed = self.seed + self.shuffle_counter + self.input_strategy.shuffle_filenames(new_seed) + self.output_strategy.shuffle_filenames(new_seed) + self.shuffle_counter += 1 diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 9078c4a..7451fd5 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -10,15 +10,16 @@ class IReader(Protocol): def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: ... - def get_dataset_size(self) -> int: + def get_dataset_size(self,minibatch:int) -> int: ... - def get_image_size(self) -> int: + def get_image_size(self) -> tuple[int,int]: ... - def __shuffle_filenames__(self) -> None: + def shuffle_filenames(self,seed:int) -> None: ... + class RGBImageStrategy: def __init__( self, @@ -51,18 +52,15 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size - - def get_image_size(self) -> int: + + def get_image_size(self) -> tuple[int,int]: return self.image_size - - def __shuffle_filenames__(self) -> None: - if self.shuffle: - state = np.random.RandomState(self.seed + self.shuffle_counter) - self.shuffle_counter += 1 - shuffled_indices = state.permutation(len(self.image_filenames)) - shuffled_indices = shuffled_indices.astype(int) - for array in self.linked_data: - array = array[shuffled_indices] + + def shuffle_filenames(self,seed:int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices] class HyperspectralImageStrategy: @@ -108,15 +106,12 @@ def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size - - def get_image_size(self) -> int: - return self.image_size - - def __shuffle_filenames__(self) -> None: - if self.shuffle: - state = np.random.RandomState(self.seed + self.shuffle_counter) - self.shuffle_counter += 1 - shuffled_indices = state.permutation(len(self.image_filenames)) - shuffled_indices = shuffled_indices.astype(int) - for array in self.linked_data: - array = array[shuffled_indices] + + def get_image_size(self) -> tuple[int,int]: + return self.image_resize + + def shuffle_filenames(self,seed:int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices] From 51900d8ad612619504b72dcff8c2079c2301fccf Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Fri, 25 Aug 2023 18:22:08 +0100 Subject: [PATCH 39/75] added more tests, changed variable name image_resize to image_size HyperspectralImageStrategy, modified marker development to pyproject.toml --- pyproject.toml | 1 + .../test_strategies.py | 222 +++++++++++++++++- .../segmentation_utils/reading_strategies.py | 10 +- 3 files changed, 216 insertions(+), 17 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 922d766..77f08e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ packages = ["utilities"] markers = [ "staging: Mark a test as part of the staging environment", "production: Mark a test as part of the production environment", + "development: Mark a test as part of the development environment", ] [project.optional-dependencies] diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index b2084ae..209c3bf 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -2,6 +2,7 @@ import numpy as np from PIL import Image +import pytest from pytest import MonkeyPatch from utilities.segmentation_utils.reading_strategies import ( @@ -34,7 +35,7 @@ def __exit__(self, type, value, traceback): # called at the end of a 'with' block pass - +@pytest.mark.development def test_read_batch_image_path() -> None: # checking if the file is being opened and read correctly patch = MonkeyPatch() @@ -63,7 +64,7 @@ def test_read_batch_image_path() -> None: patch.undo() patch.undo() - +@pytest.mark.development def test_read_batch_returns_nparray() -> None: # checking if the returned value is a numpy array @@ -93,8 +94,8 @@ def test_read_batch_returns_nparray() -> None: patch.undo() patch.undo() - -def test_get_dataset_size() -> None: +@pytest.mark.development +def test_RGB_get_dataset_size() -> None: # checking if the calculation is done correctly patch = MonkeyPatch() @@ -102,20 +103,13 @@ def test_get_dataset_size() -> None: patch.setattr(os, "listdir", lambda x: mock_filenames) - #! not needed as you arent reading any image in this function - patch.setattr( - Image, - "open", - lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), - ) - image_strategy = RGBImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), image_resample=Image.Resampling.NEAREST, ) dataset = len(mock_filenames) # number of images in the specified path - mini_batch = 2 # number of images we want in each batch + mini_batch = 2 # number of images we want in each batch expected_value = int( np.floor(dataset / float(mini_batch)) ) # number of sets of images we expect @@ -125,7 +119,33 @@ def test_get_dataset_size() -> None: patch.undo() patch.undo() +@pytest.mark.development +def test_Hyperspectral_get_dataset_size() -> None: + # checking if the calculation is done correctly + patch = MonkeyPatch() + + mock_filenames = ["a", "b", "c"] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy = HyperspectralImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), + ) + dataset = len(mock_filenames) # number of images in the specified path + mini_batch = 2 # number of images we want in each batch + expected_value = int( + np.floor(dataset / float(mini_batch)) + ) # number of sets of images we expect + + dataset_size = image_strategy.get_dataset_size(mini_batch) + assert dataset_size == expected_value + patch.undo() + patch.undo() + +@pytest.mark.development def test_hyperspectral_open(): patch = MonkeyPatch() mock_filenames = ["a", "b", "c"] @@ -146,3 +166,181 @@ def test_hyperspectral_open(): read_images = strategy.read_batch(2, 0) assert read_images.shape == (2, 224, 224, 3) + +@pytest.mark.development +def test_empty_batch(): + patch = MonkeyPatch() + + mock_filenames = ["a", "b", "c"] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + patch.setattr( + Image, + "open", + lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), + ) + + image_strategy = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) + + batch_size = 0 + dataset_index = 0 + result = image_strategy.read_batch(batch_size, dataset_index) + + assert result.shape == (0, 224, 224, 3) #0 indicates there are no images in the batch + patch.undo() + patch.undo() + +@pytest.mark.development +def test_out_of_bounds_index(): + patch = MonkeyPatch() + + mock_filenames = ["a", "b", "c"] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + patch.setattr( + Image, + "open", + lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), + ) + + image_strategy = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) + + batch_size = 2 #not an empty batch + dataset_index = len(image_strategy.image_filenames) #out of bounds index + + try: + result = image_strategy.read_batch(batch_size, dataset_index) + assert True + + except IndexError: + pass + patch.undo() + patch.undo() + +@pytest.mark.development +def test_batch_slicing(): + patch = MonkeyPatch() + + mock_filenames = ["a" for _ in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + patch.setattr( + Image, + "open", + lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), + ) + + image_strategy = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) + + batch_size = 10 + dataset_index = 2 + result = image_strategy.read_batch(batch_size, dataset_index) + assert result.shape[0] == batch_size #compare the size of returned data with batch_size + patch.undo() + patch.undo() + +@pytest.mark.development +def test_RGB_get_image_index(): + patch = MonkeyPatch() + + mock_filenames = ["a" for _ in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) + + result = image_strategy.get_image_size( + ) + assert result == (224,224) + +@pytest.mark.development +def test_HyperSpectral_get_image_index(): + patch = MonkeyPatch() + + mock_filenames = ["a" for _ in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy = HyperspectralImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) + ) + + result = image_strategy.get_image_size( + ) + assert result == (224,224) + + +def test_RGB_shuffle(): + patch = MonkeyPatch() + + mock_filenames = [str(i) for i in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy_1 = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) + + image_strategy_2 = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) + + n = 100 + + for i in range(n): + image_strategy_1.shuffle_filenames(i) + image_strategy_2.shuffle_filenames(i) + + assert np.array_equal(image_strategy_1.image_filenames, image_strategy_2.image_filenames) + +def test_Hyperspectral_shuffle(): + patch = MonkeyPatch() + + mock_filenames = [str(i) for i in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy_1 = HyperspectralImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) + ) + + image_strategy_2 = HyperspectralImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) + ) + + n = 100 + + for i in range(n): + image_strategy_1.shuffle_filenames(i) + image_strategy_2.shuffle_filenames(i) + + assert np.array_equal(image_strategy_1.image_filenames, image_strategy_2.image_filenames) \ No newline at end of file diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 7451fd5..586af7d 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -68,13 +68,13 @@ class HyperspectralImageStrategy: def __init__( self, image_path: str, - image_resize: tuple[int, int], + image_size: tuple[int, int], image_resample=Image.Resampling.NEAREST, package: Any = rasterio, ): self.image_path = image_path self.image_filenames = np.array(sorted(os.listdir(self.image_path))) - self.image_resize = image_resize + self.image_size = image_size self.image_resample = image_resample self.package = package # gets the number of bands for the dataset @@ -90,13 +90,13 @@ def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: # defines the array that will contain the images images = np.zeros( - (batch_size, self.bands, self.image_resize[0], self.image_resize[1]) + (batch_size, self.bands, self.image_size[0], self.image_size[1]) ) for i, filename in enumerate(batch_filenames): with self.package.open(os.path.join(self.image_path, filename)) as dataset: # .read() returns a numpy array that contains the raster cell values in your file. image = dataset.read() - images[i, :, :, :] = np.resize(image, self.image_resize) + images[i, :, :, :] = np.resize(image, self.image_size) # ensures channel-last orientation for the reader images = np.moveaxis(images, 1, 3) @@ -108,7 +108,7 @@ def get_dataset_size(self, mini_batch) -> int: return dataset_size def get_image_size(self) -> tuple[int,int]: - return self.image_resize + return self.image_size def shuffle_filenames(self,seed:int) -> None: state = np.random.RandomState(seed) From 579891817b179bb05207d37c68830ca031660661 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 19:10:56 +0100 Subject: [PATCH 40/75] adds ability to rgbstrategy readbatch to determine if it is reading grayscale --- utilities/segmentation_utils/reading_strategies.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 586af7d..d1aa2d0 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -33,6 +33,7 @@ def __init__( ) #!update: added variable to initialiser self.image_size = image_size self.image_resample = image_resample + self.is_color = True def read_batch(self, batch_size, dataset_index) -> np.ndarray: # read images with PIL @@ -40,12 +41,15 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: dataset_index : dataset_index + batch_size ] images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) - + for i in range(batch_size): image = Image.open( os.path.join(self.image_path, batch_filenames[i]) ).resize(self.image_size, self.image_resample) image = np.array(image) + if len(image.shape) == 2 and self.is_color: + images = np.zeros((batch_size, self.image_size[0], self.image_size[1])) + is_color = False images[i, :, :, :] = image return images From ea85f3f54bbadc56846a523c121bc5c85cd7d944 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 19:17:11 +0100 Subject: [PATCH 41/75] fixes possible bug in read_batch --- utilities/segmentation_utils/reading_strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index d1aa2d0..59c76f2 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -49,7 +49,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: image = np.array(image) if len(image.shape) == 2 and self.is_color: images = np.zeros((batch_size, self.image_size[0], self.image_size[1])) - is_color = False + self.is_color = False images[i, :, :, :] = image return images From 1dc5a8c6eb896f836218ac39ab1b4248eb50307a Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 19:32:54 +0100 Subject: [PATCH 42/75] fixes bug where there are too many indices indexed --- utilities/segmentation_utils/reading_strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 59c76f2..83100de 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -50,7 +50,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: if len(image.shape) == 2 and self.is_color: images = np.zeros((batch_size, self.image_size[0], self.image_size[1])) self.is_color = False - images[i, :, :, :] = image + images[i, ...] = image return images def get_dataset_size(self, mini_batch) -> int: From a2f5ac9266c8b5f243af5c558c62bfcc5541fbbb Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 19:38:44 +0100 Subject: [PATCH 43/75] add a print for testing --- utilities/segmentation_utils/reading_strategies.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 83100de..289b46f 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -4,6 +4,7 @@ import numpy as np import rasterio from PIL import Image +import tensorflow as tf class IReader(Protocol): @@ -43,6 +44,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) for i in range(batch_size): + tf.print("Reading image: ", batch_filenames[i]") image = Image.open( os.path.join(self.image_path, batch_filenames[i]) ).resize(self.image_size, self.image_resample) From f366fb751f6f1ffac1c3412f2c7dac33efa08ddc Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 19:40:11 +0100 Subject: [PATCH 44/75] removes unnecessary comma --- utilities/segmentation_utils/reading_strategies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 289b46f..99ddb7f 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -3,8 +3,8 @@ import numpy as np import rasterio -from PIL import Image import tensorflow as tf +from PIL import Image class IReader(Protocol): @@ -44,7 +44,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) for i in range(batch_size): - tf.print("Reading image: ", batch_filenames[i]") + tf.print("Reading image: ", batch_filenames[i]) image = Image.open( os.path.join(self.image_path, batch_filenames[i]) ).resize(self.image_size, self.image_resample) From 4fa965d35a0630311971fa2a16a65322cf5c22cd Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 19:46:12 +0100 Subject: [PATCH 45/75] adds debugging print statements --- utilities/segmentation_utils/flowreader.py | 2 ++ utilities/segmentation_utils/reading_strategies.py | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index b1fcb66..51b47ce 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -420,8 +420,10 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: if index < self.validity_index - self.batch_size // self.mini_batch: self.validity_index = 0 + tf.print("Resetting validity index") if index == self.validity_index: + tf.print("Reading new batch") self.__read_batch(index * self.batch_size) self.validity_index = (self.batch_size // self.mini_batch) + index diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 99ddb7f..173c681 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -34,24 +34,28 @@ def __init__( ) #!update: added variable to initialiser self.image_size = image_size self.image_resample = image_resample - self.is_color = True + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + tf.print("Reading batch: ", dataset_index) # read images with PIL batch_filenames = self.image_filenames[ dataset_index : dataset_index + batch_size ] + + tf.print("Extracted filenames") + images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) - + is_color = True for i in range(batch_size): tf.print("Reading image: ", batch_filenames[i]) image = Image.open( os.path.join(self.image_path, batch_filenames[i]) ).resize(self.image_size, self.image_resample) image = np.array(image) - if len(image.shape) == 2 and self.is_color: + if len(image.shape) == 2 and is_color: images = np.zeros((batch_size, self.image_size[0], self.image_size[1])) - self.is_color = False + is_color = False images[i, ...] = image return images From f6de5cff5f82c5606f233d124a84d6a989671f87 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 19:51:15 +0100 Subject: [PATCH 46/75] adds more debug info --- utilities/segmentation_utils/flowreader.py | 2 +- utilities/segmentation_utils/reading_strategies.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 51b47ce..73876b3 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -423,7 +423,7 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: tf.print("Resetting validity index") if index == self.validity_index: - tf.print("Reading new batch") + tf.print("Reading new batch at index: ", index) self.__read_batch(index * self.batch_size) self.validity_index = (self.batch_size // self.mini_batch) + index diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 173c681..1187b29 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -43,7 +43,7 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: dataset_index : dataset_index + batch_size ] - tf.print("Extracted filenames") + tf.print("Extracted filenames: ", batch_filenames.shape) images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) is_color = True From 01e9e81f04cf4c8b4d86bfbd59bfc1faece09db1 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 20:00:14 +0100 Subject: [PATCH 47/75] fixes possible problem with dataset indexing --- utilities/segmentation_utils/flowreader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 73876b3..0d0d544 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -424,7 +424,7 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: if index == self.validity_index: tf.print("Reading new batch at index: ", index) - self.__read_batch(index * self.batch_size) + self.__read_batch(index * self.mini_batch) self.validity_index = (self.batch_size // self.mini_batch) + index # slices new batch From 1f010bac615ffc4831d42272d76b3e9e7085a4ad Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 20:05:23 +0100 Subject: [PATCH 48/75] removes print statements for debugging --- utilities/segmentation_utils/flowreader.py | 4 ++-- utilities/segmentation_utils/reading_strategies.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 0d0d544..5ac8a3a 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -420,10 +420,10 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: if index < self.validity_index - self.batch_size // self.mini_batch: self.validity_index = 0 - tf.print("Resetting validity index") + if index == self.validity_index: - tf.print("Reading new batch at index: ", index) + self.__read_batch(index * self.mini_batch) self.validity_index = (self.batch_size // self.mini_batch) + index diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 1187b29..ca12fdf 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -37,18 +37,18 @@ def __init__( def read_batch(self, batch_size, dataset_index) -> np.ndarray: - tf.print("Reading batch: ", dataset_index) + # read images with PIL batch_filenames = self.image_filenames[ dataset_index : dataset_index + batch_size ] - tf.print("Extracted filenames: ", batch_filenames.shape) + images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) is_color = True for i in range(batch_size): - tf.print("Reading image: ", batch_filenames[i]) + image = Image.open( os.path.join(self.image_path, batch_filenames[i]) ).resize(self.image_size, self.image_resample) From cab20a8755d4b1e9bf333a6ce771b4ece351ecee Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Fri, 25 Aug 2023 20:13:30 +0100 Subject: [PATCH 49/75] reads debug logs --- utilities/segmentation_utils/flowreader.py | 10 +++++----- utilities/segmentation_utils/reading_strategies.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 5ac8a3a..3e47c5d 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -420,18 +420,18 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: if index < self.validity_index - self.batch_size // self.mini_batch: self.validity_index = 0 - + tf.print("Resetting validity index") if index == self.validity_index: - - self.__read_batch(index * self.mini_batch) + tf.print("Reading new batch at index: ", index) + self.__read_batch(index * self.batch_size) self.validity_index = (self.batch_size // self.mini_batch) + index - + tf.print("batch read at index: ", index) # slices new batch store_index = (self.batch_size // self.mini_batch) - ( self.validity_index - index ) - + tf.print("store index: ", store_index) batch_images = self.image_batch_store[store_index, ...] # type: ignore batch_masks = self.mask_batch_store[store_index, ...] # type: ignore diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index ca12fdf..1187b29 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -37,18 +37,18 @@ def __init__( def read_batch(self, batch_size, dataset_index) -> np.ndarray: - + tf.print("Reading batch: ", dataset_index) # read images with PIL batch_filenames = self.image_filenames[ dataset_index : dataset_index + batch_size ] - + tf.print("Extracted filenames: ", batch_filenames.shape) images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) is_color = True for i in range(batch_size): - + tf.print("Reading image: ", batch_filenames[i]) image = Image.open( os.path.join(self.image_path, batch_filenames[i]) ).resize(self.image_size, self.image_resample) From b74b2ccaa0f230262506f9b42e9db3bd282e79a3 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 09:26:29 +0100 Subject: [PATCH 50/75] removes print statements --- utilities/segmentation_utils/flowreader.py | 8 ++++---- utilities/segmentation_utils/reading_strategies.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 3e47c5d..3da5c77 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -420,18 +420,18 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: if index < self.validity_index - self.batch_size // self.mini_batch: self.validity_index = 0 - tf.print("Resetting validity index") + if index == self.validity_index: - tf.print("Reading new batch at index: ", index) + self.__read_batch(index * self.batch_size) self.validity_index = (self.batch_size // self.mini_batch) + index - tf.print("batch read at index: ", index) + # slices new batch store_index = (self.batch_size // self.mini_batch) - ( self.validity_index - index ) - tf.print("store index: ", store_index) + batch_images = self.image_batch_store[store_index, ...] # type: ignore batch_masks = self.mask_batch_store[store_index, ...] # type: ignore diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 1187b29..2fb8f55 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -37,18 +37,18 @@ def __init__( def read_batch(self, batch_size, dataset_index) -> np.ndarray: - tf.print("Reading batch: ", dataset_index) + # read images with PIL batch_filenames = self.image_filenames[ dataset_index : dataset_index + batch_size ] - tf.print("Extracted filenames: ", batch_filenames.shape) + images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) is_color = True for i in range(batch_size): - tf.print("Reading image: ", batch_filenames[i]) + image = Image.open( os.path.join(self.image_path, batch_filenames[i]) ).resize(self.image_size, self.image_resample) From a3c8070c16f7f57d21edbcc5c94562b0e847b9e1 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 09:58:35 +0100 Subject: [PATCH 51/75] push fix to indexing error --- utilities/segmentation_utils/flowreader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 3da5c77..20e9ca8 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -424,7 +424,7 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: if index == self.validity_index: - self.__read_batch(index * self.batch_size) + self.__read_batch(index * self.mini_batch) self.validity_index = (self.batch_size // self.mini_batch) + index # slices new batch From a88497eb3145e8fcc813c92973353c01340f386a Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 10:05:06 +0100 Subject: [PATCH 52/75] fixing linting issues, removing unused imports --- utilities/segmentation_utils/flowreader.py | 17 ++++------------ .../segmentation_utils/reading_strategies.py | 20 +++++++------------ 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 20e9ca8..6d20be5 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -7,12 +7,9 @@ from typing import Optional import numpy as np -import pandas as pd import tensorflow as tf from keras.preprocessing.image import ImageDataGenerator from keras.utils import Sequence -from PIL import Image -from tqdm import tqdm from utilities.segmentation_utils import ImagePreprocessor from utilities.segmentation_utils.constants import ImageOrdering @@ -273,9 +270,8 @@ def __init__( preprocessing_queue_image: IPreprocessor = ImagePreprocessor.generate_image_queue(), preprocessing_queue_mask: IPreprocessor = ImagePreprocessor.generate_mask_queue(), image_ordering: ImageOrdering = ImageOrdering.CHANNEL_LAST, - is_column:bool = False, + is_column: bool = False, ): - self.input_strategy = input_strategy self.output_strategy = output_strategy self.batch_size = batch_size @@ -292,12 +288,10 @@ def __init__( self.preprocessing_queue_image = preprocessing_queue_image self.preprocessing_queue_mask = preprocessing_queue_mask - + self.image_ordering = image_ordering self.is_column = is_column - - self.image_batch_store = None self.mask_batch_store = None self.validity_index = 0 @@ -420,18 +414,16 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: if index < self.validity_index - self.batch_size // self.mini_batch: self.validity_index = 0 - if index == self.validity_index: - self.__read_batch(index * self.mini_batch) self.validity_index = (self.batch_size // self.mini_batch) + index - + # slices new batch store_index = (self.batch_size // self.mini_batch) - ( self.validity_index - index ) - + batch_images = self.image_batch_store[store_index, ...] # type: ignore batch_masks = self.mask_batch_store[store_index, ...] # type: ignore @@ -449,7 +441,6 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: batch_images = np.moveaxis(batch_images, -1, 1) batch_masks = np.moveaxis(batch_masks, -1, 1) - return batch_images, batch_masks def on_epoch_end(self) -> None: diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 2fb8f55..30cd6e7 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -3,7 +3,6 @@ import numpy as np import rasterio -import tensorflow as tf from PIL import Image @@ -11,13 +10,13 @@ class IReader(Protocol): def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: ... - def get_dataset_size(self,minibatch:int) -> int: + def get_dataset_size(self, minibatch: int) -> int: ... - def get_image_size(self) -> tuple[int,int]: + def get_image_size(self) -> tuple[int, int]: ... - def shuffle_filenames(self,seed:int) -> None: + def shuffle_filenames(self, seed: int) -> None: ... @@ -34,21 +33,16 @@ def __init__( ) #!update: added variable to initialiser self.image_size = image_size self.image_resample = image_resample - def read_batch(self, batch_size, dataset_index) -> np.ndarray: - # read images with PIL batch_filenames = self.image_filenames[ dataset_index : dataset_index + batch_size ] - - images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) is_color = True for i in range(batch_size): - image = Image.open( os.path.join(self.image_path, batch_filenames[i]) ).resize(self.image_size, self.image_resample) @@ -63,10 +57,10 @@ def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size - def get_image_size(self) -> tuple[int,int]: + def get_image_size(self) -> tuple[int, int]: return self.image_size - def shuffle_filenames(self,seed:int) -> None: + def shuffle_filenames(self, seed: int) -> None: state = np.random.RandomState(seed) shuffled_indices = state.permutation(len(self.image_filenames)) shuffled_indices = shuffled_indices.astype(int) @@ -117,10 +111,10 @@ def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size - def get_image_size(self) -> tuple[int,int]: + def get_image_size(self) -> tuple[int, int]: return self.image_size - def shuffle_filenames(self,seed:int) -> None: + def shuffle_filenames(self, seed: int) -> None: state = np.random.RandomState(seed) shuffled_indices = state.permutation(len(self.image_filenames)) shuffled_indices = shuffled_indices.astype(int) From 841ee0ff41db57de4f0f4c25ae6657f347b768b7 Mon Sep 17 00:00:00 2001 From: Ayleen Sohaib <2684413S@student.gla.ac.uk> Date: Sat, 26 Aug 2023 14:44:02 +0100 Subject: [PATCH 53/75] added partial_dataset and adjusted_batch_size to read_batch - to allow loading partial batches --- utilities/segmentation_utils/flowreader.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 6d20be5..8076a42 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -343,13 +343,19 @@ def set_mini_batch_size(self, batch_size: int) -> None: self.mini_batch = batch_size def __read_batch(self, dataset_index: int) -> None: - # read image batch - - # calculate number of mini batches in a batch - n = self.batch_size // self.mini_batch - - batch_images = self.input_strategy.read_batch(self.batch_size, dataset_index) - batch_masks = self.output_strategy.read_batch(self.batch_size, dataset_index) + + #!adjust the batch size as it is passed to the function + #calculates remaining images in a dataset and scales it down by multiplying with minibatch + partial_dataset = self.dataset_size * self.mini_batch - dataset_index + + #compare and choose the smaller value, to avoid making a larger batch_size + adjusted_batch_size = min(self.batch_size, partial_dataset) + + #calculate number of mini batches in a batch + n = adjusted_batch_size // self.mini_batch + + batch_images = self.input_strategy.read_batch(adjusted_batch_size, dataset_index) + batch_masks = self.output_strategy.read_batch(adjusted_batch_size, dataset_index) # preprocess and assign images and masks to the batch From 8ae3d0eab5a771fa832604dcb330983e160982a3 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 14:49:26 +0100 Subject: [PATCH 54/75] adds dataset size to flowgenerator_exp, adds multi threaded version of the rasterio strategy --- .../test_strategies.py | 172 +++++++++++++++--- utilities/segmentation_utils/flowreader.py | 2 + .../segmentation_utils/reading_strategies.py | 75 ++++++++ 3 files changed, 225 insertions(+), 24 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index 209c3bf..aad5a4b 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -1,12 +1,13 @@ import os import numpy as np -from PIL import Image import pytest +from PIL import Image from pytest import MonkeyPatch from utilities.segmentation_utils.reading_strategies import ( - HyperspectralImageStrategy, RGBImageStrategy) + HyperspectralImageStrategy, HyperspectralImageStrategyMultiThread, + RGBImageStrategy) class MockRasterio: @@ -15,6 +16,7 @@ def __init__(self, n, size, bands, dtypes): self.size = size self.bands = bands self.dtypes = dtypes + self.call_count = 0 def open(self, *args, **kwargs): return self @@ -24,7 +26,10 @@ def count(self) -> int: return self.bands def read(self, *args, **kwargs): - return np.zeros((self.bands, self.size[0], self.size[1]), self.dtypes[0]) + self.call_count += 1 + return np.full( + (self.bands, self.size[0], self.size[1]), self.call_count, self.dtypes[0] + ) # these functions are invoked when a 'with' statement is executed def __enter__(self): @@ -35,6 +40,10 @@ def __exit__(self, type, value, traceback): # called at the end of a 'with' block pass + def get_count(self): + return self.call_count + + @pytest.mark.development def test_read_batch_image_path() -> None: # checking if the file is being opened and read correctly @@ -64,6 +73,7 @@ def test_read_batch_image_path() -> None: patch.undo() patch.undo() + @pytest.mark.development def test_read_batch_returns_nparray() -> None: # checking if the returned value is a numpy array @@ -94,6 +104,7 @@ def test_read_batch_returns_nparray() -> None: patch.undo() patch.undo() + @pytest.mark.development def test_RGB_get_dataset_size() -> None: # checking if the calculation is done correctly @@ -109,7 +120,7 @@ def test_RGB_get_dataset_size() -> None: image_resample=Image.Resampling.NEAREST, ) dataset = len(mock_filenames) # number of images in the specified path - mini_batch = 2 # number of images we want in each batch + mini_batch = 2 # number of images we want in each batch expected_value = int( np.floor(dataset / float(mini_batch)) ) # number of sets of images we expect @@ -119,6 +130,7 @@ def test_RGB_get_dataset_size() -> None: patch.undo() patch.undo() + @pytest.mark.development def test_Hyperspectral_get_dataset_size() -> None: # checking if the calculation is done correctly @@ -135,7 +147,7 @@ def test_Hyperspectral_get_dataset_size() -> None: ) dataset = len(mock_filenames) # number of images in the specified path - mini_batch = 2 # number of images we want in each batch + mini_batch = 2 # number of images we want in each batch expected_value = int( np.floor(dataset / float(mini_batch)) ) # number of sets of images we expect @@ -145,6 +157,7 @@ def test_Hyperspectral_get_dataset_size() -> None: patch.undo() patch.undo() + @pytest.mark.development def test_hyperspectral_open(): patch = MonkeyPatch() @@ -167,6 +180,30 @@ def test_hyperspectral_open(): assert read_images.shape == (2, 224, 224, 3) + +@pytest.mark.development +def test_hyperspectral_mt_open(): + patch = MonkeyPatch() + mock_filenames = ["a", "b", "c"] + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_path = "tests/segmentation_utils_tests/test_strategies" + + mock_data = { + "n": 3, + "size": (224, 224), + "bands": 3, + "dtypes": ["uint8"], + } + strategy = HyperspectralImageStrategyMultiThread( + image_path, (224, 224), package=MockRasterio(**mock_data) + ) + + read_images = strategy.read_batch(2, 0) + + assert read_images.shape == (2, 224, 224, 3) + + @pytest.mark.development def test_empty_batch(): patch = MonkeyPatch() @@ -191,10 +228,16 @@ def test_empty_batch(): dataset_index = 0 result = image_strategy.read_batch(batch_size, dataset_index) - assert result.shape == (0, 224, 224, 3) #0 indicates there are no images in the batch + assert result.shape == ( + 0, + 224, + 224, + 3, + ) # 0 indicates there are no images in the batch patch.undo() patch.undo() + @pytest.mark.development def test_out_of_bounds_index(): patch = MonkeyPatch() @@ -215,18 +258,19 @@ def test_out_of_bounds_index(): image_resample=Image.Resampling.NEAREST, ) - batch_size = 2 #not an empty batch - dataset_index = len(image_strategy.image_filenames) #out of bounds index + batch_size = 2 # not an empty batch + dataset_index = len(image_strategy.image_filenames) # out of bounds index try: - result = image_strategy.read_batch(batch_size, dataset_index) + image_strategy.read_batch(batch_size, dataset_index) assert True - + except IndexError: pass patch.undo() patch.undo() + @pytest.mark.development def test_batch_slicing(): patch = MonkeyPatch() @@ -249,13 +293,16 @@ def test_batch_slicing(): batch_size = 10 dataset_index = 2 - result = image_strategy.read_batch(batch_size, dataset_index) - assert result.shape[0] == batch_size #compare the size of returned data with batch_size + result = image_strategy.read_batch(batch_size, dataset_index) + assert ( + result.shape[0] == batch_size + ) # compare the size of returned data with batch_size patch.undo() patch.undo() + @pytest.mark.development -def test_RGB_get_image_index(): +def test_RGB_get_image_size(): patch = MonkeyPatch() mock_filenames = ["a" for _ in range(20)] @@ -268,12 +315,12 @@ def test_RGB_get_image_index(): image_resample=Image.Resampling.NEAREST, ) - result = image_strategy.get_image_size( - ) - assert result == (224,224) + result = image_strategy.get_image_size() + assert result == (224, 224) + @pytest.mark.development -def test_HyperSpectral_get_image_index(): +def test_HyperSpectral_get_image_size(): patch = MonkeyPatch() mock_filenames = ["a" for _ in range(20)] @@ -283,14 +330,31 @@ def test_HyperSpectral_get_image_index(): image_strategy = HyperspectralImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), - package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) + package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), ) - result = image_strategy.get_image_size( + result = image_strategy.get_image_size() + assert result == (224, 224) + + +@pytest.mark.development +def test_HyperSpectral_MT_get_image_size(): + patch = MonkeyPatch() + + mock_filenames = ["a" for _ in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy = HyperspectralImageStrategyMultiThread( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), ) - assert result == (224,224) + result = image_strategy.get_image_size() + assert result == (224, 224) +@pytest.mark.development def test_RGB_shuffle(): patch = MonkeyPatch() @@ -316,8 +380,11 @@ def test_RGB_shuffle(): image_strategy_1.shuffle_filenames(i) image_strategy_2.shuffle_filenames(i) - assert np.array_equal(image_strategy_1.image_filenames, image_strategy_2.image_filenames) + assert np.array_equal( + image_strategy_1.image_filenames, image_strategy_2.image_filenames + ) +@pytest.mark.development def test_Hyperspectral_shuffle(): patch = MonkeyPatch() @@ -328,13 +395,43 @@ def test_Hyperspectral_shuffle(): image_strategy_1 = HyperspectralImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), - package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) + package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), + ) + + image_strategy_2 = HyperspectralImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), + ) + + n = 100 + + for i in range(n): + image_strategy_1.shuffle_filenames(i) + image_strategy_2.shuffle_filenames(i) + + assert np.array_equal( + image_strategy_1.image_filenames, image_strategy_2.image_filenames + ) + +@pytest.mark.development +def test_Hyperspectral_mt_shuffle(): + patch = MonkeyPatch() + + mock_filenames = [str(i) for i in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy_1 = HyperspectralImageStrategyMultiThread( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), ) image_strategy_2 = HyperspectralImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), - package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) + package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), ) n = 100 @@ -343,4 +440,31 @@ def test_Hyperspectral_shuffle(): image_strategy_1.shuffle_filenames(i) image_strategy_2.shuffle_filenames(i) - assert np.array_equal(image_strategy_1.image_filenames, image_strategy_2.image_filenames) \ No newline at end of file + assert np.array_equal( + image_strategy_1.image_filenames, image_strategy_2.image_filenames + ) + +@pytest.mark.development +def test_Hyperspectral_mt_image_in_order(): + patch = MonkeyPatch() + + mock_filenames = [str(i) for i in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + mock_package = MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) + image_strategy = HyperspectralImageStrategyMultiThread( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=mock_package, + ) + + batch_size = 10 + + call_count = mock_package.get_count() + + result = image_strategy.read_batch(batch_size, 0) + + for i in range(call_count, call_count + batch_size): + assert np.array_equal( + result[i - call_count, :, :, :], np.full((224, 224, 3), i + 1) + ) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 8076a42..1b96d65 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -297,6 +297,8 @@ def __init__( self.validity_index = 0 self.shuffle_counter = 0 + self.dataset_size = self.__len__() + self.__shuffle_filenames() if len(self.output_size) != 2: diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 30cd6e7..7c743a2 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -1,4 +1,6 @@ import os +from concurrent import futures +from concurrent.futures import ThreadPoolExecutor from typing import Any, Protocol import numpy as np @@ -119,3 +121,76 @@ def shuffle_filenames(self, seed: int) -> None: shuffled_indices = state.permutation(len(self.image_filenames)) shuffled_indices = shuffled_indices.astype(int) self.image_filenames = self.image_filenames[shuffled_indices] + + +class HyperspectralImageStrategyMultiThread: + # read images with rasterio + def __init__( + self, + image_path: str, + image_size: tuple[int, int], + image_resample=Image.Resampling.NEAREST, + max_workers: int = 8, + package: Any = rasterio, + ): + self.image_path = image_path + self.image_filenames = np.array(sorted(os.listdir(self.image_path))) + self.image_size = image_size + self.image_resample = image_resample + self.package = package + self.max_workers = max_workers + # gets the number of bands for the dataset + self.bands = package.open( + os.path.join(self.image_path, self.image_filenames[0]) + ).count + + def __read_single_image( + self, filename: str, image_path: str, package: Any, image_size: tuple[int, int] + ): + with package.open(os.path.join(image_path, filename)) as dataset: + image = dataset.read() + resized_image = np.resize(image, image_size) + return resized_image + + def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: + batch_filenames = self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + + # defines the array that will contain the images + images = np.zeros( + (batch_size, self.bands, self.image_size[0], self.image_size[1]) + ) + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + future_to_index = { + executor.submit( + self.__read_single_image, + filename, + self.image_path, + self.package, + self.image_size, + ): i + for i, filename in enumerate(batch_filenames) + } + for future in futures.as_completed(future_to_index): + i = future_to_index[future] + images[i, :, :, :] = future.result() + + # ensures channel-last orientation for the reader + images = np.moveaxis(images, 1, 3) + + return np.array(images) + + def get_dataset_size(self, mini_batch) -> int: + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) + return dataset_size + + def get_image_size(self) -> tuple[int, int]: + return self.image_size + + def shuffle_filenames(self, seed: int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices] From 97d291324247e636ac1ca4e5833b3c9961b57e63 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 15:03:32 +0100 Subject: [PATCH 55/75] update preprocessing to use adjusted_batch --- utilities/segmentation_utils/flowreader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 1b96d65..c6a5db1 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -362,7 +362,7 @@ def __read_batch(self, dataset_index: int) -> None: # preprocess and assign images and masks to the batch if self.preprocessing_enabled: - for i in range(self.batch_size): + for i in range(adjusted_batch_size): image = batch_images[i, ...] mask = batch_masks[i, ...] if self.preprocessing_seed is None: @@ -418,7 +418,7 @@ def __len__(self) -> int: def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: # check if the batch is already cached - index = index % self.__len__() + if index < self.validity_index - self.batch_size // self.mini_batch: self.validity_index = 0 From d2c8ecd7bf036b0af7eb122236fdfe7598bfd183 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 15:08:06 +0100 Subject: [PATCH 56/75] test if works with no adjustment --- utilities/segmentation_utils/flowreader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index c6a5db1..bdbb46a 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -351,7 +351,7 @@ def __read_batch(self, dataset_index: int) -> None: partial_dataset = self.dataset_size * self.mini_batch - dataset_index #compare and choose the smaller value, to avoid making a larger batch_size - adjusted_batch_size = min(self.batch_size, partial_dataset) + adjusted_batch_size = self.batch_size #calculate number of mini batches in a batch n = adjusted_batch_size // self.mini_batch From e20f19c41c6c7b525b9ea22f58efb57839c8ce49 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 15:26:55 +0100 Subject: [PATCH 57/75] adds printstatements for debugging --- utilities/segmentation_utils/flowreader.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index bdbb46a..d1fb6d7 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -350,9 +350,12 @@ def __read_batch(self, dataset_index: int) -> None: #calculates remaining images in a dataset and scales it down by multiplying with minibatch partial_dataset = self.dataset_size * self.mini_batch - dataset_index + tf.print("partial_dataset: ", partial_dataset) #compare and choose the smaller value, to avoid making a larger batch_size - adjusted_batch_size = self.batch_size - + adjusted_batch_size = min(self.batch_size, partial_dataset) + + tf.print("adjusted_batch_size: ", adjusted_batch_size) + #calculate number of mini batches in a batch n = adjusted_batch_size // self.mini_batch From e06f47f337c67b1ccc8d8371ca4352c90d1bdce8 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 15:31:33 +0100 Subject: [PATCH 58/75] adds more debugging statements --- utilities/segmentation_utils/flowreader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index d1fb6d7..ba38d2c 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -345,7 +345,8 @@ def set_mini_batch_size(self, batch_size: int) -> None: self.mini_batch = batch_size def __read_batch(self, dataset_index: int) -> None: - + tf.print("dataset_index: ", dataset_index) + tf.print("dataset_size: ", self.dataset_size) #!adjust the batch size as it is passed to the function #calculates remaining images in a dataset and scales it down by multiplying with minibatch partial_dataset = self.dataset_size * self.mini_batch - dataset_index From f2a9cef2c559f6b7b72eb314bca4f264d1538299 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 15:44:23 +0100 Subject: [PATCH 59/75] adds even more debugging print --- utilities/segmentation_utils/flowreader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index ba38d2c..294ffa1 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -298,6 +298,7 @@ def __init__( self.shuffle_counter = 0 self.dataset_size = self.__len__() + tf.print("dataset_size: ", self.dataset_size) self.__shuffle_filenames() From 095cfe95181c2397f0cc52c0d44206006eadb90d Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 15:46:34 +0100 Subject: [PATCH 60/75] possibly fixes error with incorrect dataset size calculation in initializer --- utilities/segmentation_utils/flowreader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 294ffa1..02d6a20 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -297,7 +297,7 @@ def __init__( self.validity_index = 0 self.shuffle_counter = 0 - self.dataset_size = self.__len__() + self.dataset_size = self.input_strategy.get_dataset_size(self.mini_batch) tf.print("dataset_size: ", self.dataset_size) self.__shuffle_filenames() From 7ef8fc95956539e62df1794950fa543da5fd9e42 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 15:50:26 +0100 Subject: [PATCH 61/75] more print --- utilities/segmentation_utils/flowreader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 02d6a20..a121a9d 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -419,6 +419,7 @@ def __read_batch(self, dataset_index: int) -> None: # required to check when to read the next batch def __len__(self) -> int: + tf.print("len called") return self.input_strategy.get_dataset_size(self.mini_batch) def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: From 603bc036d42f599d0ca5a4a0bac474f11c7ec0d5 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 15:55:56 +0100 Subject: [PATCH 62/75] adds possible fix for incorrect dataset size calculation --- utilities/segmentation_utils/flowreader.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index a121a9d..6e40a47 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -297,8 +297,7 @@ def __init__( self.validity_index = 0 self.shuffle_counter = 0 - self.dataset_size = self.input_strategy.get_dataset_size(self.mini_batch) - tf.print("dataset_size: ", self.dataset_size) + self.__update_dataset_size() self.__shuffle_filenames() @@ -344,6 +343,11 @@ def set_mini_batch_size(self, batch_size: int) -> None: if self.batch_size % batch_size != 0: raise ValueError("The batch size must be divisible by the mini batch size") self.mini_batch = batch_size + self.__update_dataset_size() + + def __update_dataset_size(self) -> None: + self.dataset_size = self.input_strategy.get_dataset_size(self.mini_batch) + def __read_batch(self, dataset_index: int) -> None: tf.print("dataset_index: ", dataset_index) From f23bc4d8187feb0abdd2253766c589000b2d060d Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 16:17:23 +0100 Subject: [PATCH 63/75] pil image loader strategy multi threaded version --- .../segmentation_utils/reading_strategies.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 7c743a2..b24ceaa 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -69,6 +69,75 @@ def shuffle_filenames(self, seed: int) -> None: self.image_filenames = self.image_filenames[shuffled_indices] +class RGBImageStrategyMultiThread: + def __init__( + self, + image_path: str, + image_size: tuple[int, int], + image_resample=Image.Resampling.NEAREST, + max_workers: int = 8, + ): + self.image_path = image_path + self.image_filenames = np.array( + sorted(os.listdir(self.image_path)) + ) #!update: added variable to initialiser + self.image_size = image_size + self.image_resample = image_resample + self.max_workers = max_workers + + def __read_single_image_pil(self, filename, image_path, image_size, image_resample): + image = Image.open(os.path.join(image_path, filename)).resize( + image_size, image_resample + ) + return np.array(image) + + def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: + batch_filenames = self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + + images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) + is_color = True + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + future_to_index = { + executor.submit( + self.__read_single_image_pil, + filename, + self.image_path, + self.image_size, + self.image_resample, + ): i + for i, filename in enumerate(batch_filenames) + } + for future in futures.as_completed(future_to_index): + i = future_to_index[future] + image = future.result() + + if len(image.shape) == 2 and is_color: + images = np.zeros( + (batch_size, self.image_size[0], self.image_size[1]) + ) + is_color = False + + images[i, ...] = image + + return images + + def get_dataset_size(self, mini_batch) -> int: + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) + return dataset_size + + def get_image_size(self) -> tuple[int, int]: + return self.image_size + + def shuffle_filenames(self, seed: int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices] + + class HyperspectralImageStrategy: # read images with rasterio def __init__( From a777cc0224fe438941a3882df5d465b541b98c71 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sat, 26 Aug 2023 16:42:14 +0100 Subject: [PATCH 64/75] removes debugging print statements from flowgenerator, optimizes multithreaded workloads in strategies --- utilities/segmentation_utils/flowreader.py | 26 +++++------ .../segmentation_utils/reading_strategies.py | 44 +++++++++---------- 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 6e40a47..1282a6b 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -344,29 +344,27 @@ def set_mini_batch_size(self, batch_size: int) -> None: raise ValueError("The batch size must be divisible by the mini batch size") self.mini_batch = batch_size self.__update_dataset_size() - + def __update_dataset_size(self) -> None: self.dataset_size = self.input_strategy.get_dataset_size(self.mini_batch) - def __read_batch(self, dataset_index: int) -> None: - tf.print("dataset_index: ", dataset_index) - tf.print("dataset_size: ", self.dataset_size) #!adjust the batch size as it is passed to the function - #calculates remaining images in a dataset and scales it down by multiplying with minibatch - partial_dataset = self.dataset_size * self.mini_batch - dataset_index + # calculates remaining images in a dataset and scales it down by multiplying with minibatch + partial_dataset = self.dataset_size * self.mini_batch - dataset_index - tf.print("partial_dataset: ", partial_dataset) - #compare and choose the smaller value, to avoid making a larger batch_size + # compare and choose the smaller value, to avoid making a larger batch_size adjusted_batch_size = min(self.batch_size, partial_dataset) - - tf.print("adjusted_batch_size: ", adjusted_batch_size) - #calculate number of mini batches in a batch + # calculate number of mini batches in a batch n = adjusted_batch_size // self.mini_batch - batch_images = self.input_strategy.read_batch(adjusted_batch_size, dataset_index) - batch_masks = self.output_strategy.read_batch(adjusted_batch_size, dataset_index) + batch_images = self.input_strategy.read_batch( + adjusted_batch_size, dataset_index + ) + batch_masks = self.output_strategy.read_batch( + adjusted_batch_size, dataset_index + ) # preprocess and assign images and masks to the batch @@ -423,12 +421,10 @@ def __read_batch(self, dataset_index: int) -> None: # required to check when to read the next batch def __len__(self) -> int: - tf.print("len called") return self.input_strategy.get_dataset_size(self.mini_batch) def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: # check if the batch is already cached - if index < self.validity_index - self.batch_size // self.mini_batch: self.validity_index = 0 diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index b24ceaa..c570ef0 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -171,7 +171,7 @@ def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: with self.package.open(os.path.join(self.image_path, filename)) as dataset: # .read() returns a numpy array that contains the raster cell values in your file. image = dataset.read() - images[i, :, :, :] = np.resize(image, self.image_size) + images[i, :, :, :] = np.resize(image, (self.bands, *self.image_size)) # ensures channel-last orientation for the reader images = np.moveaxis(images, 1, 3) @@ -214,42 +214,42 @@ def __init__( ).count def __read_single_image( - self, filename: str, image_path: str, package: Any, image_size: tuple[int, int] - ): - with package.open(os.path.join(image_path, filename)) as dataset: + self, filename: str, package: Any, image_size: tuple[int, int, int] + ) -> np.ndarray: + with package.open(filename) as dataset: image = dataset.read() resized_image = np.resize(image, image_size) return resized_image def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: - batch_filenames = self.image_filenames[ - dataset_index : dataset_index + batch_size + batch_filenames = [ + os.path.join(self.image_path, filename) + for filename in self.image_filenames[ + dataset_index : dataset_index + batch_size + ] ] - # defines the array that will contain the images + # Pre-allocate memory images = np.zeros( (batch_size, self.bands, self.image_size[0], self.image_size[1]) ) + # Use ThreadPoolExecutor.map for more efficient multi-threading with ThreadPoolExecutor(max_workers=self.max_workers) as executor: - future_to_index = { - executor.submit( + for i, image in enumerate( + executor.map( self.__read_single_image, - filename, - self.image_path, - self.package, - self.image_size, - ): i - for i, filename in enumerate(batch_filenames) - } - for future in futures.as_completed(future_to_index): - i = future_to_index[future] - images[i, :, :, :] = future.result() - - # ensures channel-last orientation for the reader + batch_filenames, + [self.package] * batch_size, + [(self.bands, *self.image_size)] * batch_size, + ) + ): + images[i, :, :, :] = image + + # Ensure channel-last orientation images = np.moveaxis(images, 1, 3) - return np.array(images) + return images def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) From 14ad5a1761fd4bfb043077451ae98f6f62773c9d Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 27 Aug 2023 15:39:36 +0100 Subject: [PATCH 65/75] renames previous hyperspectralstrategy to rasterstrategy, adds new hyperspectral strategy based on spectral python --- pyproject.toml | 1 + .../test_strategies.py | 86 +++++++++++++++---- .../segmentation_utils/reading_strategies.py | 73 +++++++++++++++- 3 files changed, 139 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 77f08e4..6f05b5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "Pillow >= 9.4.0", "tensorflow >= 2.10", "toml >= 0.10.2", + "spectral >= 0.23.1", ] [tool.setuptools] diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index aad5a4b..0632f63 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -6,7 +6,7 @@ from pytest import MonkeyPatch from utilities.segmentation_utils.reading_strategies import ( - HyperspectralImageStrategy, HyperspectralImageStrategyMultiThread, + HSImageStrategy, RasterImageStrategy, RasterImageStrategyMultiThread, RGBImageStrategy) @@ -43,6 +43,26 @@ def __exit__(self, type, value, traceback): def get_count(self): return self.call_count +class SPyMock: + def __init__(self,n,size,bands) -> None: + self.n = n + self.size = size + self.bands = bands + self.call_count = 0 + + @property + def shape(self): + return (self.size[0],self.size[1],self.bands) + + def open_image(self,*args,**kwargs): + return self + + def load(self,*args,**kwargs): + self.call_count += 1 + return np.full((self.size[0],self.size[1],self.bands),self.call_count,np.uint8) + + + @pytest.mark.development def test_read_batch_image_path() -> None: @@ -132,7 +152,7 @@ def test_RGB_get_dataset_size() -> None: @pytest.mark.development -def test_Hyperspectral_get_dataset_size() -> None: +def test_raster_get_dataset_size() -> None: # checking if the calculation is done correctly patch = MonkeyPatch() @@ -140,7 +160,7 @@ def test_Hyperspectral_get_dataset_size() -> None: patch.setattr(os, "listdir", lambda x: mock_filenames) - image_strategy = HyperspectralImageStrategy( + image_strategy = RasterImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), @@ -159,7 +179,7 @@ def test_Hyperspectral_get_dataset_size() -> None: @pytest.mark.development -def test_hyperspectral_open(): +def test_raster_open(): patch = MonkeyPatch() mock_filenames = ["a", "b", "c"] patch.setattr(os, "listdir", lambda x: mock_filenames) @@ -172,7 +192,7 @@ def test_hyperspectral_open(): "bands": 3, "dtypes": ["uint8"], } - strategy = HyperspectralImageStrategy( + strategy = RasterImageStrategy( image_path, (224, 224), package=MockRasterio(**mock_data) ) @@ -181,8 +201,10 @@ def test_hyperspectral_open(): assert read_images.shape == (2, 224, 224, 3) + + @pytest.mark.development -def test_hyperspectral_mt_open(): +def test_raster_mt_open(): patch = MonkeyPatch() mock_filenames = ["a", "b", "c"] patch.setattr(os, "listdir", lambda x: mock_filenames) @@ -195,7 +217,7 @@ def test_hyperspectral_mt_open(): "bands": 3, "dtypes": ["uint8"], } - strategy = HyperspectralImageStrategyMultiThread( + strategy = RasterImageStrategyMultiThread( image_path, (224, 224), package=MockRasterio(**mock_data) ) @@ -204,6 +226,28 @@ def test_hyperspectral_mt_open(): assert read_images.shape == (2, 224, 224, 3) +@pytest.mark.development +def test_hyperspectral_open(): + patch = MonkeyPatch() + mock_filenames = ["a", "b", "c"] + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_path = "tests/segmentation_utils_tests/test_strategies" + + mock_data = { + "n": 3, + "size": (224, 224), + "bands": 3, + } + strategy = HSImageStrategy( + image_path, (224, 224), package=SPyMock(**mock_data) + ) + + read_images = strategy.read_batch(2, 0) + + assert read_images.shape == (2, 224, 224, 3) + + @pytest.mark.development def test_empty_batch(): patch = MonkeyPatch() @@ -320,14 +364,14 @@ def test_RGB_get_image_size(): @pytest.mark.development -def test_HyperSpectral_get_image_size(): +def test_raster_get_image_size(): patch = MonkeyPatch() mock_filenames = ["a" for _ in range(20)] patch.setattr(os, "listdir", lambda x: mock_filenames) - image_strategy = HyperspectralImageStrategy( + image_strategy = RasterImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), @@ -338,14 +382,14 @@ def test_HyperSpectral_get_image_size(): @pytest.mark.development -def test_HyperSpectral_MT_get_image_size(): +def test_raster_MT_get_image_size(): patch = MonkeyPatch() mock_filenames = ["a" for _ in range(20)] patch.setattr(os, "listdir", lambda x: mock_filenames) - image_strategy = HyperspectralImageStrategyMultiThread( + image_strategy = RasterImageStrategyMultiThread( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), @@ -354,6 +398,7 @@ def test_HyperSpectral_MT_get_image_size(): result = image_strategy.get_image_size() assert result == (224, 224) + @pytest.mark.development def test_RGB_shuffle(): patch = MonkeyPatch() @@ -384,21 +429,22 @@ def test_RGB_shuffle(): image_strategy_1.image_filenames, image_strategy_2.image_filenames ) + @pytest.mark.development -def test_Hyperspectral_shuffle(): +def test_raster_shuffle(): patch = MonkeyPatch() mock_filenames = [str(i) for i in range(20)] patch.setattr(os, "listdir", lambda x: mock_filenames) - image_strategy_1 = HyperspectralImageStrategy( + image_strategy_1 = RasterImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), ) - image_strategy_2 = HyperspectralImageStrategy( + image_strategy_2 = RasterImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), @@ -414,21 +460,22 @@ def test_Hyperspectral_shuffle(): image_strategy_1.image_filenames, image_strategy_2.image_filenames ) + @pytest.mark.development -def test_Hyperspectral_mt_shuffle(): +def test_raster_mt_shuffle(): patch = MonkeyPatch() mock_filenames = [str(i) for i in range(20)] patch.setattr(os, "listdir", lambda x: mock_filenames) - image_strategy_1 = HyperspectralImageStrategyMultiThread( + image_strategy_1 = RasterImageStrategyMultiThread( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), ) - image_strategy_2 = HyperspectralImageStrategy( + image_strategy_2 = RasterImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), @@ -444,15 +491,16 @@ def test_Hyperspectral_mt_shuffle(): image_strategy_1.image_filenames, image_strategy_2.image_filenames ) + @pytest.mark.development -def test_Hyperspectral_mt_image_in_order(): +def test_raster_mt_image_in_order(): patch = MonkeyPatch() mock_filenames = [str(i) for i in range(20)] patch.setattr(os, "listdir", lambda x: mock_filenames) mock_package = MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) - image_strategy = HyperspectralImageStrategyMultiThread( + image_strategy = RasterImageStrategyMultiThread( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), package=mock_package, diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index c570ef0..59d1616 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -5,7 +5,9 @@ import numpy as np import rasterio +import spectral from PIL import Image +from scipy.ndimage import zoom class IReader(Protocol): @@ -138,7 +140,74 @@ def shuffle_filenames(self, seed: int) -> None: self.image_filenames = self.image_filenames[shuffled_indices] -class HyperspectralImageStrategy: +class HSImageStrategy: + """ + Reads hyperspectral imagedata using Spectral Python + """ + + def __init__( + self, image_path: str, image_size: tuple[int, int], package: Any = spectral + ) -> None: + self.image_path = image_path + self.image_filenames = np.array(sorted(os.listdir(self.image_path))) + self.image_size = image_size + self.package = package + self.bands = self.__get_channels() + + def __get_channels(self) -> int: + # Open the first image to determine the number of channels + first_image = self.package.open_image( + os.path.join(self.image_path, self.image_filenames[0]) + ) + return first_image.shape[-1] if len(first_image.shape) == 3 else 1 + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + # read images with Spectral Python + batch_filenames = self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + + images = np.zeros( + (batch_size, self.image_size[0], self.image_size[1], self.bands) + ) + is_color = True + for i in range(batch_size): + image = self.package.open_image( + os.path.join(self.image_path, batch_filenames[i]) + ) + image_data = image.load() + + # Calculate the zoom factor for resizing + zoom_factor = ( + self.image_size[0] / image_data.shape[0], + self.image_size[1] / image_data.shape[1], + 1, + ) + + # Resize the image using scipy's zoom function + resized_image = zoom(image_data, zoom_factor, order=1) + + if len(resized_image.shape) == 2 and is_color: + images = np.zeros((batch_size, self.image_size[0], self.image_size[1])) + is_color = False + images[i, ...] = resized_image + return images + + def get_dataset_size(self, mini_batch) -> int: + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) + return dataset_size + + def get_image_size(self) -> tuple[int, int]: + return self.image_size + + def shuffle_filenames(self, seed: int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices] + + +class RasterImageStrategy: # read images with rasterio def __init__( self, @@ -192,7 +261,7 @@ def shuffle_filenames(self, seed: int) -> None: self.image_filenames = self.image_filenames[shuffled_indices] -class HyperspectralImageStrategyMultiThread: +class RasterImageStrategyMultiThread: # read images with rasterio def __init__( self, From a14348c9c4a321d67a402dc9aa87ef12c928e5d3 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 27 Aug 2023 15:46:58 +0100 Subject: [PATCH 66/75] adds scipy to the dependency list --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 6f05b5b..332e653 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ "tensorflow >= 2.10", "toml >= 0.10.2", "spectral >= 0.23.1", + "scipy >= 1.10.0", ] [tool.setuptools] From 65513288e98d738e6ad1e049d3e04ccb760ee533 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 27 Aug 2023 15:50:47 +0100 Subject: [PATCH 67/75] update pipeline to use toml file for dependency install --- .github/workflows/development.yml | 2 +- pyproject.toml | 2 ++ requirements.txt | 7 ------- 3 files changed, 3 insertions(+), 8 deletions(-) delete mode 100644 requirements.txt diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml index 8e04cb9..0488813 100644 --- a/.github/workflows/development.yml +++ b/.github/workflows/development.yml @@ -62,7 +62,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] - name: Lint with pylint run: | pip install pylint diff --git a/pyproject.toml b/pyproject.toml index 332e653..1f1081b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,8 @@ dependencies = [ "toml >= 0.10.2", "spectral >= 0.23.1", "scipy >= 1.10.0", + "tqdm >= 4.64.1", + "pandas >= 1.5.1", ] [tool.setuptools] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1200270..0000000 --- a/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -tensorflow==2.10 -numpy==1.24.1 -rasterio==1.3.6 -Pillow==9.4.0 -tqdm==4.64.1 -pandas==1.5.1 -toml==0.10.2 From 04f976bd7a8f92b6094ad1ac425b91e9b3e4b4da Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 27 Aug 2023 15:53:04 +0100 Subject: [PATCH 68/75] update all stages to install dependencies from toml config --- .github/workflows/development.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml index 0488813..467af14 100644 --- a/.github/workflows/development.yml +++ b/.github/workflows/development.yml @@ -26,7 +26,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] test: needs: dependency-install @@ -42,7 +42,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pytest pytest-mock - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] - name: Test with pytest run: | python -m pytest -v -m "not staging" From 58d6d2cc7a9ee39683b2603215e91996843887f2 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 27 Aug 2023 16:27:27 +0100 Subject: [PATCH 69/75] adds experimental hsi strategy based on cv2 --- .../test_strategies.py | 41 ++++----------- .../segmentation_utils/reading_strategies.py | 50 +++++++++---------- 2 files changed, 32 insertions(+), 59 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index 0632f63..da5d553 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -43,8 +43,9 @@ def __exit__(self, type, value, traceback): def get_count(self): return self.call_count + class SPyMock: - def __init__(self,n,size,bands) -> None: + def __init__(self, n, size, bands) -> None: self.n = n self.size = size self.bands = bands @@ -52,16 +53,16 @@ def __init__(self,n,size,bands) -> None: @property def shape(self): - return (self.size[0],self.size[1],self.bands) + return (self.size[0], self.size[1], self.bands) - def open_image(self,*args,**kwargs): + def open_image(self, *args, **kwargs): return self - - def load(self,*args,**kwargs): - self.call_count += 1 - return np.full((self.size[0],self.size[1],self.bands),self.call_count,np.uint8) - + def load(self, *args, **kwargs): + self.call_count += 1 + return np.full( + (self.size[0], self.size[1], self.bands), self.call_count, np.uint8 + ) @pytest.mark.development @@ -201,8 +202,6 @@ def test_raster_open(): assert read_images.shape == (2, 224, 224, 3) - - @pytest.mark.development def test_raster_mt_open(): patch = MonkeyPatch() @@ -226,28 +225,6 @@ def test_raster_mt_open(): assert read_images.shape == (2, 224, 224, 3) -@pytest.mark.development -def test_hyperspectral_open(): - patch = MonkeyPatch() - mock_filenames = ["a", "b", "c"] - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_path = "tests/segmentation_utils_tests/test_strategies" - - mock_data = { - "n": 3, - "size": (224, 224), - "bands": 3, - } - strategy = HSImageStrategy( - image_path, (224, 224), package=SPyMock(**mock_data) - ) - - read_images = strategy.read_batch(2, 0) - - assert read_images.shape == (2, 224, 224, 3) - - @pytest.mark.development def test_empty_batch(): patch = MonkeyPatch() diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 59d1616..84960c6 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -3,11 +3,10 @@ from concurrent.futures import ThreadPoolExecutor from typing import Any, Protocol +import cv2 import numpy as np import rasterio -import spectral from PIL import Image -from scipy.ndimage import zoom class IReader(Protocol): @@ -142,11 +141,11 @@ def shuffle_filenames(self, seed: int) -> None: class HSImageStrategy: """ - Reads hyperspectral imagedata using Spectral Python + Reads hyperspectral optimized images with OpenCV """ def __init__( - self, image_path: str, image_size: tuple[int, int], package: Any = spectral + self, image_path: str, image_size: tuple[int, int], package: Any = cv2 ) -> None: self.image_path = image_path self.image_filenames = np.array(sorted(os.listdir(self.image_path))) @@ -162,35 +161,32 @@ def __get_channels(self) -> int: return first_image.shape[-1] if len(first_image.shape) == 3 else 1 def read_batch(self, batch_size, dataset_index) -> np.ndarray: - # read images with Spectral Python + # Read a sample image to determine the number of bands + sample_image_path = os.path.join(self.image_path, self.image_filenames[0]) + sample_image = self.package.imread(sample_image_path, self.package.IMREAD_UNCHANGED) + num_bands = sample_image.shape[2] if len(sample_image.shape) == 3 else 1 + + # Initialize images array + images = np.zeros((batch_size, self.image_size[1], self.image_size[0], num_bands)) + + # Read images with OpenCV batch_filenames = self.image_filenames[ dataset_index : dataset_index + batch_size ] - images = np.zeros( - (batch_size, self.image_size[0], self.image_size[1], self.bands) - ) - is_color = True for i in range(batch_size): - image = self.package.open_image( - os.path.join(self.image_path, batch_filenames[i]) - ) - image_data = image.load() - - # Calculate the zoom factor for resizing - zoom_factor = ( - self.image_size[0] / image_data.shape[0], - self.image_size[1] / image_data.shape[1], - 1, - ) - - # Resize the image using scipy's zoom function - resized_image = zoom(image_data, zoom_factor, order=1) + image_path = os.path.join(self.image_path, batch_filenames[i]) + image = self.package.imread(image_path, self.package.IMREAD_UNCHANGED) + + # Resize the image + image = self.package.resize(image, self.image_size) + + # If the image is color, convert BGR to RGB + if len(image.shape) == 3 and image.shape[2] == 3: + image = self.package.cvtColor(image, self.package.COLOR_BGR2RGB) + + images[i, ...] = image - if len(resized_image.shape) == 2 and is_color: - images = np.zeros((batch_size, self.image_size[0], self.image_size[1])) - is_color = False - images[i, ...] = resized_image return images def get_dataset_size(self, mini_batch) -> int: From 872916622ce61cea2532b2385529590518c08137 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 27 Aug 2023 16:29:53 +0100 Subject: [PATCH 70/75] removes scipy and SPy --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1f1081b..6305a5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,10 +21,9 @@ dependencies = [ "Pillow >= 9.4.0", "tensorflow >= 2.10", "toml >= 0.10.2", - "spectral >= 0.23.1", - "scipy >= 1.10.0", "tqdm >= 4.64.1", "pandas >= 1.5.1", + "opencv-python >= 4.7.0.68" ] [tool.setuptools] From 8d055b284844b0680e9e2c9d141f523d41653f3a Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 27 Aug 2023 16:53:46 +0100 Subject: [PATCH 71/75] update get band function to use opencv in HSI strategy, updates opencv package to headless version --- pyproject.toml | 2 +- utilities/segmentation_utils/reading_strategies.py | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6305a5d..d2be2d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "toml >= 0.10.2", "tqdm >= 4.64.1", "pandas >= 1.5.1", - "opencv-python >= 4.7.0.68" + "opencv-python-headless >= 4.8.0.76" ] [tool.setuptools] diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index 84960c6..c091a21 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -155,19 +155,16 @@ def __init__( def __get_channels(self) -> int: # Open the first image to determine the number of channels - first_image = self.package.open_image( - os.path.join(self.image_path, self.image_filenames[0]) - ) - return first_image.shape[-1] if len(first_image.shape) == 3 else 1 + sample_image_path = os.path.join(self.image_path, self.image_filenames[0]) + sample_image = self.package.imread(sample_image_path, self.package.IMREAD_UNCHANGED) + return sample_image.shape[2] if len(sample_image.shape) == 3 else 1 def read_batch(self, batch_size, dataset_index) -> np.ndarray: # Read a sample image to determine the number of bands - sample_image_path = os.path.join(self.image_path, self.image_filenames[0]) - sample_image = self.package.imread(sample_image_path, self.package.IMREAD_UNCHANGED) - num_bands = sample_image.shape[2] if len(sample_image.shape) == 3 else 1 + # Initialize images array - images = np.zeros((batch_size, self.image_size[1], self.image_size[0], num_bands)) + images = np.zeros((batch_size, self.image_size[1], self.image_size[0], self.bands)) # Read images with OpenCV batch_filenames = self.image_filenames[ From 118371c56d45e377c1f6d0db12b75d542532f44b Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 27 Aug 2023 17:34:41 +0100 Subject: [PATCH 72/75] adds multi threaded hsi strategy --- .../test_strategies.py | 287 +++++++++++++++++- .../segmentation_utils/reading_strategies.py | 115 ++++++- 2 files changed, 379 insertions(+), 23 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index da5d553..88ff6c3 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -6,8 +6,8 @@ from pytest import MonkeyPatch from utilities.segmentation_utils.reading_strategies import ( - HSImageStrategy, RasterImageStrategy, RasterImageStrategyMultiThread, - RGBImageStrategy) + HSImageStrategy, HSImageStrategyMultiThread, RasterImageStrategy, + RasterImageStrategyMultiThread, RGBImageStrategy) class MockRasterio: @@ -44,26 +44,35 @@ def get_count(self): return self.call_count -class SPyMock: +class CV2Mock: + IMREAD_UNCHANGED = 1 + COLOR_BGR2RGB = 1 + def __init__(self, n, size, bands) -> None: self.n = n self.size = size self.bands = bands self.call_count = 0 - @property - def shape(self): - return (self.size[0], self.size[1], self.bands) - - def open_image(self, *args, **kwargs): - return self - - def load(self, *args, **kwargs): + def imread(self, *args, **kwargs): self.call_count += 1 return np.full( (self.size[0], self.size[1], self.bands), self.call_count, np.uint8 ) + def resize(self, *args, **kwargs): + img = args[0] + size = args[1] + return np.full((size[0], size[1], self.bands), img[0,0,0], np.uint8) + + def cvtColor(self, *args, **kwargs): + img = args[0] + return np.full( + (self.size[0], self.size[1], self.bands), img[0,0,0], np.uint8 + ) + + def get_count(self): + return self.call_count @pytest.mark.development def test_read_batch_image_path() -> None: @@ -179,6 +188,59 @@ def test_raster_get_dataset_size() -> None: patch.undo() +@pytest.mark.development +def test_hsi_get_dataset_size() -> None: + # checking if the calculation is done correctly + patch = MonkeyPatch() + + mock_filenames = ["a", "b", "c"] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy = HSImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=CV2Mock(n=3, size=(224, 224), bands=3), + ) + + dataset = len(mock_filenames) # number of images in the specified path + mini_batch = 2 # number of images we want in each batch + expected_value = int( + np.floor(dataset / float(mini_batch)) + ) # number of sets of images we expect + + dataset_size = image_strategy.get_dataset_size(mini_batch) + assert dataset_size == expected_value + patch.undo() + patch.undo() + +@pytest.mark.development +def test_hsi_mt_get_dataset_size() -> None: + # checking if the calculation is done correctly + patch = MonkeyPatch() + + mock_filenames = ["a", "b", "c"] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy = HSImageStrategyMultiThread( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=CV2Mock(n=3, size=(224, 224), bands=3), + ) + + dataset = len(mock_filenames) # number of images in the specified path + mini_batch = 2 # number of images we want in each batch + expected_value = int( + np.floor(dataset / float(mini_batch)) + ) # number of sets of images we expect + + dataset_size = image_strategy.get_dataset_size(mini_batch) + assert dataset_size == expected_value + patch.undo() + patch.undo() + + @pytest.mark.development def test_raster_open(): patch = MonkeyPatch() @@ -225,6 +287,84 @@ def test_raster_mt_open(): assert read_images.shape == (2, 224, 224, 3) +@pytest.mark.development +def test_hsi_open(): + patch = MonkeyPatch() + mock_filenames = ["a", "b", "c"] + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_path = "tests/segmentation_utils_tests/test_strategies" + + mock_data = { + "n": 3, + "size": (224, 224), + "bands": 3, + } + strategy = HSImageStrategy(image_path, (224, 224), package=CV2Mock(**mock_data)) + + read_images = strategy.read_batch(2, 0) + + assert read_images.shape == (2, 224, 224, 3) + +@pytest.mark.development +def test_hsi_mt_open(): + patch = MonkeyPatch() + mock_filenames = ["a", "b", "c"] + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_path = "tests/segmentation_utils_tests/test_strategies" + + mock_data = { + "n": 3, + "size": (224, 224), + "bands": 3, + } + strategy = HSImageStrategyMultiThread(image_path, (224, 224), package=CV2Mock(**mock_data)) + + read_images = strategy.read_batch(2, 0) + + assert read_images.shape == (2, 224, 224, 3) + + +@pytest.mark.development +def test_hsi_get_channels(): + patch = MonkeyPatch() + mock_filenames = ["a", "b", "c"] + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_path = "tests/segmentation_utils_tests/test_strategies" + + mock_data = { + "n": 3, + "size": (224, 224), + "bands": 3, + } + strategy = HSImageStrategy(image_path, (224, 224), package=CV2Mock(**mock_data)) + + channels = strategy._HSImageStrategy__get_channels() + + assert channels == 3 + +@pytest.mark.development +def test_hsi_mt_get_channels(): + patch = MonkeyPatch() + mock_filenames = ["a", "b", "c"] + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_path = "tests/segmentation_utils_tests/test_strategies" + + mock_data = { + "n": 3, + "size": (224, 224), + "bands": 3, + } + strategy = HSImageStrategyMultiThread(image_path, (224, 224), package=CV2Mock(**mock_data)) + + channels = strategy._HSImageStrategyMultiThread__get_channels() + + assert channels == 3 + + @pytest.mark.development def test_empty_batch(): patch = MonkeyPatch() @@ -359,7 +499,42 @@ def test_raster_get_image_size(): @pytest.mark.development -def test_raster_MT_get_image_size(): +def test_hsi_get_image_size(): + patch = MonkeyPatch() + + mock_filenames = ["a" for _ in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy = HSImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=CV2Mock(n=3, size=(224, 224), bands=3), + ) + + result = image_strategy.get_image_size() + assert result == (224, 224) + +@pytest.mark.development +def test_hsi_mt_get_image_size(): + patch = MonkeyPatch() + + mock_filenames = ["a" for _ in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy = HSImageStrategyMultiThread( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=CV2Mock(n=3, size=(224, 224), bands=3), + ) + + result = image_strategy.get_image_size() + assert result == (224, 224) + + +@pytest.mark.development +def test_raster_mt_get_image_size(): patch = MonkeyPatch() mock_filenames = ["a" for _ in range(20)] @@ -377,7 +552,7 @@ def test_raster_MT_get_image_size(): @pytest.mark.development -def test_RGB_shuffle(): +def test_rgb_shuffle(): patch = MonkeyPatch() mock_filenames = [str(i) for i in range(20)] @@ -493,3 +668,89 @@ def test_raster_mt_image_in_order(): assert np.array_equal( result[i - call_count, :, :, :], np.full((224, 224, 3), i + 1) ) + + +@pytest.mark.development +def test_hsi_shuffle(): + patch = MonkeyPatch() + + mock_filenames = [str(i) for i in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy_1 = HSImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=CV2Mock(n=3, size=(224, 224), bands=3), + ) + + image_strategy_2 = HSImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=CV2Mock(n=3, size=(224, 224), bands=3), + ) + + n = 100 + + for i in range(n): + image_strategy_1.shuffle_filenames(i) + image_strategy_2.shuffle_filenames(i) + + assert np.array_equal( + image_strategy_1.image_filenames, image_strategy_2.image_filenames + ) + +@pytest.mark.development +def test_hsi_mt_shuffle(): + patch = MonkeyPatch() + + mock_filenames = [str(i) for i in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + + image_strategy_1 = HSImageStrategyMultiThread( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=CV2Mock(n=3, size=(224, 224), bands=3), + ) + + image_strategy_2 = HSImageStrategyMultiThread( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=CV2Mock(n=3, size=(224, 224), bands=3), + ) + + n = 100 + + for i in range(n): + image_strategy_1.shuffle_filenames(i) + image_strategy_2.shuffle_filenames(i) + + assert np.array_equal( + image_strategy_1.image_filenames, image_strategy_2.image_filenames + ) + +@pytest.mark.development +def test_hsi_mt_image_in_order(): + patch = MonkeyPatch() + + mock_filenames = [str(i) for i in range(20)] + + patch.setattr(os, "listdir", lambda x: mock_filenames) + mock_package = CV2Mock(n=3, size=(224, 224), bands=3) + image_strategy = HSImageStrategyMultiThread( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=mock_package, + ) + + batch_size = 10 + + call_count = mock_package.get_count() + + result = image_strategy.read_batch(batch_size, 0) + + for i in range(call_count, call_count + batch_size): + assert np.array_equal( + result[i - call_count, :, :, :], np.full((224, 224, 3), i + 1) + ) diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py index c091a21..5e30a14 100644 --- a/utilities/segmentation_utils/reading_strategies.py +++ b/utilities/segmentation_utils/reading_strategies.py @@ -24,6 +24,10 @@ def shuffle_filenames(self, seed: int) -> None: class RGBImageStrategy: + """ + Strategy optimized for reading RGB images powered by backend PIL. + """ + def __init__( self, image_path: str, @@ -71,6 +75,11 @@ def shuffle_filenames(self, seed: int) -> None: class RGBImageStrategyMultiThread: + """ + Strategy optimized for reading RGB images powered by backend PIL. + Multi threaded version. + """ + def __init__( self, image_path: str, @@ -141,7 +150,7 @@ def shuffle_filenames(self, seed: int) -> None: class HSImageStrategy: """ - Reads hyperspectral optimized images with OpenCV + Strategy optimized for reading hyperspectral images powered by backend OpenCV """ def __init__( @@ -156,15 +165,18 @@ def __init__( def __get_channels(self) -> int: # Open the first image to determine the number of channels sample_image_path = os.path.join(self.image_path, self.image_filenames[0]) - sample_image = self.package.imread(sample_image_path, self.package.IMREAD_UNCHANGED) + sample_image = self.package.imread( + sample_image_path, self.package.IMREAD_UNCHANGED + ) return sample_image.shape[2] if len(sample_image.shape) == 3 else 1 def read_batch(self, batch_size, dataset_index) -> np.ndarray: # Read a sample image to determine the number of bands - # Initialize images array - images = np.zeros((batch_size, self.image_size[1], self.image_size[0], self.bands)) + images = np.zeros( + (batch_size, self.image_size[1], self.image_size[0], self.bands) + ) # Read images with OpenCV batch_filenames = self.image_filenames[ @@ -174,25 +186,99 @@ def read_batch(self, batch_size, dataset_index) -> np.ndarray: for i in range(batch_size): image_path = os.path.join(self.image_path, batch_filenames[i]) image = self.package.imread(image_path, self.package.IMREAD_UNCHANGED) - + # Resize the image image = self.package.resize(image, self.image_size) - + # If the image is color, convert BGR to RGB if len(image.shape) == 3 and image.shape[2] == 3: image = self.package.cvtColor(image, self.package.COLOR_BGR2RGB) - + + images[i, ...] = image + + return images + + def get_dataset_size(self, mini_batch) -> int: + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) + return dataset_size + + def get_image_size(self) -> tuple[int, int]: + return self.image_size + + def shuffle_filenames(self, seed: int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices] + + +class HSImageStrategyMultiThread: + def __init__( + self, + image_path: str, + image_size: tuple[int, int], + package: Any = cv2, + max_workers: int = 8, + ) -> None: + self.image_path = image_path + self.image_filenames = np.array(sorted(os.listdir(self.image_path))) + self.image_size = image_size + self.package = package + self.bands = self.__get_channels() + self.max_workers = max_workers + + def __get_channels(self) -> int: + # Open the first image to determine the number of channels + sample_image_path = os.path.join(self.image_path, self.image_filenames[0]) + sample_image = self.package.imread( + sample_image_path, self.package.IMREAD_UNCHANGED + ) + return sample_image.shape[2] if len(sample_image.shape) == 3 else 1 + + def __read_single_image( + self, filename: str, package: Any, image_size: tuple[int, int, int] + ) -> np.ndarray: + image = package.imread(filename, package.IMREAD_UNCHANGED) + image = package.resize(image, image_size) + if len(image.shape) == 3 and image.shape[2] == 3: + image = package.cvtColor(image, package.COLOR_BGR2RGB) + return image + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + # Initialize images array + images = np.zeros( + (batch_size, self.image_size[1], self.image_size[0], self.bands) + ) + + # Read images with OpenCV + batch_filenames = self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + + image_paths = [ + os.path.join(self.image_path, batch_filenames[i]) for i in range(batch_size) + ] + + with ThreadPoolExecutor() as executor: + results = executor.map( + self.__read_single_image, + image_paths, + [self.package] * batch_size, + [self.image_size] * batch_size, + ) + + for i, image in enumerate(results): images[i, ...] = image return images - + def get_dataset_size(self, mini_batch) -> int: dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) return dataset_size - + def get_image_size(self) -> tuple[int, int]: return self.image_size - + def shuffle_filenames(self, seed: int) -> None: state = np.random.RandomState(seed) shuffled_indices = state.permutation(len(self.image_filenames)) @@ -201,6 +287,10 @@ def shuffle_filenames(self, seed: int) -> None: class RasterImageStrategy: + """ + Strategy optimized for reading raster images powered by backend rasterio. + """ + # read images with rasterio def __init__( self, @@ -255,6 +345,11 @@ def shuffle_filenames(self, seed: int) -> None: class RasterImageStrategyMultiThread: + """ + Strategy optimized for reading raster images powered by backend rasterio. + Multi threaded version. + """ + # read images with rasterio def __init__( self, From 0d9c97f910ff723201a158a99176b010794c96a0 Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 27 Aug 2023 17:35:29 +0100 Subject: [PATCH 73/75] fixed linting problems in strategies file --- .../test_strategies.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index 88ff6c3..47e14a9 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -63,17 +63,16 @@ def imread(self, *args, **kwargs): def resize(self, *args, **kwargs): img = args[0] size = args[1] - return np.full((size[0], size[1], self.bands), img[0,0,0], np.uint8) + return np.full((size[0], size[1], self.bands), img[0, 0, 0], np.uint8) def cvtColor(self, *args, **kwargs): img = args[0] - return np.full( - (self.size[0], self.size[1], self.bands), img[0,0,0], np.uint8 - ) + return np.full((self.size[0], self.size[1], self.bands), img[0, 0, 0], np.uint8) def get_count(self): return self.call_count + @pytest.mark.development def test_read_batch_image_path() -> None: # checking if the file is being opened and read correctly @@ -214,6 +213,7 @@ def test_hsi_get_dataset_size() -> None: patch.undo() patch.undo() + @pytest.mark.development def test_hsi_mt_get_dataset_size() -> None: # checking if the calculation is done correctly @@ -306,6 +306,7 @@ def test_hsi_open(): assert read_images.shape == (2, 224, 224, 3) + @pytest.mark.development def test_hsi_mt_open(): patch = MonkeyPatch() @@ -319,7 +320,9 @@ def test_hsi_mt_open(): "size": (224, 224), "bands": 3, } - strategy = HSImageStrategyMultiThread(image_path, (224, 224), package=CV2Mock(**mock_data)) + strategy = HSImageStrategyMultiThread( + image_path, (224, 224), package=CV2Mock(**mock_data) + ) read_images = strategy.read_batch(2, 0) @@ -345,6 +348,7 @@ def test_hsi_get_channels(): assert channels == 3 + @pytest.mark.development def test_hsi_mt_get_channels(): patch = MonkeyPatch() @@ -358,7 +362,9 @@ def test_hsi_mt_get_channels(): "size": (224, 224), "bands": 3, } - strategy = HSImageStrategyMultiThread(image_path, (224, 224), package=CV2Mock(**mock_data)) + strategy = HSImageStrategyMultiThread( + image_path, (224, 224), package=CV2Mock(**mock_data) + ) channels = strategy._HSImageStrategyMultiThread__get_channels() @@ -515,6 +521,7 @@ def test_hsi_get_image_size(): result = image_strategy.get_image_size() assert result == (224, 224) + @pytest.mark.development def test_hsi_mt_get_image_size(): patch = MonkeyPatch() @@ -700,6 +707,7 @@ def test_hsi_shuffle(): image_strategy_1.image_filenames, image_strategy_2.image_filenames ) + @pytest.mark.development def test_hsi_mt_shuffle(): patch = MonkeyPatch() @@ -730,6 +738,7 @@ def test_hsi_mt_shuffle(): image_strategy_1.image_filenames, image_strategy_2.image_filenames ) + @pytest.mark.development def test_hsi_mt_image_in_order(): patch = MonkeyPatch() From 2defb3e8d85c0565ed7be7a352044eb97e4c499d Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 27 Aug 2023 22:56:09 +0100 Subject: [PATCH 74/75] updates pipelines to rely on pyproject.toml to install packages. Updates strategy tests to use fixtures for way more scalability. adds development markers for remaining tests --- .github/workflows/development.yml | 2 +- .github/workflows/documentation.yml | 4 +- .github/workflows/master.yml | 16 +- .github/workflows/staging.yml | 13 +- .../flow_reader_test.py | 13 +- .../test_flowreader.py | 2 +- .../test_strategies.py | 680 ++++-------------- .../image_cutting_test.py | 12 + 8 files changed, 167 insertions(+), 575 deletions(-) diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml index 467af14..9943143 100644 --- a/.github/workflows/development.yml +++ b/.github/workflows/development.yml @@ -45,7 +45,7 @@ jobs: pip install .[dev] - name: Test with pytest run: | - python -m pytest -v -m "not staging" + python -m pytest -v -m "development" devops: needs: test diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index bbdd86f..628a486 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -34,15 +34,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] pip install -U sphinx pip install furo - name: Build documentation run: | cd docs - - sphinx-apidoc -e -M --force -o . ../utilities/ make html - name: Upload build data diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 4b3ba3c..5979c2d 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -26,7 +26,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] test: needs: dependency-install @@ -42,10 +42,16 @@ jobs: run: | python -m pip install --upgrade pip pip install pytest pytest-mock - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Test with pytest + pip install .[dev] + - name: Test with pytest development run: | - python -m pytest + python -m pytest -v -m "development" + - name: Test with pytest staging + run: | + python -m pytest -v -m "staging" + - name: Test with pytest production + run: | + python -m pytest -v -m "production" devops: needs: test @@ -62,7 +68,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] - name: Lint with pylint run: | pip install pylint diff --git a/.github/workflows/staging.yml b/.github/workflows/staging.yml index 72bf0c4..f863dc4 100644 --- a/.github/workflows/staging.yml +++ b/.github/workflows/staging.yml @@ -26,7 +26,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] test: needs: dependency-install @@ -42,10 +42,13 @@ jobs: run: | python -m pip install --upgrade pip pip install pytest pytest-mock - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Test with pytest + pip install .[dev] + - name: Test with pytest development run: | - python -m pytest + python -m pytest -v -m "development" + - name: Test with pytest staging + run: | + python -m pytest -v -m "staging" devops: needs: test @@ -62,7 +65,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] - name: Lint with pylint run: | pip install pylint diff --git a/tests/segmentation_utils_tests.py/flow_reader_test.py b/tests/segmentation_utils_tests.py/flow_reader_test.py index af89e19..20d2fb2 100644 --- a/tests/segmentation_utils_tests.py/flow_reader_test.py +++ b/tests/segmentation_utils_tests.py/flow_reader_test.py @@ -36,6 +36,7 @@ def flow_from_directory_mock(*args, **kwargs): # tests +@pytest.mark.development def test_makes_flow_generator() -> None: patch = MonkeyPatch() # mock an imagedatagenerator from keras @@ -48,7 +49,7 @@ def test_makes_flow_generator() -> None: # create a flow generator FlowGenerator(**generator_args) - +@pytest.mark.development def test_makes_flow_generator_with_queue() -> None: patch = MonkeyPatch() # mock an imagedatagenerator from keras @@ -78,7 +79,7 @@ def test_makes_flow_generator_with_queue() -> None: generator = FlowGenerator(**new_generator_args) generator.set_preprocessing_pipeline(image_queue, mask_queue) - +@pytest.mark.development def test_makes_flow_generator_wrong_shape() -> None: try: patch = MonkeyPatch() @@ -98,7 +99,7 @@ def test_makes_flow_generator_wrong_shape() -> None: except ValueError: assert True - +@pytest.mark.development def test_makes_flow_generator_wrong_dimension() -> None: try: patch = MonkeyPatch() @@ -118,7 +119,7 @@ def test_makes_flow_generator_wrong_dimension() -> None: except ValueError: assert True - +@pytest.mark.development def test_flow_generator_with_preprocess() -> None: patch = MonkeyPatch() # mock an imagedatagenerator from keras @@ -141,7 +142,7 @@ def test_flow_generator_with_preprocess() -> None: patch.undo() patch.undo() - +@pytest.mark.development def test_get_dataset_size() -> None: patch = MonkeyPatch() patch.setattr(os, "listdir", lambda x: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) @@ -166,7 +167,7 @@ def test_get_dataset_size() -> None: patch.undo() patch.undo() - +@pytest.mark.development def test_get_generator() -> None: patch = MonkeyPatch() diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py index 5bdcd36..4648e19 100644 --- a/tests/segmentation_utils_tests.py/test_flowreader.py +++ b/tests/segmentation_utils_tests.py/test_flowreader.py @@ -295,7 +295,7 @@ def test_read_batch_get_item_expand_dim_fail() -> None: batch = generator[0] - +@pytest.mark.development def test_raises_error_not_compatible_shape() -> None: with pytest.raises(ValueError) as exc_info: patch = MonkeyPatch() diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index 47e14a9..bbc01b3 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -73,256 +73,163 @@ def get_count(self): return self.call_count -@pytest.mark.development -def test_read_batch_image_path() -> None: - # checking if the file is being opened and read correctly - patch = MonkeyPatch() - - mock_filenames = ["a", "b", "c"] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - patch.setattr( - Image, - "open", - lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), - ) - - image_strategy = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, - ) - - batch_size = 2 - dataset_index = 0 - result = image_strategy.read_batch(batch_size, dataset_index) +@pytest.fixture +def rasterio_mock() -> MockRasterio: + return MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) - assert result.shape == (2, 224, 224, 3) - patch.undo() - patch.undo() +@pytest.fixture +def cv2_mock() -> CV2Mock: + return CV2Mock(n=3, size=(224, 224), bands=3) -@pytest.mark.development -def test_read_batch_returns_nparray() -> None: - # checking if the returned value is a numpy array - patch = MonkeyPatch() +@pytest.fixture +def directory_mock(monkeypatch): + mock_filenames = [str(i) for i in range(20)] + monkeypatch.setattr(os, "listdir", lambda x: mock_filenames) + return len(mock_filenames) - patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) - patch.setattr( +@pytest.fixture +def mock_image_open(monkeypatch): + monkeypatch.setattr( Image, "open", lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), ) - image_strategy = RGBImageStrategy( + +@pytest.fixture +def rgb_strategy(mock_image_open) -> RGBImageStrategy: + return RGBImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), image_resample=Image.Resampling.NEAREST, ) - batch_size = 2 - dataset_index = 0 - - result = image_strategy.read_batch(batch_size, dataset_index) - assert isinstance(result, np.ndarray) - assert result.shape == (2, 224, 224, 3) - - patch.undo() - patch.undo() - -@pytest.mark.development -def test_RGB_get_dataset_size() -> None: - # checking if the calculation is done correctly - patch = MonkeyPatch() - - mock_filenames = ["a", "b", "c"] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_strategy = RGBImageStrategy( +@pytest.fixture +def raster_strategy(rasterio_mock) -> RasterImageStrategy: + return RasterImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, + package=rasterio_mock, ) - dataset = len(mock_filenames) # number of images in the specified path - mini_batch = 2 # number of images we want in each batch - expected_value = int( - np.floor(dataset / float(mini_batch)) - ) # number of sets of images we expect - dataset_size = image_strategy.get_dataset_size(mini_batch) - assert dataset_size == expected_value - patch.undo() - patch.undo() - - -@pytest.mark.development -def test_raster_get_dataset_size() -> None: - # checking if the calculation is done correctly - patch = MonkeyPatch() - mock_filenames = ["a", "b", "c"] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_strategy = RasterImageStrategy( +@pytest.fixture +def raster_mt_strategy(rasterio_mock) -> RasterImageStrategyMultiThread: + return RasterImageStrategyMultiThread( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), - package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), + package=rasterio_mock, ) - dataset = len(mock_filenames) # number of images in the specified path - mini_batch = 2 # number of images we want in each batch - expected_value = int( - np.floor(dataset / float(mini_batch)) - ) # number of sets of images we expect - - dataset_size = image_strategy.get_dataset_size(mini_batch) - assert dataset_size == expected_value - patch.undo() - patch.undo() +@pytest.fixture +def hsi_strategy(cv2_mock) -> HSImageStrategy: + return HSImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=cv2_mock, + ) -@pytest.mark.development -def test_hsi_get_dataset_size() -> None: - # checking if the calculation is done correctly - patch = MonkeyPatch() - - mock_filenames = ["a", "b", "c"] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - image_strategy = HSImageStrategy( +@pytest.fixture +def hsi_mt_strategy(cv2_mock) -> HSImageStrategyMultiThread: + return HSImageStrategyMultiThread( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), - package=CV2Mock(n=3, size=(224, 224), bands=3), + package=cv2_mock, ) - dataset = len(mock_filenames) # number of images in the specified path - mini_batch = 2 # number of images we want in each batch - expected_value = int( - np.floor(dataset / float(mini_batch)) - ) # number of sets of images we expect - dataset_size = image_strategy.get_dataset_size(mini_batch) - assert dataset_size == expected_value - patch.undo() - patch.undo() +FIXTURE_LIST = [ + "rgb_strategy", + "raster_strategy", + "raster_mt_strategy", + "hsi_strategy", + "hsi_mt_strategy", +] +FIXTURE_LIST_MT = [ + "raster_mt_strategy", + "hsi_mt_strategy", +] -@pytest.mark.development -def test_hsi_mt_get_dataset_size() -> None: - # checking if the calculation is done correctly - patch = MonkeyPatch() - mock_filenames = ["a", "b", "c"] +@pytest.fixture(params=FIXTURE_LIST) +def image_strategy(request, directory_mock): + strategy = request.getfixturevalue(request.param) + return strategy - patch.setattr(os, "listdir", lambda x: mock_filenames) - image_strategy = HSImageStrategyMultiThread( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=CV2Mock(n=3, size=(224, 224), bands=3), - ) +@pytest.fixture(params=FIXTURE_LIST_MT) +def mt_image_strategy(request, directory_mock): + strategy = request.getfixturevalue(request.param) + return strategy - dataset = len(mock_filenames) # number of images in the specified path - mini_batch = 2 # number of images we want in each batch - expected_value = int( - np.floor(dataset / float(mini_batch)) - ) # number of sets of images we expect - dataset_size = image_strategy.get_dataset_size(mini_batch) - assert dataset_size == expected_value - patch.undo() - patch.undo() +@pytest.fixture(params=FIXTURE_LIST) +def fixture_factory(request, directory_mock): + def make_instance(): + return request.getfixturevalue(request.param) + return make_instance -@pytest.mark.development -def test_raster_open(): - patch = MonkeyPatch() - mock_filenames = ["a", "b", "c"] - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_path = "tests/segmentation_utils_tests/test_strategies" - - mock_data = { - "n": 3, - "size": (224, 224), - "bands": 3, - "dtypes": ["uint8"], - } - strategy = RasterImageStrategy( - image_path, (224, 224), package=MockRasterio(**mock_data) - ) - read_images = strategy.read_batch(2, 0) +@pytest.fixture(params=FIXTURE_LIST_MT) +def mt_fixture_factory(request, directory_mock): + def make_instance(): + return request.getfixturevalue(request.param) - assert read_images.shape == (2, 224, 224, 3) + return make_instance @pytest.mark.development -def test_raster_mt_open(): - patch = MonkeyPatch() - mock_filenames = ["a", "b", "c"] - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_path = "tests/segmentation_utils_tests/test_strategies" - - mock_data = { - "n": 3, - "size": (224, 224), - "bands": 3, - "dtypes": ["uint8"], - } - strategy = RasterImageStrategyMultiThread( - image_path, (224, 224), package=MockRasterio(**mock_data) - ) +def test_read_batch_image_path(image_strategy, mock_image_open) -> None: + # checking if the file is being opened and read correctly - read_images = strategy.read_batch(2, 0) + strategy = image_strategy - assert read_images.shape == (2, 224, 224, 3) + batch_size = 2 + dataset_index = 0 + result = strategy.read_batch(batch_size, dataset_index) + + assert result.shape == (2, 224, 224, 3) @pytest.mark.development -def test_hsi_open(): - patch = MonkeyPatch() - mock_filenames = ["a", "b", "c"] - patch.setattr(os, "listdir", lambda x: mock_filenames) +def test_read_batch_returns_nparray(image_strategy) -> None: + # checking if the returned value is a numpy array + strategy = image_strategy - image_path = "tests/segmentation_utils_tests/test_strategies" + batch_size = 2 + dataset_index = 0 - mock_data = { - "n": 3, - "size": (224, 224), - "bands": 3, - } - strategy = HSImageStrategy(image_path, (224, 224), package=CV2Mock(**mock_data)) + result = strategy.read_batch(batch_size, dataset_index) + assert isinstance(result, np.ndarray) + assert result.shape == (2, 224, 224, 3) - read_images = strategy.read_batch(2, 0) - assert read_images.shape == (2, 224, 224, 3) +@pytest.mark.development +def test_get_dataset_size(image_strategy, directory_mock) -> None: + # checking if the calculation is done correctly + + strategy = image_strategy + dataset = directory_mock # number of images in the specified path + mini_batch = 2 # number of images we want in each batch + expected_value = int( + np.floor(dataset / float(mini_batch)) + ) # number of sets of images we expect + + dataset_size = strategy.get_dataset_size(mini_batch) + assert dataset_size == expected_value @pytest.mark.development -def test_hsi_mt_open(): - patch = MonkeyPatch() - mock_filenames = ["a", "b", "c"] - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_path = "tests/segmentation_utils_tests/test_strategies" - - mock_data = { - "n": 3, - "size": (224, 224), - "bands": 3, - } - strategy = HSImageStrategyMultiThread( - image_path, (224, 224), package=CV2Mock(**mock_data) - ) +def test_open(image_strategy): + strategy = image_strategy read_images = strategy.read_batch(2, 0) @@ -330,19 +237,8 @@ def test_hsi_mt_open(): @pytest.mark.development -def test_hsi_get_channels(): - patch = MonkeyPatch() - mock_filenames = ["a", "b", "c"] - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_path = "tests/segmentation_utils_tests/test_strategies" - - mock_data = { - "n": 3, - "size": (224, 224), - "bands": 3, - } - strategy = HSImageStrategy(image_path, (224, 224), package=CV2Mock(**mock_data)) +def test_hsi_get_channels(directory_mock, hsi_strategy): + strategy = hsi_strategy channels = strategy._HSImageStrategy__get_channels() @@ -350,21 +246,8 @@ def test_hsi_get_channels(): @pytest.mark.development -def test_hsi_mt_get_channels(): - patch = MonkeyPatch() - mock_filenames = ["a", "b", "c"] - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_path = "tests/segmentation_utils_tests/test_strategies" - - mock_data = { - "n": 3, - "size": (224, 224), - "bands": 3, - } - strategy = HSImageStrategyMultiThread( - image_path, (224, 224), package=CV2Mock(**mock_data) - ) +def test_hsi_mt_get_channels(directory_mock, hsi_mt_strategy): + strategy = hsi_mt_strategy channels = strategy._HSImageStrategyMultiThread__get_channels() @@ -372,28 +255,12 @@ def test_hsi_mt_get_channels(): @pytest.mark.development -def test_empty_batch(): - patch = MonkeyPatch() - - mock_filenames = ["a", "b", "c"] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - patch.setattr( - Image, - "open", - lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), - ) - - image_strategy = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, - ) +def test_empty_batch(image_strategy): + strategy = image_strategy batch_size = 0 dataset_index = 0 - result = image_strategy.read_batch(batch_size, dataset_index) + result = strategy.read_batch(batch_size, dataset_index) assert result.shape == ( 0, @@ -401,363 +268,68 @@ def test_empty_batch(): 224, 3, ) # 0 indicates there are no images in the batch - patch.undo() - patch.undo() @pytest.mark.development -def test_out_of_bounds_index(): - patch = MonkeyPatch() - - mock_filenames = ["a", "b", "c"] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - patch.setattr( - Image, - "open", - lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), - ) - - image_strategy = RGBImageStrategy( +def test_out_of_bounds_index(image_strategy): + strategy = RGBImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), image_resample=Image.Resampling.NEAREST, ) batch_size = 2 # not an empty batch - dataset_index = len(image_strategy.image_filenames) # out of bounds index + dataset_index = len(strategy.image_filenames) # out of bounds index - try: - image_strategy.read_batch(batch_size, dataset_index) - assert True - - except IndexError: - pass - patch.undo() - patch.undo() + with pytest.raises(IndexError): + strategy.read_batch(batch_size, dataset_index) @pytest.mark.development -def test_batch_slicing(): - patch = MonkeyPatch() - - mock_filenames = ["a" for _ in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - patch.setattr( - Image, - "open", - lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), - ) - - image_strategy = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, - ) +def test_batch_slicing(image_strategy): + strategy = image_strategy batch_size = 10 dataset_index = 2 - result = image_strategy.read_batch(batch_size, dataset_index) + result = strategy.read_batch(batch_size, dataset_index) assert ( result.shape[0] == batch_size ) # compare the size of returned data with batch_size - patch.undo() - patch.undo() - - -@pytest.mark.development -def test_RGB_get_image_size(): - patch = MonkeyPatch() - - mock_filenames = ["a" for _ in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_strategy = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, - ) - - result = image_strategy.get_image_size() - assert result == (224, 224) - - -@pytest.mark.development -def test_raster_get_image_size(): - patch = MonkeyPatch() - - mock_filenames = ["a" for _ in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_strategy = RasterImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), - ) - - result = image_strategy.get_image_size() - assert result == (224, 224) @pytest.mark.development -def test_hsi_get_image_size(): - patch = MonkeyPatch() - - mock_filenames = ["a" for _ in range(20)] +def test_get_image_size(image_strategy): + strategy = image_strategy - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_strategy = HSImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=CV2Mock(n=3, size=(224, 224), bands=3), - ) - - result = image_strategy.get_image_size() + result = strategy.get_image_size() assert result == (224, 224) @pytest.mark.development -def test_hsi_mt_get_image_size(): - patch = MonkeyPatch() - - mock_filenames = ["a" for _ in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_strategy = HSImageStrategyMultiThread( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=CV2Mock(n=3, size=(224, 224), bands=3), - ) - - result = image_strategy.get_image_size() - assert result == (224, 224) - - -@pytest.mark.development -def test_raster_mt_get_image_size(): - patch = MonkeyPatch() - - mock_filenames = ["a" for _ in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_strategy = RasterImageStrategyMultiThread( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), - ) - - result = image_strategy.get_image_size() - assert result == (224, 224) - - -@pytest.mark.development -def test_rgb_shuffle(): - patch = MonkeyPatch() - - mock_filenames = [str(i) for i in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_strategy_1 = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, - ) - - image_strategy_2 = RGBImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - image_resample=Image.Resampling.NEAREST, - ) - - n = 100 - - for i in range(n): - image_strategy_1.shuffle_filenames(i) - image_strategy_2.shuffle_filenames(i) - - assert np.array_equal( - image_strategy_1.image_filenames, image_strategy_2.image_filenames - ) - - -@pytest.mark.development -def test_raster_shuffle(): - patch = MonkeyPatch() - - mock_filenames = [str(i) for i in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_strategy_1 = RasterImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), - ) - - image_strategy_2 = RasterImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), - ) - - n = 100 - - for i in range(n): - image_strategy_1.shuffle_filenames(i) - image_strategy_2.shuffle_filenames(i) - - assert np.array_equal( - image_strategy_1.image_filenames, image_strategy_2.image_filenames - ) - - -@pytest.mark.development -def test_raster_mt_shuffle(): - patch = MonkeyPatch() - - mock_filenames = [str(i) for i in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) +def test_shuffle(fixture_factory): + strategy_1 = fixture_factory() - image_strategy_1 = RasterImageStrategyMultiThread( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), - ) - - image_strategy_2 = RasterImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]), - ) + strategy_2 = fixture_factory() n = 100 for i in range(n): - image_strategy_1.shuffle_filenames(i) - image_strategy_2.shuffle_filenames(i) + strategy_1.shuffle_filenames(i) + strategy_2.shuffle_filenames(i) - assert np.array_equal( - image_strategy_1.image_filenames, image_strategy_2.image_filenames - ) + assert np.array_equal(strategy_1.image_filenames, strategy_2.image_filenames) + assert type(strategy_1) == type(strategy_2) @pytest.mark.development -def test_raster_mt_image_in_order(): - patch = MonkeyPatch() - - mock_filenames = [str(i) for i in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - mock_package = MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) - image_strategy = RasterImageStrategyMultiThread( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=mock_package, - ) - - batch_size = 10 - - call_count = mock_package.get_count() - - result = image_strategy.read_batch(batch_size, 0) - - for i in range(call_count, call_count + batch_size): - assert np.array_equal( - result[i - call_count, :, :, :], np.full((224, 224, 3), i + 1) - ) - - -@pytest.mark.development -def test_hsi_shuffle(): - patch = MonkeyPatch() - - mock_filenames = [str(i) for i in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_strategy_1 = HSImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=CV2Mock(n=3, size=(224, 224), bands=3), - ) - - image_strategy_2 = HSImageStrategy( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=CV2Mock(n=3, size=(224, 224), bands=3), - ) - - n = 100 - - for i in range(n): - image_strategy_1.shuffle_filenames(i) - image_strategy_2.shuffle_filenames(i) - - assert np.array_equal( - image_strategy_1.image_filenames, image_strategy_2.image_filenames - ) - - -@pytest.mark.development -def test_hsi_mt_shuffle(): - patch = MonkeyPatch() - - mock_filenames = [str(i) for i in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - - image_strategy_1 = HSImageStrategyMultiThread( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=CV2Mock(n=3, size=(224, 224), bands=3), - ) - - image_strategy_2 = HSImageStrategyMultiThread( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=CV2Mock(n=3, size=(224, 224), bands=3), - ) - - n = 100 - - for i in range(n): - image_strategy_1.shuffle_filenames(i) - image_strategy_2.shuffle_filenames(i) - - assert np.array_equal( - image_strategy_1.image_filenames, image_strategy_2.image_filenames - ) - - -@pytest.mark.development -def test_hsi_mt_image_in_order(): - patch = MonkeyPatch() - - mock_filenames = [str(i) for i in range(20)] - - patch.setattr(os, "listdir", lambda x: mock_filenames) - mock_package = CV2Mock(n=3, size=(224, 224), bands=3) - image_strategy = HSImageStrategyMultiThread( - image_path="tests/segmentation_utils_tests/test_strategies", - image_size=(224, 224), - package=mock_package, - ) +def test_mt_image_in_order(mt_image_strategy): + strategy = mt_image_strategy batch_size = 10 - call_count = mock_package.get_count() + call_count = strategy.package.get_count() - result = image_strategy.read_batch(batch_size, 0) + result = strategy.read_batch(batch_size, 0) for i in range(call_count, call_count + batch_size): assert np.array_equal( diff --git a/tests/transform_utils_test.py/image_cutting_test.py b/tests/transform_utils_test.py/image_cutting_test.py index f3171e1..ef2a800 100644 --- a/tests/transform_utils_test.py/image_cutting_test.py +++ b/tests/transform_utils_test.py/image_cutting_test.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import rasterio from PIL import Image from pytest import MonkeyPatch @@ -7,6 +8,7 @@ image_cut, image_stich) +@pytest.mark.development def test_image_cut() -> None: img = np.zeros((512, 512, 3)) img[-1, -1, 0] = 1 @@ -29,6 +31,7 @@ def test_image_cut() -> None: # assert True +@pytest.mark.development def test_image_cut_incorrect_shape_too_many() -> None: # does not pass try: @@ -40,6 +43,7 @@ def test_image_cut_incorrect_shape_too_many() -> None: assert True +@pytest.mark.development def test_image_cut_incorrect_band_specified() -> None: # passes however the function doesn't rasie a value error # when the bands do not match @@ -52,6 +56,7 @@ def test_image_cut_incorrect_band_specified() -> None: assert True +@pytest.mark.development def test_image_cut_slack_cut() -> None: img = np.zeros((513, 513, 3)) img[-2, -2, 0] = 1 @@ -62,6 +67,7 @@ def test_image_cut_slack_cut() -> None: assert cut_ims[-1, -1, -1, 0] == 1 +@pytest.mark.development def test_image_cut_slack_cut_exact() -> None: img = np.zeros((512, 512, 3)) img[-2, -2, 0] = 1 @@ -72,6 +78,7 @@ def test_image_cut_slack_cut_exact() -> None: assert cut_ims[-1, -2, -2, 0] == 1 +@pytest.mark.development def test_image_cut_pad() -> None: img = np.zeros((511, 511, 3)) img[-2, -2, 0] = 1 @@ -82,6 +89,7 @@ def test_image_cut_pad() -> None: assert cut_ims[-1, -3, -3, 0] == 1 +@pytest.mark.development def test_image_cut_pad_exact() -> None: img = np.zeros((512, 512, 3)) img[-2, -2, 0] = 1 @@ -92,6 +100,7 @@ def test_image_cut_pad_exact() -> None: assert cut_ims[-1, -2, -2, 0] == 1 +@pytest.mark.development def test_image_cut_incorrect_band() -> None: try: img = np.zeros((512, 512)) @@ -102,6 +111,7 @@ def test_image_cut_incorrect_band() -> None: assert True +@pytest.mark.development def test_image_cut_can_add_dimension() -> None: img = np.zeros((512, 512)) img[-1, -1] = 1 @@ -112,6 +122,7 @@ def test_image_cut_can_add_dimension() -> None: assert cut_ims[-1, -1, -1, 0] == 1 +@pytest.mark.development def test_image_stich() -> None: img1 = np.zeros((256, 256, 3), dtype=np.uint8) img2 = np.zeros((256, 256, 3), dtype=np.uint8) @@ -131,6 +142,7 @@ def test_image_stich() -> None: assert stiched_img[-1, -1, 0] == 9 +@pytest.mark.development def test_cut_ims_in_directory(mocker) -> None: patch = MonkeyPatch() From f5e312ca5189c0169615017997110a7e4163a71b Mon Sep 17 00:00:00 2001 From: Sajtospoga01 Date: Sun, 27 Aug 2023 23:08:41 +0100 Subject: [PATCH 75/75] adds documentation to fixtures --- .../test_strategies.py | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py index bbc01b3..c094414 100644 --- a/tests/segmentation_utils_tests.py/test_strategies.py +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -73,18 +73,37 @@ def get_count(self): return self.call_count +#################################################################################################### +# Package Mocks # +#################################################################################################### + + @pytest.fixture def rasterio_mock() -> MockRasterio: + """ + Creates a mock of the rasterio package + """ return MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) @pytest.fixture def cv2_mock() -> CV2Mock: + """ + Creates a mock of the cv2 package + """ return CV2Mock(n=3, size=(224, 224), bands=3) +#################################################################################################### +# OS mocks # +#################################################################################################### + + @pytest.fixture def directory_mock(monkeypatch): + """ + Mocks the os.listdir function to return a list of filenames + """ mock_filenames = [str(i) for i in range(20)] monkeypatch.setattr(os, "listdir", lambda x: mock_filenames) return len(mock_filenames) @@ -92,6 +111,9 @@ def directory_mock(monkeypatch): @pytest.fixture def mock_image_open(monkeypatch): + """ + Mocks the Image.open function to return a numpy array + """ monkeypatch.setattr( Image, "open", @@ -99,8 +121,18 @@ def mock_image_open(monkeypatch): ) +#################################################################################################### +# Strategy fixtures # +#################################################################################################### + + @pytest.fixture def rgb_strategy(mock_image_open) -> RGBImageStrategy: + """ + Creates a RGBImageStrategy instance + + Relies on the mock_image_open fixture to mock the Image.open function + """ return RGBImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), @@ -110,6 +142,11 @@ def rgb_strategy(mock_image_open) -> RGBImageStrategy: @pytest.fixture def raster_strategy(rasterio_mock) -> RasterImageStrategy: + """ + Creates a RasterImageStrategy instance + + Relies on the rasterio_mock fixture to mock the rasterio package + """ return RasterImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), @@ -119,6 +156,11 @@ def raster_strategy(rasterio_mock) -> RasterImageStrategy: @pytest.fixture def raster_mt_strategy(rasterio_mock) -> RasterImageStrategyMultiThread: + """ + Creates a RasterImageStrategyMultiThread instance + + Relies on the rasterio_mock fixture to mock the rasterio package + """ return RasterImageStrategyMultiThread( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), @@ -128,6 +170,11 @@ def raster_mt_strategy(rasterio_mock) -> RasterImageStrategyMultiThread: @pytest.fixture def hsi_strategy(cv2_mock) -> HSImageStrategy: + """ + Creates a HSImageStrategy instance + + Relies on the cv2_mock fixture to mock the cv2 package + """ return HSImageStrategy( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), @@ -137,6 +184,11 @@ def hsi_strategy(cv2_mock) -> HSImageStrategy: @pytest.fixture def hsi_mt_strategy(cv2_mock) -> HSImageStrategyMultiThread: + """ + Creates a HSImageStrategyMultiThread instance + + Relies on the cv2_mock fixture to mock the cv2 package + """ return HSImageStrategyMultiThread( image_path="tests/segmentation_utils_tests/test_strategies", image_size=(224, 224), @@ -144,6 +196,10 @@ def hsi_mt_strategy(cv2_mock) -> HSImageStrategyMultiThread: ) +#################################################################################################### +# Test Generators # +#################################################################################################### + FIXTURE_LIST = [ "rgb_strategy", "raster_strategy", @@ -160,18 +216,29 @@ def hsi_mt_strategy(cv2_mock) -> HSImageStrategyMultiThread: @pytest.fixture(params=FIXTURE_LIST) def image_strategy(request, directory_mock): + """ + Generates a strategy instance for each strategy type + """ strategy = request.getfixturevalue(request.param) return strategy @pytest.fixture(params=FIXTURE_LIST_MT) def mt_image_strategy(request, directory_mock): + """ + Generates a strategy instance for each multi threaded strategy type + """ strategy = request.getfixturevalue(request.param) return strategy @pytest.fixture(params=FIXTURE_LIST) def fixture_factory(request, directory_mock): + """ + Generates a strategy instance for each strategy type + + Can be used to generate multiple instances of the same strategy type + """ def make_instance(): return request.getfixturevalue(request.param) @@ -180,12 +247,21 @@ def make_instance(): @pytest.fixture(params=FIXTURE_LIST_MT) def mt_fixture_factory(request, directory_mock): + """ + Generates a strategy instance for each multi threaded strategy type + + Can be used to generate multiple instances of the same strategy type + """ def make_instance(): return request.getfixturevalue(request.param) return make_instance +#################################################################################################### +# Test Functions # +#################################################################################################### + @pytest.mark.development def test_read_batch_image_path(image_strategy, mock_image_open) -> None: # checking if the file is being opened and read correctly