diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml index aa8c832..9943143 100644 --- a/.github/workflows/development.yml +++ b/.github/workflows/development.yml @@ -26,7 +26,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] test: needs: dependency-install @@ -42,10 +42,10 @@ jobs: run: | python -m pip install --upgrade pip pip install pytest pytest-mock - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] - name: Test with pytest run: | - python -m pytest + python -m pytest -v -m "development" devops: needs: test @@ -62,7 +62,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] - name: Lint with pylint run: | pip install pylint @@ -71,7 +71,7 @@ jobs: run: | mkdir -p ./coverage pip install pytest-cov pytest-mock - python -m pytest --cov --cov-report=xml:./coverage/coverage.xml + python -m pytest -m "not staging" --cov --cov-report=xml:./coverage/coverage.xml - name: Upload coverage uses: codecov/codecov-action@v3 diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index bbdd86f..628a486 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -34,15 +34,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] pip install -U sphinx pip install furo - name: Build documentation run: | cd docs - - sphinx-apidoc -e -M --force -o . ../utilities/ make html - name: Upload build data diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 4b3ba3c..5979c2d 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -26,7 +26,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] test: needs: dependency-install @@ -42,10 +42,16 @@ jobs: run: | python -m pip install --upgrade pip pip install pytest pytest-mock - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Test with pytest + pip install .[dev] + - name: Test with pytest development run: | - python -m pytest + python -m pytest -v -m "development" + - name: Test with pytest staging + run: | + python -m pytest -v -m "staging" + - name: Test with pytest production + run: | + python -m pytest -v -m "production" devops: needs: test @@ -62,7 +68,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] - name: Lint with pylint run: | pip install pylint diff --git a/.github/workflows/staging.yml b/.github/workflows/staging.yml index 72bf0c4..f863dc4 100644 --- a/.github/workflows/staging.yml +++ b/.github/workflows/staging.yml @@ -26,7 +26,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] test: needs: dependency-install @@ -42,10 +42,13 @@ jobs: run: | python -m pip install --upgrade pip pip install pytest pytest-mock - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Test with pytest + pip install .[dev] + - name: Test with pytest development run: | - python -m pytest + python -m pytest -v -m "development" + - name: Test with pytest staging + run: | + python -m pytest -v -m "staging" devops: needs: test @@ -62,7 +65,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] - name: Lint with pylint run: | pip install pylint diff --git a/pyproject.toml b/pyproject.toml index 20b4059..d2be2d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,11 +21,20 @@ dependencies = [ "Pillow >= 9.4.0", "tensorflow >= 2.10", "toml >= 0.10.2", + "tqdm >= 4.64.1", + "pandas >= 1.5.1", + "opencv-python-headless >= 4.8.0.76" ] [tool.setuptools] packages = ["utilities"] +[tool.pytest.ini_options] +markers = [ + "staging: Mark a test as part of the staging environment", + "production: Mark a test as part of the production environment", + "development: Mark a test as part of the development environment", +] [project.optional-dependencies] dev = [ diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1200270..0000000 --- a/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -tensorflow==2.10 -numpy==1.24.1 -rasterio==1.3.6 -Pillow==9.4.0 -tqdm==4.64.1 -pandas==1.5.1 -toml==0.10.2 diff --git a/tests/segmentation_utils_tests.py/flow_reader_test.py b/tests/segmentation_utils_tests.py/flow_reader_test.py index af89e19..20d2fb2 100644 --- a/tests/segmentation_utils_tests.py/flow_reader_test.py +++ b/tests/segmentation_utils_tests.py/flow_reader_test.py @@ -36,6 +36,7 @@ def flow_from_directory_mock(*args, **kwargs): # tests +@pytest.mark.development def test_makes_flow_generator() -> None: patch = MonkeyPatch() # mock an imagedatagenerator from keras @@ -48,7 +49,7 @@ def test_makes_flow_generator() -> None: # create a flow generator FlowGenerator(**generator_args) - +@pytest.mark.development def test_makes_flow_generator_with_queue() -> None: patch = MonkeyPatch() # mock an imagedatagenerator from keras @@ -78,7 +79,7 @@ def test_makes_flow_generator_with_queue() -> None: generator = FlowGenerator(**new_generator_args) generator.set_preprocessing_pipeline(image_queue, mask_queue) - +@pytest.mark.development def test_makes_flow_generator_wrong_shape() -> None: try: patch = MonkeyPatch() @@ -98,7 +99,7 @@ def test_makes_flow_generator_wrong_shape() -> None: except ValueError: assert True - +@pytest.mark.development def test_makes_flow_generator_wrong_dimension() -> None: try: patch = MonkeyPatch() @@ -118,7 +119,7 @@ def test_makes_flow_generator_wrong_dimension() -> None: except ValueError: assert True - +@pytest.mark.development def test_flow_generator_with_preprocess() -> None: patch = MonkeyPatch() # mock an imagedatagenerator from keras @@ -141,7 +142,7 @@ def test_flow_generator_with_preprocess() -> None: patch.undo() patch.undo() - +@pytest.mark.development def test_get_dataset_size() -> None: patch = MonkeyPatch() patch.setattr(os, "listdir", lambda x: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) @@ -166,7 +167,7 @@ def test_get_dataset_size() -> None: patch.undo() patch.undo() - +@pytest.mark.development def test_get_generator() -> None: patch = MonkeyPatch() diff --git a/tests/segmentation_utils_tests.py/image_preprocessor_test.py b/tests/segmentation_utils_tests.py/image_preprocessor_test.py index b596aaf..4841892 100644 --- a/tests/segmentation_utils_tests.py/image_preprocessor_test.py +++ b/tests/segmentation_utils_tests.py/image_preprocessor_test.py @@ -31,7 +31,7 @@ def test_image_onehot_encoder_column() -> None: ) assert np.array_equal(one_hot_image, onehot_test) - +@pytest.mark.development def test_image_onehot_encoder_squarematrix() -> None: # predifining input variables n_classes = 2 @@ -58,7 +58,7 @@ def test_image_onehot_encoder_squarematrix() -> None: ) assert np.array_equal(one_hot_image, onehot_test) - +@pytest.mark.development def test_image_augmentation_pipeline_squarematrix() -> None: # predifining input variables image = np.zeros((512, 512, 3)) @@ -89,7 +89,7 @@ def test_image_augmentation_pipeline_squarematrix() -> None: assert image_new.shape == (512, 512, 3) assert mask_new.shape == (256, 256, 1) - +@pytest.mark.development def test_processing_queue() -> None: # creating dummy queues @@ -102,7 +102,7 @@ def test_processing_queue() -> None: assert image_queue.queue[0].kwargs["seed"] == new_seed - +@pytest.mark.development def test_generate_default_queue() -> None: # creating default queues image_queue, mask_queue = ImagePreprocessor.generate_default_queue() @@ -111,7 +111,7 @@ def test_generate_default_queue() -> None: assert image_queue.get_queue_length() == 5 assert mask_queue.get_queue_length() == 2 - +@pytest.mark.development def test_flatten() -> None: image = np.zeros((512, 512, 3)) image = tf.convert_to_tensor(image) diff --git a/tests/segmentation_utils_tests.py/test_flowreader.py b/tests/segmentation_utils_tests.py/test_flowreader.py index 3676512..4648e19 100644 --- a/tests/segmentation_utils_tests.py/test_flowreader.py +++ b/tests/segmentation_utils_tests.py/test_flowreader.py @@ -7,106 +7,350 @@ from pytest import MonkeyPatch from utilities.segmentation_utils import ImagePreprocessor +from utilities.segmentation_utils.constants import ImageOrdering from utilities.segmentation_utils.flowreader import FlowGeneratorExperimental +class DummyStrategy: + def __init__(self, input_shape=(512, 512, 3)): + self.input_shape = input_shape + + def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: + return np.zeros((batch_size, *self.input_shape)) + + def get_dataset_size(self,minibatch) -> int: + return 10 + + def get_image_size(self) -> tuple[int, int]: + return self.input_shape[:2] + + def shuffle_filenames(self, seed: int) -> None: + pass + + +@pytest.mark.development def test_can_create_instance() -> None: patch = MonkeyPatch() # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + input_strategy = DummyStrategy() + output_strategy = DummyStrategy() + # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", - image_size=(512, 512), - output_size=(512,512), num_classes=7, - channel_mask= [True,True,True] + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, ) pass + +@pytest.mark.development def test_set_preprocessing_pipeline() -> None: patch = MonkeyPatch() # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + input_strategy = DummyStrategy() + output_strategy = DummyStrategy() # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", - image_size=(512, 512), - output_size=(512,512), num_classes=7, - channel_mask= [True,True,True] + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, ) image_queue = ImagePreprocessor.PreprocessingQueue(queue=[]) mask_queue = ImagePreprocessor.PreprocessingQueue(queue=[]) - generator.set_preprocessing_pipeline( - image_queue,mask_queue - ) + generator.set_preprocessing_pipeline(image_queue, mask_queue) pass + +@pytest.mark.development def test_set_mini_batch_size() -> None: patch = MonkeyPatch() # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + input_strategy = DummyStrategy() + output_strategy = DummyStrategy() + # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", - image_size=(512, 512), - output_size=(512,512), num_classes=7, - channel_mask= [True,True,True] + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, ) generator.set_mini_batch_size(2) assert generator.mini_batch == 2 -def test_set_mini_batch_size_too_large() -> None: +@pytest.mark.development +def test_set_mini_batch_size_too_large() -> None: patch = MonkeyPatch() # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + input_strategy = DummyStrategy() + output_strategy = DummyStrategy() + # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", - image_size=(512, 512), - output_size=(512,512), num_classes=7, - channel_mask= [True,True,True] + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, ) with pytest.raises(ValueError) as exc_info: generator.set_mini_batch_size(5) - assert exc_info.value.args[0] == "The mini batch size cannot be larger than the batch size" + assert ( + exc_info.value.args[0] + == "The mini batch size cannot be larger than the batch size" + ) +@pytest.mark.development def test_set_mini_batch_size_not_devisable() -> None: - patch = MonkeyPatch() # mock list directory patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + input_strategy = DummyStrategy() + output_strategy = DummyStrategy() + # create generator instance generator = FlowGeneratorExperimental( - image_path="tests/segmentation_utils_tests/flow_reader_test", - mask_path="tests/segmentation_utils_tests/flow_reader_test", - image_size=(512, 512), - output_size=(512,512), num_classes=7, - channel_mask= [True,True,True], - batch_size=3 - + channel_mask=[True, True, True], + batch_size=3, + input_strategy=input_strategy, + output_strategy=output_strategy, ) with pytest.raises(ValueError) as exc_info: generator.set_mini_batch_size(2) - assert exc_info.value.args[0] == "The batch size must be divisible by the mini batch size" - + assert ( + exc_info.value.args[0] + == "The batch size must be divisible by the mini batch size" + ) + + +@pytest.mark.development +def test_read_batch_get_item() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512)) + + # create generator instance + + generator = FlowGeneratorExperimental( + batch_size=2, + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + ) + + batch = generator[0] + + assert batch[0].shape == (2, 512, 512, 3) + assert batch[1].shape == (2, 512, 512, 7) + + +@pytest.mark.development +def test_read_batch_get_item_diff_minibatch() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512)) + + # create generator instance + + generator = FlowGeneratorExperimental( + batch_size=2, + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + ) + + generator.set_mini_batch_size(1) + + batch = generator[0] + + assert batch[0].shape == (1, 512, 512, 3) + assert batch[1].shape == (1, 512, 512, 7) + + +@pytest.mark.development +def test_read_batch_get_item_channel_first() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512)) + + # create generator instance + + generator = FlowGeneratorExperimental( + batch_size=2, + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + image_ordering=ImageOrdering.CHANNEL_FIRST, + ) + + batch = generator[0] + + assert batch[0].shape == (2, 3, 512, 512) + assert batch[1].shape == (2, 7, 512, 512) + + +@pytest.mark.development +def test_read_batch_get_item_column() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512)) + + # create generator instance + + generator = FlowGeneratorExperimental( + batch_size=2, + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + is_column=True, + ) + + batch = generator[0] + + assert batch[0].shape == (2, 512, 512, 3) + assert batch[1].shape == (2, 512 * 512, 7) + + +@pytest.mark.development +def test_read_batch_get_item_column_channel_first() -> None: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512)) + + # create generator instance + + generator = FlowGeneratorExperimental( + batch_size=2, + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + image_ordering=ImageOrdering.CHANNEL_FIRST, + is_column=True, + ) + + batch = generator[0] + + assert batch[0].shape == (2, 3, 512, 512) + assert batch[1].shape == (2, 7, 512 * 512) + + +@pytest.mark.development +def test_read_batch_get_item_expand_dim_fail() -> None: + with pytest.raises(ValueError) as exc_info: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 512, 1)) + + # create generator instance + + generator = FlowGeneratorExperimental( + batch_size=2, + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + ) + + batch = generator[0] + +@pytest.mark.development +def test_raises_error_not_compatible_shape() -> None: + with pytest.raises(ValueError) as exc_info: + patch = MonkeyPatch() + # mock list directory + patch.setattr(os, "listdir", lambda x: ["a", "b", "c"]) + + input_strategy = DummyStrategy() + output_strategy = DummyStrategy(input_shape=(512, 200, 1)) + + # create generator instance + + generator = FlowGeneratorExperimental( + batch_size=2, + num_classes=7, + channel_mask=[True, True, True], + input_strategy=input_strategy, + output_strategy=output_strategy, + ) + + +################ +# Staging tests# +################ + + +@pytest.mark.staging +def test_read_batch_staging() -> None: + classes = 7 + n_images = 4 + # prepare test files + for i in range(n_images): + image = np.random.randint(0, 255, (512, 512, 3)) + mask = np.random.randint(0, classes, (512, 512)) + np.save(f"tests/segmentation_utils_tests/flow_reader_test/image_{i}", image) + np.save(f"tests/segmentation_utils_tests/flow_reader_test/mask_{i}", mask) + + dummy_model = tf.keras.models.Sequential( + [ + tf.keras.layers.Conv2D( + input_shape=(512, 512, 3), filters=3, kernel_size=(3, 3), padding="same" + ), + tf.keras.layers.Conv2D(classes, kernel_size=(1, 1), padding="same"), + ] + ) + dummy_model.compile( + optimizer="adam", + loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), + metrics=["accuracy"], + ) + + reader = FlowGeneratorExperimental( + image_path="tests/segmentation_utils_tests/flow_reader_test", + mask_path="tests/segmentation_utils_tests/flow_reader_test", + image_size=(512, 512), + output_size=(512, 512), + num_classes=classes, + channel_mask=[True, True, True], + ) diff --git a/tests/segmentation_utils_tests.py/test_strategies.py b/tests/segmentation_utils_tests.py/test_strategies.py new file mode 100644 index 0000000..c094414 --- /dev/null +++ b/tests/segmentation_utils_tests.py/test_strategies.py @@ -0,0 +1,413 @@ +import os + +import numpy as np +import pytest +from PIL import Image +from pytest import MonkeyPatch + +from utilities.segmentation_utils.reading_strategies import ( + HSImageStrategy, HSImageStrategyMultiThread, RasterImageStrategy, + RasterImageStrategyMultiThread, RGBImageStrategy) + + +class MockRasterio: + def __init__(self, n, size, bands, dtypes): + self.n = n + self.size = size + self.bands = bands + self.dtypes = dtypes + self.call_count = 0 + + def open(self, *args, **kwargs): + return self + + @property + def count(self) -> int: + return self.bands + + def read(self, *args, **kwargs): + self.call_count += 1 + return np.full( + (self.bands, self.size[0], self.size[1]), self.call_count, self.dtypes[0] + ) + + # these functions are invoked when a 'with' statement is executed + def __enter__(self): + # called at the beginning of a 'with' block + return self # returns instance of MockRasterio class itself + + def __exit__(self, type, value, traceback): + # called at the end of a 'with' block + pass + + def get_count(self): + return self.call_count + + +class CV2Mock: + IMREAD_UNCHANGED = 1 + COLOR_BGR2RGB = 1 + + def __init__(self, n, size, bands) -> None: + self.n = n + self.size = size + self.bands = bands + self.call_count = 0 + + def imread(self, *args, **kwargs): + self.call_count += 1 + return np.full( + (self.size[0], self.size[1], self.bands), self.call_count, np.uint8 + ) + + def resize(self, *args, **kwargs): + img = args[0] + size = args[1] + return np.full((size[0], size[1], self.bands), img[0, 0, 0], np.uint8) + + def cvtColor(self, *args, **kwargs): + img = args[0] + return np.full((self.size[0], self.size[1], self.bands), img[0, 0, 0], np.uint8) + + def get_count(self): + return self.call_count + + +#################################################################################################### +# Package Mocks # +#################################################################################################### + + +@pytest.fixture +def rasterio_mock() -> MockRasterio: + """ + Creates a mock of the rasterio package + """ + return MockRasterio(n=3, size=(224, 224), bands=3, dtypes=["uint8"]) + + +@pytest.fixture +def cv2_mock() -> CV2Mock: + """ + Creates a mock of the cv2 package + """ + return CV2Mock(n=3, size=(224, 224), bands=3) + + +#################################################################################################### +# OS mocks # +#################################################################################################### + + +@pytest.fixture +def directory_mock(monkeypatch): + """ + Mocks the os.listdir function to return a list of filenames + """ + mock_filenames = [str(i) for i in range(20)] + monkeypatch.setattr(os, "listdir", lambda x: mock_filenames) + return len(mock_filenames) + + +@pytest.fixture +def mock_image_open(monkeypatch): + """ + Mocks the Image.open function to return a numpy array + """ + monkeypatch.setattr( + Image, + "open", + lambda _: Image.fromarray(np.ones((224, 224, 3)).astype(np.uint8)), + ) + + +#################################################################################################### +# Strategy fixtures # +#################################################################################################### + + +@pytest.fixture +def rgb_strategy(mock_image_open) -> RGBImageStrategy: + """ + Creates a RGBImageStrategy instance + + Relies on the mock_image_open fixture to mock the Image.open function + """ + return RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) + + +@pytest.fixture +def raster_strategy(rasterio_mock) -> RasterImageStrategy: + """ + Creates a RasterImageStrategy instance + + Relies on the rasterio_mock fixture to mock the rasterio package + """ + return RasterImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=rasterio_mock, + ) + + +@pytest.fixture +def raster_mt_strategy(rasterio_mock) -> RasterImageStrategyMultiThread: + """ + Creates a RasterImageStrategyMultiThread instance + + Relies on the rasterio_mock fixture to mock the rasterio package + """ + return RasterImageStrategyMultiThread( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=rasterio_mock, + ) + + +@pytest.fixture +def hsi_strategy(cv2_mock) -> HSImageStrategy: + """ + Creates a HSImageStrategy instance + + Relies on the cv2_mock fixture to mock the cv2 package + """ + return HSImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=cv2_mock, + ) + + +@pytest.fixture +def hsi_mt_strategy(cv2_mock) -> HSImageStrategyMultiThread: + """ + Creates a HSImageStrategyMultiThread instance + + Relies on the cv2_mock fixture to mock the cv2 package + """ + return HSImageStrategyMultiThread( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + package=cv2_mock, + ) + + +#################################################################################################### +# Test Generators # +#################################################################################################### + +FIXTURE_LIST = [ + "rgb_strategy", + "raster_strategy", + "raster_mt_strategy", + "hsi_strategy", + "hsi_mt_strategy", +] + +FIXTURE_LIST_MT = [ + "raster_mt_strategy", + "hsi_mt_strategy", +] + + +@pytest.fixture(params=FIXTURE_LIST) +def image_strategy(request, directory_mock): + """ + Generates a strategy instance for each strategy type + """ + strategy = request.getfixturevalue(request.param) + return strategy + + +@pytest.fixture(params=FIXTURE_LIST_MT) +def mt_image_strategy(request, directory_mock): + """ + Generates a strategy instance for each multi threaded strategy type + """ + strategy = request.getfixturevalue(request.param) + return strategy + + +@pytest.fixture(params=FIXTURE_LIST) +def fixture_factory(request, directory_mock): + """ + Generates a strategy instance for each strategy type + + Can be used to generate multiple instances of the same strategy type + """ + def make_instance(): + return request.getfixturevalue(request.param) + + return make_instance + + +@pytest.fixture(params=FIXTURE_LIST_MT) +def mt_fixture_factory(request, directory_mock): + """ + Generates a strategy instance for each multi threaded strategy type + + Can be used to generate multiple instances of the same strategy type + """ + def make_instance(): + return request.getfixturevalue(request.param) + + return make_instance + + +#################################################################################################### +# Test Functions # +#################################################################################################### + +@pytest.mark.development +def test_read_batch_image_path(image_strategy, mock_image_open) -> None: + # checking if the file is being opened and read correctly + + strategy = image_strategy + + batch_size = 2 + dataset_index = 0 + result = strategy.read_batch(batch_size, dataset_index) + + assert result.shape == (2, 224, 224, 3) + + +@pytest.mark.development +def test_read_batch_returns_nparray(image_strategy) -> None: + # checking if the returned value is a numpy array + strategy = image_strategy + + batch_size = 2 + dataset_index = 0 + + result = strategy.read_batch(batch_size, dataset_index) + assert isinstance(result, np.ndarray) + assert result.shape == (2, 224, 224, 3) + + +@pytest.mark.development +def test_get_dataset_size(image_strategy, directory_mock) -> None: + # checking if the calculation is done correctly + + strategy = image_strategy + dataset = directory_mock # number of images in the specified path + mini_batch = 2 # number of images we want in each batch + expected_value = int( + np.floor(dataset / float(mini_batch)) + ) # number of sets of images we expect + + dataset_size = strategy.get_dataset_size(mini_batch) + assert dataset_size == expected_value + + +@pytest.mark.development +def test_open(image_strategy): + strategy = image_strategy + + read_images = strategy.read_batch(2, 0) + + assert read_images.shape == (2, 224, 224, 3) + + +@pytest.mark.development +def test_hsi_get_channels(directory_mock, hsi_strategy): + strategy = hsi_strategy + + channels = strategy._HSImageStrategy__get_channels() + + assert channels == 3 + + +@pytest.mark.development +def test_hsi_mt_get_channels(directory_mock, hsi_mt_strategy): + strategy = hsi_mt_strategy + + channels = strategy._HSImageStrategyMultiThread__get_channels() + + assert channels == 3 + + +@pytest.mark.development +def test_empty_batch(image_strategy): + strategy = image_strategy + + batch_size = 0 + dataset_index = 0 + result = strategy.read_batch(batch_size, dataset_index) + + assert result.shape == ( + 0, + 224, + 224, + 3, + ) # 0 indicates there are no images in the batch + + +@pytest.mark.development +def test_out_of_bounds_index(image_strategy): + strategy = RGBImageStrategy( + image_path="tests/segmentation_utils_tests/test_strategies", + image_size=(224, 224), + image_resample=Image.Resampling.NEAREST, + ) + + batch_size = 2 # not an empty batch + dataset_index = len(strategy.image_filenames) # out of bounds index + + with pytest.raises(IndexError): + strategy.read_batch(batch_size, dataset_index) + + +@pytest.mark.development +def test_batch_slicing(image_strategy): + strategy = image_strategy + + batch_size = 10 + dataset_index = 2 + result = strategy.read_batch(batch_size, dataset_index) + assert ( + result.shape[0] == batch_size + ) # compare the size of returned data with batch_size + + +@pytest.mark.development +def test_get_image_size(image_strategy): + strategy = image_strategy + + result = strategy.get_image_size() + assert result == (224, 224) + + +@pytest.mark.development +def test_shuffle(fixture_factory): + strategy_1 = fixture_factory() + + strategy_2 = fixture_factory() + + n = 100 + + for i in range(n): + strategy_1.shuffle_filenames(i) + strategy_2.shuffle_filenames(i) + + assert np.array_equal(strategy_1.image_filenames, strategy_2.image_filenames) + assert type(strategy_1) == type(strategy_2) + + +@pytest.mark.development +def test_mt_image_in_order(mt_image_strategy): + strategy = mt_image_strategy + + batch_size = 10 + + call_count = strategy.package.get_count() + + result = strategy.read_batch(batch_size, 0) + + for i in range(call_count, call_count + batch_size): + assert np.array_equal( + result[i - call_count, :, :, :], np.full((224, 224, 3), i + 1) + ) diff --git a/tests/transform_utils_test.py/image_cutting_test.py b/tests/transform_utils_test.py/image_cutting_test.py index f3171e1..ef2a800 100644 --- a/tests/transform_utils_test.py/image_cutting_test.py +++ b/tests/transform_utils_test.py/image_cutting_test.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import rasterio from PIL import Image from pytest import MonkeyPatch @@ -7,6 +8,7 @@ image_cut, image_stich) +@pytest.mark.development def test_image_cut() -> None: img = np.zeros((512, 512, 3)) img[-1, -1, 0] = 1 @@ -29,6 +31,7 @@ def test_image_cut() -> None: # assert True +@pytest.mark.development def test_image_cut_incorrect_shape_too_many() -> None: # does not pass try: @@ -40,6 +43,7 @@ def test_image_cut_incorrect_shape_too_many() -> None: assert True +@pytest.mark.development def test_image_cut_incorrect_band_specified() -> None: # passes however the function doesn't rasie a value error # when the bands do not match @@ -52,6 +56,7 @@ def test_image_cut_incorrect_band_specified() -> None: assert True +@pytest.mark.development def test_image_cut_slack_cut() -> None: img = np.zeros((513, 513, 3)) img[-2, -2, 0] = 1 @@ -62,6 +67,7 @@ def test_image_cut_slack_cut() -> None: assert cut_ims[-1, -1, -1, 0] == 1 +@pytest.mark.development def test_image_cut_slack_cut_exact() -> None: img = np.zeros((512, 512, 3)) img[-2, -2, 0] = 1 @@ -72,6 +78,7 @@ def test_image_cut_slack_cut_exact() -> None: assert cut_ims[-1, -2, -2, 0] == 1 +@pytest.mark.development def test_image_cut_pad() -> None: img = np.zeros((511, 511, 3)) img[-2, -2, 0] = 1 @@ -82,6 +89,7 @@ def test_image_cut_pad() -> None: assert cut_ims[-1, -3, -3, 0] == 1 +@pytest.mark.development def test_image_cut_pad_exact() -> None: img = np.zeros((512, 512, 3)) img[-2, -2, 0] = 1 @@ -92,6 +100,7 @@ def test_image_cut_pad_exact() -> None: assert cut_ims[-1, -2, -2, 0] == 1 +@pytest.mark.development def test_image_cut_incorrect_band() -> None: try: img = np.zeros((512, 512)) @@ -102,6 +111,7 @@ def test_image_cut_incorrect_band() -> None: assert True +@pytest.mark.development def test_image_cut_can_add_dimension() -> None: img = np.zeros((512, 512)) img[-1, -1] = 1 @@ -112,6 +122,7 @@ def test_image_cut_can_add_dimension() -> None: assert cut_ims[-1, -1, -1, 0] == 1 +@pytest.mark.development def test_image_stich() -> None: img1 = np.zeros((256, 256, 3), dtype=np.uint8) img2 = np.zeros((256, 256, 3), dtype=np.uint8) @@ -131,6 +142,7 @@ def test_image_stich() -> None: assert stiched_img[-1, -1, 0] == 9 +@pytest.mark.development def test_cut_ims_in_directory(mocker) -> None: patch = MonkeyPatch() diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py index 491a7c6..1282a6b 100644 --- a/utilities/segmentation_utils/flowreader.py +++ b/utilities/segmentation_utils/flowreader.py @@ -7,15 +7,14 @@ from typing import Optional import numpy as np -import pandas as pd +import tensorflow as tf from keras.preprocessing.image import ImageDataGenerator from keras.utils import Sequence -from PIL import Image -from tqdm import tqdm from utilities.segmentation_utils import ImagePreprocessor from utilities.segmentation_utils.constants import ImageOrdering from utilities.segmentation_utils.ImagePreprocessor import IPreprocessor +from utilities.segmentation_utils.reading_strategies import IReader class FlowGenerator: @@ -253,17 +252,14 @@ class FlowGeneratorExperimental(Sequence): Raises ------ - :ValueError: if the names of the images and masks do not match :ValueError: if the output size is not a tuple of length 2 :ValueError: if the output size is not a square matrix or a column vector """ def __init__( self, - image_path: str, - mask_path: str, - image_size: tuple[int, int], - output_size: tuple[int, int], + input_strategy: IReader, + output_strategy: IReader, channel_mask: list[bool], num_classes: int, shuffle: bool = True, @@ -273,24 +269,15 @@ def __init__( preprocessing_seed: Optional[int] = None, preprocessing_queue_image: IPreprocessor = ImagePreprocessor.generate_image_queue(), preprocessing_queue_mask: IPreprocessor = ImagePreprocessor.generate_mask_queue(), - read_weights: bool = False, - weights_path: Optional[str] = None, - shuffle_counter: int = 0, image_ordering: ImageOrdering = ImageOrdering.CHANNEL_LAST, + is_column: bool = False, ): - if len(output_size) != 2: - raise ValueError("The output size has to be a tuple of length 2") - if output_size[1] != 1 and output_size[0] != output_size[1]: - raise ValueError( - "The output size has to be a square matrix or a column vector" - ) - - self.image_path = image_path - self.mask_path = mask_path + self.input_strategy = input_strategy + self.output_strategy = output_strategy self.batch_size = batch_size self.mini_batch = batch_size - self.image_size = image_size - self.output_size = output_size + self.image_size = input_strategy.get_image_size() + self.output_size = output_strategy.get_image_size() self.channel_mask = np.array(channel_mask) self.n_channels = np.sum(channel_mask) self.num_classes = num_classes @@ -298,58 +285,28 @@ def __init__( self.seed = seed self.preprocessing_enabled = preprocessing_enabled self.preprocessing_seed = preprocessing_seed - self.read_weights = read_weights - self.weights_path = weights_path + self.preprocessing_queue_image = preprocessing_queue_image self.preprocessing_queue_mask = preprocessing_queue_mask - self.shuffle_counter = shuffle_counter - self.image_ordering = image_ordering - - self.image_filenames = np.array(sorted(os.listdir(self.image_path))) - self.mask_filenames = np.array(sorted(os.listdir(self.mask_path))) - # should be moved out as a strategy - if self.read_weights: - weights_df = pd.read_csv(self.weights_path, header=None) - weights_np = weights_df.to_numpy() - print(weights_np.shape) - # sort the numpy array by the first column - weights_np = weights_np[weights_np[:, 0].argsort()] - - print(weights_np) - self.weights = weights_np[:, 1:].astype(np.float64) - weight_names = weights_np[:, 0] - for mask, weight_name in zip(self.mask_filenames, weight_names): - if mask != weight_name: - raise ValueError("The mask and weight directories do not match") - - self.linked_data = [self.image_filenames, self.mask_filenames] - if self.read_weights: - self.linked_data.append(self.weights) - - self.__shuffle_filenames() - self.dataset_size = self.__len__() - - print("Validating dataset...") - for i_name, m_name in tqdm(zip(self.image_filenames, self.mask_filenames)): - if i_name != m_name: - raise ValueError("The image and mask directories do not match") + self.image_ordering = image_ordering + self.is_column = is_column self.image_batch_store = None self.mask_batch_store = None self.validity_index = 0 + self.shuffle_counter = 0 - if self.output_size[1] == 1: - # only enters if the output is a column vector - # such no need to define it otherwise - dimension = math.sqrt(self.output_size[0]) - self.output_reshape = (int(dimension), int(dimension)) - self.column_vector = True - else: - self.output_reshape = self.output_size - self.column_vector = False + self.__update_dataset_size() - print("Reading images from: ", self.image_path) + self.__shuffle_filenames() + + if len(self.output_size) != 2: + raise ValueError("The output size has to be a tuple of length 2") + if self.output_size[1] != 1 and self.output_size[0] != self.output_size[1]: + raise ValueError( + "The output size has to be a square matrix or a column vector" + ) def set_preprocessing_pipeline( self, @@ -386,88 +343,77 @@ def set_mini_batch_size(self, batch_size: int) -> None: if self.batch_size % batch_size != 0: raise ValueError("The batch size must be divisible by the mini batch size") self.mini_batch = batch_size + self.__update_dataset_size() - def __read_batch(self, start: int, end: int) -> None: - # read image batch - batch_image_filenames = self.image_filenames[start:end] - batch_mask_filenames = self.mask_filenames[start:end] - for image, mask in zip(batch_image_filenames, batch_mask_filenames): - if image != mask: - raise ValueError("The image and mask directories do not match") + def __update_dataset_size(self) -> None: + self.dataset_size = self.input_strategy.get_dataset_size(self.mini_batch) + + def __read_batch(self, dataset_index: int) -> None: + #!adjust the batch size as it is passed to the function + # calculates remaining images in a dataset and scales it down by multiplying with minibatch + partial_dataset = self.dataset_size * self.mini_batch - dataset_index + + # compare and choose the smaller value, to avoid making a larger batch_size + adjusted_batch_size = min(self.batch_size, partial_dataset) # calculate number of mini batches in a batch - n = self.batch_size // self.mini_batch + n = adjusted_batch_size // self.mini_batch + + batch_images = self.input_strategy.read_batch( + adjusted_batch_size, dataset_index + ) + batch_masks = self.output_strategy.read_batch( + adjusted_batch_size, dataset_index + ) - batch_images = np.zeros( + # preprocess and assign images and masks to the batch + + if self.preprocessing_enabled: + for i in range(adjusted_batch_size): + image = batch_images[i, ...] + mask = batch_masks[i, ...] + if self.preprocessing_seed is None: + image_seed = np.random.randint(0, 100000) + else: + state = np.random.RandomState(self.preprocessing_seed) + image_seed = state.randint(0, 100000) + ( + image, + mask, + ) = ImagePreprocessor.augmentation_pipeline( + image, + mask=mask, + seed=image_seed, + #!both preprocessing queues are assigned by this time + image_queue=self.preprocessing_queue_image, # type: ignore + mask_queue=self.preprocessing_queue_mask, # type: ignore + ) + batch_images[i, ...] = image + batch_masks[i, ...] = mask + + batch_masks = ImagePreprocessor.onehot_encode(batch_masks, self.num_classes) + + batch_images = tf.reshape( + batch_images, ( n, self.mini_batch, self.image_size[0], self.image_size[1], self.n_channels, - ) + ), ) - - batch_masks = np.zeros( + batch_masks = tf.reshape( + batch_masks, ( n, self.mini_batch, - self.output_reshape[0], - self.output_reshape[1], + self.output_size[0], + self.output_size[1], self.num_classes, - ) + ), ) - # preprocess and assign images and masks to the batch - for i in range(n): - raw_masks = np.zeros( - (self.mini_batch, self.output_reshape[0], self.output_reshape[1]) - ) - - for j in range(self.mini_batch): - image_index = i * self.mini_batch + j - - image = Image.open( - os.path.join(self.image_path, batch_image_filenames[image_index]) - ).resize(self.image_size, Image.ANTIALIAS) - - image = np.array(image) - - mask = Image.open( - os.path.join(self.mask_path, batch_mask_filenames[image_index]) - ).resize(self.output_reshape) - - mask = np.array(mask) - # image = image[:, :, self.channel_mask] - - if self.preprocessing_enabled: - if self.preprocessing_seed is None: - image_seed = np.random.randint(0, 100000) - else: - state = np.random.RandomState(self.preprocessing_seed) - image_seed = state.randint(0, 100000) - - ( - image, - mask, - ) = ImagePreprocessor.augmentation_pipeline( - image, - mask=mask, - seed=image_seed, - #!both preprocessing queues are assigned by this time - image_queue=self.preprocessing_queue_image, # type: ignore - mask_queue=self.preprocessing_queue_mask, # type: ignore - ) - - batch_images[i, j, :, :, :] = image - # NOTE: this provides the flexibility required to process both - # column and matrix vectors - raw_masks[j, :, :] = mask - - batch_masks[i, :, :, :] = ImagePreprocessor.onehot_encode( - raw_masks, self.num_classes - ) - # chaches the batch self.image_batch_store = batch_images self.mask_batch_store = batch_masks @@ -475,17 +421,16 @@ def __read_batch(self, start: int, end: int) -> None: # required to check when to read the next batch def __len__(self) -> int: - return int(np.floor(len(self.image_filenames) / float(self.mini_batch))) + return self.input_strategy.get_dataset_size(self.mini_batch) def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: # check if the batch is already cached - index = index % self.dataset_size if index < self.validity_index - self.batch_size // self.mini_batch: self.validity_index = 0 if index == self.validity_index: - self.__read_batch(index * self.batch_size, (index + 1) * self.batch_size) + self.__read_batch(index * self.mini_batch) self.validity_index = (self.batch_size // self.mini_batch) + index # slices new batch @@ -495,13 +440,14 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: batch_images = self.image_batch_store[store_index, ...] # type: ignore batch_masks = self.mask_batch_store[store_index, ...] # type: ignore - if self.column_vector: - batch_masks = np.reshape( + + if self.is_column: + batch_masks = tf.reshape( batch_masks, ( - self.mini_batch, - batch_masks.shape[1] * batch_masks[2], - self.num_classes, + batch_masks.shape[0], + batch_masks.shape[1] * batch_masks.shape[2], + batch_masks.shape[3], ), ) @@ -509,24 +455,14 @@ def __getitem__(self, index) -> tuple[np.ndarray, np.ndarray]: batch_images = np.moveaxis(batch_images, -1, 1) batch_masks = np.moveaxis(batch_masks, -1, 1) - if self.read_weights: - batch_weights = self.weights[ - index * self.batch_size : (index + 1) * self.batch_size, ... - ] - - return batch_images, batch_masks, batch_weights - else: - return batch_images, batch_masks + return batch_images, batch_masks def on_epoch_end(self) -> None: # Shuffle image and mask filenames self.__shuffle_filenames() def __shuffle_filenames(self) -> None: - if self.shuffle: - state = np.random.RandomState(self.seed + self.shuffle_counter) - self.shuffle_counter += 1 - shuffled_indices = state.permutation(len(self.image_filenames)) - shuffled_indices = shuffled_indices.astype(int) - for array in self.linked_data: - array = array[shuffled_indices] + new_seed = self.seed + self.shuffle_counter + self.input_strategy.shuffle_filenames(new_seed) + self.output_strategy.shuffle_filenames(new_seed) + self.shuffle_counter += 1 diff --git a/utilities/segmentation_utils/reading_strategies.py b/utilities/segmentation_utils/reading_strategies.py new file mode 100644 index 0000000..5e30a14 --- /dev/null +++ b/utilities/segmentation_utils/reading_strategies.py @@ -0,0 +1,422 @@ +import os +from concurrent import futures +from concurrent.futures import ThreadPoolExecutor +from typing import Any, Protocol + +import cv2 +import numpy as np +import rasterio +from PIL import Image + + +class IReader(Protocol): + def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: + ... + + def get_dataset_size(self, minibatch: int) -> int: + ... + + def get_image_size(self) -> tuple[int, int]: + ... + + def shuffle_filenames(self, seed: int) -> None: + ... + + +class RGBImageStrategy: + """ + Strategy optimized for reading RGB images powered by backend PIL. + """ + + def __init__( + self, + image_path: str, + image_size: tuple[int, int], + image_resample=Image.Resampling.NEAREST, + ): + self.image_path = image_path + self.image_filenames = np.array( + sorted(os.listdir(self.image_path)) + ) #!update: added variable to initialiser + self.image_size = image_size + self.image_resample = image_resample + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + # read images with PIL + batch_filenames = self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + + images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) + is_color = True + for i in range(batch_size): + image = Image.open( + os.path.join(self.image_path, batch_filenames[i]) + ).resize(self.image_size, self.image_resample) + image = np.array(image) + if len(image.shape) == 2 and is_color: + images = np.zeros((batch_size, self.image_size[0], self.image_size[1])) + is_color = False + images[i, ...] = image + return images + + def get_dataset_size(self, mini_batch) -> int: + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) + return dataset_size + + def get_image_size(self) -> tuple[int, int]: + return self.image_size + + def shuffle_filenames(self, seed: int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices] + + +class RGBImageStrategyMultiThread: + """ + Strategy optimized for reading RGB images powered by backend PIL. + Multi threaded version. + """ + + def __init__( + self, + image_path: str, + image_size: tuple[int, int], + image_resample=Image.Resampling.NEAREST, + max_workers: int = 8, + ): + self.image_path = image_path + self.image_filenames = np.array( + sorted(os.listdir(self.image_path)) + ) #!update: added variable to initialiser + self.image_size = image_size + self.image_resample = image_resample + self.max_workers = max_workers + + def __read_single_image_pil(self, filename, image_path, image_size, image_resample): + image = Image.open(os.path.join(image_path, filename)).resize( + image_size, image_resample + ) + return np.array(image) + + def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: + batch_filenames = self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + + images = np.zeros((batch_size, self.image_size[0], self.image_size[1], 3)) + is_color = True + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + future_to_index = { + executor.submit( + self.__read_single_image_pil, + filename, + self.image_path, + self.image_size, + self.image_resample, + ): i + for i, filename in enumerate(batch_filenames) + } + for future in futures.as_completed(future_to_index): + i = future_to_index[future] + image = future.result() + + if len(image.shape) == 2 and is_color: + images = np.zeros( + (batch_size, self.image_size[0], self.image_size[1]) + ) + is_color = False + + images[i, ...] = image + + return images + + def get_dataset_size(self, mini_batch) -> int: + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) + return dataset_size + + def get_image_size(self) -> tuple[int, int]: + return self.image_size + + def shuffle_filenames(self, seed: int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices] + + +class HSImageStrategy: + """ + Strategy optimized for reading hyperspectral images powered by backend OpenCV + """ + + def __init__( + self, image_path: str, image_size: tuple[int, int], package: Any = cv2 + ) -> None: + self.image_path = image_path + self.image_filenames = np.array(sorted(os.listdir(self.image_path))) + self.image_size = image_size + self.package = package + self.bands = self.__get_channels() + + def __get_channels(self) -> int: + # Open the first image to determine the number of channels + sample_image_path = os.path.join(self.image_path, self.image_filenames[0]) + sample_image = self.package.imread( + sample_image_path, self.package.IMREAD_UNCHANGED + ) + return sample_image.shape[2] if len(sample_image.shape) == 3 else 1 + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + # Read a sample image to determine the number of bands + + # Initialize images array + images = np.zeros( + (batch_size, self.image_size[1], self.image_size[0], self.bands) + ) + + # Read images with OpenCV + batch_filenames = self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + + for i in range(batch_size): + image_path = os.path.join(self.image_path, batch_filenames[i]) + image = self.package.imread(image_path, self.package.IMREAD_UNCHANGED) + + # Resize the image + image = self.package.resize(image, self.image_size) + + # If the image is color, convert BGR to RGB + if len(image.shape) == 3 and image.shape[2] == 3: + image = self.package.cvtColor(image, self.package.COLOR_BGR2RGB) + + images[i, ...] = image + + return images + + def get_dataset_size(self, mini_batch) -> int: + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) + return dataset_size + + def get_image_size(self) -> tuple[int, int]: + return self.image_size + + def shuffle_filenames(self, seed: int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices] + + +class HSImageStrategyMultiThread: + def __init__( + self, + image_path: str, + image_size: tuple[int, int], + package: Any = cv2, + max_workers: int = 8, + ) -> None: + self.image_path = image_path + self.image_filenames = np.array(sorted(os.listdir(self.image_path))) + self.image_size = image_size + self.package = package + self.bands = self.__get_channels() + self.max_workers = max_workers + + def __get_channels(self) -> int: + # Open the first image to determine the number of channels + sample_image_path = os.path.join(self.image_path, self.image_filenames[0]) + sample_image = self.package.imread( + sample_image_path, self.package.IMREAD_UNCHANGED + ) + return sample_image.shape[2] if len(sample_image.shape) == 3 else 1 + + def __read_single_image( + self, filename: str, package: Any, image_size: tuple[int, int, int] + ) -> np.ndarray: + image = package.imread(filename, package.IMREAD_UNCHANGED) + image = package.resize(image, image_size) + if len(image.shape) == 3 and image.shape[2] == 3: + image = package.cvtColor(image, package.COLOR_BGR2RGB) + return image + + def read_batch(self, batch_size, dataset_index) -> np.ndarray: + # Initialize images array + images = np.zeros( + (batch_size, self.image_size[1], self.image_size[0], self.bands) + ) + + # Read images with OpenCV + batch_filenames = self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + + image_paths = [ + os.path.join(self.image_path, batch_filenames[i]) for i in range(batch_size) + ] + + with ThreadPoolExecutor() as executor: + results = executor.map( + self.__read_single_image, + image_paths, + [self.package] * batch_size, + [self.image_size] * batch_size, + ) + + for i, image in enumerate(results): + images[i, ...] = image + + return images + + def get_dataset_size(self, mini_batch) -> int: + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) + return dataset_size + + def get_image_size(self) -> tuple[int, int]: + return self.image_size + + def shuffle_filenames(self, seed: int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices] + + +class RasterImageStrategy: + """ + Strategy optimized for reading raster images powered by backend rasterio. + """ + + # read images with rasterio + def __init__( + self, + image_path: str, + image_size: tuple[int, int], + image_resample=Image.Resampling.NEAREST, + package: Any = rasterio, + ): + self.image_path = image_path + self.image_filenames = np.array(sorted(os.listdir(self.image_path))) + self.image_size = image_size + self.image_resample = image_resample + self.package = package + # gets the number of bands for the dataset + self.bands = package.open( + os.path.join(self.image_path, self.image_filenames[0]) + ).count + + def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: + # read images with rasterio + batch_filenames = self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + + # defines the array that will contain the images + images = np.zeros( + (batch_size, self.bands, self.image_size[0], self.image_size[1]) + ) + for i, filename in enumerate(batch_filenames): + with self.package.open(os.path.join(self.image_path, filename)) as dataset: + # .read() returns a numpy array that contains the raster cell values in your file. + image = dataset.read() + images[i, :, :, :] = np.resize(image, (self.bands, *self.image_size)) + + # ensures channel-last orientation for the reader + images = np.moveaxis(images, 1, 3) + + return np.array(images) + + def get_dataset_size(self, mini_batch) -> int: + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) + return dataset_size + + def get_image_size(self) -> tuple[int, int]: + return self.image_size + + def shuffle_filenames(self, seed: int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices] + + +class RasterImageStrategyMultiThread: + """ + Strategy optimized for reading raster images powered by backend rasterio. + Multi threaded version. + """ + + # read images with rasterio + def __init__( + self, + image_path: str, + image_size: tuple[int, int], + image_resample=Image.Resampling.NEAREST, + max_workers: int = 8, + package: Any = rasterio, + ): + self.image_path = image_path + self.image_filenames = np.array(sorted(os.listdir(self.image_path))) + self.image_size = image_size + self.image_resample = image_resample + self.package = package + self.max_workers = max_workers + # gets the number of bands for the dataset + self.bands = package.open( + os.path.join(self.image_path, self.image_filenames[0]) + ).count + + def __read_single_image( + self, filename: str, package: Any, image_size: tuple[int, int, int] + ) -> np.ndarray: + with package.open(filename) as dataset: + image = dataset.read() + resized_image = np.resize(image, image_size) + return resized_image + + def read_batch(self, batch_size: int, dataset_index: int) -> np.ndarray: + batch_filenames = [ + os.path.join(self.image_path, filename) + for filename in self.image_filenames[ + dataset_index : dataset_index + batch_size + ] + ] + + # Pre-allocate memory + images = np.zeros( + (batch_size, self.bands, self.image_size[0], self.image_size[1]) + ) + + # Use ThreadPoolExecutor.map for more efficient multi-threading + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + for i, image in enumerate( + executor.map( + self.__read_single_image, + batch_filenames, + [self.package] * batch_size, + [(self.bands, *self.image_size)] * batch_size, + ) + ): + images[i, :, :, :] = image + + # Ensure channel-last orientation + images = np.moveaxis(images, 1, 3) + + return images + + def get_dataset_size(self, mini_batch) -> int: + dataset_size = int(np.floor(len(self.image_filenames) / float(mini_batch))) + return dataset_size + + def get_image_size(self) -> tuple[int, int]: + return self.image_size + + def shuffle_filenames(self, seed: int) -> None: + state = np.random.RandomState(seed) + shuffled_indices = state.permutation(len(self.image_filenames)) + shuffled_indices = shuffled_indices.astype(int) + self.image_filenames = self.image_filenames[shuffled_indices]