# Install and import the required libraries

In [None]:
!pip install -r requirements.txt

## Imports
Modules such as pipeline and image_process were developed by the team to provide a dynamic and flexible tool for testing multiple combinations of augmentation techniques, filters, and processes. You will be able to find the source at the current directory.

In [None]:
import logging
import math

import cv2 as cv
import numpy as np
import h5py
import matplotlib.pyplot as plt
import unittest
import os
import zipfile

from image_process.pipeline import PipelineViewer, ProcessingPipeline
from image_process.processes import (
    MorphDilate,
    Rotate,
    Translate,
    Flip,
    BrightnessContrast,
    RandomGaussianBlur,
    GaussianBlur,
    MedianBlur,
    BilateralFilter,
    AdaptiveMeanThresh
)

# Pipeline Execution

## Define auxiliary methods
A set of configuration lines and auxiliary methods were defined to enhance the process of running the pipeline, loading the images, and logging information.

In [None]:
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)


def load_image(path, color_mode=cv.IMREAD_COLOR):
    """Loads an image from the given path with specified color mode."""
    image = cv.imread(path, color_mode)
    if image is None:
        raise FileNotFoundError(f"Could not load image from {path}")
    return image

def display_history(history):
        n_images = len(history)
        n_rows = math.ceil(n_images ** 0.5)
        n_cols = math.ceil(n_images / n_rows)
        plt.figure(figsize=(n_cols * 4, n_rows * 4))
        for i, (img, label, _) in enumerate(history):
            plt.subplot(n_rows, n_cols, i + 1)
            plt.imshow(cv.cvtColor(img, cv.COLOR_BGR2RGB))
            plt.title(label)
            plt.axis('off')
        plt.tight_layout()

def save_to_hdf5(normalized_imgs_arr, normalized_masks_arr, cropped_coordinates_arr, img_paths, filename):
    with h5py.File(filename, 'w') as f:
        for index, img_path in enumerate(img_paths):
            normalized_imgs = normalized_imgs_arr[index]
            normalized_masks = normalized_masks_arr[index]
            cropped_coordinates = cropped_coordinates_arr[index]
            for idx, (img, mask, coord) in enumerate(zip(normalized_imgs, normalized_masks, cropped_coordinates)):
                group = f.create_group(f'image_{idx}_{index}')
                group.attrs['image_shape'] = img.shape
                group.attrs['mask_shape'] = mask.shape
                group.attrs['original_path'] = img_path
                
                if img.ndim > 2:  # RGB Image
                    red, green, blue = img[:, :, 0].flatten(), img[:, :, 1].flatten(), img[:, :, 2].flatten()
                    group.create_dataset('image_red', data=red, compression='gzip', compression_opts=9)
                    group.create_dataset('image_green', data=green, compression='gzip', compression_opts=9)
                    group.create_dataset('image_blue', data=blue, compression='gzip', compression_opts=9)
                else:  # Grayscale Image
                    gray = img.flatten()
                    group.create_dataset('image_gray', data=gray, compression='gzip', compression_opts=9)
    
                mask_flattened = mask.flatten()
                group.create_dataset('mask', data=mask_flattened, compression='gzip', compression_opts=9)
                group.create_dataset('coordinates', data=np.array(coord), dtype='int32')

def load_from_hdf5(filename):
    with h5py.File(filename, 'r') as f:
        print("Datasets in the HDF5 file include:")
        for key in f.keys():
            group = f[key]
            print(f"{key} - Original path: {group.attrs['original_path']}")
            print(f"Image shape: {group.attrs['image_shape']}, Mask shape: {group.attrs['mask_shape']}")
            for dkey in group.keys():
                print(f"{dkey}: shape {group[dkey].shape}")

def compress_hdf5_to_zip(hdf5_filename, output_zip_filename):
    """
    Compress an HDF5 file into a ZIP file.

    Args:
        hdf5_filename (str): Path to the HDF5 file to compress.
        output_zip_filename (str): Path and name of the output ZIP file.
    """
    # Create a ZipFile object in write mode
    with zipfile.ZipFile(output_zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Add the HDF5 file to the zip file
        zipf.write(hdf5_filename, os.path.basename(hdf5_filename))
        print(f"Compressed {hdf5_filename} into {output_zip_filename}")

## Loading Images

In [None]:
base_img_path = "./test_imgs"
imgs_paths = [os.path.join(base_img_path, img) for img in os.listdir(base_img_path)]
imgs = [load_image(os.path.join(base_img_path, img)) for img in os.listdir(base_img_path)]
masks = [load_image(os.path.join(base_img_path, img), cv.IMREAD_GRAYSCALE) for img in os.listdir(base_img_path)]

## Defining the pipeline parameters
For the initial tests, two lists were established containing filter and augmentation modules.

In [None]:
 # Defining number of augmented images to generate
n_augmented = 5

filters = [GaussianBlur(), AdaptiveMeanThresh()]
augmentations = [
    Rotate(),
    Translate(),
    Flip(),
    BrightnessContrast(),
    RandomGaussianBlur(),
    MedianBlur(),
    BilateralFilter(),
]

## Setting up the pipeline
After defining the pipeline modules, we need to create an instance of the pipe and append the necessary modules.

In [None]:
# Create a processing pipeline
pipeline = ProcessingPipeline()

# Add filters and augmentations to the pipeline
pipeline.add_filters(filters)
pipeline.add_augmentations(augmentations)

# Executing

In [None]:
normalized_imgs_arr = []
normalized_masks_arr = []
cropped_coordinates_arr = []

for idx, img in enumerate(imgs):
    mask = masks[idx]
    normalized_imgs, normalized_masks, cropped_coordinates = pipeline.run(img, mask, n_augmented, 120, 10)
    normalized_imgs_arr.append(normalized_imgs)
    normalized_masks_arr.append(normalized_masks)
    cropped_coordinates_arr.append(cropped_coordinates)

In [None]:
count = sum(len(n) for n in normalized_imgs_arr)
print(f"Image count: {count} images")

## Results
Displayed below are the processing results up to the 100th step.
As we have yet to annotate all base image masks, here masks are defined as a copy of the original image.

In [None]:
display_history(pipeline.history[:100])

## Testing
We need to create unit tests, to ensure that our modules and classes are reliable even after making updates.
We added tests for most classes and methods.
The output bellow assures that all tests are returning "ok".

In [None]:
loader = unittest.TestLoader()
start_dir = 'tests'
suite = loader.discover(start_dir=start_dir, pattern='test_*.py')
runner = unittest.TextTestRunner(verbosity=2)
runner.run(suite)

## Dataset Generation
After processing the images, for retrieving the images, we need to store them in a h5 dataset. With it, we can later rebuild the original images and load them into memory, considering the original array of pixels.

In [None]:
filename = 'processed_data.h5'
save_to_hdf5(normalized_imgs_arr, normalized_masks_arr, cropped_coordinates_arr, imgs_paths, filename)

## Compression
Since the dataset is too large for Github to handle (and for the fact that we are required to upload to the repository), we need to compress the output dataset to a zip file.

In [None]:
zip_file = 'processed_data.zip'
compress_hdf5_to_zip(filename, zip_file)
os.unlink(filename)