# Iteration 1 - Data Preparation

> **Dataset**: Handwritten Dataset (Client)<br/>
> **Creator**: Shirel

##### *Import Libraries*

In [None]:
import cv2
import os
import matplotlib.pyplot as plt
import random
import math
import numpy as np
from tqdm import tqdm

##### *Global Variables*

In [None]:
# Define the path to the folder containing the images to be processed
folder_path = '../data/original'  # Update this path to point to your specific folder containing images

# Define the path to the folder where the processed images will be saved
output_folder = '../data/processed'  # Update this path to the desired output folder

## Loading Images

In [None]:
def load_images_from_folder(folder_path, extensions=('.png', '.jpg', '.jpeg', '.JPG')):
    """
    Load all image file paths from a specified folder that match the given file extensions.

    Parameters:
    folder_path (str): The path to the folder containing the images.
    extensions (tuple of str): A tuple of file extensions to filter the images by. 
                               Default is ('.png', '.jpg', '.jpeg', '.JPG').

    Returns:
    list: A list of full file paths to images in the folder that match the specified extensions.
    
    Raises:
    FileNotFoundError: If the specified folder does not exist.
    """

    # Check if the folder exists
    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"The specified folder does not exist: {folder_path}")

    # List comprehension to gather all image paths with the specified extensions
    image_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(extensions)]

    return image_paths

## Preview of the images

In [None]:
def preview_images(image_paths, num_images=10, title="Image Preview"):
    """
    Display a preview of random images from a list of image paths.

    Parameters:
    image_paths (list of str): A list of file paths to the images that will be previewed.
    num_images (int): The number of images to display in the preview. Default is 10.
    title (str): The title for the preview images. Default is "Image Preview".

    Raises:
    ValueError: If num_images is less than 1 or if image_paths is empty.
    TypeError: If image_paths is not a list or if any element in image_paths is not a string.
    """

    # Validate that image_paths is a list of strings
    if not isinstance(image_paths, list) or not all(isinstance(p, str) for p in image_paths):
        raise TypeError("image_paths must be a list of strings.")

    # Validate that num_images is a positive integer
    if num_images < 1:
        raise ValueError("num_images must be at least 1.")

    # Validate that there are images to preview
    if len(image_paths) == 0:
        raise ValueError("The image_paths list is empty. There are no images to preview.")

    # Randomly select images to preview, ensuring no more than the available number of images are selected
    selected_images = random.sample(image_paths, min(len(image_paths), num_images))

    # Set up the plot for displaying images
    plt.figure(figsize=(15, 10))

    # Loop through the selected images and display each one
    for i, image_path in enumerate(selected_images):
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Load image in grayscale
        if image is None:
            print(f"Warning: Could not read image {image_path}. Skipping.")
            continue
        plt.subplot(2, 5, i + 1)
        plt.imshow(image, cmap='gray')
        plt.title(f"{title} {i+1}")
        plt.axis('off')

    # Show the plot with the selected images
    plt.show()

## Step 1 - Noise Reduction

In [None]:
def noise_reduction(image, method='gaussian'):
    """
    Apply noise reduction to an image using the specified method.

    Parameters:
    image: Input image.
    method (str): The noise reduction method ('gaussian', 'median', 'non_local_means').

    Returns:
    Processed image after noise reduction.
    """
    if method == 'gaussian':
        # Apply Gaussian blur to reduce noise
        return cv2.GaussianBlur(image, (5, 5), 0)
    elif method == 'median':
        # Apply median blur to remove noise
        return cv2.medianBlur(image, 5)
    elif method == 'non_local_means':
        # Apply Non-Local Means Denoising to reduce noise
        return cv2.fastNlMeansDenoising(image, None, 30, 7, 21)
    else:
        raise ValueError(f"Unknown noise reduction method: {method}")


## Step 2 - Histogram Equalization

In [None]:
def histogram_equalization(image, method='standard'):
    """
    Apply histogram equalization to an image to enhance contrast.

    Parameters:
    image: Input image.
    method (str): The histogram equalization method ('standard', 'clahe').

    Returns:
    Image after applying histogram equalization.
    """
    if method == 'standard':
        # Apply standard histogram equalization
        return cv2.equalizeHist(image)
    elif method == 'clahe':
        # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        return clahe.apply(image)
    else:
        raise ValueError(f"Unknown histogram equalization method: {method}")


## Step 3 - Binarization 

In [None]:
def binarization(image, method='otsu'):
    """
    Apply binarization to convert the image to black and white.

    Parameters:
    image: Input image.
    method (str): The binarization method ('global', 'adaptive', 'otsu').

    Returns:
    Binarized (black and white) image.
    """
    if method == 'global':
        # Apply global thresholding for binarization
        _, binary_image = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY)
    elif method == 'adaptive':
        # Apply adaptive thresholding for binarization
        binary_image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                             cv2.THRESH_BINARY, 11, 2)
    elif method == 'otsu':
        # Apply Otsu's binarization
        _, binary_image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    else:
        raise ValueError(f"Unknown binarization method: {method}")
    return binary_image


## Step 4 - Morphological Operations 

In [None]:
def morphological_operations(image, method='closing'):
    """
    Apply morphological operations to the image to enhance shapes.

    Parameters:
    image: Input image.
    method (str): The morphological operation ('dilation', 'erosion', 'opening', 'closing').

    Returns:
    Image after applying morphological operation.
    """
    # Define a rectangular kernel for morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    if method == 'dilation':
        # Apply dilation
        return cv2.dilate(image, kernel, iterations=1)
    elif method == 'erosion':
        # Apply erosion
        return cv2.erode(image, kernel, iterations=1)
    elif method == 'opening':
        # Apply opening (erosion followed by dilation)
        return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
    elif method == 'closing':
        # Apply closing (dilation followed by erosion)
        return cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
    else:
        raise ValueError(f"Unknown morphological operation: {method}")


## Step 5 - Edge Detection 

In [None]:
def edge_detection(image, method='canny'):
    """
    Apply edge detection to highlight regions of rapid intensity change.

    Parameters:
    image: Input image.
    method (str): The edge detection method ('canny', 'sobel').

    Returns:
    Image with edges detected.
    """
    if method == 'canny':
        # Apply Canny edge detection
        return cv2.Canny(image, 100, 200)
    elif method == 'sobel':
        # Apply Sobel edge detection in both x and y directions
        sobelx = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5)
        sobely = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=5)
        return cv2.magnitude(sobelx, sobely)
    else:
        raise ValueError(f"Unknown edge detection method: {method}")


## Save Processed Images

In [None]:
def save_processed_images(images, folder_path):
    """
    Save the processed images to the specified folder.

    Parameters:
    images (list): List of processed images.
    folder_path (str): Path to the folder where the images will be saved.
    """
    # Create the folder if it doesn't exist
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # Save each image in the folder
    for idx, image in enumerate(images):
        output_path = os.path.join(folder_path, f"processed_{idx}.png")
        cv2.imwrite(output_path, image)
        print(f"Saved processed image to: {output_path}")


## Assessing the quality of the image

In [None]:
def display_image(image, title="Image"):
    """Display the image using matplotlib.
    
    Parameters:
    image (numpy.ndarray): The image to be displayed.
    title (str): The title for the image display window (default is "Image").
    
    This function uses matplotlib to show a grayscale image with a specified title.
    The axis is turned off for a cleaner presentation of the image.
    """
    plt.imshow(image, cmap='gray')
    plt.title(title)
    plt.axis('off')
    plt.show()

In [None]:
def assess_image_quality(image):
    """Assess image quality based on brightness, blurriness, and sharpness.
    
    Parameters:
    image (numpy.ndarray): The image to be assessed.
    
    Returns:
    dict: A dictionary containing the following quality metrics:
        - Brightness: Mean pixel intensity of the image.
        - Blurriness: Variance of the Laplacian, where low variance indicates a blurry image.
        - Sharpness: Sum of Sobel gradients, where higher values indicate a sharper image.
    
    This function calculates three metrics to evaluate the quality of the input image,
    which can help in determining the suitability of the image for further processing.
    """

    # Brightness: Mean pixel intensity
    brightness = np.mean(image)

    # Blurriness: Variance of the Laplacian (low variance -> blurry)
    laplacian = cv2.Laplacian(image, cv2.CV_64F)
    blurriness = laplacian.var()

    # Sharpness: Sum of Sobel gradients (higher -> sharper)
    sobelx = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5)
    sobely = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=5)
    sharpness = np.sqrt(sobelx**2 + sobely**2).sum()

    return {"Brightness": brightness, "Blurriness": blurriness, "Sharpness": sharpness}


## Main Functions to Process Images 

## running combinations on all images

In [None]:
def process_images(folder_path, output_folder, noise_method='gaussian', hist_method='clahe',
                   binarize_method='otsu', morph_method='closing', edge_method='canny'):
    """
    Load, process, and save images using the specified methods for each preprocessing step.

    Parameters:
    folder_path (str): Path to the folder containing the images.
    output_folder (str): Path to the folder where processed images will be saved.
    noise_method (str): Method for noise reduction.
    hist_method (str): Method for histogram equalization.
    binarize_method (str): Method for binarization.
    morph_method (str): Method for morphological operations.
    edge_method (str): Method for edge detection.
    """
    # Load images from the folder
    image_paths = load_images_from_folder(folder_path)
    processed_images = []

    # Using tqdm for progress tracking
    for image_path in tqdm(image_paths, desc="Processing Images", unit="image"):
        # Read the image in grayscale
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

        # Apply each preprocessing step in order
        image = noise_reduction(image, method=noise_method)  # Step 1: Noise Reduction
        image = histogram_equalization(image, method=hist_method)  # Step 2: Histogram Equalization
        image = binarization(image, method=binarize_method)  # Step 3: Binarization
        image = morphological_operations(image, method=morph_method)  # Step 4: Morphological Operations
        image = edge_detection(image, method=edge_method)  # Step 5: Edge Detection

        # Add the processed image to the list
        processed_images.append(image)

    # Save the processed images
    save_processed_images(processed_images, output_folder)

## experimenting with techniques

In [None]:
def main_experiment(image_paths, output_folder, noise_method='gaussian', hist_method='standard', binarize_method='global',
                   morph_method='dilation', edge_method='canny', experiment_step=None):
    processed_images = []

    # Loop over each image
    for image_path in tqdm(image_paths, desc="Processing Images"):
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

        # Step 1: Noise Reduction
        if experiment_step == 'noise' or experiment_step is None:
            image = noise_reduction(image, method=noise_method)
            display_image(image, title=f"After Noise Reduction ({noise_method})")

        # Step 2: Histogram Equalization
        if experiment_step == 'histogram' or experiment_step is None:
            image = histogram_equalization(image, method=hist_method)
            display_image(image, title=f"After Histogram Equalization ({hist_method})")
    
        # Step 3: Binarization
        if experiment_step == 'binarization' or experiment_step is None:
            image = binarization(image, method=binarize_method)
            display_image(image, title=f"After Binarization ({binarize_method})")
    
        # Step 4: Morphological Operations
        if experiment_step == 'morphology' or experiment_step is None:
            image = morphological_operations(image, method=morph_method)
            display_image(image, title=f"After Morphological Operation ({morph_method})")
    
        # Step 5: Edge Detection
        if experiment_step == 'edge' or experiment_step is None:
            image = edge_detection(image, method=edge_method)
            display_image(image, title=f"After Edge Detection ({edge_method})")
    
        # Append processed image
        processed_images.append(image)
    
        # Assess image quality
        quality_metrics = assess_image_quality(image)
        print(f"Image Quality Metrics: {quality_metrics}")

    # Save processed images
    save_processed_images(processed_images, output_folder)


### saving experiments separately 

In [None]:
def construct_output_path(base_folder, technique_category, technique_name):
    """
    Construct a dynamic output path for processed images.

    Parameters:
    base_folder (str): The base folder where processed images will be saved.
    technique_category (str): The category of the technique (e.g., 'noiseReduction').
    technique_name (str): The name of the specific technique used (e.g., 'median', 'gaussian').

    Returns:
    str: The full output path where images will be saved.
    """
    # Construct the full path
    output_path = os.path.join(base_folder, 'processed', technique_category, technique_name)

    # Create the directory if it doesn't exist
    os.makedirs(output_path, exist_ok=True)

    return output_path

# Execution of Functions

In [None]:
# Load all image file paths from the specified folder
image_paths_all = load_images_from_folder(folder_path)
image_paths_experiment = random.sample(image_paths_all, 5)

## Experimenting

## Preview Images

In [None]:
# Preview random images before processing
preview_images(image_paths_experiment, num_images=5, title="Original Image Preview")

## A. Noise Reduction Experiment

In [None]:
# setting the folder for experimentation with techniques in this specific step
technique = 'Noise_Reduction'

### 1. Gaussian

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'gaussian'  
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='noise')

### 2. Median

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'median'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='noise')

### 3. Non_local_means

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'non_local_means'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='noise')


## B. Histogram Equalization Experiment

In [None]:
# setting the folder for experimentation with techniques in this specific step
technique = 'Histogram_Equalization'

### 1.Standard 

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'standard'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='histogram')

### 2.Clahe

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'clahe'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='histogram')


## C. Binarization Experiment

In [None]:
# setting the folder for experimentation with techniques in this specific step
technique = 'Binarization'

### 1. Global

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'global'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='binarization')

### 2. Adaptive

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'adaptive'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='binarization')

### 3. Otsu

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'otsu'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='binarization')


## D.Morphological Operations Experiment

In [None]:
# setting the folder for experimentation with techniques in this specific step
technique = 'Morphological_Operations'

### 1. Dilation 

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'dilation'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='morphology')

### 2. Erosion 

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'erosion'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='morphology')

### 3. Opening 

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'opening'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='morphology')

### 4. Closing

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'closing'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='morphology')

## E. Edge Detection Experiment

In [None]:
# setting the folder for experimentation with techniques in this specific step
technique = 'Edge_Detection'

### 1. Canny 

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'canny'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='edge')

### 2. Sobel

In [None]:
# setting the inner folder according to the chosen technique
inner_technique = 'sobel'
output_folder_experiment = construct_output_path(output_folder, technique, inner_technique)

In [None]:
main_experiment(image_paths_experiment, output_folder_experiment,
                noise_method=inner_technique, experiment_step='edge')

# Running full prep

## Images Preparation

In [None]:
# process_images(folder_path, output_folder, noise_method='gaussian', hist_method='clahe',
#                binarize_method='otsu', morph_method='closing', edge_method='canny')


## View Processed images

In [None]:
# Preview processed images
processed_image_paths = load_images_from_folder(output_folder)
preview_images(processed_image_paths, num_images=5, title="Processed Image Preview")

# Trying Grid Search for combinations of techniques - need to be continued and prepared 

In [None]:
import itertools

# List of available methods for each step
noise_methods = ['gaussian', 'median', 'non_local_means']
histogram_methods = ['standard', 'clahe']
binarization_methods = ['global', 'adaptive', 'otsu']
morphological_methods = ['dilation', 'erosion', 'opening', 'closing']
edge_detection_methods = ['canny', 'sobel']

def evaluate_combination(image, noise_method, hist_method, binarize_method, morph_method, edge_method):
    """
    Apply the combination of preprocessing steps and evaluate the output image quality.
    """
    # Apply preprocessing steps
    image = noise_reduction(image, method=noise_method)
    image = histogram_equalization(image, method=hist_method)
    image = binarization(image, method=binarize_method)
    image = morphological_operations(image, method=morph_method)
    image = edge_detection(image, method=edge_method)

    # You can use any evaluation method here (e.g., image quality metrics, model accuracy, etc.)
    # For simplicity, let's assume a placeholder evaluation metric based on SSIM or text detection score
    score = evaluate_image_quality(image)  # Replace with actual evaluation code
    return score

def optimize_preprocessing(image):
    """
    Optimize preprocessing steps by trying different combinations of techniques.
    """
    best_score = -float('inf')
    best_combination = None

    # Iterate through all combinations of methods
    for noise_method, hist_method, binarize_method, morph_method, edge_method in itertools.product(
            noise_methods, histogram_methods, binarization_methods, morphological_methods, edge_detection_methods):

        # Evaluate each combination
        score = evaluate_combination(image, noise_method, hist_method, binarize_method, morph_method, edge_method)

        # Keep track of the best combination
        if score > best_score:
            best_score = score
            best_combination = (noise_method, hist_method, binarize_method, morph_method, edge_method)

    print(f"Best combination: {best_combination} with score: {best_score}")
    return best_combination