# Data Preparation - IT2 - Choosing |best techniques per step in the flow 

In [109]:
import cv2
import numpy as np
import pandas as pd
import random
import os
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

In [110]:
# Define the path to the folder containing the images to be processed
folder_path = '../data/original'  # Update this path to point to your specific folder containing images

# Define the path to the folder where the processed images will be saved
output_folder = '../data/processed'  # Update this path to the desired output folder

## Loading Images and stats

In [111]:
def load_images_from_folder(folder_path, extensions=('.png', '.jpg', '.jpeg', '.JPG')):
    """
    Load all image file paths from a specified folder that match the given file extensions.

    Parameters:
    folder_path (str): The path to the folder containing the images.
    extensions (tuple of str): A tuple of file extensions to filter the images by. 
                               Default is ('.png', '.jpg', '.jpeg', '.JPG').

    Returns:
    list: A list of full file paths to images in the folder that match the specified extensions.
    
    Raises:
    FileNotFoundError: If the specified folder does not exist.
    """

    # Check if the folder exists
    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"The specified folder does not exist: {folder_path}")

    # List comprehension to gather all image paths with the specified extensions
    image_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(extensions)]

    return image_paths

In [112]:
# Function to convert to gray scale
def load_and_preprocess_images(image_paths, resize_dim=(256, 256)):
    images = []
    image_ids = []

    # Initialize tqdm progress bar
    for path in tqdm(image_paths, desc="Loading and preprocessing images", unit="image"):
        img = cv2.imread(path)
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
        img_resized = cv2.resize(img_gray, resize_dim)  # Resize for consistency
        images.append(img_resized)
        image_ids.append(f'Image_{len(images)}')  # Assign image ID as Image_1, Image_2, etc.

    return images, image_ids

In [113]:
# Load the CSV file with the image statistics
images_stats_path = "../data-understanding/images_stats.csv"  
images_stats_df = pd.read_csv(images_stats_path)

In [114]:
images_stats_df.columns

Index(['Image', 'Brightness', 'Sharpness', 'Contrast', 'Noise', 'Skew',
       'Line Spacing', 'Tables Detected', 'Resolution', 'Detected Elements',
       'Texture', 'Patterns'],
      dtype='object')

## Functions per step

### Step 1: Noise Reduction Techniques

In [139]:
# Noise Reduction Functions
def apply_gaussian_blur(image, ksize=(5, 5)):
    """Apply Gaussian Blur to reduce noise with the specified kernel size."""
    return cv2.GaussianBlur(image, ksize, 0)

def apply_median_blur(image, ksize=5):
    """Apply Median Blur to reduce salt-and-pepper noise with the specified kernel size."""
    return cv2.medianBlur(image, ksize)

def apply_non_local_means(image, h=10, templateWindowSize=7, searchWindowSize=21):
    """Apply Non-Local Means Denoising with specified parameters."""
    return cv2.fastNlMeansDenoising(image, None, h, templateWindowSize, searchWindowSize)

### Step 2: Histogram Equalization Techniques

In [140]:
# Histogram Equalization Functions
def apply_histogram_equalization(image):
    return cv2.equalizeHist(image)

def apply_clahe(image, clipLimit=2.0, tileGridSize=(8, 8)):
    """Apply CLAHE to enhance image contrast with specified parameters."""
    clahe = cv2.createCLAHE(clipLimit=clipLimit, tileGridSize=tileGridSize)
    return clahe.apply(image)

### Step 3: Binarization Techniques

In [141]:
# Binarization Functions
def apply_global_threshold(image, thresholdValue=127):
    """Apply Global Thresholding with the specified threshold value."""
    _, binary_image = cv2.threshold(image, thresholdValue, 255, cv2.THRESH_BINARY)
    return binary_image

def apply_adaptive_threshold(image, adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C, blockSize=11, C=2):
    """Apply Adaptive Thresholding with the specified method, block size, and constant C."""
    return cv2.adaptiveThreshold(image, 255, adaptiveMethod, cv2.THRESH_BINARY, blockSize, C)

def apply_otsu_threshold(image):
    """Apply Otsu Thresholding."""
    _, binary_image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return binary_image


### Step 4: Morphological Operations Techniques

In [142]:
# Morphological Operations Functions
def apply_dilation(image, kernel_size=(5, 5)):
    """Apply Dilation with the specified kernel size."""
    kernel = np.ones(kernel_size, np.uint8)
    return cv2.dilate(image, kernel, iterations=1)

def apply_erosion(image, kernel_size=(5, 5)):
    """Apply Erosion with the specified kernel size."""
    kernel = np.ones(kernel_size, np.uint8)
    return cv2.erode(image, kernel, iterations=1)

def apply_opening(image, kernel_size=(5, 5)):
    """Apply Morphological Opening with the specified kernel size."""
    kernel = np.ones(kernel_size, np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

def apply_closing(image, kernel_size=(5, 5)):
    """Apply Morphological Closing with the specified kernel size."""
    kernel = np.ones(kernel_size, np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)


### Step 5: Edge Detection Techniques

In [143]:
# Edge Detection Functions
def apply_canny_edge(image, threshold1=50, threshold2=150):
    """Apply Canny Edge Detection with specified thresholds."""
    return cv2.Canny(image, threshold1, threshold2)

def apply_sobel_edge(image, ksize=3, scale=1, delta=0, borderType=cv2.BORDER_DEFAULT):
    """Apply Sobel Edge Detection with specified parameters."""
    return cv2.Sobel(image, cv2.CV_64F, 1, 1, ksize=ksize, scale=scale, delta=delta, borderType=borderType)

## Characteristics Calculation for testing

In [120]:
# Image Characteristics Calculation Functions - from data understanding it2
def calculate_brightness(image):
    return np.mean(image)

def calculate_sharpness(image):
    return cv2.Laplacian(image, cv2.CV_64F).var()

def calculate_contrast(image):
    return image.std()

def calculate_noise(image):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(image, (3, 3), 0)
    noise = cv2.absdiff(image, blurred)
    return np.var(noise)

def calculate_skew(image):
    if len(image.shape) != 2:
        raise ValueError("Invalid image format. Image must be a 2D grayscale image.")
    _, binary = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY_INV)
    coords = np.column_stack(np.where(binary > 0))
    if coords.size == 0:
        return 0
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    if abs(angle) < 1e-2:
        angle = 0
    return round(angle, 2)

def calculate_line_spacing(image):
    if len(image.shape) != 2:
        raise ValueError("Invalid image format. Image must be a 2D grayscale image.")
    _, binary = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY_INV)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    heights = [cv2.boundingRect(contour)[3] for contour in contours]
    if len(heights) > 1:
        line_spacing = np.mean(np.diff(sorted(heights)))
    else:
        line_spacing = 0
    return line_spacing

def detect_tables(image):
    if len(image.shape) != 2:
        raise ValueError("Invalid image format. Image must be a 2D grayscale image.")
    _, binary = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY_INV)
    binary = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    table_contours = [contour for contour in contours if cv2.contourArea(contour) > 1000]
    return len(table_contours)

def calculate_resolution(image):
    height, width = image.shape[:2]
    return height * width

def calculate_elements_detection(image):
    if len(image.shape) != 2:
        raise ValueError("Invalid image format. Image must be a 2D grayscale image.")
    _, binary = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY_INV)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return len(contours)

def calculate_texture(image):
    laplacian = cv2.Laplacian(image, cv2.CV_64F)
    return laplacian.std()

def calculate_patterns(image):
    if len(image.shape) != 2:
        raise ValueError("Invalid image format. Image must be a 2D grayscale image.")
    edges = cv2.Canny(image, 100, 200)
    return np.sum(edges > 0)


## Evaluation per step

The code evaluates multiple techniques in each step of the image processing pipeline and selects the best technique based on a scoring system. Here's how it works:

Technique Evaluation:

Each technique for a specific step (e.g., Gaussian Blur, Median Blur for Noise Reduction) is applied to the test images.
After applying the technique, the characteristics of the processed image are calculated (e.g., Brightness, Sharpness, Noise, etc.).
Scoring Mechanism:

The scoring function is designed to minimize noise while maximizing positive characteristics like sharpness and contrast.
The score calculation takes into account changes in image characteristics compared to the original image.
Noise is treated negatively (-stats["Noise"]), whereas characteristics like Sharpness, Contrast, and other features are treated positively (score += stats[key] - original_stats[key]).
After evaluating all techniques, the one with the highest score is selected as the "best technique" for that step.
Best Technique Selection:

The technique with the highest score is selected and printed, and this is repeated for each step in the pipeline.

### Function of evaluation

In [121]:
# Basic Evaluation Function
# -------------------------
# def basic_evaluation(image, techniques_dict, original_stats):
#     evaluation_results = {}
#     for technique_name, technique_func in techniques_dict.items():
#         processed_image = technique_func(image)
#         stats = {
#             "Brightness": calculate_brightness(processed_image),
#             "Sharpness": calculate_sharpness(processed_image),
#             "Contrast": calculate_contrast(processed_image),
#             "Noise": calculate_noise(processed_image)
#         }
# 
#         # Basic scoring function - prioritizing sharpness, contrast, and minimized noise
#         score = stats["Sharpness"] + stats["Contrast"] - stats["Noise"]
#         evaluation_results[technique_name] = {"Score": score, "Stats": stats}
# 
#     best_technique = max(evaluation_results, key=lambda x: evaluation_results[x]["Score"])
#     return {"Best Technique": best_technique, "Evaluation Results": evaluation_results}


In [122]:
# # Advanced Evaluation Function
# # ----------------------------
# def advanced_evaluation(image, techniques_dict,original_stats):
#     evaluation_results = {}
#     for technique_name, technique_func in techniques_dict.items():
#         processed_image = technique_func(image)
#         stats = {
#             "Brightness": calculate_brightness(processed_image),
#             "Sharpness": calculate_sharpness(processed_image),
#             "Contrast": calculate_contrast(processed_image),
#             "Noise": calculate_noise(processed_image),
#             "Skew": calculate_skew(processed_image),
#             "Line Spacing": calculate_line_spacing(processed_image),
#             "Tables Detected": detect_tables(processed_image),
#             "Resolution": calculate_resolution(processed_image),
#             "Detected Elements": calculate_elements_detection(processed_image),
#             "Texture": calculate_texture(processed_image),
#             "Patterns": calculate_patterns(processed_image)
#         }
# 
#         # Compare with original characteristics and calculate score
#         score = 0
#         score -= abs(stats["Brightness"] - original_stats["Brightness"])  # Closer brightness to original is better
#         score += stats["Sharpness"] - original_stats["Sharpness"]  # Higher sharpness is better, relative improvement
#         score += stats["Contrast"] - original_stats["Contrast"]  # Higher contrast is better, relative improvement
#         score -= abs(stats["Noise"] - original_stats["Noise"])  # Lower noise difference compared to original is better
#         score -= abs(stats["Skew"] - original_stats["Skew"])  # Less skew difference is better
#         score -= abs(stats["Line Spacing"] - original_stats["Line Spacing"])  # Closer line spacing to original is better
#         score += stats["Tables Detected"] - original_stats["Tables Detected"]  # More tables detected is better
#         score += stats["Resolution"] - original_stats["Resolution"]  # Higher resolution is better
#         score += stats["Detected Elements"] - original_stats["Detected Elements"]  # More elements detected is better
#         score += stats["Texture"] - original_stats["Texture"]  # Higher texture complexity is better, relative improvement
#         score += stats["Patterns"] - original_stats["Patterns"]  # More patterns detected is better
# 
#         evaluation_results[technique_name] = {"Score": score, "Stats": stats}
# 
#     best_technique = max(evaluation_results, key=lambda x: evaluation_results[x]["Score"])
#     return {"Best Technique": best_technique, "Evaluation Results": evaluation_results}

In [170]:
def advanced_evaluation(image, techniques_dict, original_stats):
    evaluation_results = {}

    for technique_name, technique_func in techniques_dict.items():
        # Apply the technique
        processed_image = technique_func(image)

        # Calculate characteristics for the processed image
        stats = {
            "Brightness": calculate_brightness(processed_image),
            "Sharpness": calculate_sharpness(processed_image),
            "Contrast": calculate_contrast(processed_image),
            "Noise": calculate_noise(processed_image),
            "Skew": calculate_skew(processed_image),
            "Line Spacing": calculate_line_spacing(processed_image),
            "Tables Detected": detect_tables(processed_image),
            "Resolution": calculate_resolution(processed_image),
            "Detected Elements": calculate_elements_detection(processed_image),
            "Texture": calculate_texture(processed_image),
            "Patterns": calculate_patterns(processed_image)
        }

        # Normalize metrics to comparable ranges (between 0 and 1, roughly)
        stats_normalized = {
            "Brightness": stats["Brightness"] / 255,
            "Sharpness": stats["Sharpness"] / 1000,
            "Contrast": stats["Contrast"] / 255,
            "Noise": stats["Noise"] / 255,
            "Skew": stats["Skew"] / 45,
            "Line Spacing": stats["Line Spacing"] / 100,
            "Tables Detected": stats["Tables Detected"] / 10,
            "Resolution": stats["Resolution"] / (512 * 512),
            "Detected Elements": stats["Detected Elements"] / 100,
            "Texture": stats["Texture"] / 100,
            "Patterns": stats["Patterns"] / 1000
        }

        # Normalize the original stats for comparison
        original_stats_normalized = {
            "Brightness": original_stats["Brightness"] / 255,
            "Sharpness": original_stats["Sharpness"] / 1000,
            "Contrast": original_stats["Contrast"] / 255,
            "Noise": original_stats["Noise"] / 255,
            "Skew": original_stats["Skew"] / 45,
            "Line Spacing": original_stats["Line Spacing"] / 100,
            "Tables Detected": original_stats["Tables Detected"] / 10,
            "Resolution": original_stats["Resolution"] / (512 * 512),
            "Detected Elements": original_stats["Detected Elements"] / 100,
            "Texture": original_stats["Texture"] / 100,
            "Patterns": original_stats["Patterns"] / 1000
        }

        # Weights for each characteristic (to determine their importance)
        weights = {
            "Brightness": -1.0,  # Closer to original is better (penalized if different)
            "Sharpness": 2.0,    # Higher is better (rewarded if improved)
            "Contrast": 1.0,     # Higher is better (rewarded if improved)
            "Noise": -1.5,       # Lower is better (penalized if increased)
            "Skew": -0.5,        # Closer to original is better (penalized if different)
            "Line Spacing": -0.5,  # Closer to original is better (penalized if different)
            "Tables Detected": 1.0,  # More tables detected is better
            "Resolution": 1.0,    # Higher is better
            "Detected Elements": 1.0,  # More elements detected is better
            "Texture": 1.0,       # Higher texture complexity is better
            "Patterns": 1.0       # More patterns detected is better
        }

        # Calculate score using normalized metrics and weights
        score = 0
        for metric, value in stats_normalized.items():
            original_value = original_stats_normalized.get(metric, 0)
            score += weights[metric] * (value - original_value)

        evaluation_results[technique_name] = {"Score": score, "Stats": stats}

    # Determine the best technique based on the highest score
    best_technique = max(evaluation_results, key=lambda x: evaluation_results[x]["Score"])
    return {"Best Technique": best_technique, "Evaluation Results": evaluation_results}


In [123]:
# Function for Each Step testing
def run_step(step_name, techniques_dict, test_images, test_image_ids, best_techniques_list):
    print(f"\nRunning Step: {step_name}\n{'-' * 40}")
    all_results = []
    for img, img_id in zip(test_images, test_image_ids):
        # Retrieve original stats from the dataset
        original_stats = images_stats_df[images_stats_df['Image'] == img_id].iloc[0].to_dict()

        step_result = advanced_evaluation(img, techniques_dict,original_stats)
        all_results.append((img_id, original_stats, step_result))
        print(f"Best Technique for {img_id}: {step_result['Best Technique']}")

    # Generate Comparison Table
    comparison_data = []
    for img_id, original_stats, result in all_results:
        # Add original stats row
        comparison_data.append([img_id, "Original"] + list(original_stats.values())[1:])  # Skip the 'Image' key
        # Add each technique's stats
        for technique, metrics in result["Evaluation Results"].items():
            comparison_data.append([img_id, f"{step_name} - {technique}"] + list(metrics["Stats"].values()))

    comparison_df = pd.DataFrame(comparison_data, columns=["Image_ID", "Technique", "Brightness", "Sharpness", "Contrast", "Noise", "Skew", "Line Spacing", "Tables Detected", "Resolution", "Detected Elements", "Texture", "Patterns"])

    # Generate Recommendation
    recommended_technique_name = max(all_results, key=lambda x: x[2]["Evaluation Results"][x[2]["Best Technique"]]["Score"])[2]["Best Technique"]
    recommended_technique_func = techniques_dict[recommended_technique_name]
    print(f"\nRecommended Technique for {step_name}: {recommended_technique_name}\n")

    # Append both technique name and function for further tuning
    best_techniques_list.append((step_name, recommended_technique_name, recommended_technique_func))

    # Return the comparison DataFrame
    return comparison_df

### Running Different Techniques per Step

In [124]:
# Load all image file paths from the specified folder
image_paths_all = load_images_from_folder(folder_path)

# Load and preprocess all images
total_images, total_image_ids = load_and_preprocess_images(image_paths_all)

# Randomly select 5 images for experimentation
experiment_indices = random.sample(range(len(total_images)), 5)
test_images = [total_images[i] for i in experiment_indices]
test_image_ids = [total_image_ids[i] for i in experiment_indices]


Loading and preprocessing images: 100%|██████████| 698/698 [02:44<00:00,  4.24image/s]


In [171]:
best_techniques_list = []
comparison_tables = []

In [172]:
# Step 1: Noise Reduction
noise_reduction_techniques = {
    "Gaussian Blur": lambda img: cv2.GaussianBlur(img, (5, 5), 0),
    "Median Blur": lambda img: cv2.medianBlur(img, 5),
    "Non-Local Means": lambda img: cv2.fastNlMeansDenoising(img, None, 10, 7, 21)
}
comparison_tables.append(run_step("Noise Reduction", noise_reduction_techniques, test_images, test_image_ids, best_techniques_list))


Running Step: Noise Reduction
----------------------------------------
Best Technique for Image_168: Non-Local Means
Best Technique for Image_642: Non-Local Means
Best Technique for Image_288: Non-Local Means
Best Technique for Image_185: Non-Local Means
Best Technique for Image_372: Non-Local Means

Recommended Technique for Noise Reduction: Non-Local Means


In [173]:
# Step 2: Histogram Equalization
histogram_equalization_techniques = {
    "Histogram Equalization": lambda img: cv2.equalizeHist(img),
    "CLAHE": lambda img: cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(img)
}
comparison_tables.append(run_step("Histogram Equalization", histogram_equalization_techniques, test_images, test_image_ids, best_techniques_list))


Running Step: Histogram Equalization
----------------------------------------
Best Technique for Image_168: Histogram Equalization
Best Technique for Image_642: CLAHE
Best Technique for Image_288: Histogram Equalization
Best Technique for Image_185: Histogram Equalization
Best Technique for Image_372: Histogram Equalization

Recommended Technique for Histogram Equalization: Histogram Equalization


In [174]:
# Step 3: Binarization
binarization_techniques = {
    "Global Threshold": lambda img: cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[1],
    "Adaptive Threshold": lambda img: cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2),
    "Otsu Threshold": lambda img: cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
}
comparison_tables.append(run_step("Binarization", binarization_techniques, test_images, test_image_ids, best_techniques_list))


Running Step: Binarization
----------------------------------------
Best Technique for Image_168: Adaptive Threshold
Best Technique for Image_642: Adaptive Threshold
Best Technique for Image_288: Adaptive Threshold
Best Technique for Image_185: Adaptive Threshold
Best Technique for Image_372: Adaptive Threshold

Recommended Technique for Binarization: Adaptive Threshold


In [175]:
# Step 4: Morphological Operations
morphological_operations_techniques = {
    "Dilation": lambda img: cv2.dilate(img, np.ones((5, 5), np.uint8), iterations=1),
    "Erosion": lambda img: cv2.erode(img, np.ones((5, 5), np.uint8), iterations=1),
    "Opening": lambda img: cv2.morphologyEx(img, cv2.MORPH_OPEN, np.ones((5, 5), np.uint8)),
    "Closing": lambda img: cv2.morphologyEx(img, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8))
}
comparison_tables.append(run_step("Morphological Operations", morphological_operations_techniques, test_images, test_image_ids, best_techniques_list))


Running Step: Morphological Operations
----------------------------------------
Best Technique for Image_168: Erosion
Best Technique for Image_642: Erosion
Best Technique for Image_288: Erosion
Best Technique for Image_185: Erosion
Best Technique for Image_372: Erosion

Recommended Technique for Morphological Operations: Erosion


In [176]:
# Step 5: Edge Detection
edge_detection_techniques = {
    "Canny Edge": lambda img: cv2.Canny(img, 100, 200),
    "Sobel Edge": lambda img: cv2.convertScaleAbs(cv2.Sobel(img, cv2.CV_64F, 1, 1, ksize=3))
}
comparison_tables.append(run_step("Edge Detection", edge_detection_techniques, test_images, test_image_ids, best_techniques_list))


Running Step: Edge Detection
----------------------------------------
Best Technique for Image_168: Canny Edge
Best Technique for Image_642: Canny Edge
Best Technique for Image_288: Canny Edge
Best Technique for Image_185: Canny Edge
Best Technique for Image_372: Canny Edge

Recommended Technique for Edge Detection: Canny Edge


## Final best techniques per step

In [177]:
# Print the list of best techniques for each step
print("\nBest Techniques for Each Step:")
for step, technique_name, technique_func in best_techniques_list:
    print(f"{step}: {technique_name}")


Best Techniques for Each Step:
Noise Reduction: Non-Local Means
Histogram Equalization: Histogram Equalization
Binarization: Adaptive Threshold
Morphological Operations: Erosion
Edge Detection: Canny Edge


In [178]:
for i, comparison_df in enumerate(comparison_tables):
    comparison_df.to_csv(f"comparison tables/comparison_table_step_{i+1}.csv", index=False)

In [179]:
# Display comparison tables within the notebook
for i, comparison_df in enumerate(comparison_tables):
    print(f"Comparison Table for Step {i+1}:")
    display(comparison_df.head())

Comparison Table for Step 1:


Unnamed: 0,Image_ID,Technique,Brightness,Sharpness,Contrast,Noise,Skew,Line Spacing,Tables Detected,Resolution,Detected Elements,Texture,Patterns
0,Image_168,Original,102.072388,978.061044,43.181213,32.850364,-90.0,0.0,1,65536,1,31.273968,3863
1,Image_168,Noise Reduction - Gaussian Blur,102.101547,23.812057,41.874337,1.118163,-90.0,0.0,2,65536,1,4.87976,1103
2,Image_168,Noise Reduction - Median Blur,103.508911,111.742331,42.719517,5.97085,-90.0,0.0,1,65536,1,10.570825,992
3,Image_168,Noise Reduction - Non-Local Means,102.519684,363.663595,42.825398,17.555627,-90.0,0.0,1,65536,1,19.069966,1884
4,Image_642,Original,109.358383,1760.960945,52.015105,56.500502,-90.0,1.59375,2,65536,161,41.963805,4931


Comparison Table for Step 2:


Unnamed: 0,Image_ID,Technique,Brightness,Sharpness,Contrast,Noise,Skew,Line Spacing,Tables Detected,Resolution,Detected Elements,Texture,Patterns
0,Image_168,Original,102.072388,978.061044,43.181213,32.850364,-90.0,0.0,1,65536,1,31.273968,3863
1,Image_168,Histogram Equalization - Histogram Equalization,130.006622,5328.425232,74.273891,138.782599,-90.0,0.605701,3,65536,422,72.996063,8332
2,Image_168,Histogram Equalization - CLAHE,120.380508,2234.628612,48.583296,61.975004,-90.0,0.927273,2,65536,276,47.271859,5905
3,Image_642,Original,109.358383,1760.960945,52.015105,56.500502,-90.0,1.59375,2,65536,161,41.963805,4931
4,Image_642,Histogram Equalization - Histogram Equalization,128.54216,3610.24846,73.686047,107.169441,-90.0,0.861486,2,65536,297,60.085343,6574


Comparison Table for Step 3:


Unnamed: 0,Image_ID,Technique,Brightness,Sharpness,Contrast,Noise,Skew,Line Spacing,Tables Detected,Resolution,Detected Elements,Texture,Patterns
0,Image_168,Original,102.072388,978.061044,43.181213,32.850364,-90.0,0.0,1,65536,1,31.273968,3863
1,Image_168,Binarization - Global Threshold,99.049072,44951.731438,124.285135,1352.177491,-90.0,0.639098,1,65536,400,212.018234,6651
2,Image_168,Binarization - Adaptive Threshold,201.448288,132966.082076,103.864819,2584.828339,-90.0,0.087496,1,65536,2824,364.645145,17617
3,Image_168,Binarization - Otsu Threshold,189.689713,16191.738274,111.304491,577.710957,-90.0,0.620438,2,65536,412,127.246761,3830
4,Image_642,Original,109.358383,1760.960945,52.015105,56.500502,-90.0,1.59375,2,65536,161,41.963805,4931


Comparison Table for Step 4:


Unnamed: 0,Image_ID,Technique,Brightness,Sharpness,Contrast,Noise,Skew,Line Spacing,Tables Detected,Resolution,Detected Elements,Texture,Patterns
0,Image_168,Original,102.072388,978.061044,43.181213,32.850364,-90.0,0.0,1,65536,1,31.273968,3863
1,Image_168,Morphological Operations - Dilation,110.889069,128.098141,41.176608,6.78468,-90.0,0.0,1,65536,1,11.318045,1011
2,Image_168,Morphological Operations - Erosion,84.292847,588.41807,44.491891,24.163212,-90.0,0.0,1,65536,1,24.25733,5870
3,Image_168,Morphological Operations - Opening,97.500473,518.894563,43.276082,22.217615,-90.0,0.0,2,65536,1,22.779257,4182
4,Image_168,Morphological Operations - Closing,106.481705,128.957677,42.070947,6.950182,-90.0,0.0,1,65536,1,11.355953,1010


Comparison Table for Step 5:


Unnamed: 0,Image_ID,Technique,Brightness,Sharpness,Contrast,Noise,Skew,Line Spacing,Tables Detected,Resolution,Detected Elements,Texture,Patterns
0,Image_168,Original,102.072388,978.061044,43.181213,32.850364,-90.0,0.0,1,65536,1,31.273968,3863
1,Image_168,Edge Detection - Canny Edge,15.030899,34312.304874,60.0579,1323.056184,-90.0,0.0,1,65536,1,185.235809,5242
2,Image_168,Edge Detection - Sobel Edge,6.434784,1951.78493,12.957535,50.027596,-90.0,0.0,1,65536,1,44.17901,3702
3,Image_642,Original,109.358383,1760.960945,52.015105,56.500502,-90.0,1.59375,2,65536,161,41.963805,4931
4,Image_642,Edge Detection - Canny Edge,19.186478,43328.501895,67.26389,1583.965897,-90.0,0.0,1,65536,1,208.154995,6268


In [180]:
# Generate Average Comparison Table
average_comparison_data = []
for comparison_df in comparison_tables:
    avg_stats = comparison_df.groupby("Technique").mean().reset_index()
    average_comparison_data.append(avg_stats)

# Combine average stats from all steps
average_comparison_df = pd.concat(average_comparison_data, ignore_index=True)
# Save the average comparison table to a CSV file
average_comparison_df.to_csv("average_comparison_table.csv", index=False)

In [181]:
average_comparison_df

Unnamed: 0,Technique,Brightness,Sharpness,Contrast,Noise,Skew,Line Spacing,Tables Detected,Resolution,Detected Elements,Texture,Patterns
0,Noise Reduction - Gaussian Blur,107.162756,24.68144,41.958559,1.12867,-90.0,8.009615,2.0,65536.0,13.6,4.926009,1221.8
1,Noise Reduction - Median Blur,108.618152,95.788121,42.708486,4.91675,-90.0,11.079617,1.4,65536.0,14.6,9.712208,1176.2
2,Noise Reduction - Non-Local Means,107.536963,482.279009,42.909078,21.262351,-90.0,19.660569,1.6,65536.0,11.8,21.061042,2284.2
3,Original,107.149319,1042.807841,43.318449,34.524881,-90.0,52.28664,1.8,65536.0,75.4,31.465931,3926.0
4,Histogram Equalization - CLAHE,126.506528,2404.716668,47.83054,66.324622,-72.0,0.704711,2.2,65536.0,379.8,48.158238,6535.8
5,Histogram Equalization - Histogram Equalization,129.341,4132.136154,74.116035,118.173999,-90.0,0.801573,2.4,65536.0,331.4,63.944852,7529.8
6,Original,107.149319,1042.807841,43.318449,34.524881,-90.0,52.28664,1.8,65536.0,75.4,31.465931,3926.0
7,Binarization - Adaptive Threshold,196.684158,130853.866491,107.010438,2507.259614,-36.0,0.102801,1.8,65536.0,2512.6,361.378629,18201.2
8,Binarization - Global Threshold,107.262177,37629.083617,125.651187,1189.211578,-90.0,0.779416,1.4,65536.0,340.0,193.481037,5701.0
9,Binarization - Otsu Threshold,195.659271,17345.278201,107.277268,623.156986,-72.0,0.902003,1.8,65536.0,369.6,129.692905,3767.0


## Hyperparameter Tuning

In [182]:
# Hyperparameter Tuning Function
def hyperparameter_tuning(images, best_techniques_list, param_grids, evaluation_function):
    tuned_results = {}
    for step_name, technique_name, best_technique_func in best_techniques_list:
        print(f"\nHyperparameter Tuning for Step: {step_name}\n{'-' * 40}")
        best_params = None
        best_score = -np.inf
        param_grid = param_grids.get(technique_name, [])

        for params in param_grid:
            total_score = 0
            for img, img_id in zip(images, test_image_ids):
                try:
                    # Apply the best technique with the given parameters explicitly based on technique name
                    if technique_name == "Gaussian Blur":
                        processed_image = apply_gaussian_blur(img, **params)
                    elif technique_name == "Median Blur":
                        processed_image = apply_median_blur(img, **params)
                    elif technique_name == "Non-Local Means":
                        processed_image = apply_non_local_means(img, **params)
                    elif technique_name == "CLAHE":
                        processed_image = apply_clahe(img, **params)
                    elif technique_name == "Global Threshold":
                        processed_image = apply_global_threshold(img, **params)
                    elif technique_name == "Adaptive Threshold":
                        processed_image = apply_adaptive_threshold(img, **params)
                    elif technique_name == "Otsu Threshold":
                        processed_image = apply_otsu_threshold(img)
                    elif technique_name == "Dilation":
                        processed_image = apply_dilation(img, **params)
                    elif technique_name == "Erosion":
                        processed_image = apply_erosion(img, **params)
                    elif technique_name == "Morphological Opening":
                        processed_image = apply_opening(img, **params)
                    elif technique_name == "Morphological Closing":
                        processed_image = apply_closing(img, **params)
                    elif technique_name == "Canny Edge":
                        processed_image = apply_canny_edge(img, **params)
                    elif technique_name == "Sobel Edge":
                        processed_image = apply_sobel_edge(img, **params)
                    else:
                        raise ValueError(f"Unknown technique: {technique_name}")

                except TypeError as e:
                    print(f"Skipping parameters {params} due to TypeError: {e}")
                    continue

                # Retrieve original stats for comparison
                original_stats = images_stats_df[images_stats_df['Image'] == img_id].iloc[0].to_dict()
                evaluation_result = evaluation_function(processed_image, {technique_name: best_technique_func}, original_stats)
                step_score = evaluation_result["Evaluation Results"][technique_name]["Score"]
                total_score += step_score

            avg_score = total_score / len(images) if len(images) > 0 else -np.inf

            if avg_score > best_score:
                best_score = avg_score
                best_params = params

            print(f"Parameters: {params}, Score: {avg_score}")

        tuned_results[step_name] = {
            "Best Parameters": best_params,
            "Best Score": best_score
        }
        print(f"Best Parameters for {step_name}: {best_params} with Score: {best_score}\n")

    return tuned_results


#### Explanation of Techniques and Parameters

##### 1. Noise Reduction
###### Gaussian Blur (`ksize`)
- **Parameter**: `ksize` (kernel size)
- **Meaning**: Defines the extent of smoothing applied. A small kernel (e.g., `(3, 3)`) produces minimal blurring, preserving details, while larger kernels (e.g., `(9, 9)`) apply more significant blurring, which is useful for reducing noise but may remove finer details.
- **Range**:
  - `(3, 3)`, `(5, 5)`, `(7, 7)`, `(9, 9)`
  - Smaller sizes preserve more details, larger sizes reduce noise more aggressively.

###### Non-Local Means (`h`, `templateWindowSize`, `searchWindowSize`)
- **Parameters**:
  - `h`: Filtering strength (higher values = stronger filtering).
  - `templateWindowSize`: Size of the patch used for comparison.
  - `searchWindowSize`: Size of the window around the pixel for searching similar patches.
- **Range**:
  - `h`: `5`, `10`, `15`, `20`
  - `templateWindowSize`: `7`, `10`
  - `searchWindowSize`: `21`, `31`
  - Balances noise reduction quality and processing time.

###### Median Blur (`ksize`)
- **Parameter**: `ksize` (kernel size)
- **Meaning**: Reduces "salt-and-pepper" noise by replacing each pixel with the median of neighboring pixels. Larger kernels apply stronger noise reduction, potentially losing details.
- **Range**:
  - `3`, `5`, `7`, `9`
  - Smaller values (`3`, `5`) are useful for mild noise; larger values (`7`, `9`) are effective for more significant noise.

##### 2. Histogram Equalization
###### CLAHE (`clipLimit`, `tileGridSize`)
- **Parameters**:
  - `clipLimit`: Controls contrast enhancement limit.
  - `tileGridSize`: Size of the grid for local histogram equalization.
- **Range**:
  - `clipLimit`: `2.0` to `6.0`
  - `tileGridSize`: `(4, 4)`, `(6, 6)`, `(8, 8)`
  - Lower `clipLimit` values reduce noise amplification, larger `tileGridSize` produces smoother results.

##### 3. Binarization
###### Global Threshold (`thresholdValue`)
- **Parameter**: `thresholdValue`
- **Meaning**: Used to convert grayscale images to binary by comparing pixel values to a threshold. Lower values produce more white areas.
- **Range**:
  - `100`, `127`, `150`, `200`
  - Balances the separation between foreground and background.

###### Adaptive Threshold (`adaptiveMethod`, `blockSize`, `C`)
- **Parameters**:
  - `adaptiveMethod`: The method used for calculating the threshold (`cv2.ADAPTIVE_THRESH_MEAN_C` or `cv2.ADAPTIVE_THRESH_GAUSSIAN_C`).
  - `blockSize`: Size of the local area considered for thresholding.
  - `C`: Constant subtracted from the mean or weighted sum.
- **Range**:
  - `adaptiveMethod`: `cv2.ADAPTIVE_THRESH_MEAN_C` or `cv2.ADAPTIVE_THRESH_GAUSSIAN_C`
  - `blockSize`: `11`, `15`
  - `C`: `2`, `3`
  - Allows adjustment to local image variations for better segmentation.

##### 4. Morphological Operations
###### Operation (`MORPH_OPEN`, `MORPH_CLOSE`, `DILATE`, `ERODE`, `kernel_size`)
- **Parameters**:
  - `operation`: The morphological transformation to apply.
    - `cv2.MORPH_OPEN`: Removes small white noise.
    - `cv2.MORPH_CLOSE`: Fills small black gaps in white areas.
    - `cv2.MORPH_DILATE`: Expands white areas to connect small features.
    - `cv2.MORPH_ERODE`: Shrinks white areas to reduce noise.
  - `kernel_size`: Size of the structuring element.
- **Range**:
  - `kernel_size`: `(3, 3)`, `(5, 5)`, `(7, 7)`, `(9, 9)`
  - Larger kernels apply more aggressive changes for connecting, removing, or shrinking features.

##### 5. Edge Detection
###### Canny Edge Detection (`threshold1`, `threshold2`)
- **Parameters**:
  - `threshold1`: Lower threshold for weak edges.
  - `threshold2`: Upper threshold for strong edges.
- **Range**:
  - `threshold1` and `threshold2`: `(50, 150)`, `(100, 200)`, `(150, 250)`, `(200, 300)`
  - Lower values detect more edges, useful for detailed images; higher values highlight stronger, more defined edges.

###### Sobel Edge Detection (`ksize`, `scale`, `delta`, `borderType`)
- **Parameters**:
  - `ksize`: Kernel size for the Sobel operator.
  - `scale`: Scaling factor for gradients.
  - `delta`: Value added to the result.
  - `borderType`: Border handling for edges.
- **Range**:
  - `ksize`: `3`, `5`, `7`
  - `scale`: `1`, `2`
  - `delta`: `0` (default)
  - `borderType`: `cv2.BORDER_DEFAULT`
  - Adjusts the level of detail and sharpness captured by the filter.


In [185]:
# Define expanded parameter grids for hyperparameter tuning for each technique
technique_param_grids = {
    # Noise Reduction Techniques Parameters
    # Gaussian Blur - kernel size affects the degree of blurring
    "Gaussian Blur": [
        {"ksize": (3, 3)},  # Small blur, preserves more details while reducing minor noise
        {"ksize": (5, 5)},  # Moderate blur, balances noise reduction and detail preservation
        {"ksize": (7, 7)},  # Stronger blur, reduces more noise but may lose more details
        {"ksize": (9, 9)}   # High blur, significant reduction of noise, more detail loss
    ],

    # Median Blur - kernel size affects the reduction of salt-and-pepper noise
    "Median Blur": [
        {"ksize": 3},  # Small kernel, effective for minor salt-and-pepper noise
        {"ksize": 5},  # Moderate kernel, more aggressive noise reduction
        {"ksize": 7},  # Large kernel, used for significant salt-and-pepper noise reduction
        {"ksize": 9}   # Largest kernel, aggressive noise reduction but may lose finer details
    ],

    # Non-Local Means - affects noise reduction strength and quality
    "Non-Local Means": [
        {"h": 5, "templateWindowSize": 7, "searchWindowSize": 21},   # Low filter strength (h), smaller template
        {"h": 10, "templateWindowSize": 7, "searchWindowSize": 21},  # Moderate filter strength (h), balance of denoising and details
        {"h": 15, "templateWindowSize": 7, "searchWindowSize": 21},  # Strong filter strength, more noise reduction but risk of over-smoothing
        {"h": 20, "templateWindowSize": 10, "searchWindowSize": 31}  # Higher strength and larger search windows for stronger denoising
    ],

    # Histogram Equalization Techniques Parameters
    # CLAHE - clip limit controls contrast, tile grid size controls local regions
    "CLAHE": [
        {"clipLimit": 2.0, "tileGridSize": (8, 8)},  # Low clip limit, preserves global contrast, effective for mild contrast enhancement
        {"clipLimit": 3.0, "tileGridSize": (8, 8)},  # Moderate clip limit, better enhancement for darker/lighter regions
        {"clipLimit": 4.0, "tileGridSize": (4, 4)},  # Higher clip limit, can lead to artifacts but increases local contrast
        {"clipLimit": 5.0, "tileGridSize": (6, 6)}   # High clip limit, strong local contrast enhancement
    ],

    # Binarization Techniques Parameters
    # Global Threshold - value for the threshold, used to separate foreground from background
    "Global Threshold": [
        {"thresholdValue": 100},  # Low threshold, makes more areas white, may overexpose
        {"thresholdValue": 127},  # Middle threshold, balance between foreground and background
        {"thresholdValue": 150},  # High threshold, less white, more black areas
        {"thresholdValue": 200}   # Higher threshold, darkest parts retained as foreground
    ],

    # Adaptive Threshold - block size and constant C, used for adaptive thresholding
    "Adaptive Threshold": [
        {"adaptiveMethod": cv2.ADAPTIVE_THRESH_MEAN_C, "blockSize": 11, "C": 2},  # Small block size, captures smaller variations
        {"adaptiveMethod": cv2.ADAPTIVE_THRESH_MEAN_C, "blockSize": 15, "C": 3},  # Larger block size, averages larger areas
        {"adaptiveMethod": cv2.ADAPTIVE_THRESH_GAUSSIAN_C, "blockSize": 11, "C": 2},  # Gaussian weighting, better for uneven lighting
        {"adaptiveMethod": cv2.ADAPTIVE_THRESH_GAUSSIAN_C, "blockSize": 15, "C": 3}   # Larger area, smoother output
    ],

    "Otsu Threshold": [
        {}  # No parameters needed, automatic threshold selection
    ],

    # Morphological Operations Techniques Parameters
    # Dilation - kernel size affects how much an object is expanded, helps to highlight and connect features in the image
    "Dilation": [
        {"kernel_size": (3, 3)},  # Small kernel, slight expansion of features
        {"kernel_size": (5, 5)},  # Medium kernel, moderate expansion, often used to fill small holes
        {"kernel_size": (7, 7)},  # Larger kernel, more significant expansion, fills larger gaps
        {"kernel_size": (9, 9)}   # Largest kernel, aggressive expansion, can connect disjoint parts
    ],

    # Erosion - kernel size affects how much an object is eroded, used to reduce noise by shrinking foreground areas
    "Erosion": [
        {"kernel_size": (3, 3)},  # Small kernel, minimal shrinking of features
        {"kernel_size": (5, 5)},  # Medium kernel, reduces small noise while keeping the main features intact
        {"kernel_size": (7, 7)},  # Larger kernel, removes more fine details, useful for stronger noise reduction
        {"kernel_size": (9, 9)}   # Largest kernel, aggressive erosion, may result in significant information loss
    ],

    # Morphological Opening - kernel size affects noise removal, used for removing small white noise from black backgrounds
    "Morphological Opening": [
        {"kernel_size": (3, 3)},  # Small kernel, removes small white noise but keeps the main structure
        {"kernel_size": (5, 5)},  # Medium kernel, better noise removal, may affect finer details
        {"kernel_size": (7, 7)}   # Larger kernel, stronger noise reduction, potentially removes small features
    ],

    # Morphological Closing - kernel size affects how gaps in foreground objects are filled, used to close small black holes within objects
    "Morphological Closing": [
        {"kernel_size": (3, 3)},  # Small kernel, fills tiny holes, maintains object shape
        {"kernel_size": (5, 5)},  # Medium kernel, closes medium-sized gaps, useful for refining object borders
        {"kernel_size": (7, 7)}   # Larger kernel, aggressively closes gaps, useful for solidifying larger structures
    ],

    # Edge Detection Techniques Parameters
    # Canny Edge Detection - lower and upper thresholds for edge linking
    "Canny Edge": [
        {"threshold1": 50, "threshold2": 150},  # Low thresholds, more edges detected
        {"threshold1": 100, "threshold2": 200},  # Moderate thresholds, balanced edge detection
        {"threshold1": 150, "threshold2": 250},  # High thresholds, only strong edges detected
        {"threshold1": 200, "threshold2": 300}   # Very high thresholds, detects fewer edges, focused on major features
    ],

    # Sobel Edge - kernel size, scale, delta, border type for Sobel edge detection
    "Sobel Edge": [
        {"ksize": 3, "scale": 1, "delta": 0, "borderType": cv2.BORDER_DEFAULT},  # Small kernel, detects finer details
        {"ksize": 5, "scale": 1, "delta": 0, "borderType": cv2.BORDER_DEFAULT},  # Medium kernel, balances detail and noise suppression
        {"ksize": 7, "scale": 1, "delta": 0, "borderType": cv2.BORDER_DEFAULT},  # Larger kernel, captures broader gradients
        {"ksize": 3, "scale": 2, "delta": 0, "borderType": cv2.BORDER_DEFAULT}   # Increased scale, emphasizes detected gradients more strongly
    ]
}

In [186]:
# Run hyperparameter tuning
tuned_results = hyperparameter_tuning(test_images, best_techniques_list, technique_param_grids, advanced_evaluation)
print("\nTuned Results:")

for step_name, result in tuned_results.items():
    print(f"{step_name}: Best Parameters: {result['Best Parameters']}, Best Score: {result['Best Score']}")


Hyperparameter Tuning for Step: Noise Reduction
----------------------------------------
Parameters: {'h': 5, 'templateWindowSize': 7, 'searchWindowSize': 21}, Score: -3.394338295790815
Parameters: {'h': 10, 'templateWindowSize': 7, 'searchWindowSize': 21}, Score: -4.059359032202847
Parameters: {'h': 15, 'templateWindowSize': 7, 'searchWindowSize': 21}, Score: -5.247716101937137
Parameters: {'h': 20, 'templateWindowSize': 10, 'searchWindowSize': 31}, Score: -5.834622915628669
Best Parameters for Noise Reduction: {'h': 5, 'templateWindowSize': 7, 'searchWindowSize': 21} with Score: -3.394338295790815


Hyperparameter Tuning for Step: Histogram Equalization
----------------------------------------
Best Parameters for Histogram Equalization: None with Score: -inf


Hyperparameter Tuning for Step: Binarization
----------------------------------------
Parameters: {'adaptiveMethod': 0, 'blockSize': 11, 'C': 2}, Score: 248.50167325414412
Parameters: {'adaptiveMethod': 0, 'blockSize': 15, 'C'

Interpretations of results:
Let's break down and interpret the hyperparameter tuning results for each step. Here's what each section tells us:

### **1. Noise Reduction: Non-Local Means**
- **Best Parameters**: `{'h': 5, 'templateWindowSize': 7, 'searchWindowSize': 21}`
- **Best Score**: `-3.39`

**Interpretation**:
- Non-Local Means was selected as the best noise reduction technique.
- The negative score (`-3.39`) indicates that this technique resulted in changes that deviated from the original characteristics, but this was the least deviation compared to other parameter combinations. Lower `h` values (`h=5`) seemed to perform better because stronger denoising (higher `h` values) tends to over-smooth the image, resulting in a higher deviation from the original characteristics.

### **2. Histogram Equalization: Histogram Equalization**
- **Best Parameters**: `None`
- **Best Score**: `-inf`

**Interpretation**:
- The hyperparameter tuning for histogram equalization did not find any parameters to improve upon, which suggests that the default parameters might be unsuitable for improving the metrics used in the scoring function. The score of `-inf` means no improvement was achieved, potentially due to the technique either not being applicable or beneficial to the specific image characteristics being targeted.

### **3. Binarization: Adaptive Threshold**
- **Best Parameters**: `{'adaptiveMethod': 1, 'blockSize': 11, 'C': 2}`
- **Best Score**: `286.58`

**Interpretation**:
- The adaptive threshold technique with Gaussian adaptive method (`adaptiveMethod=1`), smaller block size (`blockSize=11`), and constant (`C=2`) yielded the highest score of `286.58`.
- This positive score indicates that the chosen parameter combination effectively enhanced the image in ways the evaluation function rewards, such as increased contrast, sharpness, and detection of elements. Smaller block sizes are likely capturing more local variations, leading to higher scores.

### **4. Morphological Operations: Erosion**
- **Best Parameters**: `{'kernel_size': (3, 3)}`
- **Best Score**: `-1.03`

**Interpretation**:
- Erosion was selected as the best morphological operation.
- A kernel size of `(3, 3)` yielded the least negative score (`-1.03`), indicating that this parameter minimally affected the original characteristics while performing the morphological operation. Larger kernel sizes (`(5, 5)`, `(7, 7)`, `(9, 9)`) resulted in more deviation, which aligns with the idea that stronger erosion could overly reduce relevant structures in the image, negatively affecting its characteristics.

### **5. Edge Detection: Canny Edge**
- **Best Parameters**: `{'threshold1': 50, 'threshold2': 150}`
- **Best Score**: `139.47`

**Interpretation**:
- The Canny edge detection method performed best with thresholds of `50` and `150`, yielding a positive score of `139.47`.
- Lower thresholds (`threshold1=50`, `threshold2=150`) produced a better score because more edges were detected, contributing positively to metrics such as detected elements, texture, and patterns. Increasing the thresholds tended to reduce the number of detected edges, which led to lower scores.

### **Summary & Key Insights**:

1. **Negative Scores**:
   - Negative scores (e.g., Noise Reduction and Morphological Operations) indicate that the processed image deviated from the original characteristics in ways that were detrimental based on the evaluation metrics.
   - The least negative scores represent the best parameters that resulted in minimal detrimental changes.

2. **Positive Scores**:
   - Positive scores (e.g., Binarization and Edge Detection) indicate improvements in metrics such as contrast, sharpness, and detection of elements.
   - Techniques with higher positive scores effectively enhanced the quality of the image by improving certain desired features.

3. **Histogram Equalization Issue**:
   - The score of `-inf` for histogram equalization indicates that the technique wasn't effective, and the parameter space may need to be reconsidered. It might also be that this step doesn't add value for these particular images.

4. **Best Techniques Overview**:
   - For each step, the parameter combination that had the highest score (least negative or most positive) was selected.
   - It is important to note that the magnitude of scores can vary greatly, depending on the evaluation metrics and their weights.

### Recommendations:
- **Non-Local Means (Noise Reduction)**: The parameters suggest that a lighter touch (`h=5`) works better, indicating that preserving details is more beneficial for these images.
- **Adaptive Threshold (Binarization)**: The best results came from using a Gaussian adaptive method with a smaller block size, indicating that finer local adjustments are advantageous.
- **Erosion (Morphological Operations)**: A smaller kernel size `(3, 3)` resulted in the least deviation, suggesting a lighter morphological touch is preferable.
- **Canny Edge Detection**: Lower thresholds were effective in enhancing edge details, resulting in a higher score.

Overall, the scores are consistent with an expected outcome where lower parameters (e.g., less aggressive denoising or erosion) lead to better preservation of original characteristics, while certain techniques like edge detection benefit from lower thresholds for more comprehensive edge enhancement.