In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

def load_image(image_path):

    return cv2.imread(image_path, cv2.IMREAD_COLOR)

def convert_to_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

def segment_image(image, patch_size=100):
    h, w = image.shape
    patches = []
    for y in range(0, h, patch_size):
        for x in range(0, w, patch_size):
            patch = image[y:y+patch_size, x:x+patch_size]  
            patches.append((x, y, patch))  
    return patches

def binarize_patches(patches):
    binarized_patches = []
    for x, y, patch in patches:
        binarized_patch = cv2.adaptiveThreshold(patch, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                                cv2.THRESH_BINARY, 21, 6)  
        binarized_patches.append((x, y, binarized_patch))
    return binarized_patches

def reconstruct_image(patches, original_shape):
    h, w = original_shape
    reconstructed = np.ones((h, w), dtype=np.uint8) * 255  
    for x, y, patch in patches:
        reconstructed[y:y+patch.shape[0], x:x+patch.shape[1]] = patch  
    return reconstructed

def remove_background_noise(image, min_size=30):
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(255 - image, connectivity=8)
    cleaned = np.ones_like(image) * 255  
    for i in range(1, num_labels): 
        if stats[i, cv2.CC_STAT_AREA] > min_size:
            cleaned[labels == i] = 0  
    return cleaned

def enforce_white_background(image):
    background = cv2.GaussianBlur(image, (25, 25), 0)  
    enhanced = cv2.addWeighted(image, 1.5, background, -0.5, 0)  
    return cv2.threshold(enhanced, 200, 255, cv2.THRESH_BINARY)[1]  

def save_image(image, input_path, output_folder):
    filename = os.path.basename(input_path)  
    output_path = os.path.join(output_folder, filename)  
    os.makedirs(output_folder, exist_ok=True)  
    cv2.imwrite(output_path, image, [cv2.IMWRITE_PNG_COMPRESSION, 0])  
    return output_path

def process_all_images(input_root_folder, output_root_folder):
    for root, _, files in os.walk(input_root_folder):
        relative_path = os.path.relpath(root, input_root_folder)
        output_folder = os.path.join(output_root_folder, relative_path)
        os.makedirs(output_folder, exist_ok=True)  

        png_files = [file for file in files if file.lower().endswith(".png")]
        if not png_files:
            print(f"No PNG images found in {root}")
            continue

        for file in png_files:
            input_image_path = os.path.join(root, file)

            image = load_image(input_image_path)
            gray_image = convert_to_grayscale(image)
            image_patches = segment_image(gray_image, patch_size=100)
            binarized_patches = binarize_patches(image_patches)
            binarized_image = reconstruct_image(binarized_patches, gray_image.shape)
            cleaned_background = remove_background_noise(binarized_image)
            final_image = enforce_white_background(cleaned_background)

            save_image(final_image, input_image_path, output_folder)
            print(f"Processed: {input_image_path}")



In [None]:
input_folder = "img"  
output_folder = "output_2"
process_all_images(input_folder, output_folder)


#print(f"Processing complete. All images saved in: {output_folder}")

No PNG images found in img
Processed: img\RC04844\page_1.png
Processed: img\RC04844\page_2.png
Processed: img\RC04844\page_3.png
Processed: img\RC04844\page_4.png
Processed: img\RC04845\page_1.png
Processed: img\RC04846\page_1.png
Processed: img\RC04846\page_2.png
Processed: img\RC04846\page_3.png
Processed: img\RC04847\page_2.png
Processed: img\RC04848\page_2.png
Processed: img\RC04849\page_1.png
Processed: img\RC04850\page_1.png
Processed: img\RC04851\page_1.png
Processed: img\RC04852\page_1.png
Processed: img\RC04852\page_2.png
Processed: img\RC04853\page_2.png
Processed: img\RC04854\page_2.png
Processed: img\RC04855\page_2.png
Processed: img\RC04856\page_2.png
Processed: img\RC04857\page_2.png
Processed: img\RC04866\page_2.png
Processed: img\RC04866\page_3.png
Processed: img\RC04867\page_2.png
Processed: img\RC04868\page_1.png
Processed: img\RC04868\page_2.png
Processed: img\RC04868\page_3.png
Processed: img\RC04868\page_4.png
Processed: img\RC04869\page_2.png
Processed: img\RC0487