In [1]:
import cv2
import os
import numpy as np


def display(img):
    cv2.imshow("1", img)
    cv2.waitKey(0)


# def clean_background(img_input):
#     hsv = cv2.cvtColor(img_input, cv2.COLOR_BGR2HSV)
#     lower_black = np.array([0, 0, 0])
#     upper_black = np.array([180, 255, 130])
#     mask = cv2.inRange(hsv, lower_black, upper_black)
#     cleaned_img = img_input.copy()
#     cleaned_img[mask == 0] = [255, 255, 255]
#     return cleaned_img


def image_preprocess(img_input):
    cleaned = clean_background(img_input)
    gray_img = cv2.cvtColor(cleaned, cv2.COLOR_BGR2GRAY)
    gray_img = cv2.GaussianBlur(gray_img, (3, 3), 3)
    _, img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_OTSU)
    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV)
    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
    img = cv2.erode(img, kernel)
    return img, cleaned


def get_split_line(img, projection_col):
    split_line_list = []
    flag = False
    start = 0
    end = 0
    for i in range(len(projection_col)):
        if not flag and projection_col[i] > 0:
            flag = True
            start = i
        elif flag and (projection_col[i] == 0 or i == len(projection_col) - 1):
            flag = False
            end = i
            if end - start < 15:
                flag = True
                continue
            else:
                split_line_list.append((start, end))
    return split_line_list


def get_contours(img):
    contour_list = []
    contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        area = w * h
        if area < 30 or w < 3 or h < 3:
            continue  # Filter out small noise
        contour_list.append((x, y, w, h))
    # Merge vertically adjacent small boxes into a single character
    contour_list = merge_small_boxes_vertically(contour_list)
    return contour_list


def merge_small_boxes_vertically(boxes, gap_thresh=15, x_align_thresh=18, merged_hw_ratio_thresh=(0.7, 4.8)):
    if not boxes:
        return []

    boxes = sorted(boxes, key=lambda b: b[1])  # Sort top to bottom by y
    merged = []
    used = [False] * len(boxes)

    for i in range(len(boxes)):
        if used[i]:
            continue
        x1, y1, w1, h1 = boxes[i]
        merged_box = [x1, y1, w1, h1]

        for j in range(i + 1, len(boxes)):
            if used[j]:
                continue
            x2, y2, w2, h2 = boxes[j]

            vertical_gap = y2 - (y1 + h1)
            x_align = abs(x1 - x2)
            merged_x = min(x1, x2)
            merged_y = min(y1, y2)
            merged_w = max(x1 + w1, x2 + w2) - merged_x
            merged_h = max(y1 + h1, y2 + h2) - merged_y
            hw_ratio = merged_h / merged_w if merged_w != 0 else 999

            # Merge if three conditions are met
            if 0 <= vertical_gap < gap_thresh and x_align < x_align_thresh and \
               merged_hw_ratio_thresh[0] < hw_ratio < merged_hw_ratio_thresh[1]:
                merged_box = [merged_x, merged_y, merged_w, merged_h]
                used[j] = True

        merged.append(tuple(merged_box))
        used[i] = True

    return merged


def sort_merge(contour_row):
    contour_row = sorted(contour_row, key=lambda x: x[1])  # Sort top to bottom (for vertical layout)
    i = 0
    for _ in contour_row:
        if i == len(contour_row) - 1 or contour_row[i][0] == -1:
            break
        rectR = contour_row[i + 1]
        rectL = contour_row[i]
        ovlp = rectL[1] + rectL[3] - rectR[1]
        dist = abs((rectR[1] + rectR[3] / 2) - (rectL[1] - rectL[3] / 2))
        h_L = rectL[1] + rectL[3]
        h_R = rectR[1] + rectR[3]
        span = max(h_R, h_L) - rectL[1]
        nmovlp = (ovlp / rectL[3] + ovlp / rectR[3]) / 2 - dist / span / 8
        if nmovlp > 0:
            x = min(rectL[0], rectR[0])
            y = rectL[1]
            w = max(rectL[0] + rectL[2], rectR[0] + rectR[2]) - x
            h = max(h_L, h_R) - y
            contour_row[i] = (x, y, w, h)
            contour_row.pop(i + 1)
            contour_row.append((-1, -1, -1, -1))
            i -= 1
        i += 1
    return contour_row


def combine_verticalLine(contour_row):
    return contour_row  # Optional: retained for vertical layout; vertical splits are generally not a problem


def split_oversizeWidth(contour_row):
    i = 0
    for _ in contour_row:
        rect = contour_row[i]
        if rect[3] * 1.0 / rect[2] > 1.8:  # Tall aspect ratio may indicate merged characters
            y_new = int(rect[1] + rect[3] / 2 + 1)
            x_new = rect[0]
            h_new = rect[1] + rect[3] - y_new
            w_new = rect[2]
            contour_row[i] = (rect[0], rect[1], rect[2], int(rect[3] / 2))
            contour_row.insert(i + 1, (x_new, y_new, w_new, h_new))
        i += 1
    return contour_row


def get_segmentation_result(img, img_input, cleaned_img, save_dir="char_output"):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    projection_col = cv2.reduce(img, 0, cv2.REDUCE_SUM, dtype=cv2.CV_32S)  # Vertical projection
    projection_col = projection_col.flatten()
    split_line_list = get_split_line(img, projection_col)

    # Process each column from right to left
    split_line_list = sorted(split_line_list, key=lambda x: x[0], reverse=True)

    segmentation_result = []
    char_index = 1
    h_img, w_img = img_input.shape[:2]

    for i in split_line_list:
        img_col = img[:, i[0]:i[1]]
        contour_col = get_contours(img_col)
        contour_col = sort_merge(contour_col)
        contour_col = split_oversizeWidth(contour_col)
        contour_col = combine_verticalLine(contour_col)
        segmentation_result.append(contour_col)

        for (x, y, w, h) in contour_col:
            x_abs = x + i[0]  # Correct x coordinate
            x_end = min(x_abs + w, w_img)
            y_end = min(y + h, h_img)

            if x_abs < x_end and y < y_end:
                # Add padding (white space)
                pad = 3  # Try adjusting from 2 to 5
                x_pad1 = max(x_abs - pad, 0)
                y_pad1 = max(y - pad, 0)
                x_pad2 = min(x_abs + w + pad, w_img)
                y_pad2 = min(y + h + pad, h_img)

                char_img = cleaned_img[y_pad1:y_pad2, x_pad1:x_pad2]
                if char_img is not None and char_img.size > 0:
                    save_path = os.path.join(save_dir, f"char_{char_index:03d}.jpg")
                    cv2.imwrite(save_path, char_img)
                    char_index += 1

                # Draw enlarged red box for visualization
                cv2.rectangle(img_input, (x_pad1, y_pad1), (x_pad2, y_pad2), (0, 0, 255))

    return segmentation_result


# ========== Main Program ==========

# pic_path = 'J5.png'
# img_input = cv2.imread(pic_path, 1)
# img, cleaned_img = image_preprocess(img_input)
# cleaned_img_copy = cleaned_img.copy()

# segmentation_result = get_segmentation_result(
#     img, cleaned_img_copy, cleaned_img, save_dir="char_output"
# )

# cv2.imwrite("segmented_visual.jpg", cleaned_img_copy)
# cv2.imwrite("cleaned_background.jpg", cleaned_img)

# display(cleaned_img_copy)


In [2]:
import cv2
import numpy as np
import math


def display(img):
    cv2.imshow("1", img)
    cv2.waitKey(0)


def adaptive_threshold(image):
    """
    To handle low-quality images, an adaptive thresholding approach is introduced.
    - When the image contains a lot of noise, the threshold is relaxed.
    - Bright areas are additionally identified using binarization.
    """
    height, width, _ = image.shape
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)

    std_dev = np.std(gray)

    qerr1 = int(30 + (std_dev / 5))
    qerr2 = int(150 + (std_dev / 3))

    print(f"Adaptive Thresholds -> qerr1: {qerr1}, qerr2: {qerr2}, Std Dev: {std_dev:.2f}")

    _, bright_mask = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)

    r_arr, g_arr, b_arr = np.zeros(256, dtype=int), np.zeros(256, dtype=int), np.zeros(256, dtype=int)

    for i in range(height):
        for j in range(width):
            b, g, r = image[i, j]
            r_arr[r] += 1
            g_arr[g] += 1
            b_arr[b] += 1

    r_mean = np.argmax(r_arr)
    g_mean = np.argmax(g_arr)
    b_mean = np.argmax(b_arr)

    print(f"Detected color mode -> R: {r_mean}, G: {g_mean}, B: {b_mean}")

    new_image = image.copy()

    for i in range(height):
        for j in range(width):
            b, g, r = new_image[i, j]
            err = math.sqrt((b - b_mean) ** 2 + (g - g_mean) ** 2 + (r - r_mean) ** 2)
            if err < qerr1:
                new_image[i, j] = np.array([255, 255, 255], dtype=np.uint8)

    for i in range(height):
        for j in range(width):
            b, g, r = new_image[i, j]
            err = math.sqrt((b - 255) ** 2 + (g - 255) ** 2 + (r - 255) ** 2)
            if err < qerr2:
                new_image[i, j] = np.array([255, 255, 255], dtype=np.uint8)

    for i in range(height):
        for j in range(width):
            if bright_mask[i, j] == 255:
                new_image[i, j] = np.array([255, 255, 255], dtype=np.uint8)

    return new_image


def image_preprocess(img_input):
    cleaned = adaptive_threshold(img_input)

    gray_img = cv2.cvtColor(cleaned, cv2.COLOR_BGR2GRAY)
    gray_img = cv2.GaussianBlur(gray_img, (3, 3), 3)

    _, img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_OTSU)
    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV)

    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
    img = cv2.erode(img, kernel)

    return img, cleaned


In [3]:
input_folder = "input_images"  
output_root = "processed_image" 

if not os.path.exists(output_root):
    os.makedirs(output_root)

for filename in os.listdir(input_folder):
    if filename.lower().endswith((".png", ".jpg", ".jpeg", ".bmp")):
        img_path = os.path.join(input_folder, filename)
        img_input = cv2.imread(img_path, 1)

        if img_input is None:
            print(f"Warning: Failed to load {filename}")
            continue

        img, cleaned_img = image_preprocess(img_input)
        cleaned_img_copy = cleaned_img.copy()

        # named file
        base_name = os.path.splitext(filename)[0]
        save_dir = os.path.join(output_root, base_name)

        segmentation_result = get_segmentation_result(
            img, cleaned_img_copy, cleaned_img, save_dir=save_dir
        )

        # save these two files if no need you can comment out
        cv2.imwrite(os.path.join(save_dir, "segmented_visual.jpg"), cleaned_img_copy)
        cv2.imwrite(os.path.join(save_dir, "cleaned_background.jpg"), cleaned_img)

        print(f"Processed {filename}")


Adaptive Thresholds -> qerr1: 40, qerr2: 167, Std Dev: 52.93
Detected color mode -> R: 224, G: 214, B: 185
Processed J3.png
