In [None]:
Using RGB color recognition for preprocessing, and saving all characters in a single folder.

In [1]:
import cv2
import os
import numpy as np


def display(img):
    cv2.imshow("1", img)
    cv2.waitKey(0)


def clean_background(img_input):
    """
    Convert all non-black regions to white, keeping black or dark gray strokes.
    Loosen HSV brightness threshold to avoid losing faint strokes.
    """
    hsv = cv2.cvtColor(img_input, cv2.COLOR_BGR2HSV)

    # Allow values where V ≤ 130 (consider dark gray as "black")
    lower_black = np.array([0, 0, 0])
    upper_black = np.array([180, 255, 130])  # Try 140 if needed

    mask = cv2.inRange(hsv, lower_black, upper_black)

    cleaned_img = img_input.copy()
    cleaned_img[mask == 0] = [255, 255, 255]  # Turn non-black areas white

    return cleaned_img


def image_preprocess(img_input):
    cleaned = clean_background(img_input)

    gray_img = cv2.cvtColor(cleaned, cv2.COLOR_BGR2GRAY)
    gray_img = cv2.GaussianBlur(gray_img, (3, 3), 3)

    _, img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_OTSU)
    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV)

    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
    img = cv2.erode(img, kernel)

    return img, cleaned


def get_split_line(img, projection_row):
    split_line_list = []
    flag = False
    start = 0
    end = 0
    for i in range(len(projection_row)):
        if not flag and projection_row[i] > 0:
            flag = True
            start = i
        elif flag and (projection_row[i] == 0 or i == len(projection_row) - 1):
            flag = False
            end = i
            if end - start < 15:
                flag = True
                continue
            else:
                split_line_list.append((start, end))
    return split_line_list


def get_contours(img):
    contour_list = []
    contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for i in range(len(contours)):
        x, y, w, h = cv2.boundingRect(contours[i])
        contour_list.append((x, y, w, h))
    return contour_list


def sort_merge(contour_row):
    contour_row = sorted(contour_row, key=lambda x: x[0])
    i = 0
    for _ in contour_row:
        if i == len(contour_row) - 1 or contour_row[i][0] == -1:
            break
        rectR = contour_row[i + 1]
        rectL = contour_row[i]
        ovlp = rectL[0] + rectL[2] - rectR[0]
        dist = abs((rectR[0] + rectR[2] / 2) - (rectL[0] - rectL[2] / 2))
        w_L = rectL[0] + rectL[2]
        w_R = rectR[0] + rectR[2]
        span = max(w_R, w_L) - rectL[0]
        nmovlp = (ovlp / rectL[2] + ovlp / rectR[2]) / 2 - dist / span / 8
        if nmovlp > 0:
            x = rectL[0]
            y = min(rectL[1], rectR[1])
            w = max(w_R, w_L) - x
            h = max(rectL[1] + rectL[3], rectR[1] + rectR[3]) - y
            contour_row[i] = (x, y, w, h)
            contour_row.pop(i + 1)
            contour_row.append((-1, -1, -1, -1))
            i -= 1
        i += 1
    return contour_row


def combine_verticalLine(contour_row):
    i = 0
    pop_num = 0
    for _ in contour_row:
        rect = contour_row[i]
        if rect[0] == -1:
            break
        if rect[2] == 0:
            i += 1
            continue
        if rect[3] * 1.0 / rect[2] > 6:
            if i != 0 and i != len(contour_row) - 1:
                rect_left = contour_row[i - 1]
                rect_right = contour_row[i + 1]
                left_dis = rect[0] - rect_left[0] - rect_left[2]
                right_dis = rect_right[0] - rect[0] - rect[2]
                if left_dis <= right_dis and rect_left[2] < rect_right[2]:
                    x = rect_left[0]
                    y = min(rect_left[1], rect[1])
                    w = rect[0] + rect[2] - rect_left[0]
                    h = max(rect_left[1] + rect_left[3], rect[1] + rect[3]) - y
                    contour_row[i - 1] = (x, y, w, h)
                    contour_row.pop(i)
                    contour_row.append((-1, -1, -1, -1))
                    pop_num += 1
                else:
                    x = rect[0]
                    y = min(rect[1], rect_right[1])
                    w = rect_right[0] + rect_right[2] - rect[0]
                    h = max(rect_right[1] + rect_right[3], rect[1] + rect[3]) - y
                    contour_row[i] = (x, y, w, h)
                    contour_row.pop(i + 1)
                    contour_row.append((-1, -1, -1, -1))
                    pop_num += 1
        i += 1
    for _ in range(pop_num):
        contour_row.pop()
    return contour_row


def split_oversizeWidth(contour_row):
    i = 0
    for _ in contour_row:
        rect = contour_row[i]
        if rect[2] * 1.0 / rect[3] > 1.8:
            x_new = int(rect[0] + rect[2] / 2 + 1)
            y_new = rect[1]
            w_new = rect[0] + rect[2] - x_new
            h_new = rect[3]
            contour_row[i] = (rect[0], rect[1], int(rect[2] / 2), rect[3])
            contour_row.insert(i + 1, (x_new, y_new, w_new, h_new))
        i += 1
    return contour_row


def get_segmentation_result(img, img_input, cleaned_img, save_dir="char_output"):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    projection_row = cv2.reduce(img, 1, cv2.REDUCE_SUM, dtype=cv2.CV_32S)
    split_line_list = get_split_line(img, projection_row)
    segmentation_result = []
    char_index = 1
    h_img, w_img = img_input.shape[:2]

    for i in split_line_list:
        img_row = img[i[0]:i[1], :]
        contour_row = get_contours(img_row)
        contour_row = sort_merge(contour_row)
        contour_row = split_oversizeWidth(contour_row)
        contour_row = combine_verticalLine(contour_row)
        segmentation_result.append(contour_row)

        for (x, y, w, h) in contour_row:
            y_abs = y + i[0]
            x_end = min(x + w, w_img)
            y_end = min(y_abs + h, h_img)
            if x < x_end and y_abs < y_end:
                char_img = cleaned_img[y_abs:y_end, x:x_end]
                if char_img is not None and char_img.size > 0:
                    save_path = os.path.join(save_dir, f"char_{char_index:03d}.jpg")
                    cv2.imwrite(save_path, char_img)
                    char_index += 1
            cv2.rectangle(img_input, (x, y_abs), (x + w, y_abs + h), (0, 0, 255))
    return segmentation_result


# ========== Main Program ==========

pic_path = 'J6.png'  # Replace with your image path
img_input = cv2.imread(pic_path, 1)
img, cleaned_img = image_preprocess(img_input)

cleaned_img_copy = cleaned_img.copy()

segmentation_result = get_segmentation_result(
    img, cleaned_img_copy, cleaned_img, save_dir="char_output"
)

cv2.imwrite("segmented_visual.jpg", cleaned_img_copy)
cv2.imwrite("cleaned_background.jpg", cleaned_img)

display(cleaned_img_copy)


In [None]:
Better accurarcy version

In [1]:
import cv2
import os
import numpy as np


def display(img):
    cv2.imshow("1", img)
    cv2.waitKey(0)


def clean_background(img_input):
    hsv = cv2.cvtColor(img_input, cv2.COLOR_BGR2HSV)
    lower_black = np.array([0, 0, 0])
    upper_black = np.array([180, 255, 130])
    mask = cv2.inRange(hsv, lower_black, upper_black)
    cleaned_img = img_input.copy()
    cleaned_img[mask == 0] = [255, 255, 255]
    return cleaned_img


def image_preprocess(img_input):
    cleaned = clean_background(img_input)
    gray_img = cv2.cvtColor(cleaned, cv2.COLOR_BGR2GRAY)
    gray_img = cv2.GaussianBlur(gray_img, (3, 3), 3)
    _, img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_OTSU)
    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV)
    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
    img = cv2.erode(img, kernel)
    return img, cleaned


def get_split_line(img, projection_col):
    split_line_list = []
    flag = False
    start = 0
    end = 0
    for i in range(len(projection_col)):
        if not flag and projection_col[i] > 0:
            flag = True
            start = i
        elif flag and (projection_col[i] == 0 or i == len(projection_col) - 1):
            flag = False
            end = i
            if end - start < 15:
                flag = True
                continue
            else:
                split_line_list.append((start, end))
    return split_line_list


def get_contours(img):
    contour_list = []
    contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        area = w * h
        if area < 30 or w < 3 or h < 3:
            continue  # Filter out small noise
        contour_list.append((x, y, w, h))
    # Merge vertically adjacent small boxes into a single character
    contour_list = merge_small_boxes_vertically(contour_list)
    return contour_list


def merge_small_boxes_vertically(boxes, gap_thresh=15, x_align_thresh=18, merged_hw_ratio_thresh=(0.7, 4.8)):
    if not boxes:
        return []

    boxes = sorted(boxes, key=lambda b: b[1])  # Sort top to bottom by y
    merged = []
    used = [False] * len(boxes)

    for i in range(len(boxes)):
        if used[i]:
            continue
        x1, y1, w1, h1 = boxes[i]
        merged_box = [x1, y1, w1, h1]

        for j in range(i + 1, len(boxes)):
            if used[j]:
                continue
            x2, y2, w2, h2 = boxes[j]

            vertical_gap = y2 - (y1 + h1)
            x_align = abs(x1 - x2)
            merged_x = min(x1, x2)
            merged_y = min(y1, y2)
            merged_w = max(x1 + w1, x2 + w2) - merged_x
            merged_h = max(y1 + h1, y2 + h2) - merged_y
            hw_ratio = merged_h / merged_w if merged_w != 0 else 999

            # Merge if three conditions are met
            if 0 <= vertical_gap < gap_thresh and x_align < x_align_thresh and \
               merged_hw_ratio_thresh[0] < hw_ratio < merged_hw_ratio_thresh[1]:
                merged_box = [merged_x, merged_y, merged_w, merged_h]
                used[j] = True

        merged.append(tuple(merged_box))
        used[i] = True

    return merged


def sort_merge(contour_row):
    contour_row = sorted(contour_row, key=lambda x: x[1])  # Sort top to bottom (for vertical layout)
    i = 0
    for _ in contour_row:
        if i == len(contour_row) - 1 or contour_row[i][0] == -1:
            break
        rectR = contour_row[i + 1]
        rectL = contour_row[i]
        ovlp = rectL[1] + rectL[3] - rectR[1]
        dist = abs((rectR[1] + rectR[3] / 2) - (rectL[1] - rectL[3] / 2))
        h_L = rectL[1] + rectL[3]
        h_R = rectR[1] + rectR[3]
        span = max(h_R, h_L) - rectL[1]
        nmovlp = (ovlp / rectL[3] + ovlp / rectR[3]) / 2 - dist / span / 8
        if nmovlp > 0:
            x = min(rectL[0], rectR[0])
            y = rectL[1]
            w = max(rectL[0] + rectL[2], rectR[0] + rectR[2]) - x
            h = max(h_L, h_R) - y
            contour_row[i] = (x, y, w, h)
            contour_row.pop(i + 1)
            contour_row.append((-1, -1, -1, -1))
            i -= 1
        i += 1
    return contour_row


def combine_verticalLine(contour_row):
    return contour_row  # Optional: retained for vertical layout; vertical splits are generally not a problem


def split_oversizeWidth(contour_row):
    i = 0
    for _ in contour_row:
        rect = contour_row[i]
        if rect[3] * 1.0 / rect[2] > 1.8:  # Tall aspect ratio may indicate merged characters
            y_new = int(rect[1] + rect[3] / 2 + 1)
            x_new = rect[0]
            h_new = rect[1] + rect[3] - y_new
            w_new = rect[2]
            contour_row[i] = (rect[0], rect[1], rect[2], int(rect[3] / 2))
            contour_row.insert(i + 1, (x_new, y_new, w_new, h_new))
        i += 1
    return contour_row


def get_segmentation_result(img, img_input, cleaned_img, save_dir="char_output"):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    projection_col = cv2.reduce(img, 0, cv2.REDUCE_SUM, dtype=cv2.CV_32S)  # Vertical projection
    projection_col = projection_col.flatten()
    split_line_list = get_split_line(img, projection_col)

    # Process each column from right to left
    split_line_list = sorted(split_line_list, key=lambda x: x[0], reverse=True)

    segmentation_result = []
    char_index = 1
    h_img, w_img = img_input.shape[:2]

    for i in split_line_list:
        img_col = img[:, i[0]:i[1]]
        contour_col = get_contours(img_col)
        contour_col = sort_merge(contour_col)
        contour_col = split_oversizeWidth(contour_col)
        contour_col = combine_verticalLine(contour_col)
        segmentation_result.append(contour_col)

        for (x, y, w, h) in contour_col:
            x_abs = x + i[0]  # Correct x coordinate
            x_end = min(x_abs + w, w_img)
            y_end = min(y + h, h_img)

            if x_abs < x_end and y < y_end:
                # Add padding (white space)
                pad = 3  # Try adjusting from 2 to 5
                x_pad1 = max(x_abs - pad, 0)
                y_pad1 = max(y - pad, 0)
                x_pad2 = min(x_abs + w + pad, w_img)
                y_pad2 = min(y + h + pad, h_img)

                char_img = cleaned_img[y_pad1:y_pad2, x_pad1:x_pad2]
                if char_img is not None and char_img.size > 0:
                    save_path = os.path.join(save_dir, f"char_{char_index:03d}.jpg")
                    cv2.imwrite(save_path, char_img)
                    char_index += 1

                # Draw enlarged red box for visualization
                cv2.rectangle(img_input, (x_pad1, y_pad1), (x_pad2, y_pad2), (0, 0, 255))

    return segmentation_result


# ========== Main Program ==========

pic_path = 'J5.png'
img_input = cv2.imread(pic_path, 1)
img, cleaned_img = image_preprocess(img_input)
cleaned_img_copy = cleaned_img.copy()

segmentation_result = get_segmentation_result(
    img, cleaned_img_copy, cleaned_img, save_dir="char_output"
)

cv2.imwrite("segmented_visual.jpg", cleaned_img_copy)
cv2.imwrite("cleaned_background.jpg", cleaned_img)

display(cleaned_img_copy)
