In [11]:
import cv2
import numpy as np
from typing import Optional
from matplotlib import pyplot as plt

## Function to show image

In [24]:
def showimage(image):
    
    def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):
        dim = None
        (h, w) = image.shape[:2]

        if width is None and height is None:
            return image
        if width is None:
            r = height / float(h)
            dim = (int(w * r), height)
        else:
            r = width / float(w)
            dim = (width, int(h * r))

        return cv2.resize(image, dim, interpolation=inter)
    
    
    resize = ResizeWithAspectRatio(image, width=600)
    
    
    cv2.imshow('Original Image', resize)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [4]:
def threshold_dark_areas(img: np.ndarray, char_length: Optional[float]) -> np.ndarray:
    """
    Threshold image by differentiating areas with light and dark backgrounds
    :param img: image array
    :param char_length: average character length
    :return: threshold image
    """
    # Get threshold on image and binary image
    blur = cv2.GaussianBlur(img, (3, 3), 0)

    thresh_kernel = max(int(round(char_length)), 1) if char_length else 21
    thresh_kernel = thresh_kernel + 1 if thresh_kernel % 2 == 0 else thresh_kernel

    thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, thresh_kernel, 5)
    binary_thresh = cv2.adaptiveThreshold(255 - blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, thresh_kernel, 5)

    # Mask on areas with dark background
    blur_size = min(255, max(int(2 * char_length) + 1 - int(2 * char_length) % 2, 1) if char_length else 11)
    blur = cv2.GaussianBlur(img, (blur_size, blur_size), 0)
    mask = cv2.inRange(blur, 0, 100)

    # Get contours of dark areas
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # For each dark area, use binary threshold instead of regular threshold
    for c in contours:
        x, y, w, h = cv2.boundingRect(c)

        margin = int(char_length) if char_length else 21
        if min(w, h) > 2 * margin and w * h / np.prod(img.shape[:2]) < 0.9:
            thresh[y+margin:y+h-margin, x+margin:x+w-margin] = binary_thresh[y+margin:y+h-margin, x+margin:x+w-margin]

    return thresh


In [25]:
image_path = 'sample_image02.jpeg'
original_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
showimage(original_image)

In [23]:
char_length = None

# Apply the function
thresholded_image = threshold_dark_areas(original_image, char_length)

showimage(thresholded_image)