In [1]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
from scipy.ndimage import interpolation as inter
import os

In [2]:
def correct_skew(image, delta=1, limit=5):
    def determine_score(arr, angle):
        data = inter.rotate(arr, angle, reshape=False, order=0)
        histogram = np.sum(data, axis=1)
        score = np.sum((histogram[1:] - histogram[:-1]) ** 2)
        return histogram, score

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] 

    scores = []
    angles = np.arange(-limit, limit + delta, delta)
    for angle in angles:
        histogram, score = determine_score(thresh, angle)
        scores.append(score)

    best_angle = angles[scores.index(max(scores))]

    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, best_angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, \
              borderMode=cv2.BORDER_REPLICATE)

    return best_angle, rotated

In [3]:
def preprocess(input_image):
    image = cv2.imread(input_image)
    #Converting the colored image to greyscale
    angle, rotated = correct_skew(image)
    print(angle)
    cv2.imwrite('rotated.jpg', rotated)
    gray = cv2.cvtColor(rotated,cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    # Remove horizontal lines
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,1))
    remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(rotated, [c], -1, (255,255,255), 5)
        # Remove vertical lines
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,40))
    remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(rotated, [c], -1, (255,255,255), 5)
    gray = cv2.cvtColor(rotated,cv2.COLOR_BGR2GRAY)
    #applying median filter for Salt and pepper/impulse noise
    filter1 = cv2.medianBlur(gray,5)
    #applying gaussian blur to smoothen out the image edges
    filter2 = cv2.GaussianBlur(filter1,(5,5),0)
    #applying non-localized means for final Denoising of the image
    dst = cv2.fastNlMeansDenoising(filter2,None,17,9,17)
    #converting the image to binarized form using adaptive thresholding
    th1 = cv2.adaptiveThreshold(dst,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
    return th1


In [4]:
from functools import cmp_to_key
def contour_sort(a, b):

    br_a = cv2.boundingRect(a)
    br_b = cv2.boundingRect(b)

    if abs(br_a[1] - br_b[1]) <= 20:
        return br_a[0] - br_b[0]

    return br_a[1] - br_b[1]


In [5]:
def calculate_contour_distance(contour1, contour2): 
    x1, y1, w1, h1 = cv2.boundingRect(contour1)
    c_x1 = x1 + w1/2
    c_y1 = y1 + h1/2

    x2, y2, w2, h2 = cv2.boundingRect(contour2)
    c_x2 = x2 + w2/2
    c_y2 = y2 + h2/2

    return max(abs(c_x1 - c_x2) - (w1 + w2)/2, abs(c_y1 - c_y2) - (h1 + h2)/2)

def merge_contours(contour1, contour2):
    return np.concatenate((contour1, contour2), axis=0)

def agglomerative_cluster(contours, threshold_distance=0.5):
    current_contours = contours
    while len(current_contours) > 1:
        min_distance = None
        min_coordinate = None

        for x in range(len(current_contours)-1):
            for y in range(x+1, len(current_contours)):
                distance = calculate_contour_distance(current_contours[x], current_contours[y])
                if min_distance is None:
                    min_distance = distance
                    min_coordinate = (x, y)
                elif distance < min_distance:
                    min_distance = distance
                    min_coordinate = (x, y)

        if min_distance < threshold_distance:
            index1, index2 = min_coordinate
            current_contours[index1] = merge_contours(current_contours[index1], current_contours[index2])
            del current_contours[index2]
        else: 
            break

    return current_contours

In [6]:
# segement will take the preprocessed image as input and return the segmented image

def segment(image):
    gray = cv2.GaussianBlur(image, (7, 7), 0)
    ret,thresh1 = cv2.threshold(gray ,127,255,cv2.THRESH_BINARY_INV)
    dilate = cv2.dilate(thresh1, None, iterations=2)
    cnts,_ = cv2.findContours(dilate.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    # sorted_ctrs = sorted(cnts, key=lambda ctr: cv2.boundingRect(ctr)[0] + cv2.boundingRect(ctr)[1] * image.shape[1] )
    sorted_ctrs = sorted(cnts, key=cmp_to_key(contour_sort))
    sorted_ctrs = agglomerative_cluster(sorted_ctrs)
    orig = cv2.merge([image,image,image])
    i = 0
    for cnt in sorted_ctrs:
        # Check the area of contour, if it is very small ignore it
        if(cv2.contourArea(cnt) < 200):
            continue

        # Filtered countours are detected
        x,y,w,h = cv2.boundingRect(cnt)
        
        # Taking ROI of the cotour
        roi = image[y:y+h, x:x+w]
        
        # Mark them on the image if you want
        cv2.rectangle(orig,(x,y),(x+w,y+h),(0,255,0),2)

        copy_img = cv2.merge([image,image,image])
        cv2.rectangle(copy_img,(x,y),(x+w,y+h),(0,255,0),4)
        

        # Save your contours or characters
        cv2.imwrite( r"./individual/"+str(i) + ".png", roi)
        cv2.imwrite( r"./test_individual/"+str(i) + ".png", copy_img)
    
        i = i + 1
    return orig


In [7]:
# empty ./test_individual folder and ./individual folder
for filename in os.listdir('./test_individual'):
    os.remove('./test_individual/'+filename)
for filename in os.listdir('./individual'):
    os.remove('./individual/'+filename)

In [9]:
segmented_image=segment(preprocess('21.jpg'))
# save the segmented image
cv2.imwrite('segmented.jpg', segmented_image)

  data = inter.rotate(arr, angle, reshape=False, order=0)


-4


True