# OCR process flow

In [1]:
import matplotlib.pyplot as plt
%matplotlib notebook

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import random
from sklearn.datasets import make_blobs
from PIL import Image

In [2]:
img = np.asarray(Image.open('../imgs/experiments/segmentation/etiqueta6.png'), dtype=np.uint32)
#img = np.asarray(Image.open('../imgs/chars/labels/etiqueta6.png'), dtype=np.uint32)

img = np.mean(img, axis=2)
img = 255 - img
print(img.shape)
plt.figure()
plt.imshow(img, cmap="gray")

(74, 220)


<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x215107504c8>

## Light correction

### Method 1: Opening residue

In [4]:
import Mathematical_Morphology as morpho

img_op_res = morpho.openingResidue(img, 20, 20)

plt.figure()
plt.imshow(img_op_res, cmap='gray', vmin=0, vmax=255)

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x21511d4a3c8>

#### Binarization:

##### Manually setting threshold:

In [5]:
img_op_res_bin = img_op_res.copy()
img_op_res_bin[img_op_res_bin <= 65] = 0
img_op_res_bin[img_op_res_bin > 65] = 1

plt.figure()
plt.imshow(img_op_res_bin, cmap='gray', vmin=0, vmax=1)

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x2151201ef88>

The threshold will be different depending on the type of the letter font, so we use Mean shift to determine the local maximums of the probability density to be able to binarize keeping only the center of the letters.


##### Using Mean shift to segment the text

In [10]:
import MeanShift as MeanShift

X = img_op_res.reshape(-1, 1)
print(X.shape)
finalPoints = MeanShift.MeanShift(X, h=10)

(16280, 1)


In [11]:
clusterColors = {}
colores = []
cm = list(map(str, finalPoints))
quantities = {}

for c in cm:
    if c in clusterColors:
        colores.append(clusterColors[c])
        quantities[c] = quantities[c] + 1
    else:
        newColor = np.random.default_rng().uniform(0,255,3).astype(np.uint8)
        clusterColors[c] = newColor
        quantities[c] = 1
        colores.append(clusterColors[c])
        
        
print("Number of clusters generated: " + str(len(clusterColors)))

Number of clusters generated: 77


In [13]:
quantities

{'[6.91056911]': 13695,
 '[7.15079365]': 216,
 '[24.2729805]': 12,
 '[170.05847953]': 14,
 '[40.42804428]': 19,
 '[108.52991453]': 16,
 '[119.374677]': 283,
 '[180.81976744]': 27,
 '[44.20676692]': 31,
 '[72.62831858]': 22,
 '[99.85625]': 189,
 '[183.66071429]': 25,
 '[53.17573222]': 19,
 '[184.2721519]': 87,
 '[149.25099602]': 100,
 '[118.61640212]': 17,
 '[25.25872093]': 19,
 '[117.95584416]': 149,
 '[26.43661972]': 42,
 '[84.22709163]': 5,
 '[101.63141994]': 26,
 '[64.29047619]': 23,
 '[28.12048193]': 104,
 '[70.98660714]': 30,
 '[47.125]': 23,
 '[59.36199095]': 31,
 '[48.0620155]': 40,
 '[104.42105263]': 47,
 '[144.32089552]': 27,
 '[20.33838384]': 31,
 '[173.23952096]': 10,
 '[35.10126582]': 44,
 '[87.93140794]': 19,
 '[83.66122449]': 29,
 '[81.34765625]': 15,
 '[58.15492958]': 8,
 '[134.25170068]': 42,
 '[23.38504155]': 42,
 '[38.35958904]': 16,
 '[73.75113122]': 12,
 '[164.09139785]': 52,
 '[148.06866953]': 7,
 '[19.30729167]': 10,
 '[137.61347518]': 23,
 '[158.13574661]': 60,
 

In [30]:
maxQ = 0
maxQVal = 0
minQ = 10
minQVal = []

imagen_clusterizada = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint32)
asignaciones2 = np.round(finalPoints.reshape(img.shape[0], img.shape[1]), 5)

for k,v in clusterColors.items():
    imagen_clusterizada[asignaciones2==round(float(k[1:-1]), 5)] = v
    
    if quantities[k] > maxQ:
        maxQ = quantities[k]
        maxQVal = v
        
    if quantities[k] < minQ:
        minQVal.append(v)

In [31]:
image_letters = np.where(imagen_clusterizada!=maxQVal, 1, 0)

for mv in minQVal:
    image_letters = np.where(imagen_clusterizada!=mv, image_letters, 0)

image_letters = image_letters[:, :, 0]
plt.figure()
plt.imshow(image_letters, cmap='gray', vmin=0, vmax=1)
plt.show()

<IPython.core.display.Javascript object>

### Method 2: Niblack method

In [105]:
import Niblack as niblack

image_letters = niblack.NiblackBinarization(img, 15, 15, 0.3)

plt.figure()
plt.imshow(image_letters, cmap='gray', vmin=0, vmax=1)
plt.show()

<IPython.core.display.Javascript object>

## Labeling:

In [106]:
from matplotlib.colors import ListedColormap, LinearSegmentedColormap

## To see the background always as black
colormap = plt.get_cmap('rainbow', 100)
newcolors = colormap(np.linspace(0, 1, 500))
black = np.array([0, 0, 0, 1])
np.random.shuffle(newcolors)
newcolors[0, :] = black
newcolormap = ListedColormap(newcolors)

In [107]:
import LabelingRegions as LabelingRegions

[im_out, eq] = LabelingRegions.LabelingRegionsC8(image_letters)

plt.figure(figsize=(9,5))
plt.imshow(im_out, cmap=newcolormap)
plt.show()

<IPython.core.display.Javascript object>

In [108]:
unique, counts = np.unique(im_out.reshape(-1), return_counts=True)
labels = np.asarray((unique, counts)).T

In [109]:
labeled_letters = []
for region_number, quantity in labels:
    if(region_number!=0):
        if(quantity > 10):
            letter1 = np.where(im_out==region_number, 1, 0)
            
            s0 = letter1.shape[0]
            s1 = letter1.shape[1]

            indices = np.where(letter1==1)

            y0 = indices[0][0] - 5
            y1 = indices[0][-1] + 5
            x0 = indices[1][0] - 5
            x1 = indices[1][-1] + 5

            if(y0 < 0):
                y0 = 0

            if(y1 >= s0):
                y1 = s0 - 1

            if(x0 < 0):
                x0 = 0

            if(x1 >= s1):
                x1 = s1 - 1                

            letter1 = letter1[y0:y1, x0:x1]
            
            labeled_letters.append(letter1)

In [110]:
for ll in labeled_letters[0:10]:
    plt.figure()
    plt.imshow(ll, cmap="gray", vmin=0, vmax=1)
    plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>