# OCR process flow

In [1]:
import matplotlib.pyplot as plt
%matplotlib notebook

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import random
from sklearn.datasets import make_blobs
from PIL import Image

In [2]:
#img = np.asarray(Image.open('../imgs/experiments/segmentation/etiqueta6.png'), dtype=np.uint32)
img = np.asarray(Image.open('../imgs/experiments/segmentation/etiqueta1.png'), dtype=np.uint32)
#img = np.asarray(Image.open('../imgs/chars/labels/etiqueta1.png'), dtype=np.uint32)
#img = np.asarray(Image.open('../imgs/photos/1.png'), dtype=np.uint32)

img = np.mean(img, axis=2)
img = 255 - img
print(img.shape)
plt.figure()
plt.imshow(img, cmap="gray")

(82, 208)


<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x26ea07f0548>

## Light correction

### Method 1: Opening residue

In [3]:
import Mathematical_Morphology as morpho

img_op_res = morpho.openingResidue(img, 20, 20)

plt.figure()
plt.imshow(img_op_res, cmap='gray', vmin=0, vmax=255)

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x26ea083cf08>

#### Binarization:

##### Manually setting threshold:

In [4]:
img_op_res_bin = img_op_res.copy()
img_op_res_bin[img_op_res_bin <= 65] = 0
img_op_res_bin[img_op_res_bin > 65] = 1

plt.figure()
plt.imshow(img_op_res_bin, cmap='gray', vmin=0, vmax=1)

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x26ea087d808>

The threshold will be different depending on the type of the letter font, so we use Mean shift to determine the local maximums of the probability density to be able to binarize keeping only the center of the letters.


##### Using Mean shift to segment the text

In [5]:
import MeanShift as MeanShift

X = img_op_res.reshape(-1, 1)
print(X.shape)
finalPoints = MeanShift.MeanShift(X, h=10)

(17056, 1)


In [6]:
clusterColors = {}
colores = []
cm = list(map(str, finalPoints))
quantities = {}

for c in cm:
    if c in clusterColors:
        colores.append(clusterColors[c])
        quantities[c] = quantities[c] + 1
    else:
        newColor = np.random.default_rng().uniform(0,255,3).astype(np.uint8)
        clusterColors[c] = newColor
        quantities[c] = 1
        colores.append(clusterColors[c])
        
        
print("Number of clusters generated: " + str(len(clusterColors)))

Number of clusters generated: 72


In [7]:
maxQ = 0
maxQVal = 0
minQ = 10
minQVal = []

imagen_clusterizada = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint32)
asignaciones2 = np.round(finalPoints.reshape(img.shape[0], img.shape[1]), 5)

for k,v in clusterColors.items():
    imagen_clusterizada[asignaciones2==round(float(k[1:-1]), 5)] = v
    
    if quantities[k] > maxQ:
        maxQ = quantities[k]
        maxQVal = v
        
    if quantities[k] < minQ:
        minQVal.append(v)

In [8]:
image_letters = np.where(imagen_clusterizada!=maxQVal, 1, 0)

for mv in minQVal:
    image_letters = np.where(imagen_clusterizada!=mv, image_letters, 0)

image_letters = image_letters[:, :, 0]
plt.figure()
plt.imshow(image_letters, cmap='gray', vmin=0, vmax=1)
plt.show()

<IPython.core.display.Javascript object>

### Method 2: Niblack method

In [9]:
import Niblack as niblack

image_letters = niblack.NiblackBinarization(img, 15, 15, 0.3)

plt.figure()
plt.imshow(image_letters, cmap='gray', vmin=0, vmax=1)
plt.show()

<IPython.core.display.Javascript object>

## Labeling:

In [10]:
from matplotlib.colors import ListedColormap, LinearSegmentedColormap

## To see the background always as black
colormap = plt.get_cmap('rainbow', 100)
newcolors = colormap(np.linspace(0, 1, 500))
black = np.array([0, 0, 0, 1])
np.random.shuffle(newcolors)
newcolors[0, :] = black
newcolormap = ListedColormap(newcolors)

In [11]:
import LabelingRegions as LabelingRegions

[im_out, eq] = LabelingRegions.LabelingRegionsC8(image_letters)

plt.figure(figsize=(9,5))
plt.imshow(im_out, cmap=newcolormap)
plt.show()

<IPython.core.display.Javascript object>

In [12]:
unique, counts = np.unique(im_out.reshape(-1), return_counts=True)
labels = np.asarray((unique, counts)).T

In [13]:
current_meanY = -1
row = 1

labeled_letters = []
for region_number, quantity in labels:
    if(region_number!=0):
        if(quantity > 10):
            letter1 = np.where(im_out==region_number, 1, 0)
            
            s0 = letter1.shape[0]
            s1 = letter1.shape[1]

            indices = np.where(letter1==1)

            y0 = indices[0][0] - 5
            y1 = indices[0][-1] + 5
            x0 = indices[1][0] - 5
            x1 = indices[1][-1] + 5

            if(y0 < 0):
                y0 = 0

            if(y1 >= s0):
                y1 = s0 - 1

            if(x0 < 0):
                x0 = 0

            if(x1 >= s1):
                x1 = s1 - 1                

            letter1 = letter1[y0:y1, x0:x1]
            
            # mean value x and y for sorting
            in_m0 = int(indices[0].shape[0] / 2)
            in_m1 = int(indices[1].shape[0] / 2)
            meanval0 = indices[0][in_m0]
            meanval1 = indices[1][in_m1]
            
            if (current_meanY == -1):
                current_meanY = meanval0
                
            if (current_meanY < meanval0 - 5):
                current_meanY = meanval0
                row = row + 1
            
            labeled_letters.append([letter1, row, meanval1])
            
print("Lines of text found: " + str(row))

Lines of text found: 5


In [14]:
labeled_letters_sorted = sorted(labeled_letters, key=lambda v: (v[1], v[2]))

In [15]:
for ll, v0, v1 in labeled_letters_sorted[0:10]:
    plt.figure()
    plt.imshow(ll, cmap="gray", vmin=0, vmax=1)
    print(v0, v1)
    plt.show()

<IPython.core.display.Javascript object>

1 15


<IPython.core.display.Javascript object>

1 25


<IPython.core.display.Javascript object>

1 37


<IPython.core.display.Javascript object>

1 50


<IPython.core.display.Javascript object>

1 62


<IPython.core.display.Javascript object>

1 85


<IPython.core.display.Javascript object>

1 97


<IPython.core.display.Javascript object>

1 101


<IPython.core.display.Javascript object>

2 10


<IPython.core.display.Javascript object>

2 23


# Letters classification with the MLP that we have pre-trained

In [16]:
from joblib import load
import LettersNumbersClassification as LetNumClassif

clf = load('MLP.joblib')

In [17]:
ln = ''
    
crow = 1
    
for im, row, meanval1 in labeled_letters_sorted[0:15]:

    im_c = Image.fromarray(im)
    im_c = np.asarray(im_c.resize((32, 21)))
    im_c = np.where(im_c == 1, 0, 1)

    y_pred = clf.predict_proba(im_c.reshape(1, -1))

    clase = y_pred.argmax(axis=1)
        
    lett = LetNumClassif.getCharFromClassID(clase)
        
    if(row != crow):
        ln = ln + '\n'
        crow = row
        
    ln = ln + lett
    
    plt.figure()
    plt.imshow(im_c, cmap="gray", vmin=0, vmax=1)
    plt.show()
        
    print(lett, row, meanval1)

<IPython.core.display.Javascript object>

R 1 15


<IPython.core.display.Javascript object>

R 1 25


<IPython.core.display.Javascript object>

A 1 37


<IPython.core.display.Javascript object>

Q 1 50


<IPython.core.display.Javascript object>

O 1 62


<IPython.core.display.Javascript object>

B 1 85


<IPython.core.display.Javascript object>

R 1 97


<IPython.core.display.Javascript object>

R 1 101


<IPython.core.display.Javascript object>

B 2 10


<IPython.core.display.Javascript object>

E 2 23


<IPython.core.display.Javascript object>

K 2 27


<IPython.core.display.Javascript object>

R 2 37


<IPython.core.display.Javascript object>

B 2 45


<IPython.core.display.Javascript object>

R 2 51


<IPython.core.display.Javascript object>

R 2 59
