# pre-OCR light correction demonstration

In [33]:
import matplotlib.pyplot as plt
%matplotlib notebook

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import random
from sklearn.datasets import make_blobs
from PIL import Image

In [34]:
img = np.asarray(Image.open('../imgs/experiments/segmentation/4box_3.png'), dtype=np.uint32)
img = np.mean(img, axis=2)
img = 255 - img
print(img.shape)
plt.figure()
plt.imshow(img, cmap="gray")

(213, 200)


<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x169aeb69808>

## Light correction

### Method 1: Opening residue

In [140]:
import Mathematical_Morphology as morpho

img_op_res = morpho.openingResidue(img, 20, 20)

plt.figure()
plt.imshow(img_op_res, cmap='gray', vmin=0, vmax=255)

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x169b722bf88>

#### Binarization:

##### Manually setting threshold:

In [141]:
img_op_res_bin = img_op_res.copy()
img_op_res_bin[img_op_res_bin <= 65] = 0
img_op_res_bin[img_op_res_bin > 65] = 1

plt.figure()
plt.imshow(img_op_res_bin, cmap='gray', vmin=0, vmax=1)

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x169b751bfc8>

The threshold will be different depending on the type of the letter font, so we use Mean shift to determine the local maximums of the probability density to be able to binarize keeping only the center of the letters.


##### Using Mean shift to segment the text

In [46]:
import MeanShift as MeanShift

X = img.reshape(-1, 1)
print(X.shape)
finalPoints = MeanShift.MeanShift(X, h=20)

(42600, 1)


In [70]:
clusterColors = {}
colores = []
cm = list(map(str, finalPoints))
quantities = {}

for c in cm:
    if c in clusterColors:
        colores.append(clusterColors[c])
        quantities[c] = quantities[c] + 1
    else:
        newColor = np.random.default_rng().uniform(0,255,3).astype(np.uint8)
        clusterColors[c] = newColor
        quantities[c] = 1
        colores.append(clusterColors[c])
        
        
print("Number of clusters generated: " + str(len(clusterColors)))

Number of clusters generated: 14


In [152]:
maxQ = 0
maxQVal = 0
minQ = 250
minQVal = []

imagen_clusterizada = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint32)
asignaciones2 = np.round(finalPoints.reshape(img.shape[0], img.shape[1]), 5)

for k,v in clusterColors.items():
    imagen_clusterizada[asignaciones2==round(float(k[1:-1]), 5)] = v
    
    if quantities[k] > maxQ:
        maxQ = quantities[k]
        maxQVal = v
        
    if quantities[k] < minQ:
        minQVal.append(v)
    
plt.figure()
plt.title("Mean shift result")
plt.imshow(imagen_clusterizada)
plt.show()

<IPython.core.display.Javascript object>

In [153]:
image_letters = np.where(imagen_clusterizada!=maxQVal, 1, 0)

for mv in minQVal:
    image_letters = np.where(imagen_clusterizada!=mv, image_letters, 0)

image_letters = image_letters[:, :, 0]
plt.figure()
plt.imshow(image_letters, cmap='gray', vmin=0, vmax=1)
plt.show()

<IPython.core.display.Javascript object>

### Method 2: Niblack method

In [154]:
import Niblack as niblack

img_niblack = niblack.NiblackBinarization(img, 20, 20, 1.1)

plt.figure()
plt.imshow(img_niblack, cmap='gray', vmin=0, vmax=1)

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x169b638a388>