<h1 align="center">Guide for detecting areas and text in an image</h1> 

Important things for the protocol:

1. Circles in the paper have to be really separate. It will be easy split both circles.
2. Unncessary letters and lines for the processing have to be in light color. It will help in umbralization process. 
3. It is important to use thick marks for relevant lines
4. Stickers can't cut the lines because after it will not be possible separete this section
5. Script works better with uppercase letters while they are aligned (we tryed with numbers and lowercase letter)

<img src= "sources/processing_detect_text.png" />

In [1]:
import numpy as np
import cv2 
import matplotlib.pyplot as plt
%matplotlib qt5

In [12]:
# Open image 
path = r'scanner/'
pathSave = r'./areas/'
pathNumbers = r'./numbers/'
pathTexts = r'./texts/'
imageName = '29.jpg'
image = cv2.imread(path + imageName) 
plt.imshow(image)

<matplotlib.image.AxesImage at 0x23ebc28ef98>

In [13]:
# Split images in the same page

middle = int(len(image)/2) 

image1 = image[:middle]
plt.imshow(image1)

image2 = image[middle:]

plt.figure()
plt.imshow(image2)



<matplotlib.image.AxesImage at 0x23e8f920390>

In [4]:
# Select image (image1 or image 2) for processing and binarization 

setImage = image1
threshold = 170

grayImage = cv2.cvtColor(setImage, cv2.COLOR_BGR2GRAY)
_, binaryImage = cv2.threshold(grayImage, threshold, 255, cv2.THRESH_BINARY)
plt.imshow(binaryImage)



<matplotlib.image.AxesImage at 0x23ebc22dd30>

In [14]:
# Binarization to get numbers

threshold = 45
_, number = cv2.threshold(grayImage, threshold, 255, cv2.THRESH_BINARY)
kernel = np.ones((3, 3), np.uint8)
number = cv2.dilate(~number, kernel, iterations=4)

plt.imshow(number)

<matplotlib.image.AxesImage at 0x23ebc27f358>

In [15]:
# Delete number in the image 

result = ~binaryImage - (number)
plt.imshow(result)


<matplotlib.image.AxesImage at 0x23e93e710f0>

In [16]:
# Get exteral contour and separate main circle 

r,c = np.shape(grayImage)
ext = np.zeros((r,c), np.dtype('uint8'))
    
contour,_ = cv2.findContours(result, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
ext = cv2.drawContours(ext, contour, -1, 255, -1)
plt.imshow(ext)

    

<matplotlib.image.AxesImage at 0x23e8f9263c8>

In [17]:
# Mask with external contour 

imaIn = ~result*ext
kernel = np.ones((3, 3), np.uint8)
splitArea = cv2.dilate(~imaIn, kernel, iterations=3)
plt.imshow(splitArea)

<matplotlib.image.AxesImage at 0x23e93e71c50>

In [18]:
# Identify secction and put specific lables for each one

sections, labels = cv2.connectedComponents(imaIn)
print('Number of sections: ' + str(sections-1))
plt.imshow(labels)

Number of sections: 78


<matplotlib.image.AxesImage at 0x23ebc271080>

In [10]:
# Get each section, make a mask with original image and save it
for i in range(sections):
    area = np.sum(labels==i)
    if (area > 200):
        print ('Area ', str(area))
        section =  labels.copy()
        section[section != i] = 0
        section[section == i] = 255
        newSection = section.astype(np.uint8)
        newImage = cv2.bitwise_and(setImage, setImage, mask=newSection)
        cv2.imwrite( pathSave + str(area) +'.png', newImage)

print('Images saved in folder')



Area  13691528
Area  582623
Area  1852138
Area  156423
Area  227281
Area  162416
Area  177788
Area  357215
Area  186861
Images saved in folder


<h3 align="center">Secction for text processing - One image</h3> 

In this part is posible to load an image and get the text

In [22]:
# Load letter image  
imageName = '357215.png'
image = cv2.imread(pathSave + imageName) 
plt.imshow(image)

<matplotlib.image.AxesImage at 0x23e93d6bcc0>

In [25]:
# Separate section

threshold = 200
grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binaryImage = cv2.threshold(grayImage, threshold, 255, cv2.THRESH_BINARY)
kernel = np.ones((2, 2), np.uint8)
splitArea = cv2.dilate(~binaryImage, kernel, iterations=2)
plt.imshow(splitArea)

sections, labels = cv2.connectedComponents(splitArea)
print('Number of sections: ' + str(sections-1))
plt.imshow(labels)



Number of sections: 23


<matplotlib.image.AxesImage at 0x23e93d83908>

In [24]:
# Get only the number

for i in range(sections):
    area = np.sum(labels==i)
    
    if (area > 200 and area < 10000):
        print ('Area ', str(area))
        section =  labels.copy()
        section[section != i] = 0
        section[section == i] = 255
        number = section.astype(np.uint8)
        cv2.imwrite( pathNumbers + imageName[:-4] + '.png', ~number)

plt.imshow(binaryImage)

Area  8480


<matplotlib.image.AxesImage at 0x23e93d49f28>

In [26]:
# Use tesseract library for getting text 

import os
commandLine = 'tesseract ' + pathNumbers + imageName[:-4] + '.png' + ' ' + pathTexts + imageName[:-4] + ' -l eng --psm 6'
os.system(commandLine)
f = open(pathTexts + imageName[:-4] + '.txt', "r")
text = f.read()
print('Area ' + text + 'tiene ' + imageName[:-4])

Area E
tiene 357215


<h3 align="center">Secction for text processing - One folder with images</h3> 

In this part is posible to detect the text in a group of image 

In [101]:
import os

files = os.listdir(pathSave)
print (files)


['118411.png', '13672478.png', '2272593.png', '239763.png', '248540.png', '301128.png', '541542.png']


In [102]:
for imageName in files:
    print ('Processing: ' + imageName)
    if int(imageName[:-4]) < 13000000:
        image = cv2.imread(pathSave + imageName) 
        threshold = 200
        grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        _, binaryImage = cv2.threshold(grayImage, threshold, 255, cv2.THRESH_BINARY)
        kernel = np.ones((2, 2), np.uint8)
        splitArea = cv2.dilate(~binaryImage, kernel, iterations=2)
        sections, labels = cv2.connectedComponents(splitArea)
        for i in range(sections):
            area = np.sum(labels==i)
            if (area > 400 and area < 10000):
                section =  labels.copy()
                section[section != i] = 0
                section[section == i] = 255
                number = section.astype(np.uint8)
                kernel = np.ones((2, 2), np.uint8)
                numberErode= cv2.erode(number, kernel, iterations=1)
                cv2.imwrite( pathNumbers + imageName[:-4] + '.png', ~numberErode)
                #cv2.imwrite( pathNumbers + imageName[:-4] + '.png', ~number)
                commandLine = 'tesseract ' + pathNumbers + imageName[:-4] + '.png' + ' ' + pathTexts + imageName[:-4] + ' -l eng --psm 6'
                os.system(commandLine)
                f = open(pathTexts + imageName[:-4] + '.txt', "r")
                text = f.read()
                print('Area: ' + text + 'Pixeles: ' + imageName[:-4] +'\n')


Processing: 118411.png
Area: 4
Pixeles: 118411

Processing: 13672478.png
Processing: 2272593.png
Processing: 239763.png
Area: 1
Pixeles: 239763

Processing: 248540.png
Area: 2
Pixeles: 248540

Processing: 301128.png
Area: 3
Pixeles: 301128

Processing: 541542.png
Area: o
Pixeles: 541542

Area: â€”â€”
Pixeles: 541542



Useful information for tesserat commands:

tesseract --help-psm
Page segmentation modes:
  0    Orientation and script detection (OSD) only.
  1    Automatic page segmentation with OSD.
  2    Automatic page segmentation, but no OSD, or OCR.
  3    Fully automatic page segmentation, but no OSD. (Default)
  4    Assume a single column of text of variable sizes.
  5    Assume a single uniform block of vertically aligned text.
  6    Assume a single uniform block of text.
  7    Treat the image as a single text line.
  8    Treat the image as a single word.
  9    Treat the image as a single word in a circle.
 10    Treat the image as a single character.
 11    Sparse text. Find as much text as possible in no particular order.
 12    Sparse text with OSD.
 13    Raw line. Treat the image as a single text line,
       bypassing hacks that are Tesseract-specific.
 
 tesseract numero2.png outputbase -l eng --psm 6