<h1 align="center">Guide for detecting areas and text in an image</h1> 

Important things for the protocol:

1. Circles in the paper have to be really separate. It will be easy split both circles.
2. Unncessary letters and lines for the processing have to be in light color. It will help in umbralization process. 
3. It is important to use thick marks for relevant lines
4. Stickers can't cut the lines because after it will not be possible separete this section
5. Script works better with uppercase letters while they are aligned (we tryed with numbers and lowercase letter)

<img src= "sources/processing_detect_text.png" />

In [1]:
import numpy as np
import cv2 
import matplotlib.pyplot as plt
%matplotlib qt5

In [2]:
# Open image 
path = r'scanner/'
pathSave = r'./areas/'
pathNumbers = r'./numbers/'
pathTexts = r'./texts/'
imageName = 'A4.jpg'
image = cv2.imread(path + imageName) 
plt.imshow(image)

<matplotlib.image.AxesImage at 0x233ef53fa20>

In [3]:
# Split images in the same page

middle = int(len(image)/2) 

image1 = image[:middle]
plt.imshow(image1)

image2 = image[middle:]

plt.imshow(image1)



<matplotlib.image.AxesImage at 0x120063be978>

In [4]:
# Select image (image1 or image 2) for processing and binarization 

setImage = image1
threshold = 170
# 170
grayImage = cv2.cvtColor(setImage, cv2.COLOR_BGR2GRAY)
_, binaryImage = cv2.threshold(grayImage, threshold, 255, cv2.THRESH_BINARY)
plt.imshow(binaryImage)



<matplotlib.image.AxesImage at 0x120063e6860>

In [5]:
# Binarization for getting numbers

threshold = 40
# 45
_, number = cv2.threshold(grayImage, threshold, 255, cv2.THRESH_BINARY)
kernel = np.ones((3, 3), np.uint8)
number = cv2.dilate(~number, kernel, iterations=4)

plt.imshow(number)

<matplotlib.image.AxesImage at 0x120717fa0f0>

In [6]:
# Delete number in the image 

result = ~binaryImage - (number)
plt.imshow(result)


<matplotlib.image.AxesImage at 0x120063fd1d0>

In [7]:
# Get exteral contour and separate main circle 

r,c = np.shape(grayImage)
ext = np.zeros((r,c), np.dtype('uint8'))
    
contour,_ = cv2.findContours(result, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
ext = cv2.drawContours(ext, contour, -1, 255, -1)
plt.imshow(ext)

<matplotlib.image.AxesImage at 0x1200643b7b8>

In [9]:
# delete small points in the image
kernel = np.ones((2, 2), np.uint8)
extErode= cv2.erode(ext, kernel, iterations=5)
extDilate = cv2.dilate(extErode, kernel, iterations=5)
plt.imshow(extDilate)

<matplotlib.image.AxesImage at 0x1200d57ba58>

In [10]:
# find where the circle is and make a cropped region
points = np.argwhere(extDilate==255) # find where the black pixels are
points = np.fliplr(points) # store them in x,y coordinates instead of row,col indices
x, y, w, h = cv2.boundingRect(points) # create a rectangle around those points
x, y, w, h = x-10, y-10, w+20, h+20 # make the box a little bigger
crop = ext[y:y+h, x:x+w] # create a cropped region of the gray image
plt.imshow(crop)

<matplotlib.image.AxesImage at 0x1200d5a2fd0>

In [23]:
# Mask with external contour 
resultCropped = result[y:y+h, x:x+w]
imaIn = ~result[y:y+h, x:x+w]*ext[y:y+h, x:x+w]
kernel = np.ones((3, 3), np.uint8)
splitArea = cv2.dilate(~imaIn, kernel, iterations=3)
plt.imshow(splitArea)

<matplotlib.image.AxesImage at 0x28533904588>

In [24]:
# Identify secction and put specific lables for each one

sections, labels = cv2.connectedComponents(imaIn)
print('Number of sections: ' + str(sections-1))
plt.imshow(labels)

Number of sections: 20


<matplotlib.image.AxesImage at 0x285338ba550>

In [25]:
# Get each section, make a mask with original image and save it
for i in range(sections):
    area = np.sum(labels==i)
    if (area > 200):
        print ('Area ', str(area))
        section =  labels.copy()
        section[section != i] = 0
        section[section == i] = 255
        newSection = section.astype(np.uint8)
        newImage = cv2.bitwise_and(setImage[y:y+h, x:x+w], setImage[y:y+h, x:x+w], mask=newSection)
        cv2.imwrite( pathSave + str(area) +'.png', newImage)

print('Images saved in folder')



Area  1401482
Area  3335282
Area  431340
Area  354293
Images saved in folder


<h3 align="center">Secction for text processing - One image</h3> 

In this part is posible to load an image and get the text

In [12]:
# Load letter image  
imageName = '354293.png'
image = cv2.imread(pathSave + imageName) 
plt.imshow(image)

<matplotlib.image.AxesImage at 0x1200d5e2400>

In [16]:
# Separate sections
 
threshold = 150
grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binaryImage = cv2.threshold(grayImage, threshold, 255, cv2.THRESH_BINARY)
kernel = np.ones((2, 2), np.uint8)
splitArea = cv2.dilate(~binaryImage, kernel, iterations=1)

r,c = np.shape(grayImage)
ext = np.zeros((r,c), np.dtype('uint8'))

contour,_ = cv2.findContours(~splitArea, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
ext = cv2.drawContours(ext, contour, -1, 255, -1)

imaIn = (splitArea*ext)
imaInErode = cv2.erode(imaIn, kernel, iterations=2)
cv2.imwrite( pathNumbers + imageName[:-4] + '.png', ~(imaInErode*255))

plt.imshow(imaInErode)

<matplotlib.image.AxesImage at 0x1200d649b70>

In [17]:
# Use tesseract library for getting text 

import os

pathNumbers = './numbers/'
commandLine = 'tesseract ' + pathNumbers + imageName[:-4] + '.png' + ' ' + pathTexts + imageName[:-4] + ' -l eng --psm 6'
os.system(commandLine)
f = open(pathTexts + imageName[:-4] + '.txt', "r")
text = f.read()
print('Area ' + text + 'tiene ' + imageName[:-4])

Area F4
tiene 354293


Useful information for tesserat commands:

tesseract --help-psm
Page segmentation modes:
  0    Orientation and script detection (OSD) only.
  1    Automatic page segmentation with OSD.
  2    Automatic page segmentation, but no OSD, or OCR.
  3    Fully automatic page segmentation, but no OSD. (Default)
  4    Assume a single column of text of variable sizes.
  5    Assume a single uniform block of vertically aligned text.
  6    Assume a single uniform block of text.
  7    Treat the image as a single text line.
  8    Treat the image as a single word.
  9    Treat the image as a single word in a circle.
 10    Treat the image as a single character.
 11    Sparse text. Find as much text as possible in no particular order.
 12    Sparse text with OSD.
 13    Raw line. Treat the image as a single text line,
       bypassing hacks that are Tesseract-specific.
 
 tesseract numero2.png outputbase -l eng --psm 6