<a href="https://colab.research.google.com/github/Yogender-Singh/Notebooks/blob/main/ocr/pytesseract_improving_ocr_image_processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Installing Tesseract dependencies

In [None]:
%%bash
sudo apt-get install -y libicu-dev libpango1.0-dev libcairo2-dev
sudo apt-get install -y automake ca-certificates g++ git libtool libleptonica-dev make pkg-config
sudo apt-get install -y libpng-dev libjpeg8-dev libtiff5-dev zlib1g-dev

### Compiling and Installing Tesseract `v4.1.1` from source

In [None]:
%%bash
wget -O tesseract.zip https://github.com/tesseract-ocr/tesseract/archive/4.1.1.zip
unzip tesseract.zip
mv tesseract-4.1.1 tesseract
cd tesseract
./autogen.sh
./configure
make
sudo make install
sudo ldconfig
pip install pytesseract

In [None]:
%%bash
cd /usr/local/share/tessdata/
sudo wget https://github.com/tesseract-ocr/tessdata_fast/raw/master/eng.traineddata
sudo wget https://github.com/tesseract-ocr/tessdata_fast/raw/master/osd.traineddata
cd ~

### Download the code zip file

In [None]:
!wget qq https://github.com/Yogender-Singh/Notebooks/raw/main/images/ocr-image-processing.zip
!unzip -qq ocr-image-processing.zip

### Import Packages

In [None]:
# import the necessary packages
from matplotlib import pyplot as plt
import numpy as np
import pytesseract
import argparse
import imutils
import cv2

### Function to display images in Jupyter Notebooks and Google Colab

In [None]:
def plt_imshow(title, image):
	# convert the image frame BGR to RGB color space and display it
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	plt.imshow(image)
	plt.title(title)
	plt.grid(False)
	plt.show()

### Implementing an Image Processing Pipeline for OCR

In [None]:
# construct the argument parser and parse the arguments
#ap = argparse.ArgumentParser()
#ap.add_argument("-i", "--image", required=True,
#	help="path to input image to be OCR'd")
#args = vars(ap.parse_args())


# since we are using Jupyter Notebooks we can replace our argument
# parsing code with *hard coded* arguments and values
args = {
	"image": "challenging_example.png"
}

In [None]:
# load the input image and convert it to grayscale
image = cv2.imread(args["image"])
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# threshold the image using Otsu's thresholding method
thresh = cv2.threshold(gray, 0, 255,
	cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
plt_imshow("Otsu", thresh)

In [None]:
# apply a distance transform which calculates the distance to the
# closest zero pixel for each pixel in the input image
dist = cv2.distanceTransform(thresh, cv2.DIST_L2, 5)

# normalize the distance transform such that the distances lie in
# the range [0, 1] and then convert the distance transform back to
# an unsigned 8-bit integer in the range [0, 255]
dist = cv2.normalize(dist, dist, 0, 1.0, cv2.NORM_MINMAX)
dist = (dist * 255).astype("uint8")
plt_imshow("Dist", dist)

# threshold the distance transform using Otsu's method
dist = cv2.threshold(dist, 0, 255,
	cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
plt_imshow("Dist Otsu", dist)

In [None]:
# apply an "opening" morphological operation to disconnect components
# in the image
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
opening = cv2.morphologyEx(dist, cv2.MORPH_OPEN, kernel)
plt_imshow("Opening", opening)

In [None]:
# find contours in the opening image, then initialize the list of
# contours which belong to actual characters that we will be OCR'ing
cnts = cv2.findContours(opening.copy(), cv2.RETR_EXTERNAL,
	cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
chars = []

# loop over the contours
for c in cnts:
	# compute the bounding box of the contour
	(x, y, w, h) = cv2.boundingRect(c)

	# check if contour is at least 35px wide and 100px tall, and if
	# so, consider the contour a digit
	if w >= 35 and h >= 100:
		chars.append(c)

In [None]:
# compute the convex hull of the characters
chars = np.vstack([chars[i] for i in range(0, len(chars))])
hull = cv2.convexHull(chars)

# allocate memory for the convex hull mask, draw the convex hull on
# the image, and then enlarge it via a dilation
mask = np.zeros(image.shape[:2], dtype="uint8")
cv2.drawContours(mask, [hull], -1, 255, -1)
mask = cv2.dilate(mask, None, iterations=2)
plt_imshow("Mask", mask)

# take the bitwise of the opening image and the mask to reveal *just*
# the characters in the image
final = cv2.bitwise_and(opening, opening, mask=mask)

In [None]:
# OCR the input image using Tesseract
options = "--psm 8 -c tessedit_char_whitelist=0123456789"
text = pytesseract.image_to_string(final, config=options)
print(text)

# show the final output image
plt_imshow("Final", final)