In [1]:
from PIL import Image
import pytesseract
from pathlib import Path
import os.path
from os import path

# If you don't have tesseract executable in your PATH, include the following:
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # Path('C:/Program Files\ Tesseract-OCR\ tesseract').as_posix()
# Example tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract'

In [2]:
# Simple image to string
print(pytesseract.image_to_string(Image.open('guy_with_letters.png')))
print(pytesseract.image_to_string(Image.open('image_text_2.jfif')))
print(pytesseract.image_to_boxes(Image.open('image_text_2.jfif')))



Every morning
is special and
you will not get
them again. Sle,
: a"

  

“~ Good morning my
dear friend!
E 8 164 16 183 0
v 14 165 23 173 0
e 21 160 29 183 0
r 22 160 45 173 0
y 36 160 44 183 0
m 49 165 68 173 0
o 59 160 67 176 0
r 66 160 74 176 0
n 74 160 82 176 0
i 68 160 104 176 0
n 89 160 97 176 0
g 96 160 104 176 0
i 14 140 17 151 0
s 18 140 24 148 0
s 29 135 33 153 0
p 32 135 39 153 0
e 38 135 47 153 0
c 27 135 66 151 0
i 52 135 59 153 0
a 58 135 64 153 0
l 68 140 98 153 0
a 78 130 84 164 0
n 83 130 90 164 0
d 89 130 101 164 0
y 0 105 10 134 0
o 4 111 25 124 0
u 16 105 25 134 0
w 31 115 43 124 0
i 38 105 44 134 0
l 44 115 47 126 0
l 49 115 57 128 0
n 62 105 71 134 0
o 62 115 78 124 0
t 78 115 83 126 0
g 89 111 94 125 0
e 94 115 102 123 0
t 102 115 108 126 0
t 13 81 23 111 0
h 23 81 31 111 0
e 17 90 46 103 0
m 38 81 46 111 0
a 53 81 61 111 0
g 54 86 69 100 0
a 68 90 76 99 0
i 77 90 80 102 0
n 80 81 86 111 0
. 81 90 91 99 0
S 219 83 247 122 0
l 236 74 246 126 0
e 248 93 263 111 0


In [3]:
# Pre-processing to improve tesseract accuracy
import cv2
import numpy as np


# get grayscale image
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal
def remove_noise(image):
    return cv2.medianBlur(image,5)
 
#thresholding
def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

#dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
def canny(image):
    return cv2.Canny(image, 100, 200)

#skew correction
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

#template matching
def match_template(image, template):
    return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) 

In [4]:
image = cv2.imread('image_text_2.jfif')
gray = get_grayscale(image)
print(pytesseract.image_to_string(gray))
thresh = thresholding(gray)
print(pytesseract.image_to_string(thresh)) # Seems to work best.
opening = opening(gray)
print(pytesseract.image_to_string(opening))
canny = canny(gray)
print(pytesseract.image_to_string(canny))

Every morning
is special and
you will not get
them again.

ge

 

Se.
Fay
Good morning my
dear friend!
Every morning
is special and
you will not get
them again.

pe

 

Se.
fay
Good morning my
dear Friend!
veel gt

(Ao an
a eae
occu) a
Peer ae
i
Demat 7 Sie.

oe ares
aL)
dear Friend


In [5]:
# Plot bounding boxes around each character

import cv2
import pytesseract

# Original image
image = cv2.imread('image_text_2.jfif')
# Image to find bounding boxes
gray = get_grayscale(image)
thresh = thresholding(gray)

h, w, c = image.shape
boxes = pytesseract.image_to_boxes(thresh) 
for b in boxes.splitlines():
    b = b.split(' ')
    image = cv2.rectangle(image, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), (0, 255, 0), 2)

cv2.imshow('img', image)
cv2.waitKey(0)

-1

In [6]:
# Plot bounding box around words
from pytesseract import Output

img = cv2.imread('image_text_2.jfif')

gray = get_grayscale(img)
thresh = thresholding(gray)
d = pytesseract.image_to_data(thresh, output_type=Output.DICT)
print(d.keys())

n_boxes = len(d['text'])
for i in range(n_boxes):
    if int(d['conf'][i]) > 60:
        (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
        img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imshow('img', img)
cv2.waitKey(0)

dict_keys(['level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num', 'left', 'top', 'width', 'height', 'conf', 'text'])


-1

In [7]:
cv2.imshow('img', gray)
cv2.waitKey(0)

-1