# Reading LOTR

NOTE - Can find ground truth txt document here - 
https://gosafir.com/mag/wp-content/uploads/2019/12/Tolkien-J.-The-lord-of-the-rings-HarperCollins-ebooks-2010.pdf
But, things like headers, footers and page numbers will need to be removed

In [1]:
import cv2
import numpy as np
from matplotlib import pyplot as plt

import pytesseract
from pytesseract import Output

# img = cv2.imread('image.jpg')

# get grayscale image
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal
def remove_noise(image):
    return cv2.medianBlur(image,5)
 
#thresholding
def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

#dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
def canny(image):
    return cv2.Canny(image, 100, 200)

#skew correction
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

#template matching
def match_template(image, template):
    return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) 


In [2]:
img = cv2.imread('IMG_6974.jpg')
gray = get_grayscale(img)
thresh = thresholding(gray)
opening = opening(gray)
canny = canny(gray)

In [3]:
# establishing ground truth 
with open('ground_truth.txt') as f:
    ground_truth = f.readlines()

ground_truth = " ".join(ground_truth).replace('\n', '')
print(ground_truth)

hobbit ears to all the soft woodland noises about them. One thing he had noted, that in all the talk the name of Gollum had not once come up. He was glad, though he felt that it was too much to hope that he would never hear it again. He soon became aware also that though they walked alone, there were many men close at hand: not only Damrod and Mablung flitting in and out of the shadows ahead, but others on either side, all making their swift secret way to some appointed place. Once, looking suddenly back, as if some prickle of the skin told him that he was watched from behind, he thought he caught a brief glimpse of a small dark shape slipping behind a tree-trunk. He opened his mouth to speak and shut it again. 'I'm not sure of it,' he said to himself, 'and why should I remind them of the old villain, if they choose to forget him? I wish I could!'  So they passed on, until the woodlands grew thinner and the land began to fall more steeply. Then they turned aside again, to the right, an

In [4]:
# boxing words
d = pytesseract.image_to_data(img, output_type=Output.DICT)
print(d.keys())

n_boxes = len(d['text'])
for i in range(n_boxes):
    if int(d['conf'][i]) > 60:
        (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
        img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)


dict_keys(['level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num', 'left', 'top', 'width', 'height', 'conf', 'text'])


In [6]:
# still need to research if this is the best distance metric to use
from Levenshtein import distance as levenshtein_distance

image_transformations = [img, gray, thresh, opening, canny]
string_results = []
similarity_scores = []
for image in image_transformations:
    string_results.append(pytesseract.image_to_string(image).replace('\n', ' ').replace('THE WINDOW ON THE WEST 879', '')) # just removing the chapter number
    similarity_scores.append(levenshtein_distance(list(ground_truth), list(string_results[-1])))
    
for i in range(len(string_results)):
    print("Image transformation: ", i)
    print("String result: ", string_results[i])
    print("Similarity score: ", similarity_scores[i])
    print("\n")

print("Baseline similarity: ", levenshtein_distance(list(ground_truth), list("")))

# best score
print("Best score: ", min(similarity_scores))
print("Best score index: ", similarity_scores.index(min(similarity_scores)))

best_reading = string_results[similarity_scores.index(min(similarity_scores))]

Image transformation:  0
String result:    ,obbit ears t0-al the Soft woodland noises about them! One ‘hing he had noted, that in all the talk the name had not once come up. He was glad, though he felt that .- was too much to hope that he would never hear it again. He soon became aware also that though they walked alone, where were many men close at hand: not only Damrod and Mablung flitting in and out of the shadows ahead, but others all making their swift secret way to ap d place.  told him that he was watched from behind, he thought he caught a brief glimpse of a small dark shape slipping behind a tree-trunk. He opened his mouth to speak and shut it again. Tm not sure of it,’ he said to himself, ‘and why should I remind them of the old villain, if they choose to forget him?  I wish I could!’  the land began to fall more steeply. Then they  again, to the right, and quickly to a small river in a  ‘lex and dark box-woods. Looking west they could see, below them in a haze of light, lowl

In [1]:
from playsound import playsound
playsound("hello.mp3")

# Turning the read words to speech

In [3]:
import gtts
from playsound import playsound

# make request to google to get synthesis
tts = gtts.gTTS(best_reading)

# save the audio file
tts.save("best_reading.mp3")

# play the audio file
playsound("best_reading.mp3")