In [1]:
import numpy as np
import pytesseract
import cv2
from east_detector import EASTDetector
import matplotlib.pyplot as plt

In [2]:
def load_image(name):
    image = cv2.imread(name)
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

In [3]:
def show_images(images, cols=2, titles=None):
    assert titles is None or len(images) == len(titles)
    n_images = len(images)
    if titles is None: titles = ['Image (%d)' % i for i in range(1, n_images + 1)]
    fig = plt.figure()
    fig.tight_layout()
    for n, (image, title) in enumerate(zip(images, titles)):
        a = fig.add_subplot(cols, np.ceil(n_images/float(cols)), n + 1)
        if image.ndim == 2:
            plt.gray()
        plt.imshow(image)
        a.set_title(title)
        a.axis('off')
    fig.set_size_inches(np.array(fig.get_size_inches()) * n_images)
    plt.show()

In [4]:
def narrow(image, convert_color = False, binarize = True):
    """
    Draws narrower bounding boxes by heavily dilating the image and picking out the 3 largest blocks
    """
    original = image.copy()
    if convert_color:
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
    if binarize:            
        _, image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        if np.mean(image) > 127:
            binary = cv2.bitwise_not(image)
        
    box_kernel = np.ones((5, 25), np.uint8)
    dilation = cv2.dilate(image, box_kernel, iterations = 1)
    bounds, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    boxes = []

    for cnt in bounds:
        x, y, w, h = cv2.boundingRect(cnt)
        region = original[y:y + h, x:x + w]
        boxes.append(region)

    boxes = sorted(boxes, key=lambda i: -1 * i.shape[0] * i.shape[1])
    return boxes[:3]

def narrow_images(images, convert_color=False, binarize=False):
    narrowed = []
    titles = []
    for img in images:
        regions = narrow(img, convert_color, binarize)
        for region in regions:
            narrowed.append(region)
    return narrowed

In [5]:
def binarize_images(images, black_on_white=False):
    binarized = []
    for img in images:
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        
        if np.mean(binary) > 127:
            binary = cv2.bitwise_not(binary)
        
        binarized.append(binary)
    return binarized

In [6]:
def dilate_images(images, kernel = np.ones((1, 1), np.uint8), iterations=1):
    dilated = []
    for img in images:
        img_dilated = cv2.dilate(img, kernel, iterations=iterations)
        dilated.append(img_dilated)
    return dilated

In [7]:
def blur_images(images, blur_weight=1):
    blurred = []
    for dilated in images:
        img_blurred = cv2.medianBlur(dilated, blur_weight)
        blurred.append(img_blurred)
    return blurred

# Text Localization
![Book Stack](Images/books1.jpg)

In [8]:
books = load_image("Images/books1.jpg")
print(pytesseract.image_to_string(books))

DR) The Way It [5 — cxsrwour
LONG  WaAtkine In circtes HF

RCA Maca CRC Usa CW ta Sohwxcrceey]


## EAST Text Detection

In [None]:
%matplotlib tk
detector = EASTDetector()
slices = detector.get_slices(books)

In [None]:
titles = [pytesseract.image_to_string(img) for img in slices]
show_images(slices, titles=titles, cols=3)

## Frame Narrowing

In [None]:
narrowed = binarize_images(slices[2:], black_on_white=False)
narrowed = narrow_images(narrowed)

In [None]:
titles = [pytesseract.image_to_string(img) for img in narrowed]
show_images(narrowed, titles=titles, cols=3)

# Image Filters
![Book 2](Images/books2.jpg)

In [None]:
books = load_image("Images/books2.jpg")
%matplotlib tk
detector = EASTDetector(small_overlap=0.75)
slices = detector.get_slices(books)

In [None]:
titles = [pytesseract.image_to_string(img) for img in slices]
show_images(slices, titles=titles, cols=3)

## Binarization

In [None]:
binarized = binarize_images(slices, black_on_white=True)

In [None]:
titles = [pytesseract.image_to_string(img) for img in binarized]
show_images(binarized, titles=titles, cols=3)

## Blurring

In [None]:
blurred = blur_images([binarized[-1]], blur_weight=3)

In [None]:
titles = [pytesseract.image_to_string(img) for img in blurred]
show_images(blurred, titles=titles, cols=3)

## Dilation

In [None]:
blurred = blur_images([binarized[0]], blur_weight=1)
dilated = dilate_images(blurred, kernel=np.ones((5, 5), np.uint8))

In [None]:
titles = [pytesseract.image_to_string(img) for img in dilated]
show_images(dilated, titles=titles, cols=3)

# Text Localizaiton using Contouring

In [19]:
books = load_image("Images/books1.jpg")
detector = EASTDetector()
slices = detector.get_slices(books)
img = slices[-1]

In [20]:
%matplotlib tk
processed = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)           
_, processed = cv2.threshold(processed, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
if np.mean(processed) > 127:
    processed = cv2.bitwise_not(processed)

box_kernel = np.ones((5, 25), np.uint8)
dilation = cv2.dilate(processed, box_kernel, iterations = 1)
bounds, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

for cnt in bounds:
    x, y, w, h = cv2.boundingRect(cnt)
    cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 1)
    
plt.imshow(dilation, cmap='gray')

<matplotlib.image.AxesImage at 0x123ac6bd0>

In [21]:
plt.imshow(img)

<matplotlib.image.AxesImage at 0x123365450>