## Release the Kraken!

In [None]:
# The biggest limitation of tesseract is the lack of a layout engine inside of it. Tesseract
# expects to be using fairly clean text, and gets confused if we don't crop out other artifacts.
# It's not bad, but Kraken can help us out be segmenting pages.
# https://pypi.org/project/kraken/

In [None]:
from typing import Tuple

from PIL import Image
from PIL import ImageDraw
from PIL import ImageChops

import kraken
from kraken import pageseg

import cv2 as cv
import numpy as np

In [None]:
im = Image.open("/app/readonly/two_col.png")
display(im)

black_and_white = im.convert('1')  # binary mode
pageseg.segment(black_and_white)

In [None]:
# Column detection
# parameter black_colseps : If set to True, kraken will assume that columns will be
# separated by black lines.

def show_boxes(img_rgb: Image) -> Image:
    """
    Find boxes of text on the input image and draw those boxes on the image
    """
    drawing_object = ImageDraw.Draw(img_rgb)
    bounding_boxes = pageseg.segment(
        img_rgb.convert('1'),
        black_colseps=True
    )['boxes']

    for box in bounding_boxes:
        drawing_object.rectangle(box, fill = None, outline ='red')

    return img_rgb


display(show_boxes(Image.open("/app/readonly/two_col.png")))

In [None]:
# Target : find and draw a column separator
#   we choose a size of at least : 25 pixels wide (1 char) and six lines high
#   question : how many pixels are six lines high ?

def calculate_line_height(img: Image) -> int:
    """
    Calculates the average height of a line from a given image based on the detected text boxes
    """
    bounding_boxes = pageseg.segment(img.convert('1'))['boxes']
    height_accumulator = sum(
        bottom - top
        for (left, top, right, bottom) in bounding_boxes
    )
    
    return height_accumulator // len(bounding_boxes)

# And lets test this with the image with have been using
char_width = 25
line_height = calculate_line_height(Image.open("/app/readonly/two_col.png"))

gap_box = (0, 0, char_width, line_height * 6)
gap_box

In [None]:
# Determine if there is a block of whitespace  

WHITE_PIXEL = 255

def gap_check(img: Image, location: Tuple[int, int]) -> bool:
    """
    :img: binarized Image
    :return: True if location is the top left corner of an empty area of size gap_box, otherwise False
    """
    x0, y0 = location
    if x0 < 0 or y0 < 0:
        return False
    if x0 + gap_box[2] >= img.width or y0 + gap_box[3] > img.height:
        return False
    
    return all(
        img.getpixel((x,y)) == WHITE_PIXEL
        for x in range(x0, x0 + gap_box[2])
        for y in range(y0, y0 + gap_box[3])
    )

In [None]:
def draw_sep(img: Image, location: Tuple[int, int]):
    """
    Draw a vertical line on img in the middle of the box.
    location: Top left corner of the box
    """
    x0, y0 = location

    # Draw a line from (x1, y1) to (x2, y2)
    x1 = x0 + int(gap_box[2]/2)
    x2 = x1

    y1 = y0
    y2 = y1 + gap_box[3]
    
    drawing_object = ImageDraw.Draw(img)
    drawing_object.rectangle((x1,y1,x2,y2), fill = 'black', outline ='black')

    return img

In [None]:
def add_vertical_bar(img: Image):
    """
    Add a vertical bar in a 2-column text image
    """
    for x in range(img.width):
        for y in range(img.height):
            if (gap_check(img, (x,y))):
                draw_sep(img, (x,y))
    return img

# Lets read in our test image and convert it through binarization
image = Image.open("/app/readonly/two_col.png").convert("L")
image = add_vertical_bar(image)
display(image)

In [None]:
display(show_boxes(image))

## OpenCV : Comparing Image Data Structures

In [None]:
# OpenCV supports reading of images in most file formats, such as JPEG, PNG, and TIFF. Most image and 
# video analysis requires converting images into grayscale first. This simplifies the image and reduces 
# noise allowing for improved analysis.

img = cv.imread('/app/readonly/floyd.jpg')
gray: np.ndarray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

# The display package, doesn't know what to do with this image. So lets convert it
# into a PIL object to render it in the browser.
image = Image.fromarray(gray, "L")

display(image)

In [None]:
print(img.shape)
# 3 dimensions image : width, height and a color depth.
first_pixel = img[0][0]
first_pixel

In [None]:
# Reshape

print("Original image")
print(gray)

# If we wanted to represent that as a one dimensional image, we just call reshape
print("Reshaped image")
image1d = np.reshape(gray, (1, gray.shape[0] * gray.shape[1]))
print(image1d)

In [None]:
# For instance, remember in the last lecture when we wanted to look for gaps in an image so
# that we could draw lines to feed into kraken? Well, we use PIL to do this, using getpixel()
# to look at individual pixels and see what the luminosity was, then ImageDraw.rectangle to
# actually fill in a black bar separator. This was a nice high level API, and let us write
# routines to do the work we wanted without having to understand too much about how the images
# were being stored. But it was computationally very slow.
#
# Instead, we could write the code to do this using matrix features within numpy. Lets take
# a look.

img = cv.imread('/app/readonly/two_col.png')
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

# Find a white box :
np.count_nonzero(gray[2:4, 1:3])

In [None]:
# Manually create an image from ndarray, and change color

white_matrix = np.full((6, 6), 255, dtype=np.uint8)
display(Image.fromarray(white_matrix, "L"))

white_matrix[:, 4] = np.full((1,6), 0, dtype=np.uint8)
display(Image.fromarray(white_matrix, "L"))

white_matrix

## OpenCV

In [None]:
# Face detection

# First step is to load the XML-based classifiers
face_cascade = cv.CascadeClassifier('/app/readonly/haarcascade_frontalface_default.xml')
eye_cascade = cv.CascadeClassifier('/app/readonly/haarcascade_eye.xml')

img = cv.imread('/app/readonly/floyd.jpg')
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

# Use the face_cascade classifier
faces = face_cascade.detectMultiScale(gray)
# The resulting rectangles are in the format of (x,y,w,h) where x and y denote the upper
# left hand point for the image and the width and height represent the bounding box.

pil_img = Image.fromarray(gray, mode="L")
drawing = ImageDraw.Draw(pil_img)

# PIL.ImageDraw is looking for (x1, y1, x2, y2)
x, y, w, h = faces[0]
drawing.rectangle((x, y, x + w, y + h), outline="white")

display(pil_img)

In [None]:
# Draw rectangles around faces

def show_rects(faces):
    pil_img = Image.open('/app/readonly/msi_recruitment.gif')

    # pil_img.mode == "P" (custom color palette) -> RGB
    pil_img = pil_img.convert("RGB")

    drawing = ImageDraw.Draw(pil_img)
    for x, y, w, h in faces:
        drawing.rectangle((x, y, x+w, y+h), outline="white")

    display(pil_img)

In [None]:
# Lets use PIL to open our image # OpenCV can't work with Gif image
pil_img = Image.open('/app/readonly/msi_recruitment.gif')

pil_img.convert("L").save("/app/notebooks/msi_recruitment.png")
cv_img = cv.imread("/app/notebooks/msi_recruitment.png")

# lets try and detect faces in that image
faces = face_cascade.detectMultiScale(cv_img)

show_rects(faces)

# false negatives : missed four faces
# false positives : something the machine thought was a face but it wasn't.

In [None]:
# binarize this image

threshold, cv_img_bin = cv.threshold(cv_img, 120, 255, cv.THRESH_BINARY)
faces = face_cascade.detectMultiScale(cv_img_bin)
show_rects(faces)

In [None]:
# detectMultiScale() parameters : 
#     change the scale factor = size of rectangles which are considered against the model

faces = face_cascade.detectMultiScale(cv_img,1.05)
show_rects(faces)

faces = face_cascade.detectMultiScale(cv_img,1.25)
show_rects(faces)

In [None]:
# Time spent :

%timeit face_cascade.detectMultiScale(cv_img, 1.05)
%timeit face_cascade.detectMultiScale(cv_img, 1.25)
# Bigger square => faster

## More Jupyter Widgets

In [None]:
# One of the nice things about using the Jupyter notebook systems is that there is a
# rich set of contributed plugins that seek to extend this system. In this lecture I
# want to introduce you to one such plugin, call ipy web rtc. Webrtc is a fairly new
# protocol for real time communication on the web. Yup, I'm talking about chatting.
# The widget brings this to the Jupyter notebook system. Lets take a look.

from ipywebrtc import CameraStream, ImageRecorder

# The image recorder lets us actually grab images from the camera stream. There are features
# for downloading and using the image as well. We see that the default format is a png file.

In [None]:
camera = CameraStream.facing_user(audio=False)
image_recorder = ImageRecorder(stream=camera)

# Now, the docs are a little unclear how to use this within Jupyter, but if we call the
# download() function it will actually store the results of the camera which is hooked up
# in image_recorder.image. Lets try it out
# First, lets tell the recorder to start capturing data
image_recorder.recording = True
# Now lets download the image
image_recorder.download()
# Then lets inspect the type of the image
type(image_recorder.image)

In [None]:
# Ok, the object that it stores is an ipywidgets.widgets.widget_media.Image. How do we do
# something useful with this? Well, an inspection of the object shows that there is a handy
# value field which actually holds the bytes behind the image. And we know how to display
# those.

import PIL.Image
import io

img = PIL.Image.open(io.BytesIO(image_recorder.image.value))
display(img)