In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

<a name='1-1'></a>
### 1.1 - Goal
Use openCV and Deep Learning Model to build a simple autograde system

### 1.2 - How to build
- ***Detection***: Apply image processing methods to determine the position of objects in images, in order to be an input for Classification Step
- ***Classification***: Apply Deep Learning model to classify the determined objects


In [2]:
img = cv2.imread("multiple_choice.jpg", 0)
img = cv2.resize(img, (720, 960), interpolation=cv2.INTER_AREA)
blur = cv2.GaussianBlur(img, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)

In [4]:
cv2.imshow("Thresh", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
print(img.shape)

(960, 720)


In [3]:
# Define horizontal and vertical lines in the image
horizontal = thresh
vertical = thresh

scale_height = 20  # the bigger this scale is, the more vertical lines will be defined
scale_long = 15

long = int(img.shape[1] / scale_long)
height = int(img.shape[0] / scale_height)

# Opening
# - Get the structure of images
# - Erode
# - And Dilate
horizontal_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (long, 1))
horizontal = cv2.erode(horizontal, horizontal_structure, (-1, -1))
horizontal = cv2.dilate(horizontal, horizontal_structure, (-1, -1))

vertical_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, height))
vertical = cv2.erode(vertical, vertical_structure, (-1, -1))
vertical = cv2.dilate(vertical, vertical_structure, (-1, -1))

mask = vertical + horizontal

### => we can successfully get all the horizontal and vertical lines in the image

In [6]:
cv2.imshow("Mask", mask)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [4]:
# Now we detect the position of table in the previous mask
# findContours: help to take position of closed object in an image. It can take many positions of objects (here each line, each box can be a object)
# But we just take the biggest object which is the whole table => that's why we're using max
contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

max = -1
for cnt in contours:
    x, y, w, h = cv2.boundingRect(cnt)
    if cv2.contourArea(cnt) > max:
        x_max, y_max, w_max, h_max = x, y, w, h
        max = cv2.contourArea(cnt)

table = img[y_max:y_max + h_max, x_max:x_max + w_max]

In [23]:
cv2.imshow("After finding contours", table)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Now, Get the answer
- Now we'll divide the previous table by specific ratio to have each answer box.
- Inside each answer box, we'll use findContours to define the character that is written in it.
- Because the table has 19 rows, then we'll divide vertically into 19 parts, each part is 1 row
- In each row, we can estimate that question box is approximately equal to 1/2 answer box, so we'll divide horizontally into 6 parts
- Define coordinate for each answer box
- Apply findContours for answer boxes to get the object inside them



In [9]:
cropped_thresh_img = []
cropped_origin_img = []
contours_img = []

NUM_ROWS = 19  # number of rows for this problem
START_ROW = 1

for i in range(START_ROW, NUM_ROWS):
    # Get each answer box in the first half
    # Specifically, get the coordinate of row i, then get the coordinate of corresponding answer box i
    thresh1 = thresh[y_max + round(i * h_max / NUM_ROWS):y_max + round((i + 1) * h_max / NUM_ROWS),
              x_max + round(w_max / 6):x_max + round(w_max / 2)]
    contours_thresh1, hierarchy_thresh1 = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    origin1 = img[y_max + round(i * h_max / NUM_ROWS):y_max + round((i + 1) * h_max / NUM_ROWS),
              x_max + round(w_max / 6):x_max + round(w_max / 2)]

    cropped_thresh_img.append(thresh1)
    cropped_origin_img.append(origin1)
    contours_img.append(contours_thresh1)

for i in range(START_ROW, NUM_ROWS):
    thresh1 = thresh[y_max + round(i * h_max / NUM_ROWS):y_max + round((i + 1) * h_max / NUM_ROWS),
              x_max + round(2 * w_max / 3):x_max + round(w_max)]
    contours_thresh1, hierarchy_thresh1 = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    origin1 = img[y_max + round(i * h_max / NUM_ROWS):y_max + round((i + 1) * h_max / NUM_ROWS),
              x_max + round(2 * w_max / 3):x_max + round(w_max)]

    cropped_thresh_img.append(thresh1)
    cropped_origin_img.append(origin1)
    contours_img.append(contours_thresh1)

# With this condition(> 30), we'll only get contours that bigger than 30 (in order to remove noise)
# And with contours we found, in order not to take the rims of answer box, we focus in the center of the box (0.1 * box_length to 0.9 * box_length) (just with this kind of problem)
answers = []
for i, contour_img in enumerate(contours_img):
    for cnt in contour_img:
        if cv2.contourArea(cnt) > 30:
            x, y, w, h = cv2.boundingRect(cnt)
            if cropped_origin_img[i].shape[1] * 0.1 < x < cropped_origin_img[i].shape[1] * 0.9:
                answer = cropped_origin_img[i][y:y + h, x:x + w]
                answer = cv2.threshold(answer, 160, 255, cv2.THRESH_BINARY_INV)[1]
                answer = cv2.resize(answer, (28, 28), interpolation=cv2.INTER_AREA)
                answers.append(answer)

In [13]:
answers = np.array(answers)
print(answers.shape)

(25, 28, 28)


In [16]:
cv2.imshow("Answer", answer)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [19]:
import tensorflow
from keras import models

In [20]:
model = models.load_model('./letter_recognition_model.h5')

In [23]:
test_logits = model.predict(answers)
test_logits = np.argmax(test_logits, axis=-1)
res = [test_logits]
letter = ['A', 'B', 'C', 'D']

result = []
for r in res:
    if len(r) == 0:
        result.append("X")
    elif len(r) > 1:
        result.append("O")
    else:
        result.append(letter[int(r[0])])

print(result)

['O']
