In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import cv2


def read_image(image_file_path):
    """Reads in the image using openCV"""
    return cv2.imread(image_file_path)

<IPython.core.display.Javascript object>

In [3]:
def lets_see_it(image):
    """Displays the image."""
    cv2.imshow("window name", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

<IPython.core.display.Javascript object>

In [4]:
def grayscale(image):
    """Grayscales image using openCV."""
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

<IPython.core.display.Javascript object>

In [5]:
import imutils


def normalize_dimensions(image, desired_width=20, desired_height=20):
    """Resizes image to desired dimensions using white padding and rescaling."""
    (h, w) = image.shape[:2]
    if w > h:
        image = imutils.resize(image, width=desired_width)
    else:
        image = imutils.resize(image, height=desired_height)
    width_padding = int((desired_width - image.shape[1]) / 2)
    height_padding = int((desired_height - image.shape[0]) / 2)
    WHITE = [255, 255, 255]
    image_with_border = cv2.copyMakeBorder(
        image,
        height_padding,
        height_padding,
        width_padding,
        width_padding,
        cv2.BORDER_CONSTANT,
        value=WHITE,
    )
    image_with_border_resized = cv2.resize(
        image_with_border, (desired_width, desired_height), interpolation=cv2.INTER_AREA
    )
    return image_with_border_resized

<IPython.core.display.Javascript object>

In [6]:
import numpy as np


def reshape_for_keras(image):
    """Adds a dummy dimension to fit keras's input requirements."""
    return np.expand_dims(image, axis=2)

<IPython.core.display.Javascript object>

In [7]:
from imutils import paths
import os

captcha_processing_output_folder = "extracted_character_images"
images = []
labels = []

for image_file_path in paths.list_images(captcha_processing_output_folder):
    image = read_image(image_file_path)
    image_gray = grayscale(image)
    image_normalized = normalize_dimensions(image_gray)
    image_reshaped_for_keras = reshape_for_keras(image_normalized)
    images.append(image_reshaped_for_keras)
    label = image_file_path.split(os.path.sep)[-2]
    labels.append(label)

<IPython.core.display.Javascript object>

In [8]:
X = np.array(images, dtype="float") / 255.0
labels = np.array(labels)

<IPython.core.display.Javascript object>

In [9]:
X.shape

(18636, 20, 20, 1)

<IPython.core.display.Javascript object>

In [10]:
from sklearn import preprocessing

label_binarizer = preprocessing.LabelBinarizer().fit(labels)
y = label_binarizer.transform(labels)

<IPython.core.display.Javascript object>

In [11]:
from tensorflow.keras import backend
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Flatten, Dense

num_classes = len(set(labels))
CNN_model = Sequential()
CNN_model.add(
    Conv2D(20, (5, 5), padding="same", input_shape=(20, 20, 1), activation="relu")
)
CNN_model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
CNN_model.add(Conv2D(50, (5, 5), padding="same", activation="relu"))
CNN_model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
CNN_model.add(Flatten())
CNN_model.add(Dense(512, activation="relu"))
CNN_model.add(Dense(num_classes, activation="softmax"))
CNN_model.compile(
    loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
)
CNN_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 20, 20, 20)        520       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 10, 10, 20)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 10, 10, 50)        25050     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 50)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1250)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               640512    
_________________________________________________________________
dense_2 (Dense)              (None, 32)               

Using TensorFlow backend.


<IPython.core.display.Javascript object>

In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

<IPython.core.display.Javascript object>

In [13]:
CNN_model.fit(
    X_train,
    y_train,
    validation_data=(X_test, y_test),
    batch_size=32,
    epochs=5,
    verbose=1,
)

Train on 14908 samples, validate on 3728 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x1b59c66ab48>

<IPython.core.display.Javascript object>

In [14]:
CAPTCHA = "captcha_images\\2A5J.png"

<IPython.core.display.Javascript object>

In [15]:
import cv2


def read_CAPTCHA_image(captcha_image_file):
    """Read CAPTCHA image into numpy array using OpenCv."""
    return cv2.imread(captcha_image_file)

<IPython.core.display.Javascript object>

In [16]:
def grayscale_CAPTCHA_image(captcha_image):
    "Grayscales CAPTCHA image." ""
    return cv2.cvtColor(captcha_image, cv2.COLOR_BGR2GRAY)

<IPython.core.display.Javascript object>

In [17]:
def threshold_CAPTCHA_image(captcha_image_grayscaled):
    """Thresholds CAPTCHA image."""
    return cv2.threshold(
        captcha_image_grayscaled, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU
    )[1]

<IPython.core.display.Javascript object>

In [18]:
import numpy as np


def dilate_characters(binary_image):
    """slightly expands the characters."""
    kernel = np.ones((2, 2), np.uint8)
    return cv2.dilate(binary_image, kernel, iterations=1)

<IPython.core.display.Javascript object>

In [19]:
def find_CAPTCHA_contours(captcha_image_thresholded):
    """Compute the contours of characters in the CAPTCHA image."""
    return cv2.findContours(
        captcha_image_thresholded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )[0]

<IPython.core.display.Javascript object>

In [20]:
def compute_bounding_rectangles(contours):
    """Computes the bounding rectangles of the contours."""
    return list(map(cv2.boundingRect, contours))

<IPython.core.display.Javascript object>

In [21]:
def split_fat_rectangles(rectangles):
    """Splits fat rectangles into two rectangles."""
    letter_bounding_rectangles = []
    for rectangle in rectangles:
        (x, y, w, h) = rectangle
        if w / h > 1.25:
            half_width = int(w / 2)
            letter_bounding_rectangles.append((x, y, half_width, h))
            letter_bounding_rectangles.append((x + half_width, y, half_width, h))
        else:
            letter_bounding_rectangles.append(rectangle)
    return letter_bounding_rectangles

<IPython.core.display.Javascript object>

In [22]:
def get_character_images(rectangles, image):
    """Extracts the characters defined by bounding rectangles."""
    char_images = []
    for rect in rectangles:
        x, y, w, h = rect
        char_image = image[y - 1 : y + h + 1, x - 1 : x + w + 1]
        char_images.append(char_image)
    return char_images

<IPython.core.display.Javascript object>

In [23]:
def sort_bounding_rectangles(rects):
    """Sorts bounding rectangles by x coordinate."""
    return sorted(rects, key=lambda x: float(x[0]))

<IPython.core.display.Javascript object>

In [24]:
captcha_image = read_CAPTCHA_image(CAPTCHA)
captcha_image_grayscaled = grayscale_CAPTCHA_image(captcha_image)
captcha_image_thresholded = threshold_CAPTCHA_image(captcha_image_grayscaled)
captcha_image_dilated = dilate_characters(captcha_image_thresholded)
captcha_image_contours = find_CAPTCHA_contours(captcha_image_dilated)
character_bounding_rectangles = split_fat_rectangles(
    compute_bounding_rectangles(captcha_image_contours)
)
character_bounding_rectangles = sort_bounding_rectangles(character_bounding_rectangles)
character_images = get_character_images(character_bounding_rectangles, captcha_image)

<IPython.core.display.Javascript object>

In [35]:
X = []
for image in character_images:
    image_gray = grayscale(image)
    image_normalized = normalize_dimensions(image_gray)
    lets_see_it(image_normalized)
    image_reshaped_for_keras = reshape_for_keras(image_normalized)
    X.append(image_reshaped_for_keras)

X = np.array(X, dtype="float") / 255.0
pred = CNN_model.predict(X)

<IPython.core.display.Javascript object>

In [36]:
label_binarizer.inverse_transform(pred)

array(['2', 'A', '5', 'J'], dtype='<U1')

<IPython.core.display.Javascript object>