In [2]:
import os
import os.path
import cv2
import glob
import imutils

import pickle
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Flatten, Dense
from imutils import paths
from PIL import Image
import tensorflow as tf

import pandas as pd

from keras.models import load_model

import matplotlib.pyplot as plt

In [3]:
def resize_to_fit(image, width, height):
    """
    A helper function to resize an image to fit within a given size
    :param image: image to resize
    :param width: desired width in pixels
    :param height: desired height in pixels
    :return: the resized image
    """

    # grab the dimensions of the image, then initialize the padding values
    (h, w) = image.shape[:2]

    # if the width is greater than the height then resize along the width
    if w > h:
        image = imutils.resize(image, width=width)

    # otherwise, the height is greater than the width so resize along the height
    else:
        image = imutils.resize(image, height=height)

    # determine the padding values for the width and height to obtain the target dimensions
    padW = int((width - image.shape[1]) / 2.0)
    padH = int((height - image.shape[0]) / 2.0)

    # pad the image then apply one more resizing to handle any rounding issues
    image = cv2.copyMakeBorder(image, padH, padH, padW, padW,cv2.BORDER_REPLICATE)
    image = cv2.resize(image, (width, height))

    # return the pre-processed image
    return image

### Split data into train and validation sets

In [5]:
data = []
labels = []

# loop over the input images
for image_file in paths.list_images('kaggle-letters'):
    # Load the image and convert it to grayscale
    image = cv2.imread(image_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Resize the letter so it fits in a 20x20 pixel box
    image = resize_to_fit(image, 20, 20)

    # Add a third channel dimension to the image to make Keras work
    image = np.expand_dims(image, axis=2)

    # Grab the name of the letter based on the folder it was in
    label = image_file.split(os.path.sep)[-2]

    # Add the letter image and it's label to our training data
    data.append(image)
    labels.append(label)

# data[0].shape -> (20, 20, 1)

# Scale the raw pixel intensities to [0,1] -> improves training
data = np.array(data, dtype='float') / 255.0
labels = np.array(labels)


X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size = 0.3, random_state=0, stratify=labels)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.1, random_state=0, stratify=y_train)

# Convert the labels (letters) into one-hot-encodings that Keras can work with
# Used to extend classifiers to multi-class classifiers. It will make the model consisting of "one model per class
labelbin = LabelBinarizer().fit(y_train)
y_train = labelbin.transform(y_train)
y_val = labelbin.transform(y_val)
y_test = labelbin.transform(y_test)

# Save mapping from labels to one-hot-encodings
with open("hard_labels.dat", 'wb') as f:
    pickle.dump(labelbin, f)


### Model definition

In [6]:
model = Sequential()

# 1st convolutional layer with max pooling -> 20 kernels
model.add(Conv2D(20, (5, 5), padding="same", input_shape=(20, 20, 1), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

# 2nd convolutional layer with max pooling -> 50 kernels
model.add(Conv2D(50, (5, 5), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

model.add(Flatten())

# Fully Connected network
# Hidden layer with 500 nodes
model.add(Dense(500, activation = 'relu'))

# Output layer with 32 nodes -> number of possible characters
model.add(Dense(62, activation = 'softmax'))

model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics =['accuracy'])

2022-11-12 00:35:07.485491: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-12 00:35:07.486548: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [7]:
# Train the neural network
model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size = 32, epochs = 10, verbose = 1)

model.save('hard_captchas_solver.hdf5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Model evaluation

In [8]:
# load label enconder
with open('hard_labels.dat', 'rb') as f:
    lb2 = pickle.load(f)

# load CNN
model = load_model('hard_captchas_solver.hdf5')

In [9]:
results = model.evaluate(X_test, y_test, batch_size=32)
results



[0.09021096676588058, 0.9837002754211426]

### Transfer learning to classify the simpler captcha

#### Update the old model

In [10]:
new_model = Sequential()

# add all but the last two layers of the previous model
for layer in model.layers[:-2]:
    new_model.add(layer)

# stop convolutional layers weights from being updated
for layer in new_model.layers:
    layer.trainable = False

# add two last layers
new_model.add(Dense(500, activation = 'relu'))

# Output layer with 32 nodes -> number of possible characters
new_model.add(Dense(32, activation = 'softmax'))

new_model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics =['accuracy'])

new_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 20, 20, 20)        520       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 10, 10, 20)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 10, 10, 50)        25050     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 5, 5, 50)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 1250)              0         
                                                                 
 dense_2 (Dense)             (None, 500)              

### Split the data into train, validation and test sets

In [12]:
data = []
labels = []

with open('simple_labels.dat', 'rb') as f:
    lb = pickle.load(f)

# loop over the input images
for image_file in paths.list_images('extracted_letter_images'):
    # Load the image and convert it to grayscale
    image = cv2.imread(image_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Resize the letter so it fits in a 20x20 pixel box
    image = resize_to_fit(image, 20, 20)

    # Add a third channel dimension to the image to make Keras work
    image = np.expand_dims(image, axis=2)

    # Grab the name of the letter based on the folder it was in
    label = image_file.split(os.path.sep)[-2]

    # Add the letter image and it's label to our training data
    data.append(image)
    labels.append(label)

# data[0].shape -> (20, 20, 1)

# Scale the raw pixel intensities to [0,1] -> improves training
data = np.array(data, dtype='float') / 255.0
labels = np.array(labels)

X_train, X_test_simple, y_train, y_test_simple = train_test_split(data, labels, test_size = 0.30, random_state=0, stratify=labels)

X_train_simple, X_val_simple, y_train_simple, y_val_simple = train_test_split(X_train, y_train, test_size = 0.10, random_state=0, stratify=y_train)

y_train_simple = lb.transform(y_train_simple)
y_val_simple = lb.transform(y_val_simple)
y_test_simple = lb.transform(y_test_simple)

(20, 20, 1)


In [13]:
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()

### Model training

In [14]:
new_model.fit(X_train_simple, y_train_simple, validation_data=(X_val_simple, y_val_simple), batch_size = 32, epochs = 10, verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fc98ac21bb0>

In [15]:
new_model.save('transfer_model.hdf5')

### Model evaluation

In [25]:
results = new_model.evaluate(X_test_simple, y_test_simple, batch_size=32)
results



[0.041542135179042816, 0.9947522282600403]

### See the results in action

In [26]:
def simple_captcha_test_preprocessing(image_file):

    # Load image
    img = cv2.imread(image_file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.copyMakeBorder(img, 20, 20, 20, 20, cv2.BORDER_REPLICATE)

    img_thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]

    img_contours = cv2.findContours(img_thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    img_contours = img_contours[1] if imutils.is_cv3() else img_contours[0]

    letter_image_regions = []

    for contour in img_contours:
        (x, y, w, h) = cv2.boundingRect(contour)

        if w / h > 1.25:
            half_width = int(w/2)
            letter_image_regions.append((x, y, half_width, h))
            letter_image_regions.append((x + half_width, y, half_width, h))
        else:
            letter_image_regions.append((x, y, w, h))

    if len(letter_image_regions) != 4:
        return None

    letter_image_regions = sorted(letter_image_regions, key= lambda x: x[0])

    return (img, letter_image_regions)

In [27]:
# grab images to test against
captcha_image_files = list(paths.list_images('generated_captcha_images'))
captcha_image_files = np.random.choice(captcha_image_files, size=(10,), replace=False)

In [29]:
for image_file in captcha_image_files:

    preprocess_return = simple_captcha_test_preprocessing(image_file)

    if preprocess_return == None:
        continue
    
    (image, letter_image_regions) = preprocess_return

    output = cv2.merge([image] * 3)
    predictions = []

    for letter_bounding_box in letter_image_regions:
        (x, y, w, h) = letter_bounding_box

        letter_image = image[y-2 : y + h + 2, x-2 : x+w+2]

        letter_image = resize_to_fit(letter_image, 20, 20)

        letter_image = np.expand_dims(letter_image, axis = 2)
        letter_image = np.expand_dims(letter_image, axis = 0)
        
        prediction = new_model.predict(letter_image)

        letter = lb.inverse_transform(prediction)[0]
        predictions.append(letter)

        cv2.rectangle(output, (x-2, y-2), (x+w+4, y+h+4), (0, 255, 0), 1)
        cv2.putText(output, letter, (x - 5, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2)

    captcha_text = "".join(predictions)
    print("CAPTCHA text is {}".format(captcha_text))

    cv2.imshow("Output", output)
    cv2.waitKey(0)
    cv2.destroyAllWindows() 


CAPTCHA text is BCUW
CAPTCHA text is 3DY6
CAPTCHA text is P547
CAPTCHA text is XZFJ
CAPTCHA text is SJFT
CAPTCHA text is VP4F
CAPTCHA text is UACZ
CAPTCHA text is QC7L
CAPTCHA text is TLDG
CAPTCHA text is 7K6H
