# Downloading Data

In [None]:
!mkdir -p ~/.kaggle
!cp drive/MyDrive/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download fanbyprinciple/captcha-images

Downloading captcha-images.zip to /content
 65% 8.00M/12.4M [00:00<00:00, 82.2MB/s]
100% 12.4M/12.4M [00:00<00:00, 79.5MB/s]


In [None]:
!unzip captcha-images.zip -d . > /dev/null

In [None]:
import os

len(os.listdir("captcha_images"))

9955

# Data Preparation

In [None]:
import os
from collections import defaultdict
import cv2
import random
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score

In [None]:
image_names = os.listdir("captcha_images")
random.shuffle(image_names)

data = []
for image_name in image_names:
    captcha = image_name.split(".")[0]
    img = cv2.imread(f"captcha_images/{image_name}")

    data.append((img, captcha))

In [None]:
train_imgs, test_imgs, train_captchas, test_captchas = train_test_split(
    [img for img, _ in data],
    [captcha for _, captcha in data],
    test_size=0.2
)

In [None]:
def split_captcha_img(img, captcha):
    width, lenght, _ = img.shape
    captcha = list(captcha)

    splitted_img = []
    for idx in range(len(captcha)):
        char = captcha[idx]
        char_img = img[:, int(idx * lenght / len(captcha)):int((idx + 1) * lenght / len(captcha))]

        splitted_img.append((char_img, char))

    return splitted_img

def prepare_training_data(imgs, captchas, shuffle_flag=True, flatten_flag=False):
    prepared_data = []
    for img, captcha in zip(imgs, captchas):
        prepared_data.extend(split_captcha_img(img, captcha))

    if shuffle_flag:
        random.shuffle(prepared_data)

    if flatten_flag:
        return [char_img.reshape(1, -1)[0] for char_img, _ in prepared_data], [char for _, char in prepared_data]
    else:
        return [char_img for char_img, _ in prepared_data], [char for _, char in prepared_data]

# PCA + Random Forest


In [None]:
X_train, y_train = prepare_training_data(train_imgs, train_captchas, flatten_flag=True)
X_test, y_test = prepare_training_data(test_imgs, test_captchas, flatten_flag=True)

In [None]:
model = Pipeline([('pca', PCA(n_components=50)), ('clf', RandomForestClassifier(n_estimators=51, class_weight="balanced", n_jobs=-1))])
model.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('pca',
                 PCA(copy=True, iterated_power='auto', n_components=50,
                     random_state=None, svd_solver='auto', tol=0.0,
                     whiten=False)),
                ('clf',
                 RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                        class_weight='balanced',
                                        criterion='gini', max_depth=None,
                                        max_features='auto',
                                        max_leaf_nodes=None, max_samples=None,
                                        min_impurity_decrease=0.0,
                                        min_impurity_split=None,
                                        min_samples_leaf=1, min_samples_split=2,
                                        min_weight_fraction_leaf=0.0,
                                        n_estimators=51, n_jobs=-1,
                                        oob_score=Fa

In [None]:
preds = model.predict(X_test)

In [None]:
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           2       0.91      0.97      0.94       232
           3       0.87      0.91      0.89       265
           4       0.95      0.90      0.92       231
           5       0.84      0.88      0.86       227
           6       0.89      0.92      0.90       228
           7       0.90      0.92      0.91       265
           8       0.90      0.88      0.89       255
           9       0.96      0.98      0.97       264
           A       0.93      0.97      0.95       248
           B       0.91      0.90      0.91       245
           C       0.94      0.96      0.95       247
           D       0.93      0.92      0.92       246
           E       0.90      0.88      0.89       226
           F       0.84      0.88      0.86       240
           G       0.98      0.90      0.94       240
           H       0.89      0.96      0.92       250
           J       0.89      0.93      0.91       247
           K       0.91    

In [None]:
char_acc = accuracy_score(y_test, preds)
expected_acc = char_acc ** 4
print(round(expected_acc, 2))

0.75


# Nueral Network

In [None]:
import os
import cv2
import imutils
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report, accuracy_score
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Flatten, Dense
from keras.callbacks import EarlyStopping

In [None]:
class ImagePreprocessor:
    def process(self, image_paths):
        images, labels = [], []

        for captcha_image_file in image_paths:
            captcha_label = self._get_captcha_label(captcha_image_file)
            gray, letter_bounding_rectangles = self._captcha_to_gray_scale_and_bounding_rectangles(captcha_image_file)

            if len(letter_bounding_rectangles) != 4:
                continue

            letter_images, letter_labels = self._crop_bounding_rectangles_and_save_to_file(letter_bounding_rectangles, gray, captcha_label)

            images.extend(letter_images)
            labels.extend(letter_labels)

        return images, labels

    def _preprocess_captcha(self, img):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        gray_with_border = cv2.copyMakeBorder(gray, 8, 8, 8, 8, cv2.BORDER_REPLICATE)
        preprocessed = cv2.threshold(gray_with_border, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
        return gray_with_border, preprocessed

    def _get_captcha_label(self, path_to_file):
        filename = os.path.basename(path_to_file)
        label = filename.split(".")[0]
        return label

    def _find_bounding_rectangles_of_contours(self, contours):
        letter_bounding_rectangles= []
        for contour in contours:
            (x,y,w,h) = cv2.boundingRect(contour)
            if w/h > 1.25:
                half_width = int(w/2)
                letter_bounding_rectangles.append((x,y,half_width, h))
                letter_bounding_rectangles.append((x+half_width, y, half_width, h))
            else:
                letter_bounding_rectangles.append((x,y,w,h))
        return letter_bounding_rectangles

    def _captcha_to_gray_scale_and_bounding_rectangles(self, captcha_image_file):
        image = cv2.imread(captcha_image_file)
        gray, preprocessed = self._preprocess_captcha(image)
        contours = cv2.findContours(preprocessed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        contours = contours[0]
        letter_bounding_rectangles = self._find_bounding_rectangles_of_contours(contours)
        letter_bounding_rectangles = sorted(letter_bounding_rectangles, key=lambda x: x[0])
        return gray, letter_bounding_rectangles

    def _crop_bounding_rectangles_and_save_to_file(self, letter_bounding_rectangles, gray, captcha_label):
        images, labels = [], []

        for letter_bounding_rectangle, current_letter in zip(letter_bounding_rectangles, captcha_label):
            x,y,w,h = letter_bounding_rectangle
            letter_image = gray[y-2:y+h+2, x-2:x+w+2]
            letter_image = self._resize_image_to_dimensions(letter_image, 20, 20)
            letter_image = np.expand_dims(letter_image, axis=2)

            images.append(letter_image)
            labels.append(current_letter)

        return images, labels

    def _resize_image_to_dimensions(self, image, desired_width, desired_height):
        (h, w) = image.shape[:2]
        if w > h:
            image = imutils.resize(image, width=desired_width)
        else:
            image = imutils.resize(image, height=desired_height)
        pad_width = int((desired_width - image.shape[1]) / 2.0)
        pad_height = int((desired_height - image.shape[0]) / 2.0)
        image_with_border = cv2.copyMakeBorder(image, pad_height, pad_height, pad_width, pad_width, cv2.BORDER_REPLICATE)
        image_with_border_resized = cv2.resize(image_with_border, (desired_width, desired_height))
        return image_with_border_resized

In [None]:
captchas = [f"captcha_images/{file_name}" for file_name in os.listdir("captcha_images")]

images, labels = ImagePreprocessor().process(captchas)

images = np.array(images, dtype="float") / 255.0
labels = np.array(labels)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=11)

In [None]:
label_binarizer = LabelBinarizer().fit(y_train)
y_train = label_binarizer.transform(y_train)
y_test = label_binarizer.transform(y_test)

In [None]:
num_classes = len(label_binarizer.classes_)
NN_model = Sequential()
NN_model.add(Conv2D(20, (5, 5), padding="same", input_shape=(20, 20, 1), activation="relu"))
NN_model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
NN_model.add(Conv2D(50, (5, 5), padding="same", activation="relu"))
NN_model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
NN_model.add(Flatten())
NN_model.add(Dense(512, activation="relu"))
NN_model.add(Dense(num_classes, activation="softmax"))
NN_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
NN_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 20, 20, 20)        520       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 10, 10, 20)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 10, 10, 50)        25050     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 50)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1250)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               640512    
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1

In [None]:
early_stopping =EarlyStopping(monitor='val_loss', patience=3)

NN_model.fit(
    X_train,
    y_train,
    validation_data=(X_test, y_test),
    batch_size=16,
    epochs=20,
    verbose=1,
    callbacks=[early_stopping,]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20


<keras.callbacks.History at 0x7f100193d210>

In [None]:
probs = NN_model.predict(X_test)
preds = label_binarizer.inverse_transform(probs)

In [None]:
labels = label_binarizer.inverse_transform(y_test)
print(classification_report(labels, preds, digits=4))

              precision    recall  f1-score   support

           2     1.0000    0.9960    0.9980       253
           3     0.9960    1.0000    0.9980       248
           4     0.9957    1.0000    0.9979       233
           5     1.0000    0.9960    0.9980       249
           6     1.0000    1.0000    1.0000       225
           7     1.0000    1.0000    1.0000       247
           8     0.9959    1.0000    0.9979       240
           9     1.0000    0.9960    0.9980       249
           A     1.0000    0.9954    0.9977       217
           B     1.0000    1.0000    1.0000       258
           C     1.0000    1.0000    1.0000       233
           D     1.0000    1.0000    1.0000       239
           E     1.0000    1.0000    1.0000       253
           F     0.9962    1.0000    0.9981       260
           G     1.0000    1.0000    1.0000       232
           H     1.0000    0.9956    0.9978       226
           J     0.9960    1.0000    0.9980       248
           K     1.0000    

In [None]:
char_acc = accuracy_score(labels, preds)
expected_acc = char_acc ** 4
print(round(expected_acc, 4))

0.9969
