# Captchas Solver

### Toolset
- Python3
- OpenCV
- Keras
- Tensorflow

In [3]:
import os
import os.path
import cv2
import glob
import imutils

import pickle
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Flatten, Dense
from imutils import paths

from keras.models import load_model

import matplotlib.pyplot as plt

### Converting sequences of characters to single character images

In [6]:
def captcha_training_samples_processing(input_folder, output_folder):
    # Get a list of all the captcha images we need to process
    captcha_image_files = glob.glob(os.path.join(input_folder, "*"))
    counts = {}

    for (i, captcha_image_file) in enumerate(captcha_image_files):
        print(f"INFO: processing image {i+1}/{len(captcha_image_files)}")

        # Extract name of the file since it contains the captcha characters
        filename = os.path.basename(captcha_image_file)
        captcha_characters = os.path.splitext(filename)[0]

        # Load image and convert it to gray scale
        img = cv2.imread(captcha_image_file)
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Add some extra padding
        img_gray = cv2.copyMakeBorder(img_gray, 8, 8, 8, 8, cv2.BORDER_REPLICATE)

        # smoothing the image
        img = cv2.medianBlur(img_gray, 5)

        #edge detection    
        edges = cv2.Canny(img, 100, 200)

        cv2.imshow('edges', edges)
        
        #waits for user to press any key 
        #(this is necessary to avoid Python kernel form crashing)
        cv2.waitKey(0) 
        
        #closing all open windows 
        cv2.destroyAllWindows() 


        # ekernel = np.ones((1,2),np.uint8)
        # eroded = cv2.erode(img_gray, ekernel, iterations = 1)
        # dkernel = np.ones((2,3),np.uint8)
        # dilated_once = cv2.dilate(eroded, dkernel, iterations = 1)
        # ekernel = np.ones((2,2),np.uint8)
        # dilated_twice = cv2.erode(dilated_once, ekernel, iterations = 1)
        

        # Threshold image -> convert it to pure black and white
        img_thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
        # cv2.imshow('thresh', img_thresh)
        
        # #waits for user to press any key 
        # #(this is necessary to avoid Python kernel form crashing)
        # cv2.waitKey(0) 
        
        # #closing all open windows 
        # cv2.destroyAllWindows() 


        img_contours = cv2.findContours(img_thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
        

        # Hack for compatibility with different OpenCV versions
        img_contours = img_contours[1] if imutils.is_cv3() else img_contours[0]

        letter_image_regions =[]

        # Loop through contours and extract letter inside of each one
        for contour in img_contours:
            # Get the rectangle that contains the contour
            (x, y, w, h) = cv2.boundingRect(contour)

            # Compare height and width of the contour to detect letters that are conjoined in one chunk
            if w / h > 1.25:
                # Contour is way to large to be a single letter
                half_width =  int(w / 2)
                letter_image_regions.append((x, y, half_width, h))
                letter_image_regions.append((x + half_width, y, half_width, h))
            else:
                letter_image_regions.append((x, y, w, h))

        # # If we found more than 5 letters in captcha, the letter extraction did not work and we skip it
        # if len(letter_image_regions) > 5:
        #     continue

        # Sort regions from left to right
        letter_image_regions = sorted(letter_image_regions, key = lambda x: x[0])

        # Save each letter on a separate image
        for region, letter in zip(letter_image_regions, captcha_characters):
            x, y, w, h = region
            
            # Extract the letter from the original image with a 2 pixels margin around the edges
            letter_image = img_gray[y-2 : y+h+2, x-2 : x+w+2]
            
            rect = cv2.rectangle(img_gray, (x-2, y-2), (x + w+2, y + h+2), (0, 255, 0), 2)

            # Save letter
            save_path = os.path.join(output_folder, letter)

            # If the output directory does not exist, create it
            if not os.path.exists(save_path):
                os.makedirs(save_path)

            # For numeration of images
            count = counts.get(letter, 1)

            p = os.path.join(save_path, "{}.png".format(str(count).zfill(6)))
            t = cv2.imwrite(p, letter_image)

            # Update number of times each character was seen
            counts[letter] = count + 1

In [7]:
captcha_training_samples_processing('kaggle-captchas1', 'kaggle-captchas-extracted-letter')
# TODO - work on better image preprocessing

INFO: processing image 1/1


In [75]:
captcha_training_samples_processing('generated_captcha_images', 'extracted_letter_images')

INFO: processing image 1/9955
INFO: processing image 2/9955
INFO: processing image 3/9955
INFO: processing image 4/9955
INFO: processing image 5/9955
INFO: processing image 6/9955
INFO: processing image 7/9955
INFO: processing image 8/9955
INFO: processing image 9/9955
INFO: processing image 10/9955
INFO: processing image 11/9955
INFO: processing image 12/9955
INFO: processing image 13/9955
INFO: processing image 14/9955
INFO: processing image 15/9955
INFO: processing image 16/9955
INFO: processing image 17/9955
INFO: processing image 18/9955
INFO: processing image 19/9955
INFO: processing image 20/9955
INFO: processing image 21/9955
INFO: processing image 22/9955
INFO: processing image 23/9955
INFO: processing image 24/9955
INFO: processing image 25/9955
INFO: processing image 26/9955
INFO: processing image 27/9955
INFO: processing image 28/9955
INFO: processing image 29/9955
INFO: processing image 30/9955
INFO: processing image 31/9955
INFO: processing image 32/9955
INFO: processing 

### Building and Training the Neural Network

In [None]:
def resize_to_fit(image, width, height):
    """
    A helper function to resize an image to fit within a given size
    :param image: image to resize
    :param width: desired width in pixels
    :param height: desired height in pixels
    :return: the resized image
    """

    # grab the dimensions of the image, then initialize
    # the padding values
    (h, w) = image.shape[:2]

    # if the width is greater than the height then resize along
    # the width
    if w > h:
        image = imutils.resize(image, width=width)

    # otherwise, the height is greater than the width so resize
    # along the height
    else:
        image = imutils.resize(image, height=height)

    # determine the padding values for the width and height to
    # obtain the target dimensions
    padW = int((width - image.shape[1]) / 2.0)
    padH = int((height - image.shape[0]) / 2.0)

    # pad the image then apply one more resizing to handle any
    # rounding issues
    image = cv2.copyMakeBorder(image, padH, padH, padW, padW,
        cv2.BORDER_REPLICATE)
    image = cv2.resize(image, (width, height))

    # return the pre-processed image
    return image

- get each image and tranform it to gray and to scale 20x20 pixels
- extend each image to have a 3rd dimension (dimension of colors that will not be used in this case)
- get the label of each image (saved on the path name)

- having data and labels:
    - split the data into training and test sets
    - convert lables to one-hot enconding to do multi-class classification
    - save model that encodes labels into one-hot encondings to later preprocess new observations

In [None]:
data = []
labels = []

# loop over the input images
for image_file in paths.list_images('extracted_letter_images'):
    # Load the image and convert it to grayscale
    image = cv2.imread(image_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Resize the letter so it fits in a 20x20 pixel box
    image = resize_to_fit(image, 20, 20)

    # Add a third channel dimension to the image to make Keras work
    image = np.expand_dims(image, axis=2)

    # Grab the name of the letter based on the folder it was in
    label = image_file.split(os.path.sep)[-2]

    # Add the letter image and it's label to our training data
    data.append(image)
    labels.append(label)

# data[0].shape = (20, 20, 1)

# Scale the raw pixel intensities to [0,1] -> improves training
data = np.array(data, dtype='float') / 255.0
labels = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size = 0.25, random_state=0)

# Convert the labels (letters) into one-hot-encodings that Keras can work with
# Used to extend classifiers to multi-class classifiers. It will make the model consisting of "one model per class
lb = LabelBinarizer().fit(y_train)
y_train = lb.transform(y_train)
y_test = lb.transform(y_test)

# Save mapping from labels to one-hot-encodings
with open("model_labels.dat", 'wb') as f:
    pickle.dump(lb, f)


- Model with 2 convultional layers (20 neurons + 50 neurons) with relu activation functions

- Check what MaxPooling is ????

- Flatten the output of the second convultional layer to make it as input of a dense fully connected layer

- Ass 2 more dense layers, the latter as output layer with 32 neurons (same numnber as output characters possibilities) and softmax activation

- Compile model with cross-entropy loss and Adam optimizer

- Fit the model to the training data and use test data for validation

- Save the model for later use to classifying new data


In [None]:
model = Sequential()

# 1st convolutional layer with max pooling
model.add(Conv2D(20, (5, 5), padding="same", input_shape=(20, 20, 1), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

# 2nd convolutional layer with max pooling
model.add(Conv2D(50, (5, 5), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

# Hidden layer with 500 nodes
model.add(Flatten())
model.add(Dense(500, activation = 'relu'))

# Output layer with 32 nodes -> number of possible characters
model.add(Dense(32, activation = 'softmax'))

# Ask Keras to build the model with TensorFlow behind the scenes
model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics =['accuracy'])

# Train the neural network
model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size = 32, epochs = 10, verbose = 1)

model.save('captcha_model.hdf5')

### Using the network on new data

In [None]:
def captcha_testing_sample_preprocessing(image_file):

    # Load image
    img = cv2.imread(image_file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.copyMakeBorder(img, 20, 20, 20, 20, cv2.BORDER_REPLICATE)

    img_thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]

    img_contours = cv2.findContours(img_thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    img_contours = img_contours[1] if imutils.is_cv3() else img_contours[0]

    letter_image_regions = []

    for contour in img_contours:
        (x, y, w, h) = cv2.boundingRect(contour)

        if w / h > 1.25:
            half_width = int(w/2)
            letter_image_regions.append((x, y, half_width, h))
            letter_image_regions.append((x + half_width, y, half_width, h))
        else:
            letter_image_regions.append((x, y, w, h))

    if len(letter_image_regions) != 4:
        return None

    letter_image_regions = sorted(letter_image_regions, key= lambda x: x[0])

    return (img, letter_image_regions)

In [None]:
# load label enconder
with open('model_labels.dat', 'rb') as f:
    lb = pickle.load(f)

# load CNN
model = load_model('captcha_model.hdf5')

# grab images to test against
captcha_image_files = list(paths.list_images('generated_captcha_images'))
captcha_image_files = np.random.choice(captcha_image_files, size=(10,), replace=False)

In [None]:
for image_file in captcha_image_files:

    preprocess_return = captcha_testing_sample_preprocessing(image_file)

    if preprocess_return == None:
        continue
    
    (image, letter_image_regions) = preprocess_return

    output = cv2.merge([image] * 3)
    predictions = []

    for letter_bounding_box in letter_image_regions:
        (x, y, w, h) = letter_bounding_box

        letter_image = image[y-2 : y + h + 2, x-2 : x+w+2]

        letter_image = resize_to_fit(letter_image, 20, 20)

        letter_image = np.expand_dims(letter_image, axis = 2)
        letter_image = np.expand_dims(letter_image, axis = 0)

        prediction = model.predict(letter_image)

        letter = lb.inverse_transform(prediction)[0]
        predictions.append(letter)

        cv2.rectangle(output, (x-2, y-2), (x+w+4, y+h+4), (0, 255, 0), 1)
        cv2.putText(output, letter, (x - 5, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2)

    captcha_text = "".join(predictions)
    print("CAPTCHA text is {}".format(captcha_text))

    cv2.imshow("Output", output)
    cv2.waitKey()

### Extending the model to work on new characters

In [None]:
new_model = Sequential()

# model.summary()
for layer in model.layers[:-1]:
    new_model.add(layer)

for layer in new_model.layers:
    layer.trainable = False

# Output layer with 32 nodes -> number of possible characters
# new_model.add(Dense(32, activation = 'softmax'))

In [None]:
captcha_training_samples_processing('kaggle-captchas1', 'kaggle-captchas-extracted-letter')
# TODO - work on better image preprocessing