# Get Data & clear previous data

We will clone my github repository that contains the input data and also the pretrained weights

In [12]:
!rm -rf *
!git clone https://github.com/adityagupta1089/Captcha-Generator-using-cGANs.git

Cloning into 'Captcha-Generator-using-cGANs'...
remote: Enumerating objects: 1079, done.[K
remote: Counting objects:   0% (1/1079)   [Kremote: Counting objects:   1% (11/1079)   [Kremote: Counting objects:   2% (22/1079)   [Kremote: Counting objects:   3% (33/1079)   [Kremote: Counting objects:   4% (44/1079)   [Kremote: Counting objects:   5% (54/1079)   [Kremote: Counting objects:   6% (65/1079)   [Kremote: Counting objects:   7% (76/1079)   [Kremote: Counting objects:   8% (87/1079)   [Kremote: Counting objects:   9% (98/1079)   [Kremote: Counting objects:  10% (108/1079)   [Kremote: Counting objects:  11% (119/1079)   [Kremote: Counting objects:  12% (130/1079)   [Kremote: Counting objects:  13% (141/1079)   [Kremote: Counting objects:  14% (152/1079)   [Kremote: Counting objects:  15% (162/1079)   [Kremote: Counting objects:  16% (173/1079)   [Kremote: Counting objects:  17% (184/1079)   [Kremote: Counting objects:  18% (195/1079)   [Kremote:

We will now move the input folder out of clone repo and remove the repo folder

In [0]:
!mv Captcha-Generator-using-cGANs/input/ input/
!rm -rf Captcha-Generator-using-cGANs

# Read Data

We define three functions that will encode and decode our labels and one-hot enconding back and forth. For this we have assigned each character in 5 character label a one-hot vector of size 36 (26 alphabets and 10 numbers).

*   `index(c)` converts `c`(alphabet or number)  to `0-35`
*   `character` converts `v`(number) to `a-z0-9`
*  `one_hot_label(label)` converts `label` to vector of size $36*5=180$
*  `get_label(one_hot)` converts vector of size `180` to string of size `5`



In [0]:
ALPHABETS = 26
NUMBERS = 10
CHARACTERS = 5
TOTAL = ALPHABETS + NUMBERS

def index(c):
    v = ord(c)
    if 'a' <= c and c <= 'z':
        return v - ord('a')
    elif '0' <= c and c <= '9':
        return ALPHABETS + v - ord('0')
    else:
        return TOTAL + 1
    
def character(v):
    if 0 <= v and v <= ALPHABETS - 1:
        return chr(v + ord('a'))
    elif ALPHABETS <= v and v <= TOTAL - 1:
        return chr(v - ALPHABETS + ord('0'))
    else:
        return '?'

def one_hot_label(label):
    result = []
    for c in label:
        char_result = [0] * TOTAL
        char_result[index(c)] = 1
        result.append(char_result)
    return result

def get_label(one_hot):
    result = ""
    for i in range(0, 180, 36):
        v = np.argmax(one_hot[i:i+36])
        result = result + character(v)
    return result

Now we read all data and create our two input arrays `X` and `Y`

In [31]:
import cv2
from PIL import Image as image
from os import listdir
import numpy as np

def loadData(path):
    images = listdir(path)
    X = []
    Y = []
    for y in images:
        x = cv2.imread(path + y)
        X.append(x)
        Y.append(one_hot_label(y.split('.')[0]))
        
    X = np.array(X)
    Y = np.array(Y)
    return X, Y

path = "input/"

X, Y = loadData(path)

img_shape = X.shape[1:]
print(X.shape)
print(Y.shape)


(1070, 50, 200, 3)
(1070, 5, 36)


#Train Test Split

We split the data into test and train in ratio of 20:80

In [38]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, 
                                                    test_size=0.2, 
                                                    random_state=0)

print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(856, 50, 200, 3)
(214, 50, 200, 3)
(856, 5, 36)
(214, 5, 36)


We also split `Y` into `5` different arrays for each output

In [0]:
def split_fun(arr):
    N = arr.shape[0]
    result = []
    for Yi in np.split(arr, CHARACTERS, axis=1):
        result.append(Yi.reshape(N, TOTAL))
    return result

def combine_fun(arr):
    return np.hstack(tuple(arr))

In [39]:
Y_train = split_fun(Y_train)
Y_test = split_fun(Y_test)

print([y.shape for y in Y_train])
print([y.shape for y in Y_test])

[(856, 36), (856, 36), (856, 36), (856, 36), (856, 36)]
[(214, 36), (214, 36), (214, 36), (214, 36), (214, 36)]


# Define Models

## Solver

### Model

Now we define a model similar to VGGnet

In [44]:
from keras.models import Sequential, Model
from keras.layers import (
     Dense, 
     Conv2D,
     Input,
     BatchNormalization, 
     MaxPooling2D, 
     Flatten, 
     Activation, 
     Dropout)
from keras import backend as K
import tensorflow as tf


def my_solver():
    model = Sequential(name="ConvPart")
    
    input_shape = X.shape[1:]
    
    model.add(Conv2D(64, (5, 5), input_shape=input_shape, 
                     activation='relu', padding='same'))
    model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
    model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
    model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
    model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
        
    model_input = Input(shape=input_shape, name="Input")
    
    outputs = []
    
    for i in range(0, CHARACTERS):
        outputs.append(Dense(TOTAL, activation='softmax', name="D" + str(i))(model(model_input)))
        
    complete_model = Model(model_input, outputs)
    
    losses = ['categorical_crossentropy'] * CHARACTERS
    
    complete_model.compile(loss=losses, metrics=['categorical_accuracy'], optimizer='adam')
    
    return complete_model

solver = my_solver()
solver.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input (InputLayer)              (None, 50, 200, 3)   0                                            
__________________________________________________________________________________________________
ConvPart (Sequential)           (None, 1024)         3087552     Input[0][0]                      
                                                                 Input[0][0]                      
                                                                 Input[0][0]                      
                                                                 Input[0][0]                      
                                                                 Input[0][0]                      
__________________________________________________________________________________________________
D0 (Dense)

### Training

In [69]:
h = solver.fit(X_train, 
           Y_train, 
           batch_size=8, 
           epochs=150, 
           verbose=1, 
           validation_data=(X_test, Y_test), 
           shuffle=True)

Train on 856 samples, validate on 214 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
E

We can save weights if we want

In [0]:
solver.save_weights('solver.hdf5')

Or load weights if we want

In [0]:
solver.load_weights('solver.hdf5')

Let us see what the incorrectly labeled captchas look like

In [70]:
solver.evaluate(X_test, 
           Y_test, 
           batch_size=8, 
           verbose=1)

Y_pred = solver.predict(X_test)

def contendors(y):
    top = y.argsort()[-3:][::-1]
    return ', '.join([character(x) + "(" + ("%2.1f%%" % (100.0 * y[x])) + ")" for x in top])

i = 0
for y_test, y_pred in zip(combine_fun(Y_test), combine_fun(Y_pred)):
    label_test = get_label(y_test)
    label_pred = get_label(y_pred)
    if label_test != label_pred:
        i = i + 1        
        print(i, "Wrong: ", label_test, label_pred)
        for j in range(len(label_test)):
            if label_test[j] != label_pred[j]:
                print('\tposition', str(j+1) + ',', contendors(np.split(y_pred, CHARACTERS)[j]))    

1 Wrong:  wyc25 w4yc5
	position 2, 4(46.7%), y(18.4%), f(17.4%)
	position 3, y(99.7%), c(0.3%), e(0.0%)
	position 4, c(73.1%), 2(26.9%), w(0.0%)
2 Wrong:  c6f8g c6f8y
	position 5, y(92.3%), p(4.9%), b(1.1%)
3 Wrong:  bf52c bf5cc
	position 4, c(95.8%), 2(4.2%), 3(0.0%)
4 Wrong:  25257 25557
	position 3, 5(99.9%), 2(0.1%), 3(0.0%)
5 Wrong:  cx3wg ex3wg
	position 1, e(99.6%), x(0.3%), w(0.0%)
6 Wrong:  mmg38 mmgp8
	position 4, p(79.1%), g(10.2%), 3(6.2%)
7 Wrong:  ewcf5 ewc35
	position 4, 3(91.3%), c(5.2%), g(1.4%)
8 Wrong:  d22n7 d22m7
	position 4, m(86.0%), n(10.0%), y(1.4%)
9 Wrong:  mwdf6 mwwd6
	position 3, w(100.0%), x(0.0%), 4(0.0%)
	position 4, d(77.4%), w(22.4%), b(0.2%)
10 Wrong:  mxyxw mxyww
	position 4, w(80.7%), x(10.7%), c(6.9%)
11 Wrong:  m4g8g m44w8
	position 3, 4(91.3%), g(7.7%), x(0.7%)
	position 4, w(81.3%), g(13.9%), 8(2.3%)
	position 5, 8(99.6%), g(0.4%), n(0.0%)
12 Wrong:  w52fn wp5fn
	position 2, p(29.1%), f(25.6%), 5(18.4%)
	position 3, 5(47.0%), p(37.7%), 2(7.0%)
1