In [1]:
%matplotlib inline

In [2]:
import config as cfg
# import os
# os.environ['KERAS_BACKEND'] = 'tensorflow'

In [3]:
import os
import matplotlib.pylab as plt
import matplotlib.image as mpimg
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder


In [4]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D, Conv2D
from keras.optimizers import SGD
from keras.models import model_from_json
from keras.utils import np_utils

In [5]:
def preprocess(img_orig, digit=6):
    new_width = img_orig.shape[1]//digit
    img_orig = img_orig[:, :new_width*digit, :]/ 255
    imgs = img_orig.reshape(img_orig.shape[0], digit, new_width, img_orig.shape[2])
    imgs = [imgs[:, idx, :, :] for idx in range(digit)]
    return imgs

In [None]:
digit = cfg.DIGIT_CAPTCHA
fname_model = cfg.PATH_CNN_MODEL
fname_weights = cfg.PATH_CNN_WEIGHTS
dirname = 'img/'

In [6]:
files = [f for f in os.listdir(dirname) if not f.startswith('img') and f.endswith('.png')]

y_ = []
X = []
for idx, file in enumerate(files):
    suby = list(file[:6])
    subx = preprocess(mpimg.imread(f'{dirname}/{file}', 0), digit)
    y_ += suby
    X += subx

y_ = np.array(y_).reshape(len(y_), 1).astype(int)
X = np.array(X)
    
# One Hot encode the class labels
encoder = OneHotEncoder(sparse=False, categories='auto')
y = encoder.fit_transform(y_)
#print(y)

# Split the data for training and testing
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.20)

reference: https://gist.github.com/NiharG15/cd8272c9639941cf8f481a7c4478d525

In [7]:
img_channels = X.shape[3]
img_rows = X.shape[1]
img_cols = X.shape[2]
batch_size = 32
nb_classes = 10
nb_epoch = 50

model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                        input_shape=(img_rows, img_cols, img_channels)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))# prevent overfitting
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

In [17]:
# if os.path.exists(fname_model) and os.path.exists(fname_weights):
#     model = model_from_json(open(fname_model).read())
#     model.load_weights(fname_weights)

optimizer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

print('Neural Network Model Summary: ')
print(model.summary())


Neural Network Model Summary: 
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 22, 32)        896       
_________________________________________________________________
activation_1 (Activation)    (None, 28, 22, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 26, 20, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 26, 20, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 10, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 13, 10, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)      

In [9]:
model.fit(X_train, Y_train,
              batch_size=batch_size,
              epochs=nb_epoch,
              validation_data=(X_test, Y_test),
              shuffle=True)

Train on 964 samples, validate on 242 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.callbacks.History at 0x7f625c3befd0>

In [14]:
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.00018389043318226063
Test accuracy: 1.0


In [16]:
json_string = model.to_json()
open(fname_model, 'w').write(json_string)
model.save_weights(fname_weights)