# Training a CNN for image classification

## importing libraries

In [1]:
import cv2
import pickle
import os.path
import numpy as np
from imutils import paths
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Flatten, Dense

Using TensorFlow backend.


##  initializing the folder for number images folder , model file and model labels file

In [2]:
IMAGES_FOLDER = '/home/drake/Desktop/captcha scanner/training'
MODEL_FILE = "/home/drake/Desktop/captcha scanner/captcha_model.hdf5"
MODEL_LABELS_FILE = "/home/drake/Desktop/captcha scanner/model_labels.dat"

## initialize the data and labels

In [3]:
data = []
labels = []

## looping over images folder

In [4]:

for image_file in paths.list_images(IMAGES_FOLDER):
    
    image = cv2.imread(image_file)                     #reading the image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)   #convert to gray scale

    
    image = cv2.resize(image, (25,25))               # Resize the number image
    _,image = cv2.threshold(image,127,255,cv2.THRESH_BINARY) #binarize the image after resizing
    

    
    image = np.expand_dims(image, axis=2)         # Add a third channel dimension 

    label = image_file.split(os.path.sep)[-2] # name of the folder for labelling of data asit will be' /././number/xyz.png'

    data.append(image)
    labels.append(label)

## scaling the pixel between 0 and 1

In [5]:
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)


##  splitting the dataset

In [6]:
(X_train, X_test, Y_train, Y_test) = train_test_split(data, labels, test_size=0.25, random_state=0)

##  one hot encoding the labels

In [7]:
lb = LabelBinarizer().fit(Y_train)
Y_train = lb.transform(Y_train)
Y_test = lb.transform(Y_test)


## saving the mapping of one hot encoding for prediction decoding

In [8]:
with open(MODEL_LABELS_FILE, "wb") as f:
    pickle.dump(lb, f)

## MODEL building

In [9]:
model = Sequential()  #type of neural network

model.add(Conv2D(25, (5, 5), padding="same", input_shape=(25, 25, 1), activation="relu")) #first convulation layer
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))


model.add(Conv2D(50, (5, 5), padding="same", activation="relu"))  # Second convolutional layer
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))


model.add(Flatten())                                            # fully connected Hidden layer with 450 nodes
model.add(Dense(450, activation="relu"))

model.add(Dense(9, activation="softmax"))  # Output layer with 9 nodes, one for each number

Instructions for updating:
Colocations handled automatically by placer.


## compiling the model

In [10]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

##  training the network

In [11]:
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=32, epochs=20, verbose=1)

Instructions for updating:
Use tf.cast instead.
Train on 3517 samples, validate on 1173 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f6a5ff1d390>

## saving the model

In [12]:
model.save(MODEL_FILE)