In [2]:
import struct
import numpy as np
import cv2


def __convert_to_one_hot(vector, num_classes):
    result = np.zeros(shape=[len(vector), num_classes])
    result[np.arange(len(vector)), vector] = 1
    return result


def __resize_image(src_image, dst_image_height, dst_image_width):
    src_image_height = src_image.shape[0]
    src_image_width = src_image.shape[1]

    if src_image_height > dst_image_height or src_image_width > dst_image_width:
        height_scale = dst_image_height / src_image_height
        width_scale = dst_image_width / src_image_width
        scale = min(height_scale, width_scale)
        img = cv2.resize(src=src_image, dsize=(0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
    else:
        img = src_image

    img_height = img.shape[0]
    img_width = img.shape[1]

    dst_image = np.zeros(shape=[dst_image_height, dst_image_width], dtype=np.uint8)

    y_offset = (dst_image_height - img_height) // 2
    x_offset = (dst_image_width - img_width) // 2

    dst_image[y_offset:y_offset+img_height, x_offset:x_offset+img_width] = img

    return dst_image


def read_hoda_cdb(file_name):
    with open(file_name, 'rb') as binary_file:

        data = binary_file.read()

        offset = 0

        # read private header

        yy = struct.unpack_from('H', data, offset)[0]
        offset += 2

        m = struct.unpack_from('B', data, offset)[0]
        offset += 1

        d = struct.unpack_from('B', data, offset)[0]
        offset += 1

        H = struct.unpack_from('B', data, offset)[0]
        offset += 1

        W = struct.unpack_from('B', data, offset)[0]
        offset += 1

        TotalRec = struct.unpack_from('I', data, offset)[0]
        offset += 4

        LetterCount = struct.unpack_from('128I', data, offset)
        offset += 128 * 4

        imgType = struct.unpack_from('B', data, offset)[0]  # 0: binary, 1: gray
        offset += 1

        Comments = struct.unpack_from('256c', data, offset)
        offset += 256 * 1

        Reserved = struct.unpack_from('245c', data, offset)
        offset += 245 * 1

        if (W > 0) and (H > 0):
            normal = True
        else:
            normal = False

        images = []
        labels = []

        for i in range(TotalRec):

            StartByte = struct.unpack_from('B', data, offset)[0]  # must be 0xff
            offset += 1

            label = struct.unpack_from('B', data, offset)[0]
            offset += 1

            if not normal:
                W = struct.unpack_from('B', data, offset)[0]
                offset += 1

                H = struct.unpack_from('B', data, offset)[0]
                offset += 1

            ByteCount = struct.unpack_from('H', data, offset)[0]
            offset += 2

            image = np.zeros(shape=[H, W], dtype=np.uint8)

            if imgType == 0:
                # Binary
                for y in range(H):
                    bWhite = True
                    counter = 0
                    while counter < W:
                        WBcount = struct.unpack_from('B', data, offset)[0]
                        offset += 1
                        # x = 0
                        # while x < WBcount:
                        #     if bWhite:
                        #         image[y, x + counter] = 0  # Background
                        #     else:
                        #         image[y, x + counter] = 255  # ForeGround
                        #     x += 1
                        if bWhite:
                            image[y, counter:counter + WBcount] = 0  # Background
                        else:
                            image[y, counter:counter + WBcount] = 255  # ForeGround
                        bWhite = not bWhite  # black white black white ...
                        counter += WBcount
            else:
                # GrayScale mode
                data = struct.unpack_from('{}B'.format(W * H), data, offset)
                offset += W * H
                image = np.asarray(data, dtype=np.uint8).reshape([W, H]).T

            images.append(image)
            labels.append(label)

        return images, labels


def read_hoda_dataset(dataset_path, images_height=32, images_width=32, one_hot=False, reshape=True):
    images, labels = read_hoda_cdb(dataset_path)
    assert len(images) == len(labels)

    X = np.zeros(shape=[len(images), images_height, images_width], dtype=np.float32)
    Y = np.zeros(shape=[len(labels)], dtype=np.int)

    for i in range(len(images)):
        image = images[i]
        # Image resizing.
        image = __resize_image(src_image=image, dst_image_height=images_height, dst_image_width=images_width)
        # Image normalization.
        image = image / 255
        # Image binarization.
        image = np.where(image >= 0.5, 1, 0)
        # Image.
        X[i] = image
        # Label.
        Y[i] = labels[i]

    if one_hot:
        Y = __convert_to_one_hot(Y, 10).astype(dtype=np.float32)
    else:
        Y = Y.astype(dtype=np.float32)

    if reshape:
        X = X.reshape(-1, images_height * images_width)
    else:
        X = X.reshape(-1, images_height, images_width, 1)

    return X, Y

In [3]:
from google.colab import drive
drive.mount('/content/drive' , timeout_ms=3600000)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import numpy as np
from matplotlib import pyplot as plt

#!unzip "/content/drive/My Drive/DigitDB.zip" -d "/content/drive/My Drive/"
x_train, y_train = read_hoda_cdb('/content/drive/My Drive/Train 60000.cdb')
x_test, y_test = read_hoda_cdb('/content/drive/My Drive/Test 20000.cdb')

In [5]:
import cv2 

X_train=[]
for imgs in x_train:
  X_train.append(np.array(cv2.resize(imgs ,(30 , 30), interpolation = cv2.INTER_AREA ) ,dtype='float64'))

X_test=[]
for imgs in x_test:
  X_test.append(np.array(cv2.resize(imgs ,(30 , 30), interpolation = cv2.INTER_AREA ) ,dtype='float64' ))




In [6]:
import numpy as np
import matplotlib.pyplot as plt
from keras.utils import to_categorical
import keras
from keras.models import Sequential
from keras.layers import Layer, Dense, Activation, Dropout, Flatten,Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD
from sklearn.metrics import confusion_matrix
from keras.models import load_model

In [7]:
# preprocessing of  train and test data  

X_train= np.array(X_train)
X_train = X_train.reshape(60000, 30, 30, 1)
X_test= np.array(X_test)
X_test = X_test.reshape(20000, 30, 30, 1)
X_train /= 255
X_test /= 255

y_train= np.array(y_train)
y_test= np.array(y_test)

y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)


print('X_train shape: \t', X_train.shape )
print('X_test shape: \t', X_test.shape )
print('y_test shape: \t', y_train.shape )
print('y_test shape: \t', y_test.shape )




X_train shape: 	 (60000, 30, 30, 1)
X_test shape: 	 (20000, 30, 30, 1)
y_test shape: 	 (60000, 10)
y_test shape: 	 (20000, 10)


In [20]:
def CNN_model():

  model = Sequential()
  model.add(Conv2D(32, (3, 3), activation = 'relu', padding='same', input_shape=X_train.shape[1:]))
  model.add(BatchNormalization())
  model.add(Conv2D(32, (3, 3), activation = 'relu', padding='same'))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Conv2D(64, (3, 3), activation = 'relu', padding='same'))
  model.add(BatchNormalization())
  model.add(Conv2D(64, (3, 3), activation = 'relu', padding='same'))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Conv2D(128, (3, 3), activation = 'relu', padding='same'))
  model.add(BatchNormalization())
  model.add(Conv2D(128, (3, 3), activation = 'relu', padding='same'))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Flatten())
  model.add(Dense(128, activation = 'relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.5))
  model.add(Dense(10, activation='softmax'))
  model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.SGD(lr=0.001, momentum=0.9), metrics=['accuracy'])
  return model

In [24]:
batch_size = 60
epochs = 25

first_model = CNN_model()
first_model.summary()



Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_30 (Conv2D)           (None, 30, 30, 32)        320       
_________________________________________________________________
batch_normalization_35 (Batc (None, 30, 30, 32)        128       
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 30, 30, 32)        9248      
_________________________________________________________________
batch_normalization_36 (Batc (None, 30, 30, 32)        128       
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 15, 15, 32)        0         
_________________________________________________________________
dropout_20 (Dropout)         (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 15, 15, 64)       

In [25]:

first_history = first_model.fit( X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test), shuffle=True)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [35]:
y_pred = first_model.predict(X_test)
y_pred = [np.argmax(y_pred[i]) for i in range(len(y_pred))]
y_true = [np.argmax(y_test[i]) for i in range(len(y_test))]

train_scores = first_model.evaluate(X_train, y_train, verbose = 0)
test_scores = first_model.evaluate(X_test, y_test, verbose = 0)

print('Train accuracy : \t ', train_scores[1])
print('\nTest accuracy : \t  ', test_scores[1])

  
cm = confusion_matrix(y_true, y_pred)
print('\n\nConfusion Matrix =  \n\n\n{} ' .format(cm))

Train accuracy : 	  0.9987499713897705

Test accuracy : 	   0.9944000244140625


Confusion Matrix =  


[[1980   11    2    0    0    5    0    2    0    0]
 [   2 1997    0    0    1    0    0    0    0    0]
 [   0    2 1994    1    0    0    0    1    0    2]
 [   0    0   23 1970    5    1    0    0    1    0]
 [   0    0    4    4 1990    0    0    1    0    1]
 [   6    0    3    0    1 1989    0    0    1    0]
 [   0    4    0    0    0    2 1987    0    0    7]
 [   2    4    1    0    0    0    0 1993    0    0]
 [   0    1    0    0    0    0    0    0 1999    0]
 [   1    5    0    0    0    2    3    0    0 1989]] 
