## Dataset Loading

In [0]:
!git clone https://github.com/amir-saniyan/HodaDatasetReader.git
%cd /content/HodaDatasetReader/

Cloning into 'HodaDatasetReader'...
remote: Enumerating objects: 24, done.[K
remote: Total 24 (delta 0), reused 0 (delta 0), pack-reused 24[K
Unpacking objects: 100% (24/24), done.
/content/HodaDatasetReader


In [0]:
from HodaDatasetReader import read_hoda_dataset


x_train, y_train = read_hoda_dataset(dataset_path='./DigitDB/Train 60000.cdb',
                                images_height=32,
                                images_width=32,
                                one_hot=True,
                                reshape=False
                                )
print('Reading Test 20000.cdb ...')
x_test, y_test = read_hoda_dataset('./DigitDB/Test 20000.cdb',
                                        images_height=32,
                                         images_width = 32,
                                         one_hot=True,
                                         reshape=False  
                                        )


print('Reading RemainingSamples.cdb ...')
remaining_images, remaining_labels = read_hoda_dataset('./DigitDB/RemainingSamples.cdb', images_height=32,
                                         images_width = 32,
                                         one_hot=True,)

Reading Test 20000.cdb ...
Reading RemainingSamples.cdb ...


## Metrics

In [0]:
from keras.optimizers import Adam
from keras import backend as K
import numpy as np
import matplotlib.pyplot as plt

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.utils import np_utils, to_categorical

def recall(y_true, y_pred):

    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall


def precision(y_true, y_pred):

    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


def f1(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return 2 * ((p * r) / (p + r))

Using TensorFlow backend.


## train test seperation

In [0]:
x_val = x_train[:6000]
partial_x_train = x_train[6000:]
y_val = y_train[:6000]
partial_y_train = y_train[6000:]

x_train.shape


(60000, 32, 32, 1)

## Model

In [0]:
from keras.layers import Input,Conv2D,MaxPooling2D,Flatten,Dense,Dropout
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras import models
from keras import layers
from keras.applications.imagenet_utils import preprocess_input
from keras import optimizers
from keras import metrics

input_img = Input(shape=(32,32,1))
conv = Conv2D(32, (5, 5), activation='relu')(input_img)
conv = Conv2D(32, (3, 3), activation='relu')(input_img)
pooling = MaxPooling2D((2, 2))(conv)
conv = Conv2D(64, (3, 3), activation='relu')(pooling)
pooling = MaxPooling2D((2, 2))(conv)
conv = Conv2D(128, (3, 3), activation='relu')(pooling)
conv = Conv2D(128, (3, 3), activation='relu')(pooling)
pooling = MaxPooling2D((2, 2))(conv)
flatten = Flatten()(pooling)
dense = Dense(512, activation='relu')(flatten)
dense = Dropout(0.5)(dense)
dense = Dense(256, activation='relu')(dense)
dense = Dropout(0.5)(dense)
dense = Dense(128, activation='relu')(dense)
dense = Dense(10, activation='sigmoid')(dense)

model = Model(inputs=input_img, outputs=dense)
model.summary()

model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=2e-5),
              metrics=["accuracy"])

history = model.fit(partial_x_train, 
                    partial_y_train, 
                    epochs=30, 
                    batch_size=180
                    ,validation_data=(x_val, y_val))

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 32, 32, 1)         0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 30, 30, 32)        320       
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 4, 4, 128)         73856     
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 2, 2, 128)         0   

KeyboardInterrupt: ignored

In [0]:
model.save('../model.h5')

In [0]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

## Evaluation

In [0]:
score = model.evaluate(x_test, y_test)
for i in range(len(score)):
  print(str(list(model.metrics_names)[i])+" : "+ str(score[i]))

## confusion matrix

In [0]:
from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt


predicted_classes = model.predict(x_test)
matrix = confusion_matrix(y_test.argmax(axis=1), predicted_classes.argmax(axis=1))
print(matrix)


## wrong results


In [0]:
diff = np.abs(predicted_classes-y_test)
error = np.sum(np.abs(diff),axis=1)
wrongs = []
for i in range(10):
  max_index = np.argmax(error)
  wrongs.append(max_index)
  error[max_index] = 0

In [0]:


w = 10
h = 10
fig = plt.figure(figsize=(9, 13))
columns = 2
rows = 5

xs = np.linspace(0, 2*np.pi, 60)  
ys = np.abs(np.sin(xs))           

ax = []

for i in range(columns*rows):
    x = wrongs[i]
    img = x_test[x].reshape(32,32)
    ax.append( fig.add_subplot(rows, columns, i+1) )
    ax[-1].set_title("predicted:"+str(predicted_classes[x].argmax())+" "+"tag:"+str(y_test[x].argmax()))  
    plt.imshow(img)

plt.show()