In [1]:
import numpy as np
from tensorflow import keras
import tensorflow as tf
import matplotlib.pyplot as plt
import scipy.io as sio
from keras.utils import np_utils
from keras.models import Model
from keras.layers import Input, Dense, Flatten
import keras.optimizers
from keras import regularizers

import time  # To time each epoch

Using TensorFlow backend.


In [0]:
def load_data(DATA_NAME = 'notMNIST_small.mat'):

    rows, cols = 28, 28
    nb_classes = 10
        
    mat = sio.loadmat(DATA_NAME)

    X = mat['images']
    Y = mat['labels']

    # Move last column to front
    X = np.rollaxis(X, 2)

    # Reshape and format input
    X = X.reshape(X.shape[0], rows, cols, 1)
    X = X.astype('float32')
    X -= np.mean(X,axis=0)
    X /= 255.0

    # Hot encoding
    Y = Y.astype(int)
    Y = np_utils.to_categorical(Y, nb_classes)

    # Divide into test and train sets
    perm = np.random.permutation(X.shape[0])

    train_size = 13000

    X_train = X[perm[:train_size]]
    X_test = X[perm[train_size:]]

    Y_train = Y[perm[:train_size]]
    Y_test = Y[perm[train_size:]]

    return (X_train, Y_train, X_test, Y_test)

In [0]:
train_images, train_labels , test_images, test_labels  = load_data()

batch_size = 128
nb_epoch = 20

In [0]:
from keras.backend import sigmoid
def swish(x, beta = 1):
    return (x * sigmoid(beta * x))
from keras.utils.generic_utils import get_custom_objects
from keras.layers import Activation
get_custom_objects().update({'swish': Activation(swish)})

In [0]:
def get_model(activation='relu', initializer='he_uniform', hidden_size=128,regularizer=None):

    # Model parameters
    rows, cols = 28, 28
    input_shape = (rows, cols, 1)

    nb_classes = 10
    
    inp = Input(shape=input_shape)
    flat = Flatten()(inp)
    hidden_1 = Dense(hidden_size, activation=activation, kernel_initializer=initializer, 
                kernel_regularizer=regularizer)(flat)
    out = Dense(nb_classes, activation='softmax')(hidden_1)

    model = Model(inputs=inp, outputs=out)

    print(model.summary())

    return model

In [0]:
def get_model2(activation='relu', initializer='he_uniform', hidden_size=128,regularizer=None):

    # Model parameters
    rows, cols = 28, 28
    input_shape = (rows, cols, 1)

    nb_classes = 10
    
    inp = Input(shape=input_shape)
    flat = Flatten()(inp)
    hidden_1 = Dense(hidden_size, activation=activation, kernel_initializer=initializer,
                kernel_regularizer=regularizer)(flat)
    hidden_2 = Dense(hidden_size, activation=activation, kernel_initializer=initializer,
                kernel_regularizer=regularizer)(hidden_1)
    out = Dense(nb_classes, activation='softmax')(hidden_2)

    model = Model(inputs=inp, outputs=out)

    print(model.summary())

    return model

With He Initialization, One hidden layer with 32 input node, ReLU, RMSProp, without regularizer

In [7]:
model = get_model(hidden_size=32)

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])	  

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                25120     
_________________________________________________________________
dense_2 (Dense)              (None, 10)                330       
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoc

With He Initialization, One hidden layer with 32 input node, ReLU, RMSProp, with L1 regularizer

In [8]:
model = get_model(hidden_size=32,regularizer=regularizers.l1(0.01))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 32)                25120     
_________________________________________________________________
dense_4 (Dense)              (None, 10)                330       
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoc

With He Initialization, One hidden layer with 32 input node, ReLU, RMSProp, with L2 regularizer

In [9]:
model = get_model(hidden_size=32,regularizer=regularizers.l2(0.01))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 32)                25120     
_________________________________________________________________
dense_6 (Dense)              (None, 10)                330       
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoc

With He Initialization, Two hidden layer with each layer contains 128 input node, ReLU, RMSProp, without regularizer

In [10]:
model = get_model2(hidden_size=128)

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])	  

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 128)               100480    
_________________________________________________________________
dense_8 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_9 (Dense)              (None, 10)                1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

With He Initialization, Two hidden layer with each layer contains 128 input node, ReLU, RMSProp, with L1 regularizer

In [11]:
model = get_model2(hidden_size=128,regularizer=regularizers.l1(0.01))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 128)               100480    
_________________________________________________________________
dense_11 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_12 (Dense)             (None, 10)                1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

With He Initialization, Two hidden layer with each layer contains 128 input node, ReLU, RMSProp, with L2
regularizer

In [12]:
model = get_model2(hidden_size=128,regularizer=regularizers.l2(0.01))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])

Model: "model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 128)               100480    
_________________________________________________________________
dense_14 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_15 (Dense)             (None, 10)                1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

With He Initialization, One hidden layer with 32 input node, Swish, RMSProp, without regularizer

In [13]:
model = get_model(hidden_size=32,activation='swish')

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])	  

Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 32)                25120     
_________________________________________________________________
dense_17 (Dense)             (None, 10)                330       
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoc

With He Initialization, One hidden layer with 32 input node, Swish, RMSProp, with L1 regularizer

In [14]:
model = get_model(hidden_size=32,activation='swish',regularizer=regularizers.l1(0.01))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])

Model: "model_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_8 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 32)                25120     
_________________________________________________________________
dense_19 (Dense)             (None, 10)                330       
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoc

With He Initialization, One hidden layer with 32 input node, Swish, RMSProp, with L2 regularizer

In [15]:
model = get_model(hidden_size=32,activation='swish',regularizer=regularizers.l2(0.01))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])

Model: "model_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 32)                25120     
_________________________________________________________________
dense_21 (Dense)             (None, 10)                330       
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoc

With He Initialization, Two hidden layer with each layer contains 128 input node, Swish, RMSProp, without regularizer

In [16]:
model = get_model2(hidden_size=128,activation='swish')

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])	  

Model: "model_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_10 (Flatten)         (None, 784)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 128)               100480    
_________________________________________________________________
dense_23 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_24 (Dense)             (None, 10)                1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/2

With He Initialization, Two hidden layer with each layer contains 128 input node, Swish, RMSProp, with L1 regularizer

In [17]:
model = get_model2(hidden_size=128,regularizer=regularizers.l1(0.01))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])

Model: "model_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_11 (InputLayer)        (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_11 (Flatten)         (None, 784)               0         
_________________________________________________________________
dense_25 (Dense)             (None, 128)               100480    
_________________________________________________________________
dense_26 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_27 (Dense)             (None, 10)                1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/2

With He Initialization, Two hidden layer with each layer contains 128 input node, Swish, RMSProp, with L2
regularizer

In [18]:
model = get_model2(hidden_size=128,regularizer=regularizers.l2(0.01))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=batch_size, epochs=nb_epoch,
          verbose=1, validation_data=(test_images, test_labels))

score = model.evaluate(test_images, test_labels, verbose=1)

print("Accuracy:", score[1])

Model: "model_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_12 (Flatten)         (None, 784)               0         
_________________________________________________________________
dense_28 (Dense)             (None, 128)               100480    
_________________________________________________________________
dense_29 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_30 (Dense)             (None, 10)                1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________
None
Train on 13000 samples, validate on 5724 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/2

To save the model

In [0]:
model.save("model.h5")

To load the model and run inference

In [20]:
# load and evaluate a saved model
from numpy import loadtxt
from keras.models import load_model

# load model
model = load_model('model.h5')
# summarize model.
model.summary()
# load dataset
_,_,test_images, test_labels  = load_data()

# evaluate the model
score = model.evaluate(test_images, test_labels, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))

Model: "model_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        (None, 28, 28, 1)         0         
_________________________________________________________________
flatten_12 (Flatten)         (None, 784)               0         
_________________________________________________________________
dense_28 (Dense)             (None, 128)               100480    
_________________________________________________________________
dense_29 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_30 (Dense)             (None, 10)                1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________
accuracy: 91.84%


In [0]:
# make a prediction for a new image.
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.models import load_model

# load and prepare the image
def load_image(filename):
	# load the image
	img = load_img(filename, grayscale=True, target_size=(28, 28))
	# convert to array
	img = img_to_array(img)
	# reshape into a single sample with 1 channel
	img = img.reshape(1, 28, 28, 1)
	# prepare pixel data
	img = img.astype('float32')
	img = img / 255.0
	return img

# load an image and predict the class
def run_example():
	# load the image
	img = load_image('sample_image.png')
	# load model
	model = load_model('model.h5')
	# predict the class
	result = model.predict_classes(img)
	print(result[0])

# entry point, run the example
#run_example()