In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from keras.utils import to_categorical

import numpy as np

Using TensorFlow backend.


In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
x_train, x_test = x_train/255, x_test/255
x_train, x_test = x_train.reshape(x_train.shape[0], 28, 28, 1), x_test.reshape(x_test.shape[0],28,28,1)

y_train, y_test = to_categorical(y_train, 10), to_categorical(y_test, 10)

In [6]:
#CNN configuration:

# Convolutional Layer 1.
filter_size1 = 5          # Convolution filters are 5 x 5 pixels.
num_filters1 = 16         # There are 16 of these filters.

# Convolutional Layer 2.
filter_size2 = 5          # Convolution filters are 5 x 5 pixels.
num_filters2 = 36         # There are 36 of these filters.

# Fully-connected layer.
fc_size = 128             # Number of neurons in fully-connected layer.

In [7]:
x = tf.placeholder('float',shape = (None, 28, 28, 1), name="x")
y = tf.placeholder('float', shape = (None), name = "y")

In [83]:
def flatten_layer(layer):
    # Get the shape of the input layer.
    layer_shape = layer.get_shape()

    # The shape of the input layer is assumed to be:
    # layer_shape == [num_images, img_height, img_width, num_channels]

    # The number of features is: img_height * img_width * num_channels
    # We can use a function from TensorFlow to calculate this.
    num_features = layer_shape[1:4].num_elements()
    
    # Reshape the layer to [num_images, num_features].
    # Note that we just set the size of the second dimension
    # to num_features and the size of the first dimension to -1
    # which means the size in that dimension is calculated
    # so the total size of the tensor is unchanged from the reshaping.
    layer_flat = tf.reshape(layer, [-1, num_features])

    # The shape of the flattened layer is now:
    # [num_images, img_height * img_width * num_channels]

    # Return both the flattened layer and the number of features.
    return layer_flat, num_features


def next_batch(num, data, labels):
    '''
    Return a total of `num` random samples and labels. 
    '''
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[i] for i in idx]
    labels_shuffle = [labels[i] for i in idx]

    return np.asarray(data_shuffle), np.asarray(labels_shuffle)

In [78]:
#conv layers:
conv_L1 = {'weights': tf.Variable(tf.truncated_normal([filter_size1, filter_size1, 1, num_filters1], stddev=0.05)),
            'biases': tf.Variable(tf.constant(0.05, shape=[num_filters1]))}

conv_L2 = {'weights': tf.Variable(tf.truncated_normal([filter_size2, filter_size2, num_filters1, num_filters2], stddev=0.05)),
            'biases': tf.Variable(tf.constant(0.05, shape=[num_filters2]))}



L1 = tf.nn.conv2d(input = x, filter = conv_L1["weights"], strides = [1,1,1,1], padding = "SAME",name="mylayer1")
L1 += conv_L1["biases"]
#maxpooling:
L1 = tf.nn.max_pool(L1, ksize = [1,2,2,1], strides = [1,2,2,1], padding = "SAME")

#activation function:
L1 = tf.nn.relu(L1)


#conv layer 2:
L2 = tf.nn.conv2d(input = L1, filter = conv_L2["weights"], strides = [1,1,1,1], padding = "SAME")
L2 += conv_L2["biases"]

L2 = tf.nn.max_pool(L2, ksize = [1,8,8,1], strides = [1,8,8,1], padding = "SAME")

L2 = tf.nn.relu(L2)


#fully connected layer:
layer_flat, num_features = flatten_layer(L2)

full_con_L3 = {'weights': tf.Variable(tf.random_normal([num_features, fc_size])),
                'biases': tf.Variable(tf.random_normal([fc_size]))}

output_layer = {'weights': tf.Variable(tf.random_normal([fc_size, 10])),
                'biases': tf.Variable(tf.random_normal([10]))}



L3 = tf.add(tf.matmul(layer_flat, full_con_L3['weights']), full_con_L3['biases'])
L3 = tf.nn.relu(L3)

logits = tf.add(tf.matmul(L3, output_layer['weights']), output_layer['biases'])
output = tf.nn.softmax(logits)

In [89]:
loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y) )
optimiser = tf.train.AdamOptimizer().minimize(loss)


init = tf.global_variables_initializer()
batch_size = 100

epochs = 5
total_batch_count = 60000//batch_size

In [90]:
import time

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(epochs):
        start = time.time()
        avg_loss = 0 
        
        for i in range(total_batch_count):
            
            batch_x, batch_y = next_batch(batch_size, x_train, y_train)
            
            _, c = sess.run([optimiser, loss], feed_dict = {x:batch_x, y:batch_y})
            
            avg_loss +=c/total_batch_count
        
        end = time.time()
        print("epoch = ", epoch + 1, "loss =", avg_loss, "time for epoch =", end - start)
           
        
    preds = output.eval({x:x_test})
    
    sess.close()


epoch =  1 loss = 0.6570533545066916 time for epoch = 44.607166051864624
epoch =  2 loss = 0.12615318010716384 time for epoch = 42.42839193344116
epoch =  3 loss = 0.08760699524311351 time for epoch = 41.908621072769165
epoch =  4 loss = 0.06939670414062375 time for epoch = 42.10570788383484
epoch =  5 loss = 0.05734044778277162 time for epoch = 42.331732988357544


In [91]:
correct = 0
incorrect = []
for i in range(len(preds)):
    pred = np.argmax(preds[i])
    acts = np.argmax([y_test[i]])
    if pred == acts:
        correct+=1
    else:
        incorrect.append(i)
        
print("accuracy on test set: ", correct/len(preds))

accuracy on test set:  0.9788


##### Analysis
model is overfitting, loss of 0.05 is very low and so test set accuracy is only 97.88%. As high as this is, it is still less than what was achieved on keras however to overcome this some regularisation can be added like dropout between the two connected layers.