## MNIST CNN

In [1]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
from tensorflow.examples.tutorials.mnist import input_data

In [3]:
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### HELPER FUNCTIONS

In [9]:
# INIT WEIGHTS
def init_weights(shape):
    init_random_dist = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(init_random_dist)

In [11]:
# INIT BIAS
def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

In [12]:
# CONV2D
def conv2d(x, W):
    # x --> input tensor, so shape:[batch, H, W, Channels]
    # W --> kernel, shape:[filter_height, filter_width, channels_in, channels_out]    
    return tf.nn.conv2d(x,W, strides=[1,1,1,1], padding='SAME') # SAME = zero padding

In [14]:
# POOLING
def max_pooling_2by2(x):
    # x --> input tensor, so shape:[batch, H, W, Channels]
    # ksize, strides --> we want to apply pooling to the H and W values of the tensor (image) 
    # therefore we need to skip the
    # batch and channels by putting 1, and the rest is 2
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') # SAME = zero padding

In [27]:
# CONVOLUTIONAL LAYER
def convolutional_layer(input_x, shape):
    W = init_weights(shape)
    b = init_bias([shape[3]])
    _conv2d = conv2d(input_x, W) + b
    return tf.nn.relu(_conv2d)

In [19]:
# FULLY CONNECTED LAYER
def fully_connected_layer(input_layer, size):
    #size = neuron size in fully connected layer
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size, size])
    b = init_bias([size])
    _fully_connected_layer = tf.matmul(input_layer, W) + b
    return _fully_connected_layer

### BUILDING NEURAL NETWORK

In [25]:
# PLACEHOLDERS
X = tf.placeholder(tf.float32, shape=[None, 28*28])
y = tf.placeholder(tf.float32, shape=[None, 10])

In [32]:
# LAYERS
X_image = tf.reshape(X, [-1, 28,28,1]) # reshape to flatten X image to the multiple arrays as image.
# 5 x 5 convolutional layers, 1, 32 that means that, this convolutional layer 
# is going to calculate 32 features for each 5 by 5 patch
# that means, weight tensor is 5, 5, 1, 32
# First Two dimensions : PATCH SIZE
# 1 : Channel Size since we have only one gray channel, that is 1
# 32 : Actual feature we are computing, it is the output channels
convo_1 = convolutional_layer(X_image, shape=[5,5,1,32])
convo_1_pooling = max_pooling_2by2(convo_1)

convo_2 = convolutional_layer(convo_1_pooling, shape=[5,5,32,64])
convo_2_pooling = max_pooling_2by2(convo_2)

# flattening output of last layer so that it can be an input for the fully connected layer
convo_2_flat= tf.reshape(convo_2_pooling, [-1,7*7*64]) # 7,7: W,H
full_layer_1 = tf.nn.relu(fully_connected_layer(convo_2_flat, 1024))

#dropout
hold_prob = tf.placeholder(tf.float32)
full_1_dropout = tf.nn.dropout(full_layer_1, keep_prob=hold_prob)

#predictions, classifications
y_pred = fully_connected_layer(full_1_dropout, 10)

In [33]:
# LOSS FUNCTIONS
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_pred))

In [35]:
# OPTIMIZER
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
train = optimizer.minimize(cross_entropy)

In [39]:
# initialize variables and session
init=tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    steps= 5000
    for i in range(steps):
        batch_X, batch_y = mnist.train.next_batch(50)
        sess.run(train, feed_dict={X:batch_X, y:batch_y, hold_prob:0.5})
        
        if i%100 == 0:
            #report back our accuracy
            print("On Step: {}".format(i))
            print("Accuracy: ")
            matches = tf.equal(tf.argmax(y_pred,1), tf.argmax(y,1))
            acc = tf.reduce_mean(tf.cast(matches, tf.float32))
            # We are providing the test images and test labels to execute the y_preds for those ones
            # so that we can run the acc to get calculate the matches in order to observe how much 
            # it is getting better
            print(sess.run(acc, feed_dict={X:mnist.test.images, y:mnist.test.labels, hold_prob:1.0}))
            print("\n")

On Step: 0
Accuracy: 


KeyboardInterrupt: 