In [1]:
import tensorflow as tf

In [2]:
from tensorflow.examples.tutorials.mnist import input_data

In [4]:
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Helper Functions

- Weight initialization for FC layer or Conv layers

In [5]:
def init_weights(shape):
    # Distribution: mean 0, std deviation 0.1
    init_random_dist = tf.truncated_normal(shape, stddev=0.1)
    # Return as variable as this should be learned
    return tf.Variable(init_random_dist)

In [6]:
def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

### Model Creation

1. Create a 2D-Conv-Layer by using `conv2d` from TF
2. Compute a convolution given 4-D input and filters:
    - Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
and a filter / kernel tensor of shape
`[filter_height, filter_width, in_channels, out_channels]`, this operation
performs the following:
        1. Flatten the filter to a 2-D matrix with shape `[filter_height * filter_width * in_channels, output_channels]`.
        2. Extract the image patches from the input tensor to form a *virtual* tensor of shape `[batch, out_height, out_width,
   filter_height * filter_width * in_channels]`.
   - For each patch, right-multiply the filter matrix and the image patch vector


In [7]:
def conv2d(x, W):
    # input tensor and input kernel
    # conv2d based on inputs and stride
    # x --> input tensor [bs, H, W, C] e.g. [bs,H,W,1] for gray scale
    # W --> kernel [H, W, C_in, C_out]
    
    # Params --> stride is 1 in every direction 
    # Padding --> "SAME": size should remain the same
    return tf.nn.conv2d(x, W, 
                        strides=[1, 1, 1, 1], 
                        padding='SAME')

In [8]:
def max_pool_2by2(x):
    # x --> [bs, H, W, C]
    # ksize --> size of the window for each dimension of the input tensor
    # strides --> stride of the sliding window of the input tensor
    # padding --> same size
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], 
                          padding='SAME')

In [9]:
def convolutional_layer(input_x, shape):
    W = init_weights(shape)
    b = init_bias([shape[3]])
    return tf.nn.relu(conv2d(input_x, W) + b)

In [10]:
def normal_full_layer(input_layer, size):
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size, size])
    b = init_bias([size])
    return tf.matmul(input_layer, W) + b

### Placeholders

In [11]:
x = tf.placeholder(tf.float32,shape=[None,784])

In [12]:
y_true = tf.placeholder(tf.float32,shape=[None,10])

### Layers

In [13]:
x_image = tf.reshape(x,[-1,28,28,1]) # reshaping the flattened out tensor to an image

In [14]:
# Using a 6by6 filter here, used 5by5 in video, you can play around with the filter size
# You can change the 32 output, that essentially represents the amount of filters used
# You need to pass in 32 to the next input though, the 1 comes from the original input of 
# a single image.

# [patch_size, patch_size, C_in, C_out], C_in is 1 because of grayscale
convo_1 = convolutional_layer(x_image,shape=[6,6,1,32])
convo_1_pooling = max_pool_2by2(convo_1)

In [15]:
# Using a 6by6 filter here, used 5by5 in video, you can play around with the filter size
# You can actually change the 64 output if you want, you can think of that as a representation
# of the amount of 6by6 filters used.
convo_2 = convolutional_layer(convo_1_pooling,shape=[6,6,32,64]) #64 features for each 6x6 patch
convo_2_pooling = max_pool_2by2(convo_2)

#### Conversion from Convolution to Dense Layer

Reshaped tensor:
- -1 --> Filled automatically
- 7 -->
- 64 depends on the number of features extracted during the last layer before flattening


In [16]:
# Why 7 by 7 image? Because we did 2 pooling layers, so (28/2)/2 = 7
# 64 then just comes from the output of the previous Convolution
convo_2_flat = tf.reshape(convo_2_pooling,[-1,7*7*64])
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,1024))

In [17]:
# NOTE THE PLACEHOLDER HERE!
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(full_layer_one,keep_prob=hold_prob)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [18]:
y_pred = normal_full_layer(full_one_dropout,10)

### Loss Function

In [19]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



### Optimizer

In [20]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
train = optimizer.minimize(cross_entropy)

### Initialization and Session

In [21]:
init = tf.global_variables_initializer()

In [22]:
steps = 5000

with tf.Session() as sess:
    
    sess.run(init)
    
    for i in range(steps):
        
        batch_x , batch_y = mnist.train.next_batch(50)
        
        sess.run(train,feed_dict={x:batch_x,y_true:batch_y,hold_prob:0.5})
        
        # PRINT OUT A MESSAGE EVERY 100 STEPS
        if i%100 == 0:
            
            print('Currently on step {}'.format(i))
            print('Accuracy is:')
            # Test the Train Model
            matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))

            acc = tf.reduce_mean(tf.cast(matches,tf.float32))

            print(sess.run(acc,feed_dict={x:mnist.test.images,y_true:mnist.test.labels,hold_prob:1.0}))
            print('\n')

Currently on step 0
Accuracy is:
0.1371


Currently on step 100
Accuracy is:
0.8591


Currently on step 200
Accuracy is:
0.9143


Currently on step 300
Accuracy is:
0.9356


Currently on step 400
Accuracy is:
0.9367


Currently on step 500
Accuracy is:
0.9482


Currently on step 600
Accuracy is:
0.9517


Currently on step 700
Accuracy is:
0.9573


Currently on step 800
Accuracy is:
0.9565


Currently on step 900
Accuracy is:
0.9612


Currently on step 1000
Accuracy is:
0.9636


Currently on step 1100
Accuracy is:
0.9654


Currently on step 1200
Accuracy is:
0.9684


Currently on step 1300
Accuracy is:
0.9683


Currently on step 1400
Accuracy is:
0.9694


Currently on step 1500
Accuracy is:
0.9729


Currently on step 1600
Accuracy is:
0.9733


Currently on step 1700
Accuracy is:
0.9744


Currently on step 1800
Accuracy is:
0.9765


Currently on step 1900
Accuracy is:
0.9763


Currently on step 2000
Accuracy is:
0.9765


Currently on step 2100
Accuracy is:
0.9766


Currently on step 2200