# MNIST: CNN
Solving the MNIST problem using Convulutional Neural Network (CNN) instead of Linear

In [None]:
import numpy as np
import pandas as pd
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

import matplotlib.pyplot as plt

from tensorflow.examples.tutorials.mnist import input_data

In [None]:
mnist = input_data.read_data_sets("data/MNIST/",one_hot=True)

In [22]:
# helper function: initialize all the weights
def init_weights(shape):
    init_random_dist = tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(init_random_dist)

In [23]:
# helper function: initialize the biases
def init_bias(shape):
    init_bias_vals = tf.constant(0.1,shape=shape)
    return tf.Variable(init_bias_vals)

In [24]:
# create a 2D CNN

def conv2d(x,W):
    # x ---> [batch,H,W,Channels]
    #     -> number of images
    #     -> height,width
    #     -> color channel = 1 => grayscale, RGB = more channels
    
    # W ---> [H,W, chIN, chOut]
    #     -> height, width
    #     -> number of channels in/out
    
    # strides in every direction
    # padding algorithm (SAME=zeroes)
    return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')

In [25]:
# pooling = summarizes a certain patch of a layer

def max_pool_2by2(x):
    # x ---> [batch,H,W,Channels]
    #     -> number of images
    #     -> height,width
    #     -> color channel = 1 => grayscale, RGB = more channels
    
    # ksize = the pooling to apply only on H and W, same with strides
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

In [56]:
# convolutional layer

def convolutional_layer(input_x,shape):
    W = init_weights(shape)
    b = init_bias([shape[3]])
    return tf.nn.relu(conv2d(input_x,W)+b)

In [57]:
# dense layer

def normal_full_layer(input_layer,size):
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size,size])
    b = init_bias([size])
    return tf.matmul(input_layer,W) + b

In [58]:
# PLACEHOLDERS

# input layer: image
x = tf.placeholder(tf.float32,shape=[None,28*28])

# output layer: 0-9
y_true = tf.placeholder(tf.float32,shape=[None,10])

In [59]:
# LAYERS

x_image = tf.reshape(x,[-1,28,28,1])

In [60]:
patch_h = 5
patch_w = 5

# first convo layer
# 1 input feature
# 32 output feature
convo_1 = convolutional_layer(x_image,shape=[patch_h,patch_w,1,32])

# first pooling layer
convo_1_pooling = max_pool_2by2(convo_1)

In [61]:
# our second layer
# 32 out layers from the previous node
# 64 output layers for the next node
convo_2 = convolutional_layer(convo_1_pooling,shape=[patch_h,patch_w,32,64])

# second pooling layer
convo_2_pooling = max_pool_2by2(convo_2)

In [62]:
convo_2_flat = tf.reshape(convo_2_pooling,[-1,7*7*64])
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,1024))

In [63]:
# introduce a dropout
# dropout = randomly disconnect a node during training
# this is to avoid overfitting by overly relying on a single node
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(full_layer_one,keep_prob=hold_prob)

In [64]:
# our final layer with 10 output
y_pred = normal_full_layer(full_one_dropout,10)

In [65]:
# our loss function
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))

In [66]:
# optimizer
# Adam = Adaptive learning rate
# the best of both worlds
# - fast training
# - precise learning rate
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)

In [None]:
init = tf.global_variables_initializer()

steps = 5000

with tf.Session() as sess:
    sess.run(init)
    
    for i in range(steps):
        batch_x, batch_y = mnist.train.next_batch(50)
        sess.run(train,feed_dict={x:batch_x,y_true:batch_y,hold_prob:0.5})
        
        # lets sample the accuracy of our training (dropping code, machine memory cant handle)
#         if i%100 == 0:
#             print("ON STEP: {}".format(i))
#             print("ACCURACY: ")
#             matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))
#             acc = tf.reduce_mean(tf.cast(matches,tf.float32))
#             result = sess.run(acc,feed_dict={x:mnist.test.images,y_true:mnist.test.labels,hold_prob:1.0})                            
#             print(result)
                                         

In [41]:
# TODO: view the graph from tensorboard
# https://www.tensorflow.org/tensorboard/tensorboard_in_notebooks