# Classifying MNIST Data Using an MLP and a basic Covnet 

This Jupyter notebook was created using code provided by [Udacity](https://www.udacity.com/)'s [Deep Learning Foundations](https://www.udacity.com/course/deep-learning-nanodegree-foundation--nd101) Nanodegree - I've also added in a simple multi-layer perceptron (MLP) using [TensorFlow](https://www.tensorflow.org/) for comparison. The original author for covnet portion of this code is [Aymeric Damien](https://github.com/aymericdamien/TensorFlow-Examples), The Tensorflow tutorials and examples found in linked GitHub are great for beginners, I highly recommend checking them out.

First we'll create a simple MLP with two hidden layers and dropout on the last layer. Then an implementation of a simple covnet with two convolutions and two dense layers, is shown. 

I created this notebook because it gave me a reason to try coding up (really just copying and trying to understand) a simple covnet in TensorFlow. I also thought this would be a perfect opportunity to try cloud computing with [Floyd Hub](https://www.floydhub.com) using a GPU instance - this was _extremely_ painless.

### Import the necessary data/libraries

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('.', one_hot=True, reshape=False)

In [None]:
import tensorflow as tf

### Classification Using a MLP

In [None]:
learning_rate = 0.00001
epochs = 1
batch_size = 128

test_valid_size = 256

n_classes = 10

dropout = 0.50

In [None]:
weights = {
    'wd1': tf.Variable(tf.random_normal([28*28, 250])),
    'wd2': tf.Variable(tf.random_normal([250, 100])),
    'out': tf.Variable(tf.random_normal([100, n_classes]))
}

biases = {
    'bd1': tf.Variable(tf.random_normal([250])),
    'bd2': tf.Variable(tf.random_normal([100])), 
    'out': tf.Variable(tf.random_normal([n_classes])), 
}

In [None]:
def fully_conn(x, W, b):
    x = tf.add(tf.matmul(x, W), b)
    x = tf.nn.relu(x)

def mlp(x, weights, biases, dropout):
    
    fc1 = fully_conn(x, weights['wd1'], biases['bd1'])
    fc1 = tf.nn.dropout(fc1, dropout)
    
    fc2 = fully_conn(fc1, weights['wd2'], biases['bd2'])
    fc2 = tf.nn.dropout(fc2, dropout)
    
    out = tf.add(tf.matmul(fc2, weights['out']), biases['out'])
    out = tf.nn.softmax(out)
    return out

In [None]:
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

x = tf.reshape(x, [-1, weights['wd1'].get_shape().as_list()[0]])

logits = mlp(x, weights, biases, keep_prob)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(epochs):
        for batch in range(mnist.train.num_examples//batch_size):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={
                x: batch_x, 
                y: batch_y,
                keep_prob: dropout
            })
            
            loss = sess.run(cost, feed_dict={
                x: batch_x,
                y: batch_y, 
                keep_prob : 1.
            })
            
            valid_acc = sess.run(accuracy, feed_dict={
                x: mnist.validation.images[:test_valid_size],
                y: mnist.validation.labels[:test_valid_size],
                keep_prob: 1.
            })
            
            print('Epoch {:>2}, Batch {:>3} -'
                  'Loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(
                epoch + 1,
                batch + 1,
                loss,
                valid_acc))
    
    test_acc = sess.run(accuracy, feed_dict={
        x: mnist.test.images[:test_valid_size],
        y: mnist.test.labels[:test_valid_size],
        keep_prob: 1.
    })
    
    print('Test Accuracy: {}'.format(test_acc))

### Classification Using a Covnet

In [None]:
learning_rate = 0.00001
epochs = 10
batch_size = 128

test_valid_size = 256

n_classes = 10

dropout = 0.75

In [None]:
weights = {
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])), 
    'out': tf.Variable(tf.random_normal([n_classes])), 
}

In [None]:
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool2d(x, k=2):
    return tf.nn.max_pool(
    x,
    ksize=[1, k, k, 1],
    strides=[1, k, k, 1], 
    padding='SAME')

def conv_net(x, weights, biases, dropout):
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    conv1 = maxpool2d(conv1, k=2)
    
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    conv2 = maxpool2d(conv2, k=2)
    
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)
    
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

In [None]:
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

logits = conv_net(x, weights, biases, keep_prob)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(epochs):
        for batch in range(mnist.train.num_examples//batch_size):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={
                x: batch_x, 
                y: batch_y,
                keep_prob: dropout
            })
            
            loss = sess.run(cost, feed_dict={
                x: batch_x,
                y: batch_y, 
                keep_prob : 1.
            })
            
            valid_acc = sess.run(accuracy, feed_dict={
                x: mnist.validation.images[:test_valid_size],
                y: mnist.validation.labels[:test_valid_size],
                keep_prob: 1.
            })
            
            print('Epoch {:>2}, Batch {:>3} -'
                  'Loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(
                epoch + 1,
                batch + 1,
                loss,
                valid_acc))
    
    test_acc = sess.run(accuracy, feed_dict={
        x: mnist.test.images[:test_valid_size],
        y: mnist.test.labels[:test_valid_size],
        keep_prob: 1.
    })
    
    print('Test Accuracy: {}'.format(test_acc))