In [None]:
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
import numpy as np
import random as ran
import tensorflow as tf

%matplotlib inline

# Load the dataset from the tensorflow library
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

# Useful functions

def plot_digit(num_sample):
    label = y_train[num_sample].argmax(axis=0)
    img = x_train[num_sample].reshape([28,28])
    plt.title('Sample: %d  Grounthruth label: %d' % (num_sample, label))
    plt.imshow(img, cmap=plt.get_cmap('gray_r'))
    plt.show()
    
def plot_weights():
    for i in range(10):
        plt.subplot(2, 5, i+1)
        weight = sess.run(W)[:,i]
        plt.title(i)
        plt.imshow(weight.reshape([28,28]), cmap=plt.get_cmap('seismic'))
        frame1 = plt.gca()
        frame1.axes.get_xaxis().set_visible(False)
        frame1.axes.get_yaxis().set_visible(False) 
    plt.show()
    

# Initialize train_data
x_train, y_train = mnist.train.images, mnist.train.labels
x_test, y_test = mnist.test.images, mnist.test.labels

In [None]:
#Train data size
print("Num Samples:", x_train.shape[0])
print("Length / Features:", x_train.shape[1])

# Data preview
plot_digit(ran.randint(a=0,b=1000))

In [None]:
%matplotlib inline

# Case 1 - Perceptron

#Architecture constructor
X = tf.placeholder(tf.float32, shape=[None, 784])
Y = tf.placeholder(tf.float32, shape=[None, 10])
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(X,W) + b)

#Main Parameters
LEARNING_RATE = 0.05
TRAINING_EPOCHS = 1
BATCH_SIZE = 200
DISPLAY_STEP = 50

# Define loss and optimizer
loss_op = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(y), reduction_indices=[1]))
#optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
train_op = optimizer.minimize(loss_op)

#Only for visualization
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init = tf.global_variables_initializer()

# Training
with tf.Session() as sess:
    sess.run(init)
    #Plot weights before training
    plot_weights()
    # Training cycle
    error = []
    for epoch in range(TRAINING_EPOCHS):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/BATCH_SIZE)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(BATCH_SIZE)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                      Y: batch_y})
            # Display logs per epoch step
            error.append(sess.run(loss_op, {X: x_train, Y: y_train}))
            if i % DISPLAY_STEP == 0:
                print('Epoch:' + str(epoch) + ' Training Batch:' + str(i) + '  Accuracy =  ' + str(sess.run(accuracy, feed_dict={X: x_test, Y: y_test})) + '  Loss = ' + str(sess.run(loss_op, {X: x_train, Y: y_train})))
    #Plot weights after training
    plot_weights()


In [None]:
%matplotlib inline
plt.plot(error)
plt.show()

In [None]:
%matplotlib inline

# Case 2 - Multilayer Perceptron

#Main Parameters
LEARNING_RATE = 0.05
TRAINING_EPOCHS = 1
BATCH_SIZE = 200
DISPLAY_STEP = 50
N_HIDDEN_NODES1 = 30
N_HIDDEN_NODES2 = 30

#Architecture constructor
X = tf.placeholder(tf.float32, shape=[None, 784])
Y = tf.placeholder(tf.float32, shape=[None, 10])
weights = {
    'h1': tf.Variable(tf.random_normal([784, N_HIDDEN_NODES1])),
    'h2': tf.Variable(tf.random_normal([N_HIDDEN_NODES1, N_HIDDEN_NODES2])),
    'out': tf.Variable(tf.random_normal([N_HIDDEN_NODES2, 10]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([N_HIDDEN_NODES1])),
    'b2': tf.Variable(tf.random_normal([N_HIDDEN_NODES2])),
    'out': tf.Variable(tf.random_normal([10]))
}

layer_1 = tf.add(tf.matmul(X, weights['h1']), biases['b1'])
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
logits = tf.matmul(layer_2, weights['out']) + biases['out']
y = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
train_op = optimizer.minimize(loss_op)

#Only for visualization
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init = tf.global_variables_initializer()

# Training
with tf.Session() as sess:
    sess.run(init)
    # Training cycle
    error = []
    for epoch in range(TRAINING_EPOCHS):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/BATCH_SIZE)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(BATCH_SIZE)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                      Y: batch_y})
            # Display logs per epoch step
            error.append(sess.run(loss_op, {X: x_train, Y: y_train}))
            if i % DISPLAY_STEP == 0:
                print('Epoch:' + str(epoch) + ' Training Batch:' + str(i) + '  Accuracy =  ' + str(sess.run(accuracy, feed_dict={X: x_test, Y: y_test})) + '  Loss = ' + str(sess.run(loss_op, {X: x_train, Y: y_train})))
    

In [None]:
%matplotlib inline
plt.plot(error)
plt.show()