# Vanilla Feedforward Neural Network

Let's train our first neural network.

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
from itertools import izip

## The data set: MNIST
The MNIST data set is a labeled data set consisting in 70,000 handwritten digits (see http://yann.lecun.com/exdb/mnist/ for more information). It is often used to benchmarck classification algorithms.

In [2]:
data = input_data.read_data_sets("MNIST/", one_hot=True, validation_size=0)
x_train, y_train = data.train.images, data.train.labels
x_test, y_test = data.test.images, data.test.labels

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting MNIST/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting MNIST/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting MNIST/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting MNIST/t10k-labels-idx1-ubyte.gz


## Creating the network

Some hyperparameters of the network.

In [3]:
n_input = 784 # size of the input, i.e. dimension of an image 28*28 = 784
hidden_units1 = 128 # number of hidden units for the first layer
hidden_units2 = 128 # and for the second layer
n_classes = 10 # number of classes, i.e. digits from 0 to 9.

In [4]:
# placeholders to feed the data to the network
images = tf.placeholder(tf.float32, shape=[None, n_input])
labels = tf.placeholder(tf.int64, shape=[None, n_classes]) # e.g. digit 6 is encoder as `[0,0,0,0,0,0,1,0,0,0]`

We create a simple network with two hidden layers.

In [5]:
# layer 1
w1 = tf.get_variable('weigths_1', shape=[n_input, hidden_units1],
                     initializer=tf.random_normal_initializer(stddev=1.0))
b1 = tf.get_variable('biases_1', shape=[hidden_units1])
layer1 = tf.matmul(images, w1) + b1
layer1 = tf.nn.relu(layer1) # non linearity

# layer 2
w2 = tf.get_variable('weigths_2', shape=[hidden_units1, hidden_units2],
            initializer=tf.random_normal_initializer(stddev=1.0 / np.sqrt(hidden_units1)))
b2 = tf.get_variable('biases_2', shape=[hidden_units2])
layer2 = tf.matmul(layer1, w2) + b2
layer2 = tf.nn.relu(layer2) # non linearity

# output layer
w3 = tf.get_variable('weigths_3', shape=[hidden_units2, n_classes],
            initializer=tf.random_normal_initializer(stddev=1.0 / np.sqrt(hidden_units2)))
b3 = tf.get_variable('biases_3', shape=[n_classes])
output = tf.matmul(layer2, w3) + b3

Creation of the loss and the optimizer.

In [6]:
logits = tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=labels)
loss = tf.reduce_mean(logits)
optimizer = tf.train.AdamOptimizer().minimize(loss)

# calculate the accuracy
correct_pred = tf.equal(tf.argmax(output, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

Helper functions to get random batches of data to train the network on.

In [7]:
def get_next_batch_iterator(batch_size):
    span = range(x_train.shape[0])
    np.random.shuffle(span)
    args = [iter(span)] * batch_size
    return izip(*args)

def get_next_batch(batch_size):
    try:
        indexes = iterator.next()
    except:
        global iterator
        iterator = get_next_batch_iterator(batch_size)
        indexes = iterator.next()
    return np.asarray([x_train[i] for i in indexes]),\
            np.asarray([y_train[i] for i in indexes])

  global iterator


And we finally train the network.

In [8]:
steps_n = 1000
batch_size = 32
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(1, steps_n + 1):
        images_batch, labels_batch = get_next_batch(batch_size)
        feed_dict = {images: images_batch, labels: labels_batch}
        sess.run([optimizer], feed_dict=feed_dict)
        if i % 50 == 0:
            feed_dict = {images: x_test, labels: y_test}
            loss_, accuracy_ = sess.run([loss, accuracy], feed_dict=feed_dict)
            print('TEST: step %i, loss %.3f, accuracy %.2f' % (i, loss_, accuracy_))

TEST: step 50, loss 0.977, accuracy 0.69
TEST: step 100, loss 0.672, accuracy 0.79
TEST: step 150, loss 0.553, accuracy 0.83
TEST: step 200, loss 0.500, accuracy 0.85
TEST: step 250, loss 0.479, accuracy 0.86
TEST: step 300, loss 0.460, accuracy 0.86
TEST: step 350, loss 0.392, accuracy 0.88
TEST: step 400, loss 0.399, accuracy 0.88
TEST: step 450, loss 0.359, accuracy 0.89
TEST: step 500, loss 0.375, accuracy 0.89
TEST: step 550, loss 0.317, accuracy 0.90
TEST: step 600, loss 0.316, accuracy 0.90
TEST: step 650, loss 0.328, accuracy 0.90
TEST: step 700, loss 0.303, accuracy 0.91
TEST: step 750, loss 0.296, accuracy 0.91
TEST: step 800, loss 0.293, accuracy 0.91
TEST: step 850, loss 0.279, accuracy 0.92
TEST: step 900, loss 0.317, accuracy 0.90
TEST: step 950, loss 0.274, accuracy 0.92
TEST: step 1000, loss 0.248, accuracy 0.93
