# Week-2: Create a simple multi-layer network using pure tensorflow

## Get the input data (only the first 'n' labels) from MNIST

In [30]:
# set the number of labels we will be using
n_labels = 10
n_features = 784
n_hidden = 100

## Tensorflow Variables.

Last week we learned about 'Constants' and 'placeholders' in tensorflows. Remember that constants cannot change in tensorflow, while placeholders are dummy variables, which can be input during a session.run() call. Tensorflow variables, additionally, are a critical piece of the tensorflow library. They are used for weights and biases which need to be updated internally while learning a model.

In [31]:
import tensorflow as tf

weights_hidden = tf.Variable(tf.random_normal([n_features, n_hidden], stddev=1), name="weights_hidden")

weights_out = tf.Variable(tf.random_normal([n_hidden, n_labels], stddev=1), name="weights_out")

biases_hidden = tf.Variable(tf.random_normal([n_hidden], stddev=1), name="biases_hidden")

biases_out = tf.Variable(tf.random_normal([n_labels], stddev=1), name="biases_out")

## Define placeholders for input data

In [32]:
features = tf.placeholder("float",[None, n_features])
labels = tf.placeholder("float",[None, n_labels])

## Now define the operations that will define the algorithm to pursue.

This might be better accomplished via writing a pseudo-code of some sort, before you even write anything. As with anything else, be sure to know the problem you're trying to solve well before attempting to code it up!

### 1. Evaluate the linear mat-vec product along with addition of a bias term
$$ x_h = x.W_{x -> h} + b_{h}$$
$$ x_h = relu (x_h) $$
$$ x_o = x.W_{h -> o} + b_{o}$$

In [33]:
hidden_inputs = tf.add(tf.matmul(features,weights_hidden),biases_hidden)

prediction_hidden = tf.nn.relu(hidden_inputs)

hidden_outputs = tf.add(tf.matmul(prediction_hidden,weights_out),biases_out)

### 2. Evaluate the logits 

$$ z_i = \frac{e^{x_{o,i}}}{\sum_i{e^{x_{o,i}} }}$$

where i is the output from the i-th neuron in the softmax layer

In [44]:
# Softmax
logits = tf.nn.softmax(hidden_outputs)

### 3. Then, evaluate the cross_entropy term
$$ \phi_{k} = - { \sum_{i} y_{k,i} . \log{z_{k,i}}} $$

where k=1,2,...N (Number of training samples), and i= i-th output in softmax layer for a sample **k**

In [37]:
# Cross entropy
# This quantifies how far off the predictions were.
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=labels)

### 4. Finally evaluate the mean of the entropy term, which will be used as the loss
$$ \psi = \frac{1}{N} \sum_{k}^{N} \phi_k $$

In [38]:
# Training loss
loss = tf.reduce_mean(cross_entropy)

## Now run the model under a session, but first define the optimizer to use

In [45]:
# Gradient Descent
# This is the method used to train the model
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

In [40]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST", one_hot=True)

Extracting MNIST/train-images-idx3-ubyte.gz
Extracting MNIST/train-labels-idx1-ubyte.gz
Extracting MNIST/t10k-images-idx3-ubyte.gz
Extracting MNIST/t10k-labels-idx1-ubyte.gz


In [46]:
#Define some basic model hyper-parameters
num_epochs = 5
batch_size = 32
num_examples = mnist.train.num_examples
iterations_per_epochs = num_examples//batch_size

# Rate at which the weights are changed
learning_rate = 0.001

print("iterations per epochs: ",iterations_per_epochs)

iterations per epochs:  1718


In [47]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(num_epochs):
        
        for _ in range(iterations_per_epochs):

            #Get the training x and y values, and run the "optimizer" and "loss" computational nodes

            x_train, y_train = mnist.train.next_batch(batch_size)            
            _ , loss_val = sess.run([optimizer, loss], feed_dict={features: x_train, labels: y_train})

            '''
            IMPORTANT: do NOT run the optimizer here. We ONLY want to evaluate the loss.
            Get the validation x and y values, and run the session against the "loss" computational node
            '''

            x_valid, y_valid = mnist.valid.next_batch(batch_size)
            val_loss = sess.run(loss, feed_dict={features: x_valid, labels: y_valid})
            
        print("epoch: ",epoch, " train_loss: ",loss_val, " val_loss: ",val_loss)

epoch:  0  train_loss:  2.42925  val_loss:  2.39749
epoch:  1  train_loss:  2.45836  val_loss:  2.39455
epoch:  2  train_loss:  2.39761  val_loss:  2.39831
epoch:  3  train_loss:  2.30658  val_loss:  2.29238
epoch:  4  train_loss:  2.36702  val_loss:  2.36449
