## Multilayer Networks in TensorFlow

### Rectified Linear Unit actication function (ReLU)


In [1]:
# Hidden Layer with ReLU activation function
'''
hidden_layer = tf.add(tf.matmul(features, hidden_weights), hidden_biases)
hidden_layer = tf.nn.relu(hidden_layer)

output = tf.add(tf.matmul(hidden_layer, output_weights), output_biases)
'''

import tensorflow as tf

output = None
hidden_layer_weights = [
    [0.1, 0.2, 0.4],
    [0.4, 0.6, 0.6],
    [0.5, 0.9, 0.1],
    [0.8, 0.2, 0.8]]
out_weights = [
    [0.1, 0.6],
    [0.2, 0.1],
    [0.7, 0.9]]

# Weights and biases
weights = [
    tf.Variable(hidden_layer_weights),
    tf.Variable(out_weights)]
biases = [
    tf.Variable(tf.zeros(3)),
    tf.Variable(tf.zeros(2))]

# Input
features = tf.Variable([[1.0, 2.0, 3.0, 4.0], [-1.0, -2.0, -3.0, -4.0], [11.0, 12.0, 13.0, 14.0]])

# TODO: Create Model
hidden_layer = tf.add(tf.matmul(features, weights[0]), biases[0])
hidden_layer = tf.nn.relu(hidden_layer)
logits = tf.add(tf.matmul(hidden_layer, weights[1]), biases[1])

# TODO: Print session results
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(logits))
    

[[  5.11000013   8.44000053]
 [  0.           0.        ]
 [ 24.01000214  38.23999786]]


### Classify the letters in the MNIST database.

[More TensorFlow examples](https://github.com/aymericdamien/TensorFlow-Examples)

In [5]:
from tensorflow.examples.tutorials.mnist import input_data
# automatic batching and one-hot encodding
mnist = input_data.read_data_sets(".", one_hot=True, reshape=False)

# Parameters
learning_rate = 0.001
training_epochs = 20
batch_size = 128  # Decrease batch size if you don't have enough memory
display_step = 1

n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

n_hidden_layer = 256 # width of the layer

# Store layers weight & bias
weights = {
    'hidden_layer': tf.Variable(tf.random_normal([n_input, n_hidden_layer])),
    'out': tf.Variable(tf.random_normal([n_hidden_layer, n_classes]))
}
biases = {
    'hidden_layer': tf.Variable(tf.random_normal([n_hidden_layer])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# tf Graph input
x = tf.placeholder("float", [None, 28, 28, 1])
y = tf.placeholder("float", [None, n_classes])
# The MNIST data is made up of 28px by 28px images with a single channel
# Reshaping the 28px by 28px matrices in x into row vectors of 784px:
x_flat = tf.reshape(x, [-1, n_input])

# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x_flat, weights['hidden_layer']),biases['hidden_layer'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
logits = tf.add(tf.matmul(layer_1, weights['out']), biases['out'])

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

# SESSION
# Initializing the variables
init = tf.global_variables_initializer()


# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    # Training cycle
    for epoch in range(training_epochs):
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        # Display logs per epoch step
        if epoch % display_step == 0:
            c = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
            print("Epoch:", '%04d' % (epoch+1), "cost=", \
                "{:.9f}".format(c))
    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    # Decrease test_size if you don't have enough memory
    test_size = 256
    print("Accuracy:", accuracy.eval({x: mnist.test.images[:test_size], y: mnist.test.labels[:test_size]}))

Extracting ./train-images-idx3-ubyte.gz
Extracting ./train-labels-idx1-ubyte.gz
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz
('Epoch:', '0001', 'cost=', '39.759468079')
('Epoch:', '0002', 'cost=', '24.574600220')
('Epoch:', '0003', 'cost=', '25.239467621')
('Epoch:', '0004', 'cost=', '13.074935913')
('Epoch:', '0005', 'cost=', '17.439765930')
('Epoch:', '0006', 'cost=', '12.389623642')
('Epoch:', '0007', 'cost=', '12.551672935')
('Epoch:', '0008', 'cost=', '11.512969971')
('Epoch:', '0009', 'cost=', '9.707993507')
('Epoch:', '0010', 'cost=', '8.323940277')
('Epoch:', '0011', 'cost=', '5.698004723')
('Epoch:', '0012', 'cost=', '6.743676662')
('Epoch:', '0013', 'cost=', '8.028894424')
('Epoch:', '0014', 'cost=', '5.158036232')
('Epoch:', '0015', 'cost=', '4.630605698')
('Epoch:', '0016', 'cost=', '7.798291206')
('Epoch:', '0017', 'cost=', '8.058049202')
('Epoch:', '0018', 'cost=', '6.018205166')
('Epoch:', '0019', 'cost=', '6.064187050')
('Epoch:', '0020'

### Save and Restore TensorFlow Models

Save any `tf.Variable` with `tf.train.Saver`

Toy example to save weights and biases:

In [7]:
import tensorflow as tf

# The file path to save the data
save_file = './model.ckpt' # '.ckpt' stands for checkpoint

# Two Tensor Variables: weights and bias
weights = tf.Variable(tf.truncated_normal([2, 3])) # Random values using tf.truncated_normal
bias = tf.Variable(tf.truncated_normal([3]))

# Class used to SAVE and/or restore Tensor Variables
saver = tf.train.Saver()

with tf.Session() as sess:
    # Initialize all the variables
    sess.run(tf.global_variables_initializer())
    
    # Show the values of weights and bias
    print('Weights: ')
    print(sess.run(weights))
    print('Bias: ')
    print(sess.run(bias))
    
    # Save the model
    saver.save(sess, save_file)


Weights: 
[[-0.02379351  0.07406897  0.64788198]
 [ 1.56434417  0.98879695 -0.4301264 ]]
Bias: 
[ 0.01856856 -1.73154092 -0.06346727]


In [8]:
'''
Loading the saved variables
'''

# Remove previous weights and bias
tf.reset_default_graph()

# Still need to create the weights and bias tensors
# 
weights = tf.Variable(tf.truncated_normal([2, 3])) # Random values using tf.truncated_normal
bias = tf.Variable(tf.truncated_normal([3]))

# Class used to save and/or RESTORE Tensor Variables
#
saver = tf.train.Saver()

with tf.Session() as sess:
    # Load the weights and bias
    saver.restore(sess, save_file) # Loads the saved data into weights and bias
    
    # Show the values of the weights and bias
    print('Weights: ')
    print(sess.run(weights))
    print('Bias: ')
    print(sess.run(bias))    

INFO:tensorflow:Restoring parameters from ./model.ckpt
Weights: 
[[-1.07324827  0.40389559 -0.12456368]
 [ 1.41648293  0.47162992  0.7748329 ]]
Bias: 
[ 0.15563829  0.48175469  0.61734837]


In [12]:
'''
Train model and save its weights
'''

# Remove previous tensors and operations
tf.reset_default_graph()

from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

mnist = input_data.read_data_sets('.', one_hot=True)

'''
Start with model
'''

learning_rate = 0.001
n_input = 784
n_classes = 10

# Features and Labels
features = tf.placeholder(tf.float32, [None, n_input])
labels = tf.placeholder(tf.float32, [None, n_classes])

# Weights and Bias
weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))

# Logits: xW + b
logits = tf.add(tf.matmul(features, weights), bias)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


'''
Train model and save weights
'''

import math

save_file = './train_mode.ckpt'
batch_size = 128
n_epochs = 100

saver = tf.train.Saver()

# Launch the graph

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Training cycle
    for epoch in range(n_epochs):
        total_batch = int(math.ceil(mnist.train.num_examples / batch_size))
        # loop over all batches
        for i in range(total_batch):
            batch_features, batch_labels = mnist.train.next_batch(batch_size)
            sess.run(optimizer,
                     feed_dict = {features: batch_features, labels: batch_labels})
        
        # Print status for every 10 epochs
        if epoch % 10 == 0:
            valid_accuracy = sess.run(accuracy,
                                      feed_dict = {features: mnist.validation.images,
                                                   labels: mnist.validation.labels})
            print('Epoch {:<3} - Validation Accuracy: {}'.format(epoch, valid_accuracy))
    
    # Save the model
    saver.save(sess, save_file)
    print('Trained Model Saved')
        


Extracting ./train-images-idx3-ubyte.gz
Extracting ./train-labels-idx1-ubyte.gz
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz
Epoch 0   - Validation Accuracy: 0.112800002098
Epoch 10  - Validation Accuracy: 0.302799999714
Epoch 20  - Validation Accuracy: 0.430999994278
Epoch 30  - Validation Accuracy: 0.521600008011
Epoch 40  - Validation Accuracy: 0.584800004959
Epoch 50  - Validation Accuracy: 0.630400002003
Epoch 60  - Validation Accuracy: 0.661599993706
Epoch 70  - Validation Accuracy: 0.690199971199
Epoch 80  - Validation Accuracy: 0.706399977207
Epoch 90  - Validation Accuracy: 0.722199976444
Trained Model Saved


In [14]:
'''
Load weights and Bias from memory
'''

loader = tf.train.Saver()

# Launch the Graph
with tf.Session() as sess:
    loader.restore(sess, save_file)
    
    test_accuracy = sess.run(accuracy,
                             feed_dict = {features: mnist.test.images, labels: mnist.test.labels})
    
print('Test Accuracy: {}'.format(test_accuracy))

INFO:tensorflow:Restoring parameters from ./train_mode.ckpt
Test Accuracy: 0.732699990273


### Fine tuning

Loading weights and bias into modified models.

The `name` properties for `weights` and `bias` are different than when you saved the model. So "Assign requires shapes of both tensors to match" error is produced. The code saver.restore(sess, save_file) is trying to load weight data into bias and bias data into weights.

Instead of letting TensorFlow set the name property, it has to be done manually:


In [3]:
import tensorflow as tf

tf.reset_default_graph()

save_file = './tune_model.ckpt'

# Two Tensor Variables: weights and bias
weights = tf.Variable(tf.truncated_normal([2, 3]), name='weights_0')
bias = tf.Variable(tf.truncated_normal([3]), name='bias_0')

saver = tf.train.Saver()

# Print the name of Weights and Bias
print('Save Weights: {}'.format(weights.name))
print('Save Bias: {}'.format(bias.name))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver.save(sess, save_file)

Save Weights: weights_0:0
Save Bias: bias_0:0


In [4]:
# Remove the previous weights and bias
tf.reset_default_graph()

# Two Variables: weights and bias
bias = tf.Variable(tf.truncated_normal([3]), name='bias_0')
weights = tf.Variable(tf.truncated_normal([2, 3]) ,name='weights_0')

saver = tf.train.Saver()

# Print the name of Weights and Bias
print('Load Weights: {}'.format(weights.name))
print('Load Bias: {}'.format(bias.name))

with tf.Session() as sess:
    # Load the weights and bias - No Error
    saver.restore(sess, save_file)

print('Loaded Weights and Bias successfully.')

Load Weights: weights_0:0
Load Bias: bias_0:0
INFO:tensorflow:Restoring parameters from ./tune_model.ckpt
Loaded Weights and Bias successfully.


### Regularization

* Early termination given performance on validation set.

* Regulariztion as in applying artificial constraints that implicitly reduces the number of parameters. i.e. penalize large weight son loss function (L2).

* Dropout. Foces network to learn redundant representations. (yielding an approximate to an ensamble)

Dropout in TensorFlow using [`tf.nn.dropout()`](https://www.tensorflow.org/api_docs/python/tf/nn/dropout) function:

It takes two jparmameters:
 * The tensor to which dropout is applied. In the example below `hidden_layer`.
 * The probability of _keeping_ a given unit. In order to compensate for dropped units, `tf.nn.dropout()` multiplies all units that are kept (i.e. not dropped) by `1/keep_prob`. During training, a good starting value for `keep_prob` is **0.5**. During testing, use a `keep_prob` value of **1.0** to keep all units and maximize the power of the model.
 
```
...

keep_prob = tf.placeholder(tf.float32) # probability to keep units

hidden_layer = tf.add(tf.matmul(features, weights[0]), biases[0])
hidden_layer = tf.nn.relu(hidden_layer)
hidden_layer = tf.nn.dropout(hidden_layer, keep_prob)

logits = tf.add(tf.matmul(hidden_layer, weights[1]), biases[1])

...

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for epoch_i in range(epochs):
        for batch_i in range(batches):
            ....

            sess.run(optimizer, feed_dict={
                features: batch_features,
                labels: batch_labels,
                keep_prob: 0.5})

    validation_accuracy = sess.run(accuracy, feed_dict={
        features: test_features,
        labels: test_labels,
        keep_prob: 1.0}) 

```
 

In [3]:

import tensorflow as tf

hidden_layer_weights = [
    [0.1, 0.2, 0.4],
    [0.4, 0.6, 0.6],
    [0.5, 0.9, 0.1],
    [0.8, 0.2, 0.8]]
out_weights = [
    [0.1, 0.6],
    [0.2, 0.1],
    [0.7, 0.9]]

# Weights and biases
weights = [
    tf.Variable(hidden_layer_weights),
    tf.Variable(out_weights)]
biases = [
    tf.Variable(tf.zeros(3)),
    tf.Variable(tf.zeros(2))]

# Input
features = tf.Variable([[0.0, 2.0, 3.0, 4.0], [0.1, 0.2, 0.3, 0.4], [11.0, 12.0, 13.0, 14.0]])

# TODO: Create Model with Dropout
keep_prob = tf.placeholder(tf.float32) # probability to keep units

hidden_layer = tf.add(tf.matmul(features, weights[0]), biases[0])
hidden_layer = tf.nn.relu(hidden_layer)
hidden_layer = tf.nn.dropout(hidden_layer, keep_prob)

logits = tf.add(tf.matmul(hidden_layer, weights[1]), biases[1])

# TODO: Print logits from a session
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    print(sess.run(logits, feed_dict={keep_prob: 0.5}))


[[  8.45999908   9.39999866]
 [  0.30800003   0.7700001 ]
 [ 38.4600029   71.69999695]]
