# Deep Neural Network in TensorFlow

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(".", one_hot=True, reshape=False)

# Learning Parameters
import tensorflow as tf

# Parameters
learning_rate = 0.001
training_epochs = 20
batch_size = 128  # Decrease batch size if you don't have enough memory
display_step = 1

n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

# Hidden Layer Parameters
n_hidden_layer = 256 # layer number of features: the size of the hidden layer in the neural network.
#This is also known as the width of a layer.

# Weights and Biases
# Store layers weight & bias
weights = {
    'hidden_layer': tf.Variable(tf.random_normal([n_input, n_hidden_layer])),
    'out': tf.Variable(tf.random_normal([n_hidden_layer, n_classes]))
}
biases = {
    'hidden_layer': tf.Variable(tf.random_normal([n_hidden_layer])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Input
# tf Graph input
x = tf.placeholder("float", [None, 28, 28, 1])
y = tf.placeholder("float", [None, n_classes])

x_flat = tf.reshape(x, [-1, n_input])
'''
The MNIST data is made up of 28px by 28px images with a single channel. 
The tf.reshape() function above reshapes the 28px by 28px matrices in x into row vectors of 784px.
[MY] the None dimension is corresponding to batch
'''

# Multilayer Perceptron

# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x_flat, weights['hidden_layer']),\
    biases['hidden_layer'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
logits = tf.add(tf.matmul(layer_1, weights['out']), biases['out'])
'''
You've seen the linear function tf.add(tf.matmul(x_flat, weights['hidden_layer']), biases['hidden_layer']) before, 
also known as xw + b. Combining linear functions together using a ReLU will give you a two layer network.
'''

# Optimizer
# Define loss and optimizer
cost = tf.reduce_mean(\
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\
    .minimize(cost)
    
# Session
# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    # Training cycle
    for epoch in range(training_epochs):
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
'''
The MNIST library in TensorFlow provides the ability to receive the dataset in batches. 
Calling the mnist.train.next_batch() function returns a subset of the training data.
'''


# TensorFlow Convolution Layer

In [6]:
# Output depth
k_output = 64

# Image Properties
image_width = 10
image_height = 10
color_channels = 3

# Convolution filter
filter_size_width = 5
filter_size_height = 5

# Input/Image
input = tf.placeholder(
    tf.float32,
    shape=[None, image_height, image_width, color_channels])

# Weight and bias
weight = tf.Variable(tf.truncated_normal(
    [filter_size_height, filter_size_width, color_channels, k_output]))
bias = tf.Variable(tf.zeros(k_output))

# Apply Convolution
conv_layer = tf.nn.conv2d(input, weight, strides=[1, 2, 2, 1], padding='SAME')
# Add bias
conv_layer = tf.nn.bias_add(conv_layer, bias)
# Apply activation function
conv_layer = tf.nn.relu(conv_layer)

'''
The code above uses the tf.nn.conv2d() function to compute the convolution with weight as the filter 
and [1, 2, 2, 1] for the strides. 
TensorFlow uses a stride for each input dimension, [batch, input_height, input_width, input_channels]. 
We are generally always going to set the stride for batch and input_channels 
(i.e. the first and fourth element in the strides array) to be 1.

You'll focus on changing input_height and input_width while setting batch and input_channels to 1. 
The input_height and input_width strides are for striding the filter over input. 
This example code uses a stride of 2 with 5x5 filter over input.

The tf.nn.bias_add() function adds a 1-d bias to the last dimension in a matrix.
'''

"\nThe code above uses the tf.nn.conv2d() function to compute the convolution with weight as the filter \nand [1, 2, 2, 1] for the strides. \nTensorFlow uses a stride for each input dimension, [batch, input_height, input_width, input_channels]. \nWe are generally always going to set the stride for batch and input_channels \n(i.e. the first and fourth element in the strides array) to be 1.\n\nYou'll focus on changing input_height and input_width while setting batch and input_channels to 1. \nThe input_height and input_width strides are for striding the filter over input. \nThis example code uses a stride of 2 with 5x5 filter over input.\n\nThe tf.nn.bias_add() function adds a 1-d bias to the last dimension in a matrix.\n"

# Convolutional Network in TensorFlow

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(".", one_hot=True, reshape=False)

import tensorflow as tf

# Parameters
learning_rate = 0.00001
epochs = 10
batch_size = 128

# Number of samples to calculate validation and accuracy
# Decrease this if you're running out of memory to calculate accuracy
test_valid_size = 256

# Network Parameters
n_classes = 10  # MNIST total classes (0-9 digits)
dropout = 0.75  # Dropout, probability to keep units

# Store layers weight & bias
weights = {
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])), # 7*7*64 = 3136
    'out': tf.Variable(tf.random_normal([1024, n_classes]))}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))}

def conv2d(x, W, b, strides=1):
    '''
    The tf.nn.conv2d() function computes the convolution against weight W as shown above (Udacity visual).

    In TensorFlow, strides is an array of 4 elements; the first element in this array indicates the stride for batch 
    and last element indicates stride for features. It's good practice to remove the batches or features you want to 
    skip from the data set rather than use a stride to skip them. 
    You can always set the first and last element to 1 in strides in order to use all batches and features.

    The middle two elements are the strides for height and width respectively. 
    I've mentioned stride as one number because you usually have a square stride where height = width. 
    When someone says they are using a stride of 3, they usually mean tf.nn.conv2d(x, W, strides=[1, 3, 3, 1]).

    To make life easier, the code is using tf.nn.bias_add() to add the bias. 
    Using tf.add() doesn't work when the tensors aren't the same shape.
    '''
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool2d(x, k=2):
    '''
    The tf.nn.max_pool() function does exactly what you would expect, it performs max pooling with the ksize parameter 
    as the size of the filter.
    
     For a pooling layer, the output depth is the same as the input depth. 
     Additionally, the pooling operation is applied individually for each depth slice.
    '''
    return tf.nn.max_pool(
        x,
        ksize=[1, k, k, 1],
        strides=[1, k, k, 1],
        padding='SAME')

def conv_net(x, weights, biases, dropout):
    '''
    Convolution layer:
        new_height = (input_height - filter_height + 2 * P)/S + 1
        new_width = (input_width - filter_width + 2 * P)/S + 1       
        P = padding, S = Stride
    
    For the 'SAME' padding, the output height and width are computed as:
        out_height = ceil(float(in_height) / float(strides[1]))
        out_width  = ceil(float(in_width) / float(strides[2]))
        
    and the padding on the top and left are computed as:
        pad_along_height = max((out_height - 1) * strides[1] +
                            filter_height - in_height, 0)
        pad_along_width = max((out_width - 1) * strides[2] +
                           filter_width - in_width, 0)
        pad_top = pad_along_height // 2
        pad_bottom = pad_along_height - pad_top
        pad_left = pad_along_width // 2
        pad_right = pad_along_width - pad_left
    
    Note that the division by 2 means that there might be cases when the padding on both sides 
    (top vs bottom, right vs left) are off by one. In this case, the bottom and right sides always 
    get the one additional padded pixel. For example, when pad_along_height is 5, 
    we pad 2 pixels at the top and 3 pixels at the bottom. 
    Note that this is different from existing libraries such as cuDNN and Caffe, 
    which explicitly specify the number of padded pixels and always pad the same number of pixels on both sides.

    For the 'VALID' padding, the output height and width are computed as:
        out_height = ceil(float(in_height - filter_height + 1) / float(strides[1]))
        out_width  = ceil(float(in_width - filter_width + 1) / float(strides[2]))
        
    and the padding values are always zero. The output is then computed as
        output[b, i, j, :] =
            sum_{di, dj} input[b, strides[1] * i + di - pad_top,
                               strides[2] * j + dj - pad_left, ...] *
                         filter[di, dj, ...]
    where any value outside the original input image region are considered zero 
    ( i.e. we pad zero values around the border of the image).
    '''
    # Layer 1 - 28*28*1 to to 28*28*32 to 14*14*32
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    conv1 = maxpool2d(conv1, k=2)

    # Layer 2 - 14*14*32 to 14*14*64 to 7*7*64
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer - 7*7*64 to 1024
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
#     print('fc1.shape after reshape', fc1.shape)
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
#     print('fc1.shape after add', fc1.shape)
#     print('weight.shape', weights['wd1'].shape)
#     print('bias.shape', biases['bd1'].shape)
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)

    # Output Layer - class prediction - 1024 to 10
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

# Session

# tf Graph input
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

# Model
logits = conv_net(x, weights, biases, keep_prob)

# Define loss and optimizer
cost = tf.reduce_mean(\
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\
    .minimize(cost)

# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf. global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(epochs):
        for batch in range(mnist.train.num_examples//batch_size):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={
                x: batch_x,
                y: batch_y,
                keep_prob: dropout})

            # Calculate batch loss and accuracy
            loss = sess.run(cost, feed_dict={
                x: batch_x,
                y: batch_y,
                keep_prob: 1.})
            valid_acc = sess.run(accuracy, feed_dict={
                x: mnist.validation.images[:test_valid_size],
                y: mnist.validation.labels[:test_valid_size],
                keep_prob: 1.})

            print('Epoch {:>2}, Batch {:>3} -'
                  'Loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(
                epoch + 1,
                batch + 1,
                loss,
                valid_acc))

    # Calculate Test Accuracy
    test_acc = sess.run(accuracy, feed_dict={
        x: mnist.test.images[:test_valid_size],
        y: mnist.test.labels[:test_valid_size],
        keep_prob: 1.})
    print('Testing Accuracy: {}'.format(test_acc))


Extracting .\train-images-idx3-ubyte.gz
Extracting .\train-labels-idx1-ubyte.gz
Extracting .\t10k-images-idx3-ubyte.gz
Extracting .\t10k-labels-idx1-ubyte.gz
fc1.shape after reshape (?, 3136)
fc1.shape after add (?, 1024)
weight.shape (3136, 1024)
bias.shape (1024,)
Epoch  1, Batch   1 -Loss: 53070.4883 Validation Accuracy: 0.093750
Epoch  1, Batch   2 -Loss: 55641.1016 Validation Accuracy: 0.101562
Epoch  1, Batch   3 -Loss: 50303.0938 Validation Accuracy: 0.085938
Epoch  1, Batch   4 -Loss: 44751.5078 Validation Accuracy: 0.082031
Epoch  1, Batch   5 -Loss: 36617.0078 Validation Accuracy: 0.089844


KeyboardInterrupt: 

# TensorFlow Convolution Layer Quiz

In [15]:
"""
Setup the strides, padding and filter weight/bias such that
the output shape is (1, 2, 2, 3).
"""
import tensorflow as tf
import numpy as np

# `tf.nn.conv2d` requires the input be 4D (batch_size, height, width, depth)
# (1, 4, 4, 1)
x = np.array([
    [0, 1, 0.5, 10],
    [2, 2.5, 1, -8],
    [4, 0, 5, 6],
    [15, 1, 2, 3]], dtype=np.float32).reshape((1, 4, 4, 1))
X = tf.constant(x)


def conv2d(input):
    # Filter (weights and bias)
    # The shape of the filter weight is (height, width, input_depth, output_depth)
    # The shape of the filter bias is (output_depth,)
    # TODO: Define the filter weights `F_W` and filter bias `F_b`.
    # NOTE: Remember to wrap them in `tf.Variable`, they are trainable parameters after all.
    F_W = tf.Variable(tf.random_normal([2,2,1,3]))
    F_b = tf.Variable(tf.random_normal([3]))
    # TODO: Set the stride for each dimension (batch_size, height, width, depth)
    strides = [1,2,2,1]
    # TODO: set the padding, either 'VALID' or 'SAME'.
    padding = 'VALID'
    # https://www.tensorflow.org/versions/r0.11/api_docs/python/nn.html#conv2d
    # `tf.nn.conv2d` does not include the bias computation so we have to add it ourselves after.
    return tf.nn.conv2d(input, F_W, strides, padding) + F_b

out = conv2d(X)

# TensorFlow Pooling Layer Quiz

In [16]:
"""
Set the values to `strides` and `ksize` such that
the output shape after pooling is (1, 2, 2, 1).
"""
import tensorflow as tf
import numpy as np

# `tf.nn.max_pool` requires the input be 4D (batch_size, height, width, depth)
# (1, 4, 4, 1)
x = np.array([
    [0, 1, 0.5, 10],
    [2, 2.5, 1, -8],
    [4, 0, 5, 6],
    [15, 1, 2, 3]], dtype=np.float32).reshape((1, 4, 4, 1))
X = tf.constant(x)

def maxpool(input):
    # TODO: Set the ksize (filter size) for each dimension (batch_size, height, width, depth)
    ksize = [1, 2, 2, 1]
    # TODO: Set the stride for each dimension (batch_size, height, width, depth)
    strides = [1, 2, 2, 1]
    # TODO: set the padding, either 'VALID' or 'SAME'.
    padding = 'VALID'
    # https://www.tensorflow.org/versions/r0.11/api_docs/python/nn.html#max_pool
    return tf.nn.max_pool(input, ksize, strides, padding)
    
out = maxpool(X)

In [17]:
out.shape

TensorShape([Dimension(1), Dimension(2), Dimension(2), Dimension(1)])