# Basic Neural Network in Tensorflow  (MNIST)

### Construction Phase

In [1]:
import tensorflow as tf

n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

- since number of features is 28x28 but we dont know how many samples per training batch we're going to have, we use (None, n_inputs) as the shape of X.  
- We also know y will be a 1D tensor, but unsure of how many samples will be a batch, so the shape of y is (None)

In [2]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

- X is the input layer (it will be replaced with one training batch at a time)
- Then we have the 2 hidden layers (differ by number of neurons and the inputs they're connected to)
- The output layer uses a softmax activation function

In [3]:
# def neuron_layer(X, n_neurons, name, activation=None):
    
#     # create name scope using the name of the layer.
#     with tf.name_scope(name):
        
#         # get the number of inputs by uses the matrix's shape
#         n_inputs = int(X.get_shape()[1])
        
#         # using this stddev will help increase the efficency of this particular network
#         stddev = 2 / np.sqrt(n_inputs)
        
#         # We create W that will hold the weights of the matrix.  It is a 2D tensor containing all of the connection
#         # weights between each input and neuron.  It is initialized randomly using a truncated normal (Gaussian) 
#         # distribution.  Important to 'init' all connection weights randomly for all hidden layers to avoid any 
#         # symmetries that the GD algo would be unable to break.
#         init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
#         W = tf.Variable(init, name='weights')
        
#         # creating biases init to 0
#         b = tf.Variable(tf.zeros([n_neurons]), name='biases')
        
#         # create subgraph to compute z
#         z = tf.matmul(X, W) + b
#         if activation == 'relu':
#             return tf.nn.relu(z)
#         else:
#             return z

In [4]:
# creating the network

# with tf.name_scope('dnn'):
#     hidden1 = neuron_layer(X, n_hidden1, 'hidden1', activation='relu')
#     hidden2 = neuron_layer(n_hidden1, n_hidden2, 'hidden2', activation='relu')
#     # logits is the output of the NN before going through softmax activation function.
#     logits = neuron_layer(n_hidden2, n_outputs, 'outputs')

- instead of building our own neuron_layer function, we can use tensorflows tf.layers.dense() layer 

In [5]:
import numpy as np

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

### Define Cost Function used to train

- we will use cross-entropy -- penalize models that estimate a low probability for the target class.
- __sparse_softmax_cross_entropy_with_logits()__ computes the cross entropy based on the logits (output of the network before going through the softmax activation function), and it expects labels in the form of ints, ranging from 0 to 9.
- this will give us a 1D tensor containing the cross-entropy for each instance.  We then use TF's __reduce_mean()__ fnc to compute the cross entropy over all instances.
- note - sparse_softmax_cross_entropy_with_logits is equivalent to applying the softmax function and then computing cross entropy, but it is more efficient than applying the softmax fnc earlier in our code.

In [6]:
with tf.name_scope('loss'):
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(cross_entropy, name='loss')

- now set up the GDOptimizer which will tweak the model parameters to minimize the cost function

In [7]:
learning_rate = 0.0001

with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

### Evaluating
- for each instance, we measure of the NN prediction is correct by checking whether or not the highest logit corresponds to the target class. ----> use __in_top_k()__
- __in_top_k()__ returns a 1D tensor full of boolean values, we need to cast these booleans to floats and then compute the avg, this will give us the overall accuracy

In [8]:
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [9]:
# create node to initialize all variables and create a Saver
init = tf.global_variables_initializer()
saver = tf.train.Saver()

# Execution Phase

- use tensorflow to fetch data, scale it between 0 and 1, shuffle it

In [10]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('/tmp/data/')

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [11]:
n_epochs = 100
batch_size = 50

In [12]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Train accuracy: 0.18 Test accuracy: 0.1878
1 Train accuracy: 0.32 Test accuracy: 0.3178
2 Train accuracy: 0.36 Test accuracy: 0.4106
3 Train accuracy: 0.5 Test accuracy: 0.4773
4 Train accuracy: 0.52 Test accuracy: 0.5315
5 Train accuracy: 0.5 Test accuracy: 0.5769
6 Train accuracy: 0.68 Test accuracy: 0.6096
7 Train accuracy: 0.68 Test accuracy: 0.6393
8 Train accuracy: 0.64 Test accuracy: 0.6662
9 Train accuracy: 0.68 Test accuracy: 0.6883
10 Train accuracy: 0.66 Test accuracy: 0.71
11 Train accuracy: 0.7 Test accuracy: 0.7284
12 Train accuracy: 0.74 Test accuracy: 0.7436
13 Train accuracy: 0.7 Test accuracy: 0.7546
14 Train accuracy: 0.8 Test accuracy: 0.7646
15 Train accuracy: 0.78 Test accuracy: 0.771
16 Train accuracy: 0.84 Test accuracy: 0.7818
17 Train accuracy: 0.78 Test accuracy: 0.7891
18 Train accuracy: 0.82 Test accuracy: 0.7964
19 Train accuracy: 0.88 Test accuracy: 0.8022
20 Train accuracy: 0.72 Test accuracy: 0.8073
21 Train accuracy: 0.7 Test accuracy: 0.8139
22 Trai

In [13]:
# use code below to restore the model and make predictions

# with tf.Session() as sess:
#     saver.restore(sess, "./my_model_final.ckpt")
#     X_new_scaled = [...] # some new images (scaled from 0 to 1)
#     Z = logits.eval(feed_dict={X: X_new_scaled})
#     y_pred = np.argmax(Z, axis=1