### Import tensorflow and MNIST

In [3]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
print(mnist)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Datasets(train=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x000002A1B7DB2BE0>, validation=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x000002A1B7DB2828>, test=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x000002A1B7DB2A20>)


### Define input/output

In [6]:
# MNIST data input (img shape: 28*28)
num_input = 784
# MNIST total classes (0-9 digits)
num_classes = 10

# placeholders for input and output
X = tf.placeholder(tf.float32, [None, num_input])
Y = tf.placeholder(tf.float32, [None, num_classes])

### Define layers parameters

In [7]:
# Define size of neurons per layer of 4 connected neural layers
# Try change the number of layers and neurons per layer
n_neurons = {1: 512, 2: 256, 3: 256, 4: 128,}

# Define the wieght shape for furture invoke
weights = {
    'h1': tf.Variable(tf.random_normal([num_input, n_neurons[1]])),
    'h2': tf.Variable(tf.random_normal([n_neurons[1], n_neurons[2]])),
    'h3': tf.Variable(tf.random_normal([n_neurons[2], n_neurons[3]])),
    'h4': tf.Variable(tf.random_normal([n_neurons[3], n_neurons[4]]))
}

# Define bias shape
biases = {
    'b1': tf.Variable(tf.random_normal([n_neurons[1]])),
    'b2': tf.Variable(tf.random_normal([n_neurons[2]])),
    'b3': tf.Variable(tf.random_normal([n_neurons[3]])),
    'b4': tf.Variable(tf.random_normal([n_neurons[4]])),
    'out': tf.Variable(tf.random_normal([num_classes]))
}

### Hyperparameters!!

In [13]:
# Parameters and model hypterparameters
learning_rate = 0.01
num_steps = 1000
batch_size = 200
display_step = 100

### Define evaluation method without core neural network yet 

In [19]:
def evaluate(logits):
    prediction = tf.nn.softmax(logits)
    cost_function = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
    
    # Check out other optimzers, basically compared to GD or SGD, Adam 
    # introduced momentum to (1) more effectively train (2) protect from local
    # minimal better; and adaptively adjust learning speed more effectively to
    # converge faster, than gradient descent
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(cost_function)

    # Evaluate model
    correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # Start training
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(1, num_steps):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
            if (step % display_step == 0 or step == 1):
                # Calculate batch loss and accuracy
                loss, acc = sess.run([cost_function, accuracy],
                                     feed_dict={X: batch_x, Y: batch_y})
                print("Step " + str(step) \
                      + ", Loss= " + "{:.4f}".format(loss) \
                      + ", Training Accuracy= " + "{:.3f}".format(acc))

        print("Optimization Finished!")

        # Calculate accuracy for MNIST test images
        print("Testing Accuracy:", \
            sess.run(accuracy, feed_dict={X: mnist.test.images,
                                          Y: mnist.test.labels}))


### Valilla NN

In [20]:
def neural_net(x, n_layer):
    print("------Vanilla NN with {0} layers------".format(n_layer))
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
    layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
    hidden_layers = {
        1: layer_1,
        2: layer_2,
        3: layer_3,
        4: layer_4
    }
    out_weight = tf.Variable(tf.random_normal([n_neurons[n_layer], num_classes]))
    out_layer = tf.matmul(hidden_layers[n_layer], out_weight) + biases['out']
    return out_layer

In [21]:
evaluate(neural_net(X, 4))

------Vanilla NN with 4 layers------
Step 1, Loss= 849537.4375, Training Accuracy= 0.245
Step 100, Loss= 31256.0996, Training Accuracy= 0.850
Step 200, Loss= 21036.8301, Training Accuracy= 0.845
Step 300, Loss= 8496.9395, Training Accuracy= 0.875
Step 400, Loss= 9218.2891, Training Accuracy= 0.930
Step 500, Loss= 12085.3975, Training Accuracy= 0.845
Step 600, Loss= 7281.6538, Training Accuracy= 0.875
Step 700, Loss= 11565.0576, Training Accuracy= 0.830
Step 800, Loss= 3287.2581, Training Accuracy= 0.925
Step 900, Loss= 6025.1138, Training Accuracy= 0.900
Optimization Finished!
Testing Accuracy: 0.8733


### NN + Dropout?

In [22]:
dropout_rate = 0.5
def neural_net_dropout(x, n_layer):
    print("------NN + Dropout wiht {0} layers------".format(n_layer))
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    dropout_1 = tf.layers.dropout(inputs=layer_1, rate=dropout_rate)

    layer_2 = tf.add(tf.matmul(dropout_1, weights['h2']), biases['b2'])
    dropout_2 = tf.layers.dropout(inputs=layer_2, rate=dropout_rate)

    layer_3 = tf.add(tf.matmul(dropout_2, weights['h3']), biases['b3'])
    dropout_3 = tf.layers.dropout(inputs=layer_3, rate=dropout_rate)
    
    layer_4 = tf.add(tf.matmul(dropout_3, weights['h4']), biases['b4'])
    hidden_layers = {
        1: layer_1,
        2: layer_2,
        3: layer_3,
        4: layer_4
    }
    out_weight = tf.Variable(tf.random_normal([n_neurons[n_layer], num_classes]))
    out_layer = tf.matmul(hidden_layers[n_layer], out_weight) + biases['out']
    return out_layer

In [23]:
evaluate(neural_net_dropout(X, 4))

------NN + Dropout wiht 4 layers------
Step 1, Loss= 559437.5000, Training Accuracy= 0.220
Step 100, Loss= 16792.7754, Training Accuracy= 0.905
Step 200, Loss= 21647.2559, Training Accuracy= 0.895
Step 300, Loss= 11808.0684, Training Accuracy= 0.870
Step 400, Loss= 14638.7139, Training Accuracy= 0.870
Step 500, Loss= 5591.4370, Training Accuracy= 0.885
Step 600, Loss= 9705.9609, Training Accuracy= 0.870
Step 700, Loss= 5876.3823, Training Accuracy= 0.880
Step 800, Loss= 4576.9663, Training Accuracy= 0.910
Step 900, Loss= 6562.9238, Training Accuracy= 0.865
Optimization Finished!
Testing Accuracy: 0.8669


### NN + tanh 

In [24]:
def neural_net_tanh(x, n_layer):
    print("------NN + tanh with {0} layers------".format(n_layer))
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    tanh_1 = tf.nn.tanh(layer_1)
    
    layer_2 = tf.add(tf.matmul(tanh_1, weights['h2']), biases['b2'])
    tanh_2 = tf.nn.tanh(layer_2)
    
    layer_3 = tf.add(tf.matmul(tanh_2, weights['h3']), biases['b3'])
    tanh_3 = tf.nn.tanh(layer_3)
    
    layer_4 = tf.add(tf.matmul(tanh_3, weights['h4']), biases['b4'])
    hidden_layers = {
        1: layer_1,
        2: layer_2,
        3: layer_3,
        4: layer_4
    }
    out_weight = tf.Variable(tf.random_normal([n_neurons[n_layer], num_classes]))
    out_layer = tf.matmul(hidden_layers[n_layer], out_weight) + biases['out']
    return out_layer

In [25]:
evaluate(neural_net_tanh(X, 4))

------NN + tanh with 4 layers------
Step 1, Loss= 157.2486, Training Accuracy= 0.210
Step 100, Loss= 11.6365, Training Accuracy= 0.770
Step 200, Loss= 12.4033, Training Accuracy= 0.740
Step 300, Loss= 3.6058, Training Accuracy= 0.795
Step 400, Loss= 4.8351, Training Accuracy= 0.760
Step 500, Loss= 3.2561, Training Accuracy= 0.835
Step 600, Loss= 1.9281, Training Accuracy= 0.850
Step 700, Loss= 2.8700, Training Accuracy= 0.860
Step 800, Loss= 1.9801, Training Accuracy= 0.865
Step 900, Loss= 4.1688, Training Accuracy= 0.750
Optimization Finished!
Testing Accuracy: 0.7645


### NN + ReLU

In [26]:
def neural_net_relu(x, n_layer):
    print("------NN + ReLU with {0} layers------".format(n_layer))
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    relu_1 = tf.nn.relu(layer_1)
    
    layer_2 = tf.add(tf.matmul(relu_1, weights['h2']), biases['b2'])
    relu_2 = tf.nn.relu(layer_2)
    
    layer_3 = tf.add(tf.matmul(relu_2, weights['h3']), biases['b3'])
    relu_3 = tf.nn.relu(layer_3)

    layer_4 = tf.add(tf.matmul(relu_3, weights['h4']), biases['b4'])
    hidden_layers = {
        1: layer_1,
        2: layer_2,
        3: layer_3,
        4: layer_4
    }
    out_weight = tf.Variable(tf.random_normal([n_neurons[n_layer], num_classes]))
    out_layer = tf.matmul(hidden_layers[n_layer], out_weight) + biases['out']
    return out_layer

In [27]:
evaluate(neural_net_relu(X, 4))

------NN + ReLU with 4 layers------
Step 1, Loss= 244020.3125, Training Accuracy= 0.160
Step 100, Loss= 4933.3926, Training Accuracy= 0.885
Step 200, Loss= 1684.4766, Training Accuracy= 0.925
Step 300, Loss= 1038.8535, Training Accuracy= 0.935
Step 400, Loss= 868.9738, Training Accuracy= 0.945
Step 500, Loss= 843.2711, Training Accuracy= 0.960
Step 600, Loss= 372.9640, Training Accuracy= 0.970
Step 700, Loss= 208.9264, Training Accuracy= 0.980
Step 800, Loss= 299.4787, Training Accuracy= 0.965
Step 900, Loss= 204.8120, Training Accuracy= 0.990
Optimization Finished!
Testing Accuracy: 0.9411


### NN + ReLU + Dropout 

In [28]:
dropout_rate = 0.5
def neural_net_relu_dropout(x, n_layer):
    print("------NN + ReLU + Dropout with {0} layers------".format(n_layer))
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    relu_1 = tf.nn.relu(layer_1)
    dropout_1 = tf.layers.dropout(inputs=relu_1, rate=dropout_rate)

    layer_2 = tf.add(tf.matmul(dropout_1, weights['h2']), biases['b2'])
    relu_2 = tf.nn.relu(layer_2)
    dropout_2 = tf.layers.dropout(inputs=relu_2, rate=dropout_rate)
    
    layer_3 = tf.add(tf.matmul(dropout_2, weights['h3']), biases['b3'])
    relu_3 = tf.nn.relu(layer_3)
    dropout_3 = tf.layers.dropout(inputs=relu_3, rate=dropout_rate)

    layer_4 = tf.add(tf.matmul(dropout_3, weights['h4']), biases['b4'])
    hidden_layers = {
        1: layer_1,
        2: layer_2,
        3: layer_3,
        4: layer_4
    }
    out_weight = tf.Variable(tf.random_normal([n_neurons[n_layer], num_classes]))
    out_layer = tf.matmul(hidden_layers[n_layer], out_weight) + biases['out']
    return out_layer

In [29]:
evaluate(neural_net_relu_dropout(X, 4))

------NN + ReLU + Dropout with 4 layers------
Step 1, Loss= 168270.9531, Training Accuracy= 0.150
Step 100, Loss= 5207.7285, Training Accuracy= 0.880
Step 200, Loss= 2010.2823, Training Accuracy= 0.925
Step 300, Loss= 1804.3119, Training Accuracy= 0.920
Step 400, Loss= 447.4523, Training Accuracy= 0.950
Step 500, Loss= 355.3167, Training Accuracy= 0.975
Step 600, Loss= 183.1521, Training Accuracy= 0.995
Step 700, Loss= 164.2642, Training Accuracy= 0.980
Step 800, Loss= 461.5016, Training Accuracy= 0.940
Step 900, Loss= 125.6042, Training Accuracy= 0.980
Optimization Finished!
Testing Accuracy: 0.9461
