In [None]:
import tensorflow as tf
import math

### Training data placeholder

In [None]:
# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 28, 28, 1])

### Define the number of neurons in each layer

In [None]:
# three convolutional layers with their channel counts, and a
# fully connected layer (tha last layer has 10 softmax neurons)
K = 6  # first convolutional layer output depth
L = 12  # second convolutional layer output depth
M = 24  # third convolutional layer
N = 200  # fully connected layer

### Configure dropout
#### Tasks
* Define the pkeep placeholder for dropout

In [None]:
# Probability of keeping a node during dropout = 1.0 at test
pkeep = #TODO

### Variables for weights and biases
#### Tasks
* Initialize the weights and biases for the 2nd and 3rd convolutional layers.
 * layer2: 5x5 patch, K input channel, L output channels
 * layer3: 4x4 patch, L input channel, M output channels
* Initialize the weights for the 1st fully connected layer

In [None]:
W1 = tf.Variable(tf.truncated_normal([6, 6, 1, K], stddev=0.1))  # 5x5 patch, 1 input channel, K output channels
B1 = tf.Variable(tf.constant(0.1, tf.float32, [K]))
W2 = #TODO
B2 = #TODO
W3 = #TODO
B3 = #TODO

W4 = #TODO
B4 = tf.Variable(tf.constant(0.1, tf.float32, [N]))
W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1))
B5 = tf.Variable(tf.constant(0.1, tf.float32, [10]))

### Define the model
We are using the model defined in mnist_3.0, but include a [dropout layer](https://www.tensorflow.org/api_docs/python/tf/nn/dropout).

#### Tasks
* add the dropout layer(s)
* why don't we add more dropout layers?

In [None]:
# The model
stride = 1  # output is 28x28
Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1)
stride = 2  # output is 14x14
Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') + B2)
stride = 2  # output is 7x7
Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') + B3)

# reshape the output from the third convolution for the fully connected layer
YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M])

Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4)
YY4 = #TODO
Ylogits = tf.matmul(YY4, W5) + B5
Y = tf.nn.softmax(Ylogits)

# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10])

# variable learning rate
lr = tf.placeholder(tf.float32)

### Define the error function

In [None]:
# cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100  images
# TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
# problems with log(0) which is NaN
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy)*100

### Concatenate the weights and biases

In [None]:
allweights = tf.concat([tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), tf.reshape(W3, [-1]), tf.reshape(W4, [-1]), tf.reshape(W5, [-1])], 0)
allbiases  = tf.concat([tf.reshape(B1, [-1]), tf.reshape(B2, [-1]), tf.reshape(B3, [-1]), tf.reshape(B4, [-1]), tf.reshape(B5, [-1])], 0)

### Keep track of the prediction accuracy

In [None]:
# accuracy of the trained model, between 0 (worst) and 1 (best)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

### Configure the optimizer

In [None]:
# training step, the learning rate is a placeholder
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

### Load the training and test data

In [None]:
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets

# Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
mnist = read_data_sets("data", one_hot=True, reshape=False, validation_size=0)

### A training function to iterate over

In [None]:
# Initialise the variables
init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)

test_data = {X: mnist.test.images, Y_: mnist.test.labels, pkeep: 1.0}

# You can call this function in a loop to train the model, 100 images at a time
def training_step(i, update_test_data, update_train_data):
        
    # learning rate decay
    max_learning_rate = 0.003
    min_learning_rate = 0.0001
    decay_speed = 2000.0 # 0.003-0.0001-2000=>0.9826 done in 5000 iterations
    learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed)

    # training on batches of 100 images with 100 labels
    batch_X, batch_Y = mnist.train.next_batch(100)

    # compute training values
    if update_train_data:
        a, c, w, b = sess.run([accuracy, cross_entropy, allweights, allbiases], {X: batch_X, Y_: batch_Y, pkeep: 1.0})
        print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c) + " (lr:" + str(learning_rate) + ")")

    # compute test values
    if update_test_data:
        a, c = sess.run([accuracy, cross_entropy], test_data)
        print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c))

    # the backpropagation training step
    sess.run(train_step, {X: batch_X, Y_: batch_Y, lr: learning_rate, pkeep: 0.75})

### Run the training

In [None]:
for i in range(10000+1): training_step(i, i % 100 == 0, i % 20 == 0)
    
print(sess.run(accuracy, feed_dict=test_data))