# Problem 3 (b): Adding dropout 

### Import the package

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data


### Define our own convonlutional layer, full connected layer， pooling and dropout

In [2]:
def get_nb_params_shape(shape):
    nb_params = 1
    for dim in shape:
        nb_params = nb_params*int(dim)
    return nb_params
def count_para():
    tot_nb_params = 0
    for trainable_variable in tf.trainable_variables():
        shape = trainable_variable.get_shape()  # e.g [D,F] or [W,H,C]
        current_nb_params = get_nb_params_shape(shape)
        tot_nb_params = tot_nb_params + current_nb_params
    print ("Total trainable params number:", tot_nb_params)

def conv2d(x, filter_shape, strides, padding, name):
    assert padding in ['SAME', 'VALID']
    with tf.variable_scope(name):
        W_conv = tf.get_variable('w', shape=filter_shape, 
                                 initializer = tf.truncated_normal_initializer(stddev=0.1))
        b_conv = tf.get_variable('b', shape=[filter_shape[-1]], 
                                 initializer = tf.zeros_initializer())
        y_conv = tf.nn.conv2d(x, W_conv, strides=strides, padding=padding)
        y_conv_relu = tf.nn.relu(y_conv + b_conv)
    return y_conv_relu
    

def fc(x, in_size, out_size, name, activation=None):
    if activation is not None:
        assert activation in ['relu', 'sigmoid', 'tanh'], 'Wrong activation function.'
    with tf.variable_scope(name):
        w = tf.get_variable('w', shape = [in_size, out_size], dtype=tf.float32, 
                            initializer = tf.truncated_normal_initializer(stddev=0.1))
        b = tf.get_variable('b', shape = [out_size], dtype=tf.float32, 
                            initializer = tf.zeros_initializer())
        h_fc = tf.nn.xw_plus_b(x, w, b)
        if activation == 'relu':
            return tf.nn.relu(h_fc)
        elif activation == 'tanh':
            return tf.nn.tanh(h_fc)
        elif activation == 'sigmoid':
            return tf.nn.sigmoid(h_fc)
        else:
            return h_fc
        

def max_pooling(x, k_height, k_width, strides_x, strides_y, padding='SAME'):
    ksize=[1,k_height, k_width,1]
    strides=[1,strides_x, strides_y,1]
    h_pool = tf.nn.max_pool(x, ksize, strides, padding)
    return h_pool




### Build the network

In [3]:
with tf.name_scope('inputs'):
    X_ = tf.placeholder(tf.float32, [None, 784])
    y_ = tf.placeholder(tf.float32, [None, 10])


X = tf.reshape(X_, [-1, 28, 28, 1])
conv1 = conv2d(X, [5, 5, 1, 32], [1, 1, 1, 1], 'SAME', 'conv1')
h_pool1 = max_pooling(conv1, 2, 2, 2, 2)

conv2 = conv2d(h_pool1, [5, 5, 32, 64], [1, 1, 1, 1], 'SAME', 'conv2')
h_pool2 = max_pooling(conv2, 2, 2, 2, 2)

# flatten
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = fc(h_pool2_flat, 7*7*64, 1024, 'fc1', 'relu')


keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
h_fc2 = fc(h_fc1_drop, 1024, 10, 'fc2')
y_conv = tf.nn.softmax(h_fc2)

In [4]:
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)


cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))


sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
count_para()

for i in range(8000):
    batch = mnist.train.next_batch(50)
    if i%1000 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            X_:batch[0], y_: batch[1], keep_prob: 1.0})
        print("step %d, training accuracy %g"%(i, train_accuracy))
    train_step.run(feed_dict={X_: batch[0], y_: batch[1], keep_prob: 0.5})

print("test accuracy %g"%accuracy.eval(feed_dict={
    X_: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))


Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Total trainable params number: 3274634
step 0, training accuracy 0.12
step 1000, training accuracy 0.98
step 2000, training accuracy 1
step 3000, training accuracy 0.98
step 4000, training accuracy 1
step 5000, training accuracy 1
step 6000, training accuracy 0.96
step 7000, training accuracy 1
test 