# Imports
* Dataset used - MNIST (with one-hot-encoding)
* LOG_DIR = "tmp/"
* PLOT_DIR = './out/plots'

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

import utility

# import DataSet
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True, reshape=False, validation_size=0)

LOG_DIR = "tmp/"
PLOT_DIR = './out/plots'

## Input and Ground-truth placeholders  
* One image is a 28x28 images with 0's and 1's  
* Ground-truth is a label from 0-9  
.  
* Y  = ground-truth  
* Y-underscore = predicted-output

## Hyperparameters and train_step
* learning_rate
* number_of_epochs
* batch_size

In [None]:
# Input 28-by-28 pixels images of GRAYSCALE
X = tf.placeholder(tf.float32, [None, 28, 28, 1], name='X')
# Output in 'one-hot-encoding', 10 classes
Y_ = tf.placeholder(tf.float32, [None, 10], name='Y_')
# placeholders for hyper parameters
keep_prob = tf.placeholder(tf.float32, [], name='dropout_probability')
learning_rate = tf.placeholder(tf.float32, [], name='learning_rate')
n_epochs = 2000
batch_size = 100

# Architecture
* **Input-layer**[28x28x1 images] >> (W1, b1) >> **conv-layer-1**[28x28x4 maps]
* **conv-layer-1**[28x28x4 maps] >> (W2, b2) >> **conv-layer-2**[14x14x8 maps]
* **conv-layer-2**[14x14x8 maps] >> (W3, b3) >> **conv-layer-3**[7x7x12 maps]
* **conv-layer-3**[7x7x12 maps] >> (W4, b4) >> **fcc-layer-**[200 neurons]
* **fcc-layer**[200 neurons] >> (W5, b5) >> **Output-layer**[10 neurons]  
.  
* **sigmoid/relu** activation function used in hidden layers   
* **softmax** activation function used at output layer   
 
### Cost function  
* cost = - Y * log(Y-cap)  
  
### Accuracy function  
* Accuracy = (correct)/(correct + incorrect) %  

In [None]:
# model
K = 4
L = 8
M = 12

W1 = tf.Variable(tf.truncated_normal([5, 5, 1, K], stddev=0.5), name='w1')
b1 = tf.Variable(tf.truncated_normal([K], stddev=0.5), name='b1')

W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.5), name='w2')
b2 = tf.Variable(tf.truncated_normal([L], stddev=0.5), name='b2')

W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.5), name='w3')
b3 = tf.Variable(tf.truncated_normal([M], stddev=0.5), name='b3')

N = 200

W4 = tf.Variable(tf.truncated_normal([7*7*M, N], stddev=0.5), name='w4')
b4 = tf.Variable(tf.truncated_normal([N], stddev=0.5), name='b4')

W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.5), name='w5')
b5 = tf.Variable(tf.truncated_normal([10], stddev=0.5), name='b5')

# summaries of weights
if True:
    tf.summary.histogram("weights/w1", W1)
    tf.summary.histogram("biases/b1", b1)
    tf.summary.histogram("weights/w2", W2)
    tf.summary.histogram("biases/b2", b2)
    tf.summary.histogram("weights/w3", W3)
    tf.summary.histogram("biases/b3", b3)
    tf.summary.histogram("weights/w4", W4)
    tf.summary.histogram("biases/b4", b4)
    tf.summary.histogram("weights/w5", W5)
    tf.summary.histogram("biases/b5", b5)

# convolution-layers
Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides=[1,1,1,1], padding='SAME') + b1, name='conv_layer_1')
Y2 = tf.nn.sigmoid(tf.nn.conv2d(Y1, W2, strides=[1,2,2,1], padding='SAME') + b2, name='conv_layer_2')
Y3 = tf.nn.sigmoid(tf.nn.conv2d(Y2, W3, strides=[1,2,2,1], padding='SAME') + b3, name='conv_layer_3')

# fully-connected-layer
YY = tf.reshape(Y3, shape=[-1, 7*7*M], name='fcc')
YY = tf.nn.dropout(YY, keep_prob)

# hidden-layer
Y4 = tf.nn.sigmoid(tf.matmul(YY, W4) + b4, name='hidden_layer')
Y4 = tf.nn.dropout(Y4, keep_prob)

# model-output
Y = tf.nn.softmax(tf.matmul(Y4, W5) + b5, name='ouput_layer')

# add weights to collection
tf.add_to_collection('conv_weights', W1)
tf.add_to_collection('conv_weights', W2)
tf.add_to_collection('conv_weights', W3)

# add output to collection
tf.add_to_collection('conv_output', Y1)
tf.add_to_collection('conv_output', Y2)
tf.add_to_collection('conv_output', Y3)

# cost
cross_entropy = -tf.reduce_sum(Y_ * tf.log(Y), name='cross_entropy')
tf.summary.scalar("cross_entropy", cross_entropy)

# accuracy
is_correct = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32), name='accuracy')
tf.summary.scalar("accuracy", accuracy)

In [None]:
# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train_step = optimizer.minimize(cross_entropy)

# summaries of h_params
tf.summary.scalar("number_of_epochs", n_epochs)
tf.summary.scalar("mini_batch_size", batch_size)
tf.summary.scalar("learning_rate", optimizer._lr)

# Training
* Mini-batch method used
* train & test results stored separately

In [None]:
# training
sess = tf.InteractiveSession()
# variable-initializer
init = tf.global_variables_initializer()
sess.run(init)
summaries = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(LOG_DIR + "train", graph=tf.get_default_graph())
test_writer = tf.summary.FileWriter(LOG_DIR + "test")

for i in range(n_epochs):
    # feed train-data
    batch_X, batch_Y = mnist.train.next_batch(batch_size)
    train_data = {X: batch_X, 
                  Y_: batch_Y,
                  learning_rate: 0.003,
                  keep_prob: 0.7
                 }
    
    summ, _ = sess.run([summaries, train_step], feed_dict=train_data)
    train_writer.add_summary(summ, global_step=i)
    
    # feed test-data
    test_data = {X: mnist.test.images, 
                 Y_: mnist.test.labels,
                 learning_rate: 0.003,
                 keep_prob: 1
                }
    
    summ = sess.run(summaries, feed_dict=test_data)
    test_writer.add_summary(summ, global_step=i)
    
    # print
    if i%4 == 0:
        print ("Iteration: ", i, end='\r')

## Visualisation

In [None]:
# visualize weights of all convolutional layers
# no need for feed dictionary here
conv_weights = sess.run([tf.get_collection('conv_weights')])
for i, c in enumerate(conv_weights[0]):
    utility.plot_conv_weights(c, 'conv{}'.format(i))

print ('Weights have been visualized')

In [None]:
# get output of all convolutional layers
# here we need to provide a random input image
conv_out = sess.run([tf.get_collection('conv_output')], feed_dict={X: mnist.test.images[:1]})
for i, c in enumerate(conv_out[0]):
    utility.plot_conv_output(c, 'conv{}'.format(i))
    
print ('Activations have been visualized')