#### Import thr libraries

In [None]:
import numpy as np
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical 

#### Import the Mnist dataset

In [None]:
(x_train,y_train),(x_test,y_test)=mnist.load_data()

In [None]:
x_train=x_train/255
x_test=x_test/255

In [None]:
x_train=x_train.reshape(60000,784).astype('float32')
x_test=x_test.reshape(10000,784).astype('float32')

In [None]:
classes=10
y_train=to_categorical(y_train,num_classes = classes)
y_test=to_categorical(y_test,num_classes = classes)

In [None]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

#### Initializing the variables and defining the epochs and batch size

In [None]:
epochs=10
batch_size=128
display_progress = 40 # after this many batches, output progress to screen
wt_init = tf.contrib.layers.xavier_initializer()

#### Defining the layers 

In [None]:
# input layer: 
n_input = 784

# first convolutional layer: 
n_conv_1 = 32
k_conv_1 = 3 # k_size

# second convolutional layer: 
n_conv_2 = 64
k_conv_2 = 3

n_conv_3 = 128
k_conv_3 = 3

# max pooling layer:
pool_size = 2
mp_layer_dropout = 0.25

# dense layer: 
n_dense = 128
dense_layer_dropout = 0.5

# output layer: 
n_classes = 10

#### Creating the placeholders

In [None]:
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])

#### Defining the dense, conv and maxpooling layers

In [None]:
def dense(x,W,b):
    z=tf.add(tf.matmul(x,W),b)
    a=tf.nn.relu(z)
    return a

def conv2d(x,W,b,stride_length=1):
    xw=tf.nn.conv2d(x,W, strides=[1, stride_length, stride_length, 1],padding='SAME')
    z=tf.nn.bias_add(xw,b)
    a=tf.nn.relu(z)
    return a

def maxpooling2d(x,p_size):
    return tf.nn.max_pool(x, 
                          ksize=[1, p_size, p_size, 1], 
                          strides=[1, p_size, p_size, 1], 
                          padding='SAME')

#### Defining the network layers

In [None]:
def network(x, weights, biases, n_in, mp_psize, mp_dropout, dense_dropout):

    # reshape linear MNIST pixel input into square image: 
    square_dimensions = int(np.sqrt(n_in))
    square_x = tf.reshape(x, shape=[-1, square_dimensions, square_dimensions, 1])
    
    # convolutional and max-pooling layers:
    conv_1 = conv2d(square_x, weights['W_c1'], biases['b_c1'])
    conv_2 = conv2d(conv_1, weights['W_c2'], biases['b_c2'])
    pool_1 = maxpooling2d(conv_2, mp_psize)
    pool_1 = tf.nn.dropout(pool_1, 1-mp_dropout)
    conv_3 = conv2d(pool_1, weights['W_c3'], biases['b_c3'])
    conv_4 = conv2d(conv_3, weights['W_c4'], biases['b_c4'])
    pool_2 = maxpooling2d(conv_4, mp_psize)
    pool_2 = tf.nn.dropout(pool_2, 1-mp_dropout)
    conv_5 = conv2d(pool_2, weights['W_c5'], biases['b_c5'])
    
    # dense layer: 
    flat = tf.reshape(conv_5, [-1, weights['W_d1'].get_shape().as_list()[0]])
    dense_1 = dense(flat, weights['W_d1'], biases['b_d1'])
    dense_1 = tf.nn.dropout(dense_1, 1-dense_dropout)
    
    # output layer: 
    out_layer_z = tf.add(tf.matmul(dense_1, weights['W_out']), biases['b_out'])
    print(weights['W_d1'].get_shape().as_list())
    
    return out_layer_z

#### Creating the Bias and weight dictionaries

In [None]:
bias_dict = {
    'b_c1': tf.Variable(tf.zeros([n_conv_1])),
    'b_c2': tf.Variable(tf.zeros([n_conv_2])),
    'b_c3': tf.Variable(tf.zeros([n_conv_3])),
    'b_c4': tf.Variable(tf.zeros([n_conv_3])),
    'b_c5': tf.Variable(tf.zeros([n_conv_3])),
    'b_d1': tf.Variable(tf.zeros([n_dense])),
    'b_out': tf.Variable(tf.zeros([n_classes]))
}

# calculate number of inputs to dense layer: 
full_square_length = np.sqrt(n_input)
pooled_square_length = int((full_square_length / pool_size)/pool_size)
dense_inputs = pooled_square_length**2 * n_conv_3

weight_dict = {
    'W_c1': tf.get_variable('W_c1', 
                            [k_conv_1, k_conv_1, 1, n_conv_1], initializer=wt_init),
    'W_c2': tf.get_variable('W_c2', 
                            [k_conv_2, k_conv_2, n_conv_1, n_conv_2], initializer=wt_init),
    'W_c3': tf.get_variable('W_c3', 
                            [k_conv_3, k_conv_3, n_conv_2, n_conv_3], initializer=wt_init),
    'W_c4': tf.get_variable('W_c4', 
                            [k_conv_3, k_conv_3, n_conv_3, n_conv_3], initializer=wt_init),
    'W_c5': tf.get_variable('W_c5', 
                            [k_conv_3, k_conv_3, n_conv_3, n_conv_3], initializer=wt_init),  
    'W_d1': tf.get_variable('W_d1', 
                            [dense_inputs, n_dense], initializer=wt_init),
    'W_out': tf.get_variable('W_out', 
                             [n_dense, n_classes], initializer=wt_init)
}

#### Calculating cost and accuarcy

In [None]:
predictions = network(x, weight_dict, bias_dict, n_input, 
                      pool_size, mp_layer_dropout, dense_layer_dropout)

In [None]:
print(predictions.shape) #[25088,128]

In [None]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=predictions, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)

In [None]:
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1))
accuracy_pct = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) * 100

In [None]:
initializer_op = tf.global_variables_initializer()

In [None]:
predict=tf.argmax(predictions,1)

In [None]:
predict.shape

#### running the model

In [None]:
with tf.Session() as session:
    session.run(initializer_op)
    
    print("Training for", epochs, "epochs.")
    
    # loop over epochs: 
    for epoch in range(epochs):
        
        avg_cost = 0.0 # track cost to monitor performance during training
        avg_accuracy_pct = 0.0
        
        # loop over all batches of the epoch:
        n_batches = int(x_train.shape[0] / batch_size)
        for i in range(n_batches):

            # to reassure you something's happening! 
            if i % display_progress == 0:
                print("Step ", i+1, " of ", n_batches, " in epoch ", epoch+1, ".", sep='')
            
            #batch_x, batch_y = mnist.train.next_batch(batch_size)
            batch_start_idx = (i * batch_size) % (x_train.shape[0] - batch_size)
            batch_end_idx = batch_start_idx + batch_size
            batch_X = x_train[batch_start_idx:batch_end_idx]
            batch_Y = y_train[batch_start_idx:batch_end_idx]
            
                       
            # feed batch data to run optimization and fetching cost and accuracy: 
            _, batch_cost, batch_acc = session.run([optimizer, cost, accuracy_pct], 
                                                   feed_dict={x: batch_X, y: batch_Y})
            
            # accumulate mean loss and accuracy over epoch: 
            avg_cost += batch_cost / n_batches
            avg_accuracy_pct += batch_acc / n_batches
            
        # output logs at end of each epoch of training:
        print("Epoch ", '%03d' % (epoch+1), 
              ": cost = ", '{:.3f}'.format(avg_cost), 
              ", accuracy = ", '{:.2f}'.format(avg_accuracy_pct), "%", 
              sep='')
    
    print("Training Complete. Testing Model.\n")
    
    test_cost = cost.eval({x: x_test, y: y_test})
    test_accuracy_pct = accuracy_pct.eval({x: x_test, y: y_test})
    
    print("Test Cost:", '{:.3f}'.format(test_cost))
    print("Test Accuracy: ", '{:.2f}'.format(test_accuracy_pct), "%", sep='')
    
    #predict1=predict.eval({x:x_test})