In [3]:
import tensorflow as tf

In [4]:
from tensorflow.examples.tutorials.mnist import input_data
mnist=input_data.read_data_sets("MNIST_data/",one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [5]:
# images are 784 in size ie 28*28

input_width=28
input_height=28
input_channels=1
input_pixels=784

n_conv1=32
n_conv2=64

# strides
stride_conv1=1
stride_conv2=1

# filter size
conv1_k=5
conv2_k=5
max_pool1_k=2
max_pool2_k=2

n_hidden=1024
n_out=10

input_size_to_hidden=(input_width//(max_pool1_k*max_pool2_k))*(input_height//(max_pool1_k*max_pool2_k))*n_conv2

In [6]:
weights={
    'wc1':tf.Variable(tf.random_normal([conv1_k,conv1_k,input_channels,n_conv1])),
    'wc2':tf.Variable(tf.random_normal([conv2_k,conv2_k,n_conv1,n_conv2])),
    'wh1':tf.Variable(tf.random_normal([input_size_to_hidden,n_hidden])),
    'wo':tf.Variable(tf.random_normal([n_hidden,n_out]))
}

biases={
    'bc1':tf.Variable(tf.random_normal([n_conv1])),
    'bc2':tf.Variable(tf.random_normal([n_conv2])),
    'bh1':tf.Variable(tf.random_normal([n_hidden])),
    'bo':tf.Variable(tf.random_normal([n_out]))
}

In [7]:
# helper functions for forward propagation

def conv(x,weight,bias,stride):
#     see on google to know more about format of strides option
    out=tf.nn.conv2d(x,weight,padding='SAME',strides=[1,stride,stride,1])
    out=tf.nn.bias_add(out,bias)
    out=tf.nn.relu(out)
    return out

def maxpooling(x,k):
    return tf.nn.max_pool(x,padding='SAME',ksize=[1,k,k,1],strides=[1,k,k,1])

In [8]:
# forward propagation code

In [9]:
def cnn(x,weights,biases,keep_prob):
#     here -1 in shape means it will calculate that place from itself (ie by dividing from the rest)
# format- no.of images * height * width * channels
    x=tf.reshape(x,shape=[-1,input_height,input_width,input_channels])
    
    conv1=conv(x,weights['wc1'],biases['bc1'],stride_conv1)
    conv1_pool=maxpooling(conv1,max_pool1_k)
    
    conv2=conv(conv1_pool,weights['wc2'],biases['bc2'],stride_conv2)
    conv2_pool=maxpooling(conv2,max_pool2_k)
    
    hidden_input=tf.reshape(conv2_pool,shape=[-1,input_size_to_hidden])
    hidden_output_before_activation=tf.add(tf.matmul(hidden_input,weights['wh1']),biases['bh1'])
    hidden_output_before_dropout=tf.nn.relu(hidden_output_before_activation)
    hidden_output=tf.nn.dropout(hidden_output_before_dropout,keep_prob)
    
#     applying idenity as activation function (u can apply others as well)
    output=tf.add(tf.matmul(hidden_output,weights['wo']),biases['bo'])
    
    return output

In [12]:
x=tf.placeholder('float',[None,input_pixels])
y=tf.placeholder(tf.int32,[None,n_out])
keep_prob=tf.placeholder('float')
pred=cnn(x,weights,biases,keep_prob)

In [13]:
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,labels=y))

In [14]:
optimizer=tf.train.AdamOptimizer(learning_rate=0.01)
optimize=optimizer.minimize(cost)

In [15]:
sess=tf.Session()
sess.run(tf.global_variables_initializer())

In [16]:
batch_size=100

for i in range(25):
    num_batches=int(mnist.train.num_examples/batch_size)
    total_cost=0
    for j in range(num_batches):
        batch_x,batch_y=mnist.train.next_batch(batch_size)
#         for training keep_prob is taken 0.8 
        c,_=sess.run([cost,optimize],feed_dict={x:batch_x,y:batch_y,keep_prob:0.8})
        total_cost+=c
    print(total_cost)

851521.9915838242
36193.90754392743
20578.588449995965
13640.591186141968
11829.705231126696
7774.6129785422345
7782.038122375167
7771.670944336582
6200.005128390195
5432.193937313513
4966.132248890268
4998.958568408192
4786.898593409509
3653.6021122206753
3319.6558790360687
3777.6008795282355
3387.1357175214225
2508.531298636606
2606.4013531629025
2801.5644580904896
2515.3985907841943
3021.1233977728075
2690.753678921218
2096.098754011329
2618.3992312649116


In [27]:
# for testing

In [18]:
# here 1 denotes the axis
predictions=tf.arg_max(pred,1)
true_labels=tf.arg_max(y,1)
correct_predictions=tf.equal(predictions,true_labels)
# for testing keep_prob will be taken 1.0 ie no dropout for testing
predictions_eval,labels,correct_pred=sess.run([predictions,true_labels,correct_predictions],feed_dict={x:mnist.test.images,y:mnist.test.labels,keep_prob:1})

correct_pred.sum()

9848