## Mnist With CNN
### Steps to achieve
#### Load the data
#### Create convolution layers
#### Create pooling layers
#### Create dropout layers
#### Create Dense nn and pass the above data

In [1]:
# Lets create
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
import tensorflow_datasets as tfds

## create function to load the mnist data and convert that to numpy.

In [2]:
def mnist_data(train_test):
    mnist=tfds.load('mnist',split=train_test)
    #Convert tensor data set to numpy
    mnist_numpy=tfds.as_numpy(mnist)
    mnist_list=list(mnist_numpy)
    x_data,y_data=[],[]
    for example in mnist_list:
        x_data.append(example['image'])
        y_data.append(example['label'])
    x_data=np.asarray(x_data)
    #resize it to (28,28)
    x_data.resize((len(x_data),784))
    #one hot encoding for y_data
    y_data=tf.one_hot(y_data,depth=10)
    y_data=tfds.as_numpy(y_data)
    return (x_data,y_data)
        
        

In [57]:
#Load Train data and test data
x_train,y_train=mnist_data(tfds.Split.TRAIN)
x_test,y_test=mnist_data(tfds.Split.TEST)

In [58]:
x_train.shape

(60000, 784)

## Pre Process the data

In [61]:
x_train=x_train/255
#x_test=x_test/255

In [62]:
x_train.max()

1.0

In [63]:
#lets plot
#plt.imshow(x_train[654].resize((1,28,28)),cmap='gray')

### Create convolution layers
#### Steps for creating
##### Initialise the weights and bias
##### Create conv2d layer
##### Create Pooling layer
###### ** We will create function for each steps so that it would be easy to debug **

In [64]:
# initialize weights
def init_weights(shape):
    #We will grab weights from random distribution
    #print(shape)
    init_random_dist=tf.compat.v1.truncated_normal(shape,stddev=0.1)
    return tf.Variable(init_random_dist)

In [65]:
# Initialize the bias values
def init_bias(shape):
    init_bias_vals=tf.constant(0.1,dtype=tf.float32,shape=shape)
    return tf.Variable(init_bias_vals)

#### Conv 2d layer creation

In [66]:
def con2d(x,w):
    #x-input tensor [batch,h,w,c]
    #w-[filter height,filter weight,channels in,channels out]
    return tf.nn.conv2d(x,w,strides=[1,1,1,1],padding='SAME')
# We can give values to strides for each batch,height,width,color channels.

### Pooling layer 

In [67]:
def max_pool_2by2(x):
    # x--[batch,height,width,channels]
    # for kzise as we have to do pooling only for height and width so we will make as [1,2,2,1] same for strides
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

### Create layers

In [68]:
def convolution_layer(input_x,shape):
    w=init_weights(shape)
    b=init_bias([shape[3]])
    return tf.nn.relu(con2d(input_x,w)+b)

## normal layer

In [69]:
def normal_full_layer(input_layer,size):
    input_size=int(input_layer.get_shape()[1])
    w=init_weights([input_size,size])
    b=init_bias([size])
    return tf.matmul(input_layer,w)+b

In [70]:
tf.compat.v1.disable_eager_execution()

In [71]:
#place holders
x=tf.compat.v1.placeholder(tf.float32,shape=[None,784])
y_true=tf.compat.v1.placeholder(tf.float32,shape=[None,10])

In [72]:
#olayers
x_image=tf.reshape(x,[-1,28,28,1])

In [73]:
# Casting is important here to match the data types.
x_image=tf.cast(x_image,dtype=tf.float32)

In [74]:
# for 5 x 5 filter we are extracting 32 features for each 5 x5 patch and 1 is input channel[as of now we have gray chanl]
convo_1=convolution_layer(x_image,shape=[5,5,1,32])
convo_1_pooling=max_pool_2by2(convo_1)

In [75]:
## Why did we gave 32 for the in_channels: Because the output of convo_1 layer is 32 which we are giving as input
convo_2=convolution_layer(convo_1_pooling,shape=[5,5,32,64])
convo_2_pooling=max_pool_2by2(convo_2)

#### Lets flatten the layer to pass to the fully connected nn

In [76]:
convo_2_flat=tf.reshape(convo_2_pooling,shape=[-1,7*7*64])
full_layer_one=tf.nn.relu(normal_full_layer(convo_2_flat,1024))


In [77]:
convo_2_pooling.get_shape()

TensorShape([None, 7, 7, 64])

### Lets add drop out

In [78]:
hold_prob=tf.compat.v1.placeholder(tf.float32)
full_one_dropout=tf.nn.dropout(full_layer_one,rate=hold_prob)

In [79]:
y_pred=normal_full_layer(full_one_dropout,10)

### Loss Function

In [80]:
cross_entropy=tf.reduce_mean(tf.compat.v1.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))

In [81]:
optmizer=tf.compat.v1.train.AdamOptimizer(learning_rate=0.001)
train=optmizer.minimize(cross_entropy)

In [82]:
init=tf.compat.v1.global_variables_initializer()

In [83]:
#y_train[rand_int].shape,y_train[rand_int].dtype,

In [84]:
#y_train[rand_int]

In [85]:
steps=500
with tf.compat.v1.Session() as sess:
    sess.run(init)
    for i in range(steps):
        #import pdb; pdb.set_trace()
        rand_int=np.random.randint(len(x_train),size=50)
        #print(sess.run(y_train[rand_int].shape))
        sess.run(train,feed_dict={x:x_train[rand_int],y_true:y_train[rand_int],hold_prob:0.8})
        #For every 100 steps we will report the acuracy on test set.
        if i%100==0:
            print("on step:{}".format(i))
            print('Accuracy: ')
            matches=tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))
            acc=tf.reduce_mean(tf.cast(matches,tf.float32))

            print(sess.run(acc,feed_dict={x:x_test,y_true:y_test,hold_prob:1.0}))
            print("\n")

on step:0
Accuracy: 
0.098


on step:100
Accuracy: 
0.098


on step:200
Accuracy: 
0.098


on step:300
Accuracy: 
0.098


on step:400
Accuracy: 
0.098




In [86]:
rand_int=np.random.randint(len(x_train),size=50)

In [87]:
x_train[rand_int].shape

(50, 784)

In [None]:
x_train[rand_int].shape