<a href="https://colab.research.google.com/github/ariG23498/GrokkingDeepLearning/blob/master/%239_Convolution_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
%tensorflow_version 2.x
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm

TensorFlow 2.x selected.


In [0]:
np.random.seed(1)

# The dataset
The dataset that is being used here is the MNIST dataset, that is readily available in the `keras` module of `tensorflow`.
<br>
There are 70,000 images of `hand-written` digits. 60,000 are for training and 10,000 are for testing. The digits range from `0-9`.

In [3]:
(x_train,y_train),(x_test,y_test) = mnist.load_data()
train_images, train_labels = (x_train.reshape((x_train.shape[0],784))/255,
                  y_train)
test_images, test_labels = (x_test.reshape((x_test.shape[0],784))/255,
                  y_test)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


## One Hot Encoding

In [0]:
one_hot_train = np.zeros((len(train_labels),10))
one_hot_test = np.zeros((len(test_labels),10))

for i,l in enumerate(train_labels):
  one_hot_train[i][l] = 1
for i,l in enumerate(test_labels):
  one_hot_test[i][l] = 1
train_labels = one_hot_train
test_labels = one_hot_test

In [5]:
print('Shape of train images: {}'.format(train_images.shape))
print('Shape of train labels: {}'.format(train_labels.shape))

print('Shape of test images: {}'.format(test_images.shape))
print('Shape of test labels: {}'.format(test_labels.shape))

Shape of train images: (60000, 784)
Shape of train labels: (60000, 10)
Shape of test images: (10000, 784)
Shape of test labels: (10000, 10)


# Activation Functions
1. tanh
2. softmax

In [0]:
def tanh(x):
  return np.tanh(x)

def softmax(x):
  a = np.exp(x)
  return a/np.sum(a,axis=1,keepdims=True)

def tanh2deriv(output):
  return 1 - (output ** 2)


# Configs useful for learning

In [0]:
alpha, iteration = 2, 100
pixels_per_image, num_labels = 784, 10
batch_size = 128

input_rows = 28
input_cols = 28

kernel_rows = 3
kernel_cols = 3
num_kernels = 16

hidden_size = (input_rows - kernel_rows)*(input_cols - kernel_cols) * num_kernels
# hidden_size --> 10000
kernels = 0.02*np.random.random((kernel_rows*kernel_cols,num_kernels))-0.01
# kernels.shape --> (9, 16)
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels))-0.1
# weights_1_2.shape --> (10000, 10)

# Function to retrieve the subsection of an image

In [0]:
def get_image_section(layer, row_from, row_to, col_from, col_to):
  section = layer[:,row_from:row_to,col_from:col_to]
  return section.reshape((-1,1,row_to-row_from,col_to-col_from))

In [18]:
for j in range(iteration):
  correct_cnt = 0
  # Enters each iteration (epoch)
  for i in range(len(train_images)//batch_size):
    # Enters each batch
    batch_start, batch_end = ((i*batch_size),((i+1)*batch_size))
    layer_0 = train_images[batch_start:batch_end]
    layer_0 = layer_0.reshape((layer_0.shape[0],28,28))
    # layer_0.shape --> (128,28,28)

    sects = list()
    for row_start in range(layer_0.shape[1] - kernel_rows):
    # Each row
      for col_start in range(layer_0.shape[2] - kernel_cols):
      # Each col
        sect = get_image_section(layer_0,
                                  row_start,
                                  row_start + kernel_rows,
                                  col_start,
                                  col_start + kernel_cols)
        sects.append(sect)
        # len(sects) --> 625 after one batch
        # sect.shape --> (128,1,3,3) --> this means each 3,3 piece of 128 images
    
    # FORWARD PROPAGATION:
    expanded_input = np.concatenate(sects,axis=1)
    # expanded_input.shape --> (128,625,3,3)
    es = expanded_input.shape
    flattened_input = expanded_input.reshape((es[0]*es[1],-1))
    # flattened_input.shape --> (80000, 9)

    kernel_output = flattened_input.dot(kernels)
    layer_1 = tanh(kernel_output.reshape((es[0],-1)))
    # layer_1.shape --> (128, 10000)

    dropout_mask = np.random.randint(2,size=layer_1.shape)
    layer_1 *= dropout_mask * 2
    layer_2 = softmax(np.dot(layer_1,weights_1_2))
    # layer_2.shape --> (128,10)

    # FOR ACCURACY CALCULATION
    for k in range(batch_size):
      labelset = train_labels[batch_start+k:batch_start+k+1]
      _inc = int(np.argmax(layer_2[k:k+1]) == np.argmax(labelset)) # int() is used to convert True False into 1 or 0
      correct_cnt += _inc
    
    # BACK PROPAGATION
    layer_2_delta = (train_labels[batch_start:batch_end]-layer_2)/(batch_size*layer_2.shape[0])
    layer_1_delta = layer_2_delta.dot(weights_1_2.T)*tanh2deriv(layer_1)
    layer_1_delta *= dropout_mask
    weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
    l1d_reshape = layer_1_delta.reshape(kernel_output.shape)
    # l1d_respahe:  (80000, 16)
    k_update = flattened_input.T.dot(l1d_reshape)
    # k_update:  (9, 16)
    kernels -= alpha * k_update

  # TESTING
  test_correct_cnt = 0
  for i in range(len(test_images)):
    layer_0 = test_images[i:i+1]
    layer_0 = layer_0.reshape((layer_0.shape[0],28,28))
    # layer_0.shape --> (1,28,28)

    sects = list()
    for row_start in range(layer_0.shape[1] - kernel_rows):
      # Sweeps the row
      for col_start in range(layer_0.shape[2] - kernel_cols):
        # Sweeps the col
        sect = get_image_section(layer_0,
                                  row_start,
                                  row_start + kernel_rows,
                                  col_start,
                                  col_start + kernel_cols)
        sects.append(sect)
        # sect.shape --> (1,1,3,3) --> this means each 3,3 piece of 1 test image

    # FORWARD PROPAGATION:
    expanded_input = np.concatenate(sects,axis=1)
    # expanded_input.shape --> (1,625,3,3)
    es = expanded_input.shape
    flattened_input = expanded_input.reshape((es[0]*es[1],-1))
    # flattened_input.shape --> (625, 9)

    kernel_output = flattened_input.dot(kernels)
    layer_1 = tanh(kernel_output.reshape((es[0],-1)))
    # layer_1.shape --> (1, 10000)

    dropout_mask = np.random.randint(2,size=layer_1.shape)
    layer_1 *= dropout_mask * 2
    layer_2 = softmax(np.dot(layer_1,weights_1_2))
    # layer_2.shape --> (1,10)

    test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
  if(j % 10 == 0):
    print('Test Acc: {:0.2f}  <--> Train Acc: {:0.2f}'.format((test_correct_cnt/len(test_images)),(correct_cnt/len(train_images))))

Test Acc: 0.59  <--> Train Acc: 0.23
Test Acc: 0.87  <--> Train Acc: 0.86
Test Acc: 0.86  <--> Train Acc: 0.85
Test Acc: 0.88  <--> Train Acc: 0.89
Test Acc: 0.88  <--> Train Acc: 0.89
Test Acc: 0.89  <--> Train Acc: 0.89
Test Acc: 0.91  <--> Train Acc: 0.91
Test Acc: 0.91  <--> Train Acc: 0.91
Test Acc: 0.91  <--> Train Acc: 0.91
Test Acc: 0.91  <--> Train Acc: 0.91
