In [2]:
import numpy as np
import pandas as pd

 ## Loading MNIST Dataset

In [3]:
mnist=pd.read_csv('train_mnist.csv')

In [4]:
mnist.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Getting Target Values

In [5]:
y=mnist.label
y.head()

0    1
1    0
2    1
3    4
4    0
Name: label, dtype: int64

## Target values into Categorical Feature

In [6]:
y_d=pd.get_dummies(y,prefix='Num_')
y_d.head()

Unnamed: 0,Num__0,Num__1,Num__2,Num__3,Num__4,Num__5,Num__6,Num__7,Num__8,Num__9
0,0,1,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0


In [7]:
y_d=np.array(y_d)
y_d.shape

(42000, 10)

## Dropping Target values from input

In [8]:
X=mnist.drop('label',axis=1)
X.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Reshaping Input to apply convolution

In [9]:
X=np.array(X)
X=X.reshape(42000,28,28,1)
X.shape

(42000, 28, 28, 1)

## Function for Padding

In [10]:
def zero_pad(X, pad):
    X_pad = np.pad(X, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant', constant_values=0)
    return X_pad

## Single Convolution Step

In [11]:
def conv_single_step(a_slice_prev, W, b):
    s = np.multiply(a_slice_prev, W) + b
    Z = np.sum(s)
    return Z

In [12]:
W = np.random.randn(3, 3)
b = np.random.randn(1)

Z = conv_single_step(X[0,0:3,0:3], W, b)
print("Z =", Z)

Z = -7.337117287895287


## Function for Convolution

In [13]:
def conv_forward(A_prev, W, b, hparameters):
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    (f, f, n_C_prev, n_C) = W.shape
    stride = hparameters['stride']
    pad = hparameters['pad']
    n_H = int((n_H_prev - f + 2 * pad) / stride) + 1
    n_W = int((n_W_prev - f + 2 * pad) / stride) + 1
    Z = np.zeros((m, n_H, n_W, n_C))
    A_prev_pad = zero_pad(A_prev, pad)
    for i in range(m):                                 # loop over the batch of training examples
        a_prev_pad = A_prev_pad[i]                     # Select ith training example's padded activation
        for h in range(n_H):                           # loop over vertical axis of the output volume
            for w in range(n_W):                       # loop over horizontal axis of the output volume
                for c in range(n_C):
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                    Z[i, h, w, c] = conv_single_step(a_slice_prev, W[...,c], b[...,c])
    assert(Z.shape == (m, n_H, n_W, n_C))
    cache = (A_prev, W, b, hparameters)
    return Z, cache                

## Convolution Backward

In [14]:
def conv_backward(dZ, cache):
    
    (A_prev, W, b, hparameters) = cache
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    (f, f, n_C_prev, n_C) = W.shape
    stride = hparameters["stride"]
    pad = hparameters["pad"]
    (m, n_H, n_W, n_C) = dZ.shape    
    # Initialize dA_prev, dW, db with the correct shapes
    dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))                           
    dW = np.zeros((f, f, n_C_prev, n_C))
    db = np.zeros((1, 1, 1, n_C))

    # Pad A_prev and dA_prev
    A_prev_pad = zero_pad(A_prev, pad)
    dA_prev_pad = zero_pad(dA_prev, pad)
    
    for i in range(m):                      
        a_prev_pad = A_prev_pad[i]
        da_prev_pad = dA_prev_pad[i]
        
        for h in range(n_H):                   # loop over vertical axis of the output volume
            for w in range(n_W):               # loop over horizontal axis of the output volume
                for c in range(n_C):           # loop over the channels of the output volume
                    
                    # Find the corners of the current "slice"
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f
                    
                    # Use the corners to define the slice from a_prev_pad
                    a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]

                    # Update gradients for the window and the filter's parameters using the code formulas given above
                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]
    assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))    
    return dA_prev, dW, db

## ReLU Function

In [15]:
def ReLU(x):
    return np.maximum(0,x)

## Softmax Regression

In [16]:
def softmax(x):
    s=np.exp(x)/np.sum(np.exp(x))
    return s

### This Function takes activation of convolution layer and reshape it into 2d matrices output the probability of   each number after softmax regression 

In [17]:
def ultimate_function(a,w1,b1):
    a=a.reshape(500,196)
    Z2=np.dot(a,w1)+b1
    a2=np.zeros((500,10))
    for i in range(500):
        a2[i]=softmax(Z2[i])
    return a2,Z2

## Cost Function (Cross Entropy)

In [18]:
def Cost(y,a):
    J=np.zeros((500,10))    #
    for i in range(500):
        J= - np.multiply(y[i], np.log(a[i]+0.0000005)).sum()
        return J

## Backpropagation for Softmax Regression

In [19]:
def softmax_backprop(a):
    b=1-a
    return np.multiply(a+0.00000001,b)

## Backpropagation for Cost Function

In [20]:
def cost_back(y_d,a):
    return np.multiply(y_d,1/(a+0.00000001))

### Calculate dL/dW1

In [21]:
def wz_back(X,a):
    dzw=np.dot(X,a)
    return dzw

## Initiating Parameters and Hyperparameters

In [22]:
W = np.random.randn(2, 2, 1, 1)
b = np.random.randn(1, 1, 1, 1)
w1=np.random.rand(196,10)*0.0001
b1=np.random.rand(1,10)
alpha=0.000001
hparameters = {"pad" : 0,
               "stride": 2}
print(b1)

[[0.49776634 0.41479187 0.62161103 0.653632   0.00954677 0.82059191
  0.27075149 0.37120023 0.46784255 0.78595239]]


## Implementing CNN using Numpy (Forward Propagation and Backward Propagation)

In [23]:
for i in range(0,42000,500):
    
    Z, cache_conv = conv_forward(X[i:i+500,:,:,:], W, b, hparameters)   # Convolution Layer
    print("Z's mean =", np.mean(Z))
    a=ReLU(Z)                                       # ReLU Activation Function
    ac=a.reshape(500,196)                   
    a2,Z2=ultimate_function(a,w1,b1)                # Softmax Regression
    print(a2)
    cost_func=Cost(y_d[i:i+500,:],a2)              # Cost Function
    g=softmax_backprop(a2)                          # Softmax Backpropagation
    h=cost_back(y_d[i:i+500],a2)                    # Cost Function Backpropagation
    gh=np.multiply(g,h)                            
    dw1=wz_back(ac.T,gh)
    db1=np.sum(gh)
    w1=w1-alpha/500*dw1
    b1=b1-alpha/500*db1
    dA, dW, db = conv_backward(Z, cache_conv)
    W=W-alpha/500*dW
    b=b-alpha/500*db
    print("dw1_mean =", np.mean(dw1))
    print("w1_mean =", np.mean(w1))
    print("W_mean =", np.mean(W))

Z's mean = -95.86508381298363
[[0.09805675 0.09024894 0.11098456 ... 0.08639936 0.09516599 0.13080824]
 [0.09805675 0.09024894 0.11098456 ... 0.08639936 0.09516599 0.13080824]
 [0.09805675 0.09024894 0.11098456 ... 0.08639936 0.09516599 0.13080824]
 ...
 [0.09805675 0.09024894 0.11098456 ... 0.08639936 0.09516599 0.13080824]
 [0.09805675 0.09024894 0.11098456 ... 0.08639936 0.09516599 0.13080824]
 [0.09805675 0.09024894 0.11098456 ... 0.08639936 0.09516599 0.13080824]]
dw1_mean = 0.0
w1_mean = 4.961373735828896e-05
W_mean = -0.1304127081888542
Z's mean = -19.64535534697052
[[0.09771069 0.08995692 0.11139975 ... 0.08659256 0.09554637 0.13064421]
 [0.09793434 0.09037599 0.11089692 ... 0.08643908 0.09505606 0.130654  ]
 [0.09814524 0.09009858 0.11103895 ... 0.08622401 0.09537095 0.13078939]
 ...
 [0.0978721  0.09043933 0.11125194 ... 0.08631518 0.09444577 0.13111934]
 [0.09817308 0.08983458 0.11174573 ... 0.08675148 0.09494031 0.1304665 ]
 [0.09797472 0.08991273 0.11058644 ... 0.08612799 

dw1_mean = 258.9820809102979
w1_mean = 4.298501649044167e-05
W_mean = 0.010205405120348815
Z's mean = -0.06436738137589228
[[0.09819462 0.09077289 0.11263675 ... 0.08777925 0.09441985 0.130285  ]
 [0.09766354 0.09020119 0.11155309 ... 0.08607947 0.09564807 0.1305769 ]
 [0.09799848 0.09029875 0.11206235 ... 0.08683162 0.09505451 0.13009094]
 ...
 [0.09886537 0.0910597  0.11268771 ... 0.08571404 0.09220104 0.12843883]
 [0.09853213 0.09025325 0.1123161  ... 0.08743123 0.09419095 0.13032889]
 [0.09867935 0.09081834 0.11082677 ... 0.0866491  0.0949524  0.1308338 ]]
dw1_mean = 252.10155869159783
w1_mean = 4.2480813373058474e-05
W_mean = 0.011216681417978339
Z's mean = 0.16245375615094643
[[0.09730025 0.09079324 0.11194458 ... 0.08830776 0.09425017 0.13120332]
 [0.09865307 0.09027919 0.11170718 ... 0.08777682 0.09518949 0.13063571]
 [0.09829307 0.09072463 0.10913823 ... 0.08663851 0.09506796 0.13181059]
 ...
 [0.09670246 0.09042739 0.11080984 ... 0.08652028 0.09451682 0.13171547]
 [0.09812211

dw1_mean = 248.48871547287752
w1_mean = 3.5381237394186075e-05
W_mean = 0.01055654387854664
Z's mean = -0.0595066530501087
[[0.09771672 0.09051755 0.11101937 ... 0.08783837 0.0942396  0.13092942]
 [0.0960529  0.09213421 0.11239993 ... 0.08915845 0.09266711 0.13096164]
 [0.09484227 0.09164724 0.11058706 ... 0.08754698 0.09363359 0.13286493]
 ...
 [0.09759629 0.09115712 0.11052304 ... 0.08623671 0.09523259 0.13060497]
 [0.09876567 0.09198802 0.11243241 ... 0.08673756 0.09372974 0.12701467]
 [0.09722424 0.09156968 0.11125009 ... 0.08822482 0.09467277 0.12987538]]
dw1_mean = 246.34918086949907
w1_mean = 3.4888539032447084e-05
W_mean = 0.010894828619579314
Z's mean = 0.11063696082837086
[[0.09830278 0.09037593 0.1117245  ... 0.08716505 0.09382352 0.1310443 ]
 [0.09763001 0.09038181 0.11110009 ... 0.08679262 0.09531987 0.1296811 ]
 [0.09814163 0.09338022 0.11162055 ... 0.08762433 0.09357865 0.12959281]
 ...
 [0.09863421 0.09113386 0.10877793 ... 0.08625463 0.09558586 0.13110551]
 [0.09815807

dw1_mean = 244.4501361349094
w1_mean = 2.792376019147293e-05
W_mean = 0.009777280409173028
Z's mean = -0.1581252277464334
[[0.09712106 0.09372929 0.11328893 ... 0.08516016 0.09299799 0.1278715 ]
 [0.09855363 0.0915344  0.10963762 ... 0.0865943  0.0959907  0.13094977]
 [0.09895018 0.09168659 0.11132545 ... 0.08606748 0.095468   0.12865276]
 ...
 [0.09672384 0.09254428 0.11170843 ... 0.08626589 0.09474706 0.13191158]
 [0.09828114 0.09283409 0.11258259 ... 0.08758411 0.09479635 0.12859767]
 [0.09440486 0.09449906 0.1112806  ... 0.09145991 0.09107531 0.13843299]]
dw1_mean = 241.5205823893301
w1_mean = 2.744071902669427e-05
W_mean = 0.010345642260481389
Z's mean = -0.02754000962233261
[[0.0996349  0.09334395 0.11081348 ... 0.08474655 0.09561322 0.12751303]
 [0.09498959 0.09236074 0.11047804 ... 0.08681127 0.09418818 0.13248037]
 [0.09936845 0.09254716 0.10774001 ... 0.08681432 0.09509016 0.12953444]
 ...
 [0.09382226 0.09229391 0.1116823  ... 0.090514   0.09103941 0.13608323]
 [0.09899188 0

dw1_mean = 242.18546146154972
w1_mean = 2.053981250913392e-05
W_mean = 0.009291324694532671
Z's mean = -0.12506303793593576
[[0.09480004 0.09466983 0.11173261 ... 0.09022444 0.09019535 0.13849231]
 [0.09773803 0.09308952 0.11027337 ... 0.08699145 0.09312617 0.12985809]
 [0.09273336 0.0943555  0.11041327 ... 0.08607922 0.09589543 0.13006374]
 ...
 [0.09636846 0.09144148 0.11304959 ... 0.08967691 0.09078137 0.13306782]
 [0.09781683 0.09304601 0.10614968 ... 0.08995395 0.09435642 0.1302648 ]
 [0.09436216 0.09143308 0.11266515 ... 0.09174236 0.08933849 0.1348334 ]]
dw1_mean = 244.06851044414785
w1_mean = 2.0051675488245625e-05
W_mean = 0.009561652553414957
Z's mean = -0.1114302257300379
[[0.09312119 0.09393266 0.11157286 ... 0.08970171 0.09190564 0.133961  ]
 [0.09545628 0.09277151 0.10875998 ... 0.08901768 0.09415819 0.13330522]
 [0.09501976 0.09373771 0.10997861 ... 0.08595083 0.09503367 0.13141018]
 ...
 [0.09559639 0.09297844 0.11073139 ... 0.09338841 0.08914042 0.13326061]
 [0.0985162

dw1_mean = 233.6138219571481
w1_mean = 1.3298748527921709e-05
W_mean = 0.009542567719824563
Z's mean = -0.13832398065632495
[[0.10107796 0.0940571  0.11260186 ... 0.08635625 0.09398125 0.12577961]
 [0.09904991 0.09228906 0.11069292 ... 0.08735141 0.09395343 0.13177613]
 [0.09721903 0.09050286 0.1104079  ... 0.08936742 0.09210475 0.13399737]
 ...
 [0.09789    0.09290581 0.10815611 ... 0.08802882 0.09420015 0.13113232]
 [0.09440059 0.09683113 0.11323606 ... 0.08538311 0.09413107 0.13068901]
 [0.08967884 0.0935706  0.11113795 ... 0.08929613 0.09087582 0.13704144]]
dw1_mean = 238.08232952349894
w1_mean = 1.2822583868874712e-05
W_mean = 0.010901496001920724
Z's mean = 0.11749807636010391
[[0.10015533 0.09329414 0.11009092 ... 0.08620286 0.09481755 0.12595194]
 [0.09940864 0.09294504 0.11410448 ... 0.08227497 0.09524176 0.1246178 ]
 [0.09595986 0.09678019 0.11128894 ... 0.08562438 0.09486309 0.13057146]
 ...
 [0.0954408  0.09204529 0.11093126 ... 0.09009551 0.09011006 0.13369764]
 [0.0977016

In [24]:
print(W)

[[[[ 0.21847906]]

  [[-0.68409896]]]


 [[[ 0.03935637]]

  [[ 0.46175016]]]]


In [25]:
print(b)

[[[[-0.35237347]]]]
