In [4]:
import numpy as np
import h5py
import matplotlib.pyplot as plt

In [5]:
  """  Padding with zeros all images of the dataset X.  X -- array of shape (m, n_H, n_W, n_C) representing a batch of m images
    pad -- integer i.e. amount of padding around each image on vertical and horizontal dimensions.  Returns:
    X_pad -- padded image of shape (m, n_H + 2*pad, n_W + 2*pad, n_C)
    """
def zero_pad(X, pad):
    X_pad = np.pad(X, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant', constant_values=0)       
    return X_pad

In [17]:
"""Testing"""
x = np.random.randn(4, 3, 3, 2)
x_pad = zero_pad(x, 2)
print ("x.shape =", x.shape)
print ("x_pad.shape =", x_pad.shape)
print ("x[1, 1] =", x[1, 1])
print ("x_pad[1, 1] =", x_pad[1, 1])

x.shape = (4, 3, 3, 2)
x_pad.shape = (4, 7, 7, 2)
x[1, 1] = [[ 1.40754     0.12910158]
 [ 1.6169496   0.50274088]
 [ 1.55880554  0.1094027 ]]
x_pad[1, 1] = [[ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]]


In [8]:
 """  Showing the original image and the padded image using matplotlib plots    """
fig, axarr = plt.subplots(1, 2)
axarr[0].set_title('x')
axarr[0].imshow(x[0,:,:,0])
axarr[1].set_title('x_pad')
axarr[1].imshow(x_pad[0,:,:,0])

<matplotlib.image.AxesImage at 0x1dea306add8>

In [10]:
"""A single slice of output activation of previous layer is taken i.e. a_slice_prev of shape [f,f,nCprev] and one filter defined by parameter W is applied. Here W is matrix of shape [f,f,nCprev] and bias b is [1,1,1]"""
def convolution_singlestep(a_slice_prev, W, b) :
        s = np.multiply(a_slice_prev, W) + b
        Z=np.sum(s)
        return Z

In [16]:
"""Testing"""
a_slice_prev = np.random.randn(4, 4, 3)
W = np.random.randn(4, 4, 3)
b = np.random.randn(1, 1, 1)

Z = convolution_singlestep(a_slice_prev, W, b)
print("Z =", Z)

Z = 54.8155337988


In [24]:
"""This is the forward prop step. The input for this is A_prev which is output activation of prev layer. its shape is [m, nHprev, nWprev, nCprev] for a batch of m inputs.There are F filters/weights denoted  by W.  hparameters -- python dictionary containing "stride" and "pad".  Returns:
    Z -- conv output of shape (m, n_H, n_W, n_C) and cache -- cache of values needed for the conv_backward() function"""
def conv_forward(A_prev, W, b, hparameters):
    # Retrieve dimensions from A_prev's shape 
    (m, nHprev, nWprev, nCprev) = A_prev.shape
    
    # Retrieve dimensions from W's shape 
    (f, f, nCprev, nC) = W.shape
    stride = hparameters['stride']
    pad = hparameters['pad']
    
  
    nH = int((nHprev - f + 2 * pad) / stride) + 1
    nW = int((nWprev - f + 2 * pad) / stride) + 1
    
    # Initialize the output volume Z with zeros.
    Z = np.zeros((m, nH, nW, nC))   
    A_prev_pad = zero_pad(A_prev, pad)
    
    for i in range(m):                                 
        a_prev_pad = A_prev_pad[i]                     # Select ith training example's padded activation
        for h in range(nH):                           # loop over vertical axis of the output volume
            for w in range(nW):                       # loop over horizontal axis of the output volume
                for c in range(nC):                   # loop over channels
                    # Find the corners of the current "slice"
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    # Use the corners to define the (3D) slice of a_prev_pad
                    a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                   
                    Z[i, h, w, c] = convolution_singlestep(a_slice_prev, W[...,c], b[...,c])
                                        
     # Making sure output shape is correct
    assert(Z.shape == (m, nH, nW, nC))
    
    # Save information in "cache" for the backprop
    cache = (A_prev, W, b, hparameters)
    
    return Z, cache

In [25]:
A_prev = np.random.randn(10, 4, 4, 3)
W = np.random.randn(2, 2, 3, 8)
b = np.random.randn(1, 1, 1, 8)
hparameters = {"pad" : 2,"stride": 1}

Z, cache_conv = conv_forward(A_prev, W, b, hparameters)
print("Z's mean =", np.mean(Z))
print("cache_conv[0][1][2][3] =", cache_conv[0][1][2][3])

Z's mean = 0.155859324889
cache_conv[0][1][2][3] = [-0.20075807  0.18656139  0.41005165]


In [34]:
"""Now implementing max pool and avg pool in same function with help of if-else. There are no parameters but there are hyperparameters such as window size f."""
def pool_forward(A_prev, hparameters, mode = "max"):
     # Retrieve dimensions from the input shape
    (m, nHprev, nWprev, nCprev) = A_prev.shape
    
    # Retrieve hyperparameters from "hparameters"
    f = hparameters["f"]
    stride = hparameters["stride"]
    
    nH = int(1 + (nHprev - f) / stride)
    nW = int(1 + (nWprev - f) / stride)
    nC = nCprev
    
    # Initialize output matrix A
    A = np.zeros((m, nH, nW, nC))              
    
    for i in range(m):                         
        for h in range(nH):                   
            for w in range(nW):               
                for c in range (nC):          
                    
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    
                    a_prev_slice = A_prev[i, vert_start:vert_end, horiz_start:horiz_end, c]
                    
                    if mode == "max":
                        A[i, h, w, c] = np.max(a_prev_slice)
                    elif mode == "average":
                        A[i, h, w, c] = np.mean(a_prev_slice)
    
 
    cache = (A_prev, hparameters)
    
    assert(A.shape == (m, nH, nW, nC))
    
    return A, cache

In [35]:
"""Testing"""
A_prev = np.random.randn(2, 4, 4, 3)
hparameters = {"stride" : 1, "f": 4}

A, cache = pool_forward(A_prev, hparameters)
print("mode = max")
print("A =", A)
print()
A, cache = pool_forward(A_prev, hparameters, mode = "average")
print("mode = average")
print("A =", A)

mode = max
A = [[[[ 1.80358898  1.92381543  1.16128569]]]


 [[[ 1.1253235   1.63169151  1.81252782]]]]

mode = average
A = [[[[ 0.41217816  0.03753545 -0.25178582]]]


 [[[ 0.15540833 -0.07897916  0.23579956]]]]


In [38]:
"""Now the backprop is done. Input is dZ which is gradient of cost w.r.t. output of conv layer Z. Its is of shape [m,nH,nw,nC]. cache is output of conv_forward. Returns dA_prev w.r.t. input of conv layer A-prev and of shape [m, nHprev,nWprev, nCprev], dW of shape (f, f, n_C_prev, n_C) and db of shape [1,1,1,nC]"""
def conv_backward(dZ, cache):
    (A_prev, W, b, hparameters) = cache
    
    (m, nHprev, nWprev, nCprev) = A_prev.shape      
    (f, f, nCprev, nC) = W.shape
    
    stride = hparameters["stride"]
    pad = hparameters["pad"]    
    
    (m, nH, nW, nC) = dZ.shape
    
    # Initialize dA_prev, dW, db with the correct shapes
    dA_prev = np.zeros((m, nHprev, nWprev, nCprev))                           
    dW = np.zeros((f, f, nCprev, nC))
    db = np.zeros((1, 1, 1, nC))

    # Pad A_prev and dA_prev
    A_prev_pad = zero_pad(A_prev, pad)
    dA_prev_pad = zero_pad(dA_prev, pad)
    
    for i in range(m):                     
        
    
        a_prev_pad = A_prev_pad[i]
        da_prev_pad = dA_prev_pad[i]
        
        for h in range(nH):                
            for w in range(nW):             
                for c in range(nC):          
                    
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f                    
                   
                    a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                  
                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]
                    
        # Set the ith training example's dA_prev to the unpaded da_prev_pad
        dA_prev[i, :, :, :] = da_prev_pad[pad:-pad, pad:-pad, :]
    
  
    assert(dA_prev.shape == (m, nHprev, nWprev, nCprev))

    return dA_prev, dW, db

In [39]:
"""Testing"""
dA, dW, db = conv_backward(Z, cache_conv)
print("dA_mean =", np.mean(dA))
print("dW_mean =", np.mean(dW))
print("db_mean =", np.mean(db))

dA_mean = 9.60899067587
dW_mean = 10.5817412755
db_mean = 76.3710691956


In [45]:
"""Even though a pooling layer has no parameters for backprop to update, you still need to backpropagation the gradient through the pooling layer in order to compute gradients for layers that came before the pooling layer. The helper function create_mask_from_window() creates a "mask" matrix which keeps track of where the maximum of the matrix is. True (1) indicates the position of the maximum in X, the other entries are False (0). """
def create_mask_from_window(x):
  
    mask = x == np.max(x)   
    return mask
        
"""Here mask and input x are arrays of same shape [f,f]"""               
   
"""Testing""" 
x = np.random.randn(2,3)
mask = create_mask_from_window(x)
print('x = ', x)
print("mask = ", mask)

x =  [[ 0.3180143  -0.89027155  0.11133727]
 [-0.01952256 -0.83998891 -2.29820588]]
mask =  [[ True False False]
 [False False False]]


In [46]:
""" In average pooling, every element of the input window has equal influence on the output. So to implement backprop, we will now implement a helper function that reflects this i.e. to equally distribute a value dz through a matrix of dimension shape."""
def distribute_value(dz, shape):
    """shape -- the shape (n_H, n_W) of the output matrix for which we want to distribute the value of dz
    a -- Array of size (n_H, n_W) for which we distributed the value of dz
    """   
   
    (nH, nW) = shape
 
    average = dz / (nH * nW)
    a = np.ones(shape) * average
  
    
    return a
"""Testing"""
a = distribute_value(2, (2,2))
print('distributed value =', a)

distributed value = [[ 0.5  0.5]
 [ 0.5  0.5]]


In [53]:
"""Implementing the pool_backward function in both modes ("max" and "average").  cache -- cache output from the forward pass of the pooling layer, contains the layer's input and hparameters. dA -- gradient of cost with respect to the output of the pooling layer, same shape as A. Returns:
    dA_prev -- gradient of cost with respect to the input of the pooling layer, same shape as A_prev"""
def pool_backward(dA, cache, mode = "max"):
     (A_prev, hparameters) = cache    
   
     stride = hparameters["stride"]
     f = hparameters["f"]
    
   
     m, nHprev, nWprev, nCprev = A_prev.shape
     m, nH, nW, nC = dA.shape    
   
     dA_prev = np.zeros(A_prev.shape)
    
     for i in range(m):                     
      
        a_prev = A_prev[i]
        for h in range(nH):              
            for w in range(nW):              
                for c in range(nC):       
                   
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f
                    
              
                    if mode == "max":
                     
                        a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c]
                     
                        mask = create_mask_from_window(a_prev_slice)
                        
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += np.multiply(mask, dA[i, h, w, c])
                        
                    elif mode == "average":
                        
                        da = dA[i, h, w, c]
                       
                        shape = (f, f)
                       
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += distribute_value(da, shape)
                        

    
     assert(dA_prev.shape == A_prev.shape)
    
     return dA_prev

In [54]:
"""Testing"""
A_prev = np.random.randn(5, 5, 3, 2)
hparameters = {"stride" : 1, "f": 2}
A, cache = pool_forward(A_prev, hparameters)
dA = np.random.randn(5, 4, 2, 2)

dA_prev = pool_backward(dA, cache, mode = "max")
print("mode = max")
print('mean of dA = ', np.mean(dA))
print('dA_prev[1,1] = ', dA_prev[1,1])  
print()
dA_prev = pool_backward(dA, cache, mode = "average")
print("mode = average")
print('mean of dA = ', np.mean(dA))
print('dA_prev[1,1] = ', dA_prev[1,1])

mode = max
mean of dA =  0.138261821554
dA_prev[1,1] =  [[ 0.          0.        ]
 [ 1.6389616   0.        ]
 [ 1.62439537 -2.10961157]]

mode = average
mean of dA =  0.138261821554
dA_prev[1,1] =  [[ 0.53994345 -0.31677985]
 [ 0.94604229 -0.84418274]
 [ 0.40609884 -0.52740289]]
