In [4]:
import numpy as np
np.random.seed(1)

In [16]:
## Adds padding to an vector / image
## 1 for vectors, 2 for matrices, 3 for images, 4 for batch of images

def padding(X, pad = 1, dims = 3, const = 0):
    if (dims == 1): return np.pad(X, ((pad, pad)), 'constant', constant_values = (const,const) ) 
    if (dims == 2): return np.pad(X, ((pad, pad), (pad, pad)), 'constant', constant_values = (const,const) ) 
    if (dims == 3): return np.pad(X, ((pad, pad), (pad, pad), (0,0)), 'constant', constant_values = (const,const) )    
    if (dims == 4): return np.pad(X, ((0,0), (pad, pad), (pad, pad), (0,0)), 'constant', constant_values = (const,const) )



In [1]:
## Convolution of a slice with (W)eights, return output Z
def conv_slice(slice, W, b):
    conv = np.multiply(slice, W)
    Z = np.sum(conv) + float(b)
    return Z
    

In [80]:
## Forward propagation with modes conv, maxpool or avepool
def forward(X, W, b, pad, stride, f, mode='conv'):
    
    # Shape of input batch
    (m, Xh, Xw, Xc) = np.shape(X)

    # Shape of weights is filter size x input channels x #filters
    if(mode == 'conv'): (f, f, Xc, Zc) = np.shape(W)
    else: Zc = Xc
   
    # Shape of output volume
    Zh = int((Xh + 2*pad - f) / stride + 1)
    Zw = int((Xw + 2*pad - f) / stride + 1)

    # Initialize output, batch_size, out_height, out_width, #filters
    Z = np.zeros((m, Zh, Zw, Zc))
    
    # Pad the input, with #paddings, 4 dim, 0-padding
    X_pad = padding(X, pad, 4, 0)
    
    for i in range(m):                               
        Xi = X_pad[i]
        for h in range(Zh):    
            for w in range(Zw):
                for c in range(Zc):
                    
                    # Find the current slice
                    vert_start = h*stride
                    vert_end = h*stride + f
                    horiz_start = w*stride
                    horiz_end = w*stride +f

                    if(mode == 'conv'):
                        Xi_slice = Xi[vert_start:vert_end, horiz_start:horiz_end, :]
                        Z[i, h, w, c] = conv_slice(Xi_slice, W[:,:,:,c], b[0,0,0,c])
                        cache = (X, W, b, pad, stride)
                    if(mode == 'maxpool'):
                        Xi_slice = Xi[vert_start:vert_end, horiz_start:horiz_end, c]
                        Z[i, h, w, c] = np.max(Xi_slice)
                        cache = (X, f, stride)
                    if(mode == 'avepool'):
                        Xi_slice = Xi[vert_start:vert_end, horiz_start:horiz_end, c]
                        Z[i, h, w, c] = np.mean(Xi_slice)
                        cache = (X, f, stride)
                                        
    assert(Z.shape == (m, Zh, Zw, Zc))
    
    
    return Z, cache

In [81]:
def backward(dZ, cache):

    (X, W, b, pad, stride) = cache
    
    # Retrieve dimensions from A_prev's shape
    (m, Xh, Xw, Xc) = np.shape(X)
    
    # Retrieve dimensions from W's shape
    (f, f, Xc, Zc) = np.shape(W)
    
    # Retrieve dimensions from dZ's shape
    (m, Zh, Zw, Zc) = np.shape(dZ)
    
    # Initialize dX, dW, db
    dX = np.zeros((m, Xh, Xw, Xc))                           
    dW = np.zeros((f, f, Xc, Zc))
    db = np.zeros((1, 1, 1, Zc))

    # Pad X and dX
    X_pad = padding(X, pad, 4, 0)
    dX_pad = padding(dX, pad, 4, 0)
    
    for i in range(m):     
        Xi = X_pad[i,:,:,:]
        dXi = dX_pad[i,:,:,:]
        
        for h in range(Zh):                   # loop over vertical axis of the output volume
            for w in range(Zw):               # loop over horizontal axis of the output volume
                for c in range(Zc):           # loop over the channels of the output volume
                    
                    # Find the corners of the current "slice"
                    vert_start = stride * h
                    vert_end = vert_start + f
                    horiz_start = stride * w
                    horiz_end = horiz_start + f
                    
                    # Use the corners to define the slice from a_prev_pad
                    Xi_slice = X_pad[i, vert_start:vert_end, horiz_start:horiz_end, :]

                    # Update gradients for the window and the filter's parameters using the code formulas given above
                    dXi[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += Xi_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]

        # Set the ith training example's dA_prev to the unpaded da_prev_pad (Hint: use X[pad:-pad, pad:-pad, :])
        dX[i, :, :, :] = dXi[pad:-pad, pad:-pad, :]
    
    # Making sure your output shape is correct
    assert(dX.shape == (m, Xh, Xw, Xc))
    
    return dX, dW, db

In [82]:
# mask of maximum value of x
def max_mask(x):
    return (x == np.max(x))

In [113]:
# spread value dz to a shape
def distribute_value(dz, shape):
    (h, w) = shape
    average = dz/(h*w)
    return np.full(shape,average)
    

In [118]:
def pool_backward(dA, cache, mode = 'max'):
    
    X, f, stride = cache

    m, Xh, Xw, Xc = np.shape(X)
    m, Zh, Zw, Zc = np.shape(dA)
    dX = np.zeros(np.shape(X))

    for i in range(m):
        Xi = X[i,:,:,:]
        
        for h in range(Zh):                   # loop on the vertical axis
            for w in range(Zw):               # loop on the horizontal axis
                for c in range(Zc):           # loop over the channels (depth)
                    
                    # Find the corners of the current "slice" (â‰ˆ4 lines)
                    vert_start = stride * h
                    vert_end = vert_start + f
                    horiz_start = stride * w
                    horiz_end = horiz_start + f
                    
                    # Compute the backward propagation in both modes.
                    if mode == "max":
                        
                        Xi_slice = Xi[vert_start:vert_end, horiz_start:horiz_end, c]
                        mask = max_mask(Xi_slice)
                        dX[i, vert_start: vert_end, horiz_start: horiz_end, c] += mask * dA[i, h, w, c]
                        
                    elif mode == "average":
                        dX[i, vert_start: vert_end, horiz_start: horiz_end, c] += distribute_value(dA[i, h, w, c], (f,f))
                        
    ### END CODE ###
    
    # Making sure your output shape is correct
    assert(dX.shape == X.shape)
    
    return dX

In [120]:
def test_pool_backward():
    np.random.seed(1)
    X = np.random.randn(5, 5, 3, 2)
    stride = 1
    f = 2
    #forward(X, W, b, pad, stride, f, mode='conv'):
    A, cache = forward(X, None, None, 0, str1, 2, mode='maxpool')
    dA = np.random.randn(5, 4, 2, 2)

    dA_prev = pool_backward(dA, cache, mode = "max")
    print("mode = max")
    print('mean of dA = ', np.mean(dA))
    print('dA_prev[1,1] = ', dA_prev[1,1])  
    print()
    dA_prev = pool_backward(dA, cache, mode = "average")
    print("mode = average")
    print('mean of dA = ', np.mean(dA))
    print('dA_prev[1,1] = ', dA_prev[1,1]) 
test_pool_backward()

mode = max
mean of dA =  0.145713902729
dA_prev[1,1] =  [[ 0.          0.        ]
 [ 5.05844394 -1.68282702]
 [ 0.          0.        ]]

mode = average
mean of dA =  0.145713902729
dA_prev[1,1] =  [[ 0.08485462  0.2787552 ]
 [ 1.26461098 -0.25749373]
 [ 1.17975636 -0.53624893]]


In [51]:
def test_forward():
    np.random.seed(1)
    # 10 images of 4x4, RGB
    X = np.random.randn(10,4,4,3)
    # 8 filters of 2x2, RGB
    W = np.random.randn(2,2,3,8)
    # 8 biases for our 8 filters
    b = np.random.randn(1,1,1,8)
    pad = 2
    stride = 2

    # forward(X, W, b, pad, stride, pool_size, mode='conv')
    Z, cache_conv = forward(X, W, b, pad, stride, None)
    print("Z's mean =", np.mean(Z))
    print("Z[3,2,1] =", Z[3,2,1])
    print("cache_conv[0][1][2][3] =", cache_conv[0][1][2][3])

    np.random.seed(1)
    X = np.random.randn(2, 4, 4, 3)
    stride = 2
    f = 3

    # forward(X, W, b, pad, stride, pool_size, mode='conv')
    A, cache = forward(X, None, None, 0, stride, f, mode='maxpool')
    print("mode = max")
    print("A =", A)
    print()
    A, cache = forward(X, None, None, 0, stride, f, mode='avepool')
    print("mode = average")
    print("A =", A)
    
    np.random.seed(1)
    dA, dW, db = backward(Z, cache_conv)
    print("dA_mean =", np.mean(dA))
    print("dW_mean =", np.mean(dW))
    print("db_mean =", np.mean(db))
    
test_forward()

Z's mean = 0.0489952035289
Z[3,2,1] = [-0.61490741 -6.7439236  -2.55153897  1.75698377  3.56208902  0.53036437
  5.18531798  8.75898442]
cache_conv[0][1][2][3] = [-0.20075807  0.18656139  0.41005165]
mode = max
A = [[[[ 1.74481176  0.86540763  1.13376944]]]


 [[[ 1.13162939  1.51981682  2.18557541]]]]

mode = average
A = [[[[ 0.02105773 -0.20328806 -0.40389855]]]


 [[[-0.22154621  0.51716526  0.48155844]]]]
dA_mean = 1.45243777754
dW_mean = 1.72699145831
db_mean = 7.83923256462


In [13]:
## 0*0 + 1*1 + 2*0.5 + 4*0.25 + 1 = 4.0
def test_conv_slice():
    slice = [[0,1],[2,4]]
    weights = [[0,1],[0.5,0.25]]
    bias = 1
    print (np.shape(slice))
    print (conv_slice(slice, weights, bias))
    
test_conv_slice()

(2, 2)
4.0


In [None]:
def test_padding():
    batch = [[[[0,4,8],[1,5,9]],[[2,6,0],[3,7,1]]],[[[0,4,8],[1,5,9]],[[2,6,0],[3,7,1]]]]
    image = [[[0,4,8],[1,5,9]],[[2,6,10],[3,7,11]]]
    matrix = np.matrix('1 2 3; 4 5 6')
    vector = [0,1,2,3]

    padded_vector = padding(vector, 1, 1, 0)
    print ("Vector")
    print (vector)
    print (np.shape(vector))
    print (padded_vector)
    print (np.shape(padded_vector))
    print ()

    padded_matrix = padding(matrix, 1, 2, 0)
    print ("Matrix")
    print (matrix)
    print (np.shape(matrix))
    print (padded_matrix)
    print (np.shape(padded_matrix))
    print ()

    padded_image = padding(image, 1, 3, 0)
    print ("Padded image and shape")
    print (np.shape(image))
    print (np.shape(padded_image))
    print ()

    padded_batch = padding(batch, 1, 4, 0)
    print ("Padded batch and shape")
    print (np.shape(batch))
    print (np.shape(padded_batch))
    print ()