In [2]:
import numpy as np

# Convolution

In [3]:
def get_output_dim(
    input_dim: int, 
    kernel_dim: int, 
    padding: int, 
    stride: int
) -> int:
    return (int)((input_dim + 2 * padding - kernel_dim) / stride + 1)

In [4]:
def convolution_layer_forward(
    A_prev: np.array, 
    W_curr: np.array, 
    b_curr: np.array, 
    stride: int, 
    padding: int
) -> np.array:
    
    examples_count, activation_channels, activation_height, activation_width = \
        A_prev.shape
    kernel_count, kernel_channels, kernel_height, kernel_width = \
        W_curr.shape
    assert activation_channels == kernel_channels
    
    Z_curr_height = get_output_dim(activation_height, kernel_height, padding, stride)
    Z_curr_width = get_output_dim(activation_width, kernel_width, padding, stride)
    Z_curr = np.zeros([examples_count, kernel_count, Z_curr_height, Z_curr_width])
    
    A_prev_padding = ((0,0), (0,0), (padding, padding), (padding, padding))
    A_prev_padded = np.pad(A_prev, pad_width = A_prev_padding, mode = 'constant', constant_values = 0)
    
    for i in range(examples_count):
        for j in range(kernel_count):
            for k in range(Z_curr_height):
                for l in range(Z_curr_width):
                    crop = A_prev_padded[i, :, 
                                         (k * stride):((k * stride) + kernel_height), 
                                         (l * stride):((l * stride) + kernel_width)
                                        ]
                    Z_curr[i, j, k, l] = np.sum(crop * W_curr[j]) + b_curr[j]

    return Z_curr


In [52]:
x_shape = (2, 3, 4, 4)
w_shape = (3, 3, 4, 4)
x = np.linspace(-0.1, 0.5, num=np.prod(x_shape)).reshape(x_shape)
w = np.linspace(-0.2, 0.3, num=np.prod(w_shape)).reshape(w_shape)
b = np.linspace(-0.1, 0.2, num=3)

In [53]:
convolution_3d_layer_forward(x, w, b, 2, 1)

array([[[[-0.08759809, -0.10987781],
         [-0.18387192, -0.2109216 ]],

        [[ 0.21027089,  0.21661097],
         [ 0.22847626,  0.23004637]],

        [[ 0.50813986,  0.54309974],
         [ 0.64082444,  0.67101435]]],


       [[[-0.98053589, -1.03143541],
         [-1.19128892, -1.24695841]],

        [[ 0.69108355,  0.66880383],
         [ 0.59480972,  0.56776003]],

        [[ 2.36270298,  2.36904306],
         [ 2.38090835,  2.38247847]]]])

In [None]:
def conv_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a convolutional layer.

    Inputs:
    - dout: Upstream derivatives.
    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

    Returns a tuple of:
    - dx: Gradient with respect to x
    - dw: Gradient with respect to w
    - db: Gradient with respect to b
    """
    dx, dw, db = None, None, None
    ###########################################################################
    # TODO: Implement the convolutional backward pass.                        #
    ###########################################################################
    (x, w, b, conv_param) = cache
    stride = conv_param['stride']
    pad = conv_param['pad']
    (N, C, H, W) = x.shape
    (F, C, HH, WW) = w.shape
    H_out = (int)(1 + (H + 2 * pad - HH) / stride)
    W_out = (int)(1 + (W + 2 * pad - WW) / stride)
    out = np.zeros([N, F, H_out, W_out])
    npad = ((0,0), (0,0), (pad, pad), (pad,pad))
    x_pad = np.pad(x, pad_width = npad, mode = 'constant', constant_values = 0)
    
    dx = np.zeros(x_pad.shape)
    dw = np.zeros(w.shape)
    db = np.zeros(b.shape)
    
    for i in range(N):
        for f in range(F):
            for j in range(H_out):
                for k in range(W_out):
                    x_now = x_pad[i, :, (j*stride):((j*stride)+HH), (k*stride):((k*stride)+WW)]
                    dw[f] += x_now * dout[i, f, j, k]
                    dx[i, :, (j*stride):((j*stride)+HH), (k*stride):((k*stride)+WW)] += w[f] * dout[i, f, j, k]
                    db[f] += np.sum(dout[i, f, j, k], axis = 0, keepdims = True)
    dx = dx[:, :, pad:(pad + H), pad:(pad + W)]
    
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    return dx, dw, db


## 2D Naive implementation

In [3]:
A = np.array([
    [10, 10, 10, 10, 10, 10],
    [10, 10, 10, 10, 10, 10],
    [10, 10, 10, 10, 10, 10],
    [0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0]
])

In [5]:
k = np.array([
    [1, 2, 1],
    [0, 0, 0],
    [-1, -2, -1]
])

In [15]:
def simple_conv2d(image: np.array , kernel: np.array) -> float:
    return np.multiply(image, kernel).sum()

In [23]:
def calculate_output_dimension(image_dim: int, kernel_dim: int, padding: int = 0, stride: int = 1) -> int:
    return (int)((image_dim + 2 * padding - kernel_dim)/stride + 1)

In [24]:
def crop(image, anchor, shape):
    return image[anchor[0]:anchor[0] + shape[0], anchor[1]:anchor[1] + shape[1]]

In [25]:
def naive_convolution(image, kernel, padding = 0, stride = 1):
    # acquisition of input image and kernel dimensions
    (image_height, image_width) = image.shape
    (kernel_height, kernel_width) = kernel.shape
    
    # preparation of memory for the returned array
    output = np.zeros((
        calculate_output_dimension(image_height, kernel_height, padding, stride),
        calculate_output_dimension(image_width, kernel_width, padding, stride)
    ))
    
    (output_height, output_width) = output.shape
    
    # addition of padding
    image_with_padding = np.pad(image, [(padding, padding), (padding, padding)], mode='constant', constant_values=0)
    
    for x in range(output_height):
        for y in range(output_width):
            output[x, y] = simple_conv2d(crop(image_with_padding, (x * stride, y * stride), kernel.shape), kernel)
    
    return output

In [26]:
naive_convolution(A, k)

array([[ 0.,  0.,  0.,  0.],
       [40., 40., 40., 40.],
       [40., 40., 40., 40.],
       [ 0.,  0.,  0.,  0.]])

In [27]:
convolve(A, k, "valid")

array([[  0,   0,   0,   0],
       [-40, -40, -40, -40],
       [-40, -40, -40, -40],
       [  0,   0,   0,   0]])