In [1]:
import numpy as np

np.random.seed(1)

In [2]:
# channels, width, height
_input_img = np.random.randint(low=1, high=3, size=(2,3,4))#np.random.rand(2,3,4)
_input_size = _input_img.shape

#input_img[x][y][z] += 100
_input_img

array([[[2, 2, 1, 1],
        [2, 2, 2, 2],
        [2, 1, 1, 2]],

       [[1, 2, 2, 1],
        [1, 2, 1, 1],
        [1, 2, 1, 1]]])

In [3]:
_num_kernels = 3
_kernel_size = 3
_kernels = np.random.randint(low=0, high=3, size=(_num_kernels,_kernel_size,_kernel_size))#np.random.rand(2,3,4)
_kernels

array([[[0, 2, 0],
        [1, 2, 2],
        [0, 1, 1]],

       [[2, 0, 2],
        [1, 1, 1],
        [1, 2, 1]],

       [[1, 0, 0],
        [1, 0, 0],
        [1, 2, 1]]])

In [4]:
def apply_1d_padding(input_img, kernel_size):
    pad = kernel_size//2
    return np.concatenate([np.zeros(pad), input_img, np.zeros(pad)])

def apply_2d_padding(input_img, kernel_size):
    padded = []
    for channel in input_img:
        pad_side = np.stack([apply_1d_padding(row, kernel_size) for row in channel])
        width = pad_side.shape[1]
        pad_full = np.vstack([np.zeros(width), pad_side,np.zeros(width)])
        padded.append(pad_full)
    return np.stack(padded)

#_input_pad = apply_2d_padding(_input_img, _kernel_size)
#_input_pad

In [5]:
# Forward pass
def forward_propagation(input_img, kernels, num_kernels, kernel_size, input_size):
    output = np.zeros(shape=(num_kernels, input_size[1], input_size[2]))
    thing = "hello"
    input_pad = apply_2d_padding(input_img, kernel_size)

    for i in range(input_img.shape[0]): # input channels
        for k in range(num_kernels): # output channels
            for i_w in range(input_img.shape[1]): # img width
                for i_h in range(input_img.shape[2]): # img height
                    for k_w in range(kernel_size):
                        for k_h in range(kernel_size):
                            output[k][i_w][i_h] += kernels[k][k_w][k_h] * input_pad[i][i_w+k_w][i_h+k_h]
    return output

_output = forward_propagation(_input_img, _kernels, _num_kernels, _kernel_size, _input_size)
_output

array([[[21., 24., 20., 10.],
        [26., 30., 27., 16.],
        [18., 21., 19., 14.]],

       [[17., 24., 22., 14.],
        [24., 33., 32., 20.],
        [14., 20., 22., 11.]],

       [[10., 17., 17., 12.],
        [ 9., 17., 18., 14.],
        [ 0.,  6.,  7.,  5.]]])

In [29]:
# Backward Pass
def backward_propagation_input(input_img, kernels, num_kernels, kernel_size, output, output_grad, learning_rate):
    #output_grad = np.ones(shape=(output.shape))
    output_grad_pad = apply_2d_padding(output_grad, kernel_size)

    input_grad = np.zeros_like(input_img)    

    for i in range(input_img.shape[0]): # input channels
        for k in range(num_kernels): # output channels
            for i_w in range(input_img.shape[1]): # img width
                for i_h in range(input_img.shape[2]): # img height
                    for k_w in range(kernel_size):
                        for k_h in range(kernel_size):
                            input_grad[i][i_w][i_h] += output_grad_pad[k][i_w+kernel_size-k_w-1][i_h+kernel_size-k_h-1] * kernels[k][k_w][k_h] # or i

    return input_grad

def backward_propagation_kernels(input_img, kernels, num_kernels, kernel_size, output, output_grad, learning_rate):
    # kernel gradient
    kernels_grad = np.zeros_like(kernels)

    input_pad = apply_2d_padding(input_img, kernel_size)
    
    output_grad = np.ones(shape=(output.shape))

    for i in range(input_img.shape[0]): # input channels
        for k in range(num_kernels): # output channels
            for i_w in range(input_img.shape[1]): # img width
                for i_h in range(input_img.shape[2]): # img height
                    for k_w in range(kernel_size):
                        for k_h in range(kernel_size):
                            kernels_grad[k][k_w][k_h] += input_pad[i][i_w+k_w][i_h+k_h] * output_grad[k][k_w][k_h]

    return kernels_grad

#backward_propagation_input(_output, _output, 0.001)

In [24]:
# Test gradients
_input_img2 = _input_img.copy()
x = np.random.randint(_input_img.shape[0])
y = np.random.randint(_input_img.shape[1])
z = np.random.randint(_input_img.shape[2])
print((x,y,z))
_input_img2[x][y][z] += 1
#print(_input_img)
#print(_input_img2)

_output = forward_propagation(_input_img, _kernels, _num_kernels, _kernel_size, _input_size)
_output2 = forward_propagation(_input_img2, _kernels, _num_kernels, _kernel_size, _input_size)

#print(_output - _output2)
s1 = _output.sum()
s2 = _output2.sum()
print(s2-s1)

(1, 1, 2)
26.0


In [None]:
_output_error = np.ones_like(_output)

_input_grad = backward_propagation_input(_input_img, _kernels, _num_kernels, _kernel_size, _output, _output_error, 0.001)
_input_grad[x][y][z]

In [32]:
_kernels2 = _kernels.copy()
x = np.random.randint(_kernels.shape[0])
y = np.random.randint(_kernels.shape[1])
z = np.random.randint(_kernels.shape[2])
_kernels2[x][y][z] += 1

_output = forward_propagation(_input_img, _kernels, _num_kernels, _kernel_size, _input_size)
_output2 = forward_propagation(_input_img, _kernels2, _num_kernels, _kernel_size, _input_size)
s1 = _output.sum()
s2 = _output2.sum()
print(s2-s1)

28.0


In [33]:
_kernels_grad = backward_propagation_kernels(_input_img, _kernels, _num_kernels, _kernel_size, _output, _output_error, 0.001)
_kernels_grad[x][y][z]

28