In [1]:
import numpy as np
import itertools
import time

%load_ext autoreload
%aimport convolve
%autoreload 1

# Timing

In [5]:
dim_input = 50
dim_W = 5
padding = 1
stride = 1
dim_out = (dim_input - dim_W + 2 * padding) // stride + 1
batch_size = 100
input_depth = 6
output_depth = 16

input = np.arange(batch_size * input_depth * dim_input ** 2, dtype=float).reshape((batch_size, input_depth, dim_input ** 2))
W = np.arange(output_depth * input_depth * dim_W ** 2, dtype=float).reshape((output_depth, input_depth, dim_W ** 2))
output = np.zeros((batch_size,  output_depth, dim_out ** 2), dtype=float)

V = np.random.random(output.shape)
# J = np.sum(output * V)
dJ_dout = V

dJ_dW = np.zeros(W.shape, dtype=float)
dJ_din = np.zeros(input.shape, dtype=float)
start = time.time()
convolve.conv_forward(input, W, dim_input, dim_W, padding, stride, output)

convolve.conv_backward_W(dJ_dout, input, dim_input, dim_W, padding, stride, dJ_dW)
convolve.conv_backward_input(dJ_dout, W, dim_input, dim_W, padding, stride, dJ_din)
print('time elapsed:', time.time() - start)
# print(input.reshape((batch_size, input_depth, dim_input, dim_input)))
# print(W.reshape((output_depth, input_depth, dim_W, dim_W)))
# print(output.reshape((batch_size,  output_depth, dim_out, dim_out)))

time elapsed: 3.518523693084717


# Small delta derivative approximation

In [6]:
start = time.clock()
dim_input = 100
dim_W = 9
padding = 4
stride = 8
dim_out = (dim_input - dim_W + 2 * padding) // stride + 1
batch_size = 3
input_depth = 3
output_depth = 3

input = np.random.random((batch_size, input_depth, dim_input ** 2))
W = np.random.random((output_depth, input_depth, dim_W ** 2))
output = np.zeros((batch_size,  output_depth, dim_out ** 2), dtype=float)

convolve.conv_forward(input, W, dim_input, dim_W, padding, stride, output)
V = np.random.random(output.shape)
J = np.sum(output * output * V)

print('J:', J)

dJ_dout = 2 * output * V

dJ_dW = np.zeros(W.shape, dtype=float)
dJ_din = np.zeros(input.shape, dtype=float)
convolve.conv_backward_W(dJ_dout, input, dim_input, dim_W, padding, stride, dJ_dW)
convolve.conv_backward_input(dJ_dout, W, dim_input, dim_W, padding, stride, dJ_din)

out_changed = np.zeros(output.shape, dtype=float)
dJ_dW_approx = np.zeros(dJ_dW.shape, dtype=float)
delta = 1e-5
for i, j, k in itertools.product(*(range(x) for x in W.shape)):
    w_original = W[i, j, k]
    W[i, j, k] = w_original + delta
    convolve.conv_forward(input, W, dim_input, dim_W, padding, stride, out_changed)
    J_changed = np.sum(out_changed * out_changed * V)
    dJ_dW_approx[i, j, k] = (J_changed - J) / delta
    W[i, j, k] = w_original

print('dJ_dW magnitude', np.sum(np.abs(dJ_dW)))
print('total error from approximation', np.sum(np.abs(dJ_dW - dJ_dW_approx)))

delta = 1e-3

dJ_din_approx = np.zeros(dJ_din.shape, dtype=float)
for i, j, k in itertools.product(*(range(x) for x in input.shape)):
    input_original = input[i, j, k]
    input[i, j, k] = input_original + delta
    convolve.conv_forward(input, W, dim_input, dim_W, padding, stride, out_changed)
    J_changed = np.sum(out_changed * out_changed * V)
    dJ_din_approx[i, j, k] = (J_changed - J) / delta
    input[i, j, k] = input_original

print('dJ_din magnitude', np.sum(np.abs(dJ_din)))
print('total error from approximation', np.sum(np.abs(dJ_din - dJ_din_approx)))

print(time.clock() - start)

J: 2611400.16623
dJ_dW magnitude 10144552.6092
total error from approximation 0.583365075803
dJ_din magnitude 10414472.5929
total error from approximation 59.2146657999
71.187524
