In [1]:
import numpy as np
import itertools
import time

%load_ext autoreload
%aimport convolve
%autoreload 1

# Timing

In [5]:
dim_input = 50
dim_W = 5
padding = 1
stride = 1
dim_out = (dim_input - dim_W + 2 * padding) // stride + 1
batch_size = 100
input_depth = 6
output_depth = 16

input = np.random.random((batch_size, input_depth, dim_input, dim_input))
W = np.random.random((output_depth, input_depth, dim_W, dim_W))
output = np.zeros((batch_size,  output_depth, dim_out, dim_out), dtype=float)

V = np.random.random(output.shape)
# J = np.sum(output * V)
dJ_dout = V

dJ_dW = np.zeros(W.shape, dtype=float)
dJ_din = np.zeros(input.shape, dtype=float)
start = time.time()
convolve.conv_forward(input, W, padding, stride, output)
convolve.conv_backward_W(dJ_dout, input, padding, stride, dJ_dW)
convolve.conv_backward_input(dJ_dout, W, padding, stride, dJ_din)

print('time elapsed:', time.time() - start)
# print(input.reshape((batch_size, input_depth, dim_input, dim_input)))
# print(W.reshape((output_depth, input_depth, dim_W, dim_W)))
# print(output.reshape((batch_size,  output_depth, dim_out, dim_out)))

time elapsed: 4.081375598907471


In [3]:
batch_size = 1
depth = 1
dim_input = 10
dim_pool = 3
dim_output = (dim_input - 1) // dim_pool + 1

input = np.zeros((batch_size, depth, dim_input, dim_input))
input = np.arange(input.size, dtype=float).reshape(input.shape)
output = np.zeros((batch_size, depth, dim_output, dim_output), dtype=float)

start = time.clock()
convolve.pool_forward(input, dim_pool, output)
dJ_dout = output - 1
dJ_din = np.zeros(input.shape, dtype=float)
convolve.pool_backward(dJ_dout, input, output, dim_pool, dJ_din)
print(input)
print(output)
print(dJ_din)
print(time.clock() - start)

[[[[  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.]
   [ 10.  11.  12.  13.  14.  15.  16.  17.  18.  19.]
   [ 20.  21.  22.  23.  24.  25.  26.  27.  28.  29.]
   [ 30.  31.  32.  33.  34.  35.  36.  37.  38.  39.]
   [ 40.  41.  42.  43.  44.  45.  46.  47.  48.  49.]
   [ 50.  51.  52.  53.  54.  55.  56.  57.  58.  59.]
   [ 60.  61.  62.  63.  64.  65.  66.  67.  68.  69.]
   [ 70.  71.  72.  73.  74.  75.  76.  77.  78.  79.]
   [ 80.  81.  82.  83.  84.  85.  86.  87.  88.  89.]
   [ 90.  91.  92.  93.  94.  95.  96.  97.  98.  99.]]]]
[[[[ 22.  25.  28.  29.]
   [ 52.  55.  58.  59.]
   [ 82.  85.  88.  89.]
   [ 92.  95.  98.  99.]]]]
[[[[  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
   [  0.   0.  21.   0.   0.  24.   0.   0.  27.  28.]
   [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
   [  0.   0.  51.   0.   0.  54.   0.   0.  57.  58.]
   [  0.   0. 

# Small delta derivative approximation

In [8]:
start = time.clock()
dim_input = 30
dim_W = 8
padding = 1
stride = 1
dim_out = (dim_input - dim_W + 2 * padding) // stride + 1
batch_size = 3
input_depth = 3
output_depth = 3

input = np.random.random((batch_size, input_depth, dim_input, dim_input))
W = np.random.random((output_depth, input_depth, dim_W, dim_W))
output = np.zeros((batch_size,  output_depth, dim_out, dim_out), dtype=float)

convolve.conv_forward(input, W, padding, stride, output)
V = np.random.random(output.shape)
J = np.sum(output * output * V)

print('J:', J)

dJ_dout = 2 * output * V

dJ_dW = np.zeros(W.shape, dtype=float)
dJ_din = np.zeros(input.shape, dtype=float)
convolve.conv_backward_W(dJ_dout, input, padding, stride, dJ_dW)
convolve.conv_backward_input(dJ_dout, W, padding, stride, dJ_din)

out_changed = np.zeros(output.shape, dtype=float)
dJ_dW_approx = np.zeros(dJ_dW.shape, dtype=float)
delta = 1e-5
for i, j, k, l in itertools.product(*(range(x) for x in W.shape)):
    w_original = W[i, j, k, l]
    W[i, j, k, l] = w_original + delta
    convolve.conv_forward(input, W, padding, stride, out_changed)
    J_changed = np.sum(out_changed * out_changed * V)
    dJ_dW_approx[i, j, k, l] = (J_changed - J) / delta
    W[i, j, k, l] = w_original

print('dJ_dW magnitude', np.sum(np.abs(dJ_dW)))
print('total error from approximation', np.sum(np.abs(dJ_dW - dJ_dW_approx)))

delta = 1e-3

dJ_din_approx = np.zeros(dJ_din.shape, dtype=float)
for i, j, k, l in itertools.product(*(range(x) for x in input.shape)):
    input_original = input[i, j, k, l]
    input[i, j, k, l] = input_original + delta
    convolve.conv_forward(input, W, padding, stride, out_changed)
    J_changed = np.sum(out_changed * out_changed * V)
    dJ_din_approx[i, j, k, l] = (J_changed - J) / delta
    input[i, j, k, l] = input_original

print('dJ_din magnitude', np.sum(np.abs(dJ_din)))
print('total error from approximation', np.sum(np.abs(dJ_din - dJ_din_approx)))

print(time.clock() - start)

J: 6367650.55263
dJ_dW magnitude 25275780.3456
total error from approximation 1.75870741299
dJ_din magnitude 25248743.6647
total error from approximation 175.357479516
16.815437999999986


In [6]:
start = time.clock()
from layers import MaxPoolLayer

dim_input = 30
batch_size = 3
depth = 3
dim_pool = 2

input = np.random.random((batch_size, depth, dim_input, dim_input))
layer = MaxPoolLayer(batch_size, dim_input, depth, dim_pool)

output = layer.forward(input)

V = np.random.random(output.shape)
J = np.sum(output * output * V)
print('J:', J)

dJ_dout = 2 * output * V
dJ_din = layer.backward(dJ_dout)

delta = 1e-4
dJ_din_approx = np.zeros(dJ_din.shape, dtype=float)
for i, j, k, l in itertools.product(*(range(x) for x in input.shape)):
    input_original = input[i, j, k, l]
    input[i, j, k, l] = input_original + delta
    output = layer.forward(input)    
    J_changed = np.sum(output * output * V)
    dJ_din_approx[i, j, k, l] = (J_changed - J) / delta
    input[i, j, k, l] = input_original
    
print('dJ_din magnitude', np.sum(np.abs(dJ_din)))
print('total error from approximation', np.sum(np.abs(dJ_din - dJ_din_approx)))

print(time.clock() - start)

J: 667.984895629
dJ_din magnitude 1604.57795535
total error from approximation 0.773093570218
0.8259420000000004
