# Layer Gradient Checks

Here, we use numerical gradient checking to verify the backpropagation correctness of all layers in the Layers folder.  We should expect to see very small nonzero values for error, as the checking process approximates the gradient numerically.

In [7]:
import numpy as np
from NeuralNetwork import *
from Utils.NumericalGradient import *

from Layers.SoftmaxLossLayer import *
from Layers.AffineLayer import *
from Layers.ReLULayer import *
from Layers.SigmoidLayer import *
from Layers.TanhLayer import *

### Affine Layer
Layers/AffineLayer.py

In [2]:
affine = AffineLayer(30, 10, 1e-2)
test_input = np.random.randn(50, 30)
dout = np.random.randn(50, 10)
_ = affine.forward(test_input)
dx_num = numerical_gradient_layer(lambda x : affine.forward(x, affine.W, affine.b), test_input, dout)
dW_num = numerical_gradient_layer(lambda w : affine.forward(test_input, w, affine.b), affine.W, dout)
db_num = numerical_gradient_layer(lambda b : affine.forward(test_input, affine.W, b), affine.b, dout)
dx = affine.backward(dout)
print('Affine dx error:', np.max(relative_error(dx, dx_num)))
print('Affine dW error:', np.max(relative_error(affine.dW, dW_num)))
print('Affine db error:', np.max(relative_error(affine.db, db_num)))

Affine dx error: 5.85551142831e-08
Affine dW error: 2.70895779983e-05
Affine db error: 1.263106277e-08


### ReLU (Rectified Linear Unit) Layer
Layers/ReLULayer.py

In [3]:
relu = ReLULayer(10)
test_input = np.random.randn(50, 10)
dout = np.random.randn(50, 10)
_ = relu.forward(test_input)
dx_num = numerical_gradient_layer(lambda x : relu.forward(x), test_input, dout)
dx = relu.backward(dout)
print('ReLU dx error:', np.max(relative_error(dx, dx_num)))

ReLU dx error: 3.27563178309e-12


### Sigmoid Layer
Layers/SigmoidLayer.py

In [4]:
sigmoid = SigmoidLayer(10)
test_input = np.random.randn(50, 10)
dout = np.random.randn(50, 10)
_ = sigmoid.forward(test_input)
dx_num = numerical_gradient_layer(lambda x : sigmoid.forward(x), test_input, dout)
dx = sigmoid.backward(dout)
print('Sigmoid dx error:', np.max(relative_error(dx, dx_num)))

Sigmoid dx error: 5.60067957701e-11


### Softmax Loss Layer
Layers/SoftmaxLossLayer.py

In [5]:
softmax = SoftmaxLossLayer(10)
test_scores = np.random.randn(50, 10)
test_classes = np.random.randint(1, 10, 50)
_, dx = softmax.loss(test_scores, test_classes)
dx_num = numerical_gradient(lambda x : softmax.loss(x, test_classes)[0], test_scores)
print('Softmax backprop error:', np.max(relative_error(dx, dx_num)))

Softmax backprop error: 7.30132573919e-08


### Tanh Layer

Layers/TanhLayer.py

In [8]:
tanh = TanhLayer(10)
test_input = np.random.randn(50, 10)
dout = np.random.randn(50, 10)
_ = tanh.forward(test_input)
dx_num = numerical_gradient_layer(lambda x : tanh.forward(x), test_input, dout)
dx = tanh.backward(dout)
print('Tanh dx error:', np.max(relative_error(dx, dx_num)))

Tanh dx error: 6.88055589118e-06


### Two Layer Network
This is a gradient check for a simple example network with the following architecture:
Affine, ReLU, Affine, Softmax

In [6]:
nn = NeuralNetwork(10, data_type=np.float64)
nn.add_layer('Affine', {'neurons':10, 'weight_scale':5e-2})
nn.add_layer('ReLU', {})
nn.add_layer('Affine', {'neurons':10, 'weight_scale':5e-2})
nn.add_layer('SoftmaxLoss', {})
test_scores = np.random.randn(20, 10)
test_classes = np.random.randint(1, 10, 20)
loss, dx = nn.backward(test_scores, test_classes)

print('With regularization off:')
f = lambda _: nn.backward(test_scores, test_classes)[0]
d_b1_num = numerical_gradient(f, nn.layers[0].b, accuracy=1e-8)
d_W1_num = numerical_gradient(f, nn.layers[0].W, accuracy=1e-8)
print('Weight 1 error:', np.max(relative_error(nn.layers[0].dW, d_W1_num)))
print('Bias 1 error:', np.max(relative_error(nn.layers[0].db, d_b1_num)))

d_b2_num = numerical_gradient(f, nn.layers[2].b, accuracy=1e-8)
d_W2_num = numerical_gradient(f, nn.layers[2].W, accuracy=1e-8)
print('Weight 2 error:', np.max(relative_error(nn.layers[2].dW, d_W2_num)))
print('Bias 2 error:', np.max(relative_error(nn.layers[2].db, d_b2_num)))

print('With regularization at lambda = 1.0:')
f = lambda _: nn.backward(test_scores, test_classes, reg_param=1.0)[0]
d_b1_num = numerical_gradient(f, nn.layers[0].b, accuracy=1e-8)
d_W1_num = numerical_gradient(f, nn.layers[0].W, accuracy=1e-8)
print('Weight 1 error:', np.max(relative_error(nn.layers[0].dW, d_W1_num)))
print('Bias 1 error:', np.max(relative_error(nn.layers[0].db, d_b1_num)))

d_b2_num = numerical_gradient(f, nn.layers[2].b, accuracy=1e-8)
d_W2_num = numerical_gradient(f, nn.layers[2].W, accuracy=1e-8)
print('Weight 2 error:', np.max(relative_error(nn.layers[2].dW, d_W2_num)))
print('Bias 2 error:', np.max(relative_error(nn.layers[2].db, d_b2_num)))

With regularization off:
Weight 1 error: 3.81541867848e-05
Bias 1 error: 9.09793829959e-05
Weight 2 error: 0.00193133226872
Bias 2 error: 4.09544225405e-06
With regularization at lambda = 1.0:
Weight 1 error: 4.89860336358e-05
Bias 1 error: 9.09793829959e-05
Weight 2 error: 2.99393089663e-05
Bias 2 error: 4.09544225405e-06
