# Layer Gradient Checks

Here, we use numerical gradient checking to verify the backpropagation correctness of all layers in the Layers folder.  We should expect to see very small nonzero values for error, as the checking process approximates the gradient numerically.

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import LearnyMcLearnface as lml

### Affine Layer
Layers/AffineLayer.py

In [2]:
affine = lml.layers.AffineLayer(30, 10, 1e-2)
test_input = np.random.randn(50, 30)
dout = np.random.randn(50, 10)
_ = affine.forward(test_input)
dx_num = lml.utils.numerical_gradient_layer(lambda x : affine.forward(x, affine.W, affine.b), test_input, dout)
dW_num = lml.utils.numerical_gradient_layer(lambda w : affine.forward(test_input, w, affine.b), affine.W, dout)
db_num = lml.utils.numerical_gradient_layer(lambda b : affine.forward(test_input, affine.W, b), affine.b, dout)
dx = affine.backward(dout)
print('Affine dx error:', np.max(lml.utils.relative_error(dx, dx_num)))
print('Affine dW error:', np.max(lml.utils.relative_error(affine.dW, dW_num)))
print('Affine db error:', np.max(lml.utils.relative_error(affine.db, db_num)))

Affine dx error: 7.17763668045e-08
Affine dW error: 2.70895792014e-05
Affine db error: 1.26310646624e-08


### Dropout Layer

Layers/DropoutLayer.py

In [3]:
dropout = lml.layers.DropoutLayer(10, 0.6, seed=5684)
test_input = np.random.randn(3, 10)
dout = np.random.randn(3, 10)
_ = dropout.forward_train(test_input)
dx_num = lml.utils.numerical_gradient_layer(lambda x : dropout.forward_train(x), test_input, dout)
dx = dropout.backward(dout)
print('Dropout dx error:', np.max(lml.utils.relative_error(dx, dx_num)))

Dropout dx error: 3.38583654876e-12


### ReLU (Rectified Linear Unit) Layer
Layers/ReLULayer.py

In [4]:
relu = lml.layers.ReLULayer(10)
test_input = np.random.randn(50, 10)
dout = np.random.randn(50, 10)
_ = relu.forward(test_input)
dx_num = lml.utils.numerical_gradient_layer(lambda x : relu.forward(x), test_input, dout)
dx = relu.backward(dout)
print('ReLU dx error:', np.max(lml.utils.relative_error(dx, dx_num)))

ReLU dx error: 3.27562981159e-12


### Sigmoid Layer
Layers/SigmoidLayer.py

In [5]:
sigmoid = lml.layers.SigmoidLayer(10)
test_input = np.random.randn(50, 10)
dout = np.random.randn(50, 10)
_ = sigmoid.forward(test_input)
dx_num = lml.utils.numerical_gradient_layer(lambda x : sigmoid.forward(x), test_input, dout)
dx = sigmoid.backward(dout)
print('Sigmoid dx error:', np.max(lml.utils.relative_error(dx, dx_num)))

Sigmoid dx error: 4.097386703e-11


### Softmax Loss Layer
Layers/SoftmaxLossLayer.py

In [6]:
softmax = lml.layers.SoftmaxLossLayer(10)
test_scores = np.random.randn(50, 10)
test_classes = np.random.randint(1, 10, 50)
_, dx = softmax.loss(test_scores, test_classes)
dx_num = lml.utils.numerical_gradient(lambda x : softmax.loss(x, test_classes)[0], test_scores)
print('Softmax backprop error:', np.max(lml.utils.relative_error(dx, dx_num)))

Softmax backprop error: 9.67306817589e-08


### SVM Loss Layer

Layers/SVMLossLayer.py

In [7]:
svm = lml.layers.SVMLossLayer(10)
test_scores = np.random.randn(50, 10)
test_classes = np.random.randint(1, 10, 50)
_, dx = svm.loss(test_scores, test_classes)
dx_num = lml.utils.numerical_gradient(lambda x : svm.loss(x, test_classes)[0], test_scores)
print('SVM backprop error:', np.max(lml.utils.relative_error(dx, dx_num)))

SVM backprop error: 1.40215660067e-09


### Tanh Layer

Layers/TanhLayer.py

tanh = lml.layers.TanhLayer(10)
test_input = np.random.randn(50, 10)
dout = np.random.randn(50, 10)
_ = tanh.forward(test_input)
dx_num = lml.utils.numerical_gradient_layer(lambda x : tanh.forward(x), test_input, dout)
dx = tanh.backward(dout)
print('Tanh dx error:', np.max(lml.utils.relative_error(dx, dx_num)))

# Full Model Gradient Checks

### Two Layer Network
This is a gradient check for a simple example network with the following architecture:
Affine, ReLU, Affine, Softmax

In [8]:
opts = {
    'input_dim' : 10,
    'data_type' : np.float64
}

nn = lml.NeuralNetwork(opts)
nn.add_layer('Affine', {'neurons':10, 'weight_scale':5e-2})
nn.add_layer('ReLU', {})
nn.add_layer('Affine', {'neurons':10, 'weight_scale':5e-2})
nn.add_layer('SoftmaxLoss', {})
test_scores = np.random.randn(20, 10)
test_classes = np.random.randint(1, 10, 20)
loss, dx = nn.backward(test_scores, test_classes)

print('With regularization off:')
f = lambda _: nn.backward(test_scores, test_classes)[0]
d_b1_num = lml.utils.numerical_gradient(f, nn.layers[0].b, accuracy=1e-8)
d_W1_num = lml.utils.numerical_gradient(f, nn.layers[0].W, accuracy=1e-8)
print('Weight 1 error:', np.max(lml.utils.relative_error(nn.layers[0].dW, d_W1_num)))
print('Bias 1 error:', np.max(lml.utils.relative_error(nn.layers[0].db, d_b1_num)))

d_b2_num = lml.utils.numerical_gradient(f, nn.layers[2].b, accuracy=1e-8)
d_W2_num = lml.utils.numerical_gradient(f, nn.layers[2].W, accuracy=1e-8)
print('Weight 2 error:', np.max(lml.utils.relative_error(nn.layers[2].dW, d_W2_num)))
print('Bias 2 error:', np.max(lml.utils.relative_error(nn.layers[2].db, d_b2_num)))

print('With regularization at lambda = 1.0:')
f = lambda _: nn.backward(test_scores, test_classes, reg_param=1.0)[0]
d_b1_num = lml.utils.numerical_gradient(f, nn.layers[0].b, accuracy=1e-8)
d_W1_num = lml.utils.numerical_gradient(f, nn.layers[0].W, accuracy=1e-8)
print('Weight 1 error:', np.max(lml.utils.relative_error(nn.layers[0].dW, d_W1_num)))
print('Bias 1 error:', np.max(lml.utils.relative_error(nn.layers[0].db, d_b1_num)))

d_b2_num = lml.utils.numerical_gradient(f, nn.layers[2].b, accuracy=1e-8)
d_W2_num = lml.utils.numerical_gradient(f, nn.layers[2].W, accuracy=1e-8)
print('Weight 2 error:', np.max(lml.utils.relative_error(nn.layers[2].dW, d_W2_num)))
print('Bias 2 error:', np.max(lml.utils.relative_error(nn.layers[2].db, d_b2_num)))

With regularization off:
Weight 1 error: 0.000266447122624
Bias 1 error: 4.32867650991e-06
Weight 2 error: 3.56849989239e-05
Bias 2 error: 2.35135119232e-05
With regularization at lambda = 1.0:
Weight 1 error: 4.27541260165e-05
Bias 1 error: 4.32867650991e-06
Weight 2 error: 3.42603091643e-06
Bias 2 error: 2.35135119232e-05


### Multilayer Fully Connected Network with Augmentations

In [21]:
opts = {
    'input_dim' : 10,
    'data_type' : np.float64,
    'init_scheme' : 'xavier'
}
nn = lml.NeuralNetwork(opts)
nn.add_layer('Affine', {'neurons':10})
nn.add_layer('ReLU', {})
nn.add_layer('Dropout', {'dropout_param':0.85, 'seed':5684})
nn.add_layer('Affine', {'neurons':10})
nn.add_layer('ReLU', {})
nn.add_layer('Dropout', {'dropout_param':0.90, 'seed':5684})
nn.add_layer('Affine', {'neurons':10})
nn.add_layer('ReLU', {})
nn.add_layer('Dropout', {'dropout_param':0.95, 'seed':5684})
nn.add_layer('SoftmaxLoss', {})
test_scores = np.random.randn(20, 10)
test_classes = np.random.randint(1, 10, 20)
loss, dx = nn.backward(test_scores, test_classes)

f = lambda _: nn.backward(test_scores, test_classes, reg_param=0.7)[0]
d_b1_num = lml.utils.numerical_gradient(f, nn.layers[0].b, accuracy=1e-8)
d_W1_num = lml.utils.numerical_gradient(f, nn.layers[0].W, accuracy=1e-8)
print('Weight 1 error:', np.max(lml.utils.relative_error(nn.layers[0].dW, d_W1_num)))
print('Bias 1 error:', np.max(lml.utils.relative_error(nn.layers[0].db, d_b1_num)))

d_b1_num = lml.utils.numerical_gradient(f, nn.layers[3].b, accuracy=1e-8)
d_W1_num = lml.utils.numerical_gradient(f, nn.layers[3].W, accuracy=1e-8)
print('Weight 2 error:', np.max(lml.utils.relative_error(nn.layers[3].dW, d_W1_num)))
print('Bias 2 error:', np.max(lml.utils.relative_error(nn.layers[3].db, d_b1_num)))

d_b1_num = lml.utils.numerical_gradient(f, nn.layers[6].b, accuracy=1e-8)
d_W1_num = lml.utils.numerical_gradient(f, nn.layers[6].W, accuracy=1e-8)
print('Weight 3 error:', np.max(lml.utils.relative_error(nn.layers[6].dW, d_W1_num)))
print('Bias 3 error:', np.max(lml.utils.relative_error(nn.layers[6].db, d_b1_num)))

Weight 1 error: 1.18140579523e-05
Bias 1 error: 1.0823116919e-05
Weight 2 error: 8.68542286154e-05
Bias 2 error: 5.75731489058e-06
Weight 3 error: 1.51101874291e-06
Bias 3 error: 1.79626723799e-06
