In [3]:
# As usual, a bit of setup
import time, os, json
import numpy as np
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt


from cs231n.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from cs231n.temp import *
from cs231n.captioning_solver import CaptioningSolver
from cs231n.classifiers.rnn import CaptioningRNN
from cs231n.coco_utils import load_coco_data, sample_coco_minibatch, decode_captions
from cs231n.image_utils import image_from_url

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



In [10]:
from cs231n.temp import lstm_forward, lstm_backward
np.random.seed(231)

N, D, T, H = 2, 3, 10, 6

x = np.random.randn(N, T, D)
h0 = np.random.randn(N, H)
Wx = np.random.randn(D, 4 * H)
Wh = np.random.randn(H, 4 * H)
b = np.random.randn(4 * H)

out, cache = lstm_forward(x, h0, Wx, Wh, b)
print(out)
dout = np.random.randn(*out.shape)

dx, dh0, dWx, dWh, db = lstm_backward(dout, cache)

fx = lambda x: lstm_forward(x, h0, Wx, Wh, b)[0]
fh0 = lambda h0: lstm_forward(x, h0, Wx, Wh, b)[0]
fWx = lambda Wx: lstm_forward(x, h0, Wx, Wh, b)[0]
fWh = lambda Wh: lstm_forward(x, h0, Wx, Wh, b)[0]
fb = lambda b: lstm_forward(x, h0, Wx, Wh, b)[0]

dx_num = eval_numerical_gradient_array(fx, x, dout)
dh0_num = eval_numerical_gradient_array(fh0, h0, dout)
dWx_num = eval_numerical_gradient_array(fWx, Wx, dout)
dWh_num = eval_numerical_gradient_array(fWh, Wh, dout)
db_num = eval_numerical_gradient_array(fb, b, dout)

print('dx error: ', rel_error(dx_num, dx))
print('dh0 error: ', rel_error(dh0_num, dh0))
print('dWx error: ', rel_error(dWx_num, dWx))
print('dWh error: ', rel_error(dWh_num, dWh))
print('db error: ', rel_error(db_num, db))

[[[ -5.26808359e-01  -1.79070098e-01   4.09121321e-02   5.01304274e-05
     2.45547795e-02   1.78968141e-03]
  [  2.44339385e-02   1.50096683e-01   1.92132150e-01   7.39118696e-02
     3.86500179e-01   1.99337867e-01]
  [ -5.98330804e-01   6.00944080e-02   2.80552225e-01   1.27028709e-03
     1.12690791e-01   7.83029566e-01]
  [  3.78531084e-02   4.37886067e-02   4.18588532e-01   3.67929671e-01
     2.69696868e-01   2.67407055e-01]
  [  2.46230836e-02   5.73339438e-03   8.43148599e-02   6.44735548e-01
    -2.97567677e-01   3.26497024e-01]
  [ -1.81764128e-01   2.93656112e-03  -1.43356175e-01   4.92926596e-01
    -5.30514056e-02   1.10006804e-01]
  [ -5.09531464e-01  -5.36215181e-03  -1.97515932e-01   9.59138405e-02
    -6.34063091e-02   3.10509868e-01]
  [ -2.30905687e-02   2.26801933e-03  -2.22164229e-02   5.34910565e-01
    -4.81465326e-02   1.31039161e-01]
  [  6.50621939e-02   4.43988471e-02  -2.90225966e-01   3.69822094e-01
     5.65457953e-03   7.52682654e-02]
  [  6.50088375e-03