In [1]:
import mxnet as mx

In [2]:
sequence_length = 4
batch_size = 5
channels = 3

In [3]:
inputs = mx.nd.random.uniform(shape=(sequence_length, batch_size, channels))

In [4]:
hid_units = 6

#### Part 0: Create Stacked Bi-directional LSTM

In [5]:
hid_layers = 2
bidirectional = True

In [6]:
stack_bidir_lstm = mx.gluon.rnn.LSTM(hidden_size=hid_units, num_layers=hid_layers, layout='TNC', bidirectional=bidirectional)
stack_bidir_lstm.initialize()

In [7]:
# 2 * hid_layers (since bi-directional)
hid_init_h = mx.nd.random.uniform(shape=(2*hid_layers, batch_size, hid_units))
hid_init_c = mx.nd.random.uniform(shape=(2*hid_layers, batch_size, hid_units))
hid_init = [hid_init_h, hid_init_c]
outputs, hid_states = stack_bidir_lstm(inputs, hid_init)

#### Part 1: Show equality between:

A. first 6 channels of output in last time step

B. last hidden state of forward lstm in last layer

In [8]:
final_output = outputs[-1]
final_output_slice = final_output[:,:6]
final_output_slice


[[ 0.00280006  0.00131526  0.02154032 -0.00737004 -0.00082009  0.02088503]
 [ 0.01521961  0.01984186  0.02294893  0.01894835  0.01945606  0.01656748]
 [ 0.0041332   0.01020547  0.0035636   0.01056547  0.0121406   0.01093003]
 [ 0.01794787  0.02096532  0.03129815 -0.00915075  0.01891881  0.01848382]
 [-0.00269367  0.01555081  0.00915592  0.01482278  0.00926654  0.02081262]]
<NDArray 5x6 @cpu(0)>

In [9]:
hid_last_layer = hid_states[0][-2:]
hid_last_layer_forw = hid_last_layer[0]
hid_last_layer_forw


[[ 0.00280006  0.00131526  0.02154032 -0.00737004 -0.00082009  0.02088503]
 [ 0.01521961  0.01984186  0.02294893  0.01894835  0.01945606  0.01656748]
 [ 0.0041332   0.01020547  0.0035636   0.01056547  0.0121406   0.01093003]
 [ 0.01794787  0.02096532  0.03129815 -0.00915075  0.01891881  0.01848382]
 [-0.00269367  0.01555081  0.00915592  0.01482278  0.00926654  0.02081262]]
<NDArray 5x6 @cpu(0)>

In [10]:
mx.nd.equal(final_output_slice, hid_last_layer_forw)


[[ 1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.]]
<NDArray 5x6 @cpu(0)>

#### Part 2: Show equality between:

A. last 6 channels of output in first time step

B. last hidden state of backward lstm in last layer

In [11]:
first_output = outputs[0]
first_output_slice = first_output[:,6:]
first_output_slice


[[ 0.02874257  0.00842749  0.01191495  0.02792466  0.01095112  0.00856404]
 [ 0.02944191 -0.00884817  0.00251383  0.03231068  0.00370614  0.01290814]
 [ 0.01387642 -0.00066042  0.00942638  0.00545881  0.01227141  0.01775667]
 [ 0.02688004  0.01613665  0.01236364  0.01626807  0.00266924  0.02035296]
 [ 0.03239724 -0.00368664 -0.00626667  0.0165406  -0.00502696  0.00558209]]
<NDArray 5x6 @cpu(0)>

In [12]:
hid_last_layer = hid_states[0][-2:]
hid_last_layer_back = hid_last_layer[1]
hid_last_layer_back


[[ 0.02874257  0.00842749  0.01191495  0.02792466  0.01095112  0.00856404]
 [ 0.02944191 -0.00884817  0.00251383  0.03231068  0.00370614  0.01290814]
 [ 0.01387642 -0.00066042  0.00942638  0.00545881  0.01227141  0.01775667]
 [ 0.02688004  0.01613665  0.01236364  0.01626807  0.00266924  0.02035296]
 [ 0.03239724 -0.00368664 -0.00626667  0.0165406  -0.00502696  0.00558209]]
<NDArray 5x6 @cpu(0)>

In [13]:
mx.nd.equal(first_output_slice, hid_last_layer_back)


[[ 1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.]]
<NDArray 5x6 @cpu(0)>