- Add LSTM baseline model
- Generate a bunch of those memory sequences and train on batches

In [1]:
import logging
import numpy as np
import tensorflow as tf
from model import DNC
from trainer import trainer

logger = tf.get_logger()
logger.setLevel(logging.ERROR)

### Generate training data

In [62]:
n_train = 100
rows, cols = 6, 4
ones = np.random.randint(0, cols, size=rows*n_train)
print(ones.shape)
seq = np.zeros((n_train, rows, cols))
print(seq.shape)

(600,)
(100, 6, 4)


In [None]:
seq = np.zeros((n_train, rows, cols))
seq[np.arange(rows), ones] = 1
zer = np.zeros((rows, cols))

X = np.concatenate((seq, zer), axis=0).astype(np.float32)
y = np.concatenate((zer, seq), axis=0).astype(np.float32)

for i in range(rows):
    assert (X[i, :] == y[rows+i,:]).all()

X_train = np.expand_dims(X, axis=0)
y_train = np.expand_dims(y, axis=0)


In [2]:
rows, cols = 6, 4
ones = np.random.randint(0, cols, size=rows)
seq = np.zeros((rows, cols))
seq[np.arange(rows), ones] = 1
zer = np.zeros((rows, cols))

X = np.concatenate((seq, zer), axis=0).astype(np.float32)
y = np.concatenate((zer, seq), axis=0).astype(np.float32)

for i in range(rows):
    assert (X[i, :] == y[rows+i,:]).all()

X_train = np.expand_dims(X, axis=0)
y_train = np.expand_dims(y, axis=0)

### Initialize and train DNC model

Initialize:

In [3]:
dnc = DNC(
    output_dim=cols,
    memory_shape=(10,4),  # shape of memory matrix
    n_read=1              # nb of read heads
)

Train:

In [15]:
trainer(
    model=dnc,
    loss_fn=tf.keras.losses.mse,
    X_train=X_train,
    y_train=y_train,
    epochs=500,
    batch_size=1,
    verbose=False
)

Predict on `X`:

In [16]:
y_pred = dnc(X).numpy()

Check if the predictions are almost the same as the ground truth `y`:

In [18]:
y_pred

array([[ 9.2724264e-03, -7.6735392e-03, -2.6266277e-03,  4.6815872e-03],
       [-2.8955340e-03, -5.4877438e-03, -5.0735623e-03,  8.3521605e-03],
       [-2.9538572e-03, -1.0655727e-03, -7.1721077e-03,  3.8100034e-03],
       [-1.8956363e-03, -5.6152157e-03, -4.3858513e-03,  6.1591566e-03],
       [-3.0968189e-03, -4.2329580e-03, -2.0351112e-03,  3.3300817e-03],
       [-3.7959218e-03,  1.4320649e-03,  1.9368529e-04,  8.7299049e-03],
       [-1.0986090e-02, -7.9876557e-04, -3.9141476e-03,  1.0055457e+00],
       [-4.5063496e-03,  1.0020233e+00,  5.6702495e-03,  1.9157469e-02],
       [ 4.8452616e-04,  2.8419420e-03, -4.2665452e-03,  1.0040584e+00],
       [ 9.9847925e-01, -3.1949431e-03,  7.7199042e-03,  2.0938396e-02],
       [ 9.5604062e-03,  3.0630007e-03, -2.5590062e-03,  9.9297392e-01],
       [ 9.9689913e-01,  3.5163760e-03, -3.5902262e-03, -1.1046052e-02]],
      dtype=float32)

In [23]:
#np.testing.assert_almost_equal(y_pred, y, decimal=2)

In [24]:
y_pred

array([[ 9.2724264e-03, -7.6735392e-03, -2.6266277e-03,  4.6815872e-03],
       [-2.8955340e-03, -5.4877438e-03, -5.0735623e-03,  8.3521605e-03],
       [-2.9538572e-03, -1.0655727e-03, -7.1721077e-03,  3.8100034e-03],
       [-1.8956363e-03, -5.6152157e-03, -4.3858513e-03,  6.1591566e-03],
       [-3.0968189e-03, -4.2329580e-03, -2.0351112e-03,  3.3300817e-03],
       [-3.7959218e-03,  1.4320649e-03,  1.9368529e-04,  8.7299049e-03],
       [-1.0986090e-02, -7.9876557e-04, -3.9141476e-03,  1.0055457e+00],
       [-4.5063496e-03,  1.0020233e+00,  5.6702495e-03,  1.9157469e-02],
       [ 4.8452616e-04,  2.8419420e-03, -4.2665452e-03,  1.0040584e+00],
       [ 9.9847925e-01, -3.1949431e-03,  7.7199042e-03,  2.0938396e-02],
       [ 9.5604062e-03,  3.0630007e-03, -2.5590062e-03,  9.9297392e-01],
       [ 9.9689913e-01,  3.5163760e-03, -3.5902262e-03, -1.1046052e-02]],
      dtype=float32)

In [25]:
y

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 1., 0., 0.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.]], dtype=float32)

### Compare with LSTM baseline model

In [30]:
from tensorflow.keras.layers import Dense, InputLayer, LSTM
from tensorflow.keras.models import Sequential

In [53]:
model = Sequential([
    InputLayer(input_shape=X.shape),
    LSTM(20, return_sequences=True),
    Dense(4, activation=None)
])

model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=1000, verbose=0)

<tensorflow.python.keras.callbacks.History at 0x7fda720cbd30>

In [54]:
y_ = model(X_train).numpy()

In [55]:
y_[0]

array([[ 1.6807001e-03,  2.1896362e-03, -1.0609522e-03,  3.7924740e-03],
       [ 4.6399739e-03,  4.8826039e-03,  4.7537964e-05,  4.6107713e-03],
       [-4.9219131e-03, -7.0456713e-03,  3.7104478e-03, -6.5649655e-03],
       [-3.0669514e-03, -2.7648509e-03, -3.1739206e-03, -2.7667452e-03],
       [ 2.5546309e-03,  3.6815256e-03, -1.5622881e-03,  3.0858051e-03],
       [ 1.3735946e-03,  7.8260154e-04,  1.9159850e-03,  9.3661807e-04],
       [-1.1108257e-03, -1.1548921e-03,  5.2596210e-05,  9.9886554e-01],
       [ 5.1144511e-05,  1.0001471e+00, -2.2563455e-04,  1.7821975e-04],
       [ 1.5990250e-04,  1.5917420e-04, -1.7116597e-04,  1.0001961e+00],
       [ 9.9986362e-01, -2.0995736e-04,  2.6696926e-04, -1.5820377e-04],
       [ 1.1468120e-04,  7.4259937e-05,  9.7734155e-05,  1.0000833e+00],
       [ 9.9991417e-01, -2.9057264e-06, -2.2532936e-04, -8.9298934e-05]],
      dtype=float32)

In [56]:
y

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 1., 0., 0.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.]], dtype=float32)