In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM

In [2]:
import numpy as np

# LSTM

In [3]:
data = np.array([0.1, 0.2, 0.3]).reshape((1, 3, 1))

In [4]:
data

array([[[0.1],
        [0.2],
        [0.3]]])

In [5]:
data.shape

(1, 3, 1)

In [6]:
input = Input(shape=(3, 1))

In [7]:
output = LSTM(1)(input)

In [8]:
model = Model(input, output)

In [9]:
result = model.predict(data)

In [10]:
result

array([[0.05929545]], dtype=float32)

In [11]:
result.shape

(1, 1)

In [12]:
data = np.array([0.1, 0.2, 0.3, 0.1, 0.2, 0.3]).reshape((2, 3, 1))

In [13]:
data.shape

(2, 3, 1)

In [14]:
result = model.predict(data)

In [15]:
result.shape

(2, 1)

In [16]:
def try_return_sequences(data):
    input = Input(shape=(3, 1))
    lstm = LSTM(1, return_sequences=True)(input)
    model = Model(input, lstm)

    result = model.predict(data)

    print(result)
    print(result.shape)

In [17]:
try_return_sequences(data)

[[[-0.02106037]
  [-0.05732434]
  [-0.10306585]]

 [[-0.02106037]
  [-0.05732434]
  [-0.10306585]]]
(2, 3, 1)


全てのtimestepでhを出力

In [18]:
def try_return_state(data):
    input = Input(shape=(3, 1))
    lstm, state_h, state_c = LSTM(1, return_state=True)(input)

    model = Model(input, [lstm, state_h, state_c])

    result = model.predict(data)
    print(result)
    print(result[0].shape)
    print(result[1].shape)
    print(result[2].shape)

In [19]:
try_return_sequences(data)

[[[-0.02487313]
  [-0.06296439]
  [-0.10499674]]

 [[-0.02487313]
  [-0.06296439]
  [-0.10499674]]]
(2, 3, 1)


最終的な出力と、最終的なhとcが出力。当然、最終的な出力=最終的なh

In [20]:
def try_return_sequences_n_state(data):
    input = Input(shape=(3, 1))
    lstm, state_h, state_c = LSTM(
        1, return_sequences=True, return_state=True)(input)

    model = Model(input, [lstm, state_h, state_c])

    result = model.predict(data)
    print(result)
    print(result[0].shape)
    print(result[1].shape)
    print(result[2].shape)

In [21]:
try_return_sequences_n_state(data)

[array([[[0.00963533],
        [0.0237227 ],
        [0.03860088]],

       [[0.00963533],
        [0.0237227 ],
        [0.03860088]]], dtype=float32), array([[0.03860088],
       [0.03860088]], dtype=float32), array([[0.08601475],
       [0.08601475]], dtype=float32)]
(2, 3, 1)
(2, 1)
(2, 1)


全てのtimestepにおけるhと、最終的なhとcが出力

# Embedding & Masking

In [22]:
from tensorflow.keras.layers import Embedding, Masking

In [23]:
data = np.array([[1, 2, 3], [2, 3, 1], [3, 1, 2]])

In [24]:
def simple_embedding(data):
    # flattenとかだとデコボコの時エラー
    max_idx = max([max(d) for d in data])
    # kerasのreferenceにあるように、+1しないと以下のエラー
    # InvalidArgumentError: indices[0,2] = 3 is not in [0, 3) [Op:ResourceGather]
    embedding_layer = Embedding(max_idx+1, 2)
    result = embedding_layer(data)

    print(result)

In [25]:
simple_embedding(data)

tf.Tensor(
[[[ 0.00284185 -0.01055091]
  [-0.03061467 -0.03516297]
  [ 0.03403563 -0.0297626 ]]

 [[-0.03061467 -0.03516297]
  [ 0.03403563 -0.0297626 ]
  [ 0.00284185 -0.01055091]]

 [[ 0.03403563 -0.0297626 ]
  [ 0.00284185 -0.01055091]
  [-0.03061467 -0.03516297]]], shape=(3, 3, 2), dtype=float32)


In [26]:
bump_data = np.array(
    [*map(lambda x: np.array(x), [[1, 2, 3, 4], [2, 3], [3, 1, 2]])]
)

  bump_data = np.array(


In [27]:
simple_embedding(bump_data)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

凸凹のままでは不可

In [28]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [29]:
padded_data = pad_sequences(bump_data, padding='post')

In [30]:
padded_data

array([[1, 2, 3, 4],
       [2, 3, 0, 0],
       [3, 1, 2, 0]], dtype=int32)

In [31]:
def mask_embedding(data):
    # flattenとかだとデコボコの時エラー
    max_idx = max([max(d) for d in data])
    # kerasのreferenceにあるように、+1しないと以下のエラー
    # InvalidArgumentError: indices[0,2] = 3 is not in [0, 3) [Op:ResourceGather]
    embedding_layer = Embedding(max_idx+1, 2, mask_zero=True)
    result = embedding_layer(data)

    print(result)
    print(result._keras_mask)

In [32]:
mask_embedding(padded_data)

tf.Tensor(
[[[ 0.03018922 -0.00889117]
  [-0.01069335 -0.03257515]
  [-0.02328345  0.02120855]
  [-0.01925821  0.04810247]]

 [[-0.01069335 -0.03257515]
  [-0.02328345  0.02120855]
  [ 0.02446326 -0.01218178]
  [ 0.02446326 -0.01218178]]

 [[-0.02328345  0.02120855]
  [ 0.03018922 -0.00889117]
  [-0.01069335 -0.03257515]
  [ 0.02446326 -0.01218178]]], shape=(3, 4, 2), dtype=float32)
tf.Tensor(
[[ True  True  True  True]
 [ True  True False False]
 [ True  True  True False]], shape=(3, 4), dtype=bool)


Masking layerでも可。それらlayerのcompute_maskを使い、結果をLSTMの__call__にmaskで渡しても可

# Embedding + LSTM

In [75]:
data = np.array([1, 2, 3, 4, 5, 6, 0, 0]).reshape((8,))

In [76]:
data

array([1, 2, 3, 4, 5, 6, 0, 0])

In [77]:
input = Input(shape=(8,))

In [78]:
embedding = Embedding(6+1, 2, mask_zero=True)

In [79]:
lstm = LSTM(6, return_state=True)

In [80]:
x = embedding(input)

In [81]:
x.shape

TensorShape([None, 8, 2])

In [82]:
output = lstm(x)

In [84]:
len(output)

3

In [85]:
output[0].shape

TensorShape([None, 6])

In [86]:
output[1].shape

TensorShape([None, 6])

In [87]:
output[2].shape

TensorShape([None, 6])

In [89]:
input = Input(shape=(1,))

In [90]:
x = embedding(input)

In [91]:
x.shape

TensorShape([None, 1, 2])

In [92]:
output = lstm(x)