In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model, layers, optimizers, datasets

In [2]:
x = tf.random.normal(shape=(32, 20, 100), mean=0., stddev=1.)

In [9]:
lstm = layers.LSTM(units=10)

In [10]:
x.shape

TensorShape([32, 20, 100])

In [11]:
out = lstm(x)
out.shape

TensorShape([32, 10])

In [8]:
lstm2 = layers.LSTM(units=10, return_sequences=True, return_state=True) # [batch_size, timesteps, features]
# return_sequences : output sequences의 마지막 output을 return할지(default=False), full sequence를 return할지(True)
# return_state : output이외에 last state(hidden_state, cell_state)도 return할지 말지

In [13]:
output2, final_hidden_state, final_cell_state = lstm2(x)

In [14]:
output2.shape

TensorShape([32, 20, 10])

In [15]:
final_hidden_state.shape

TensorShape([32, 10])

In [16]:
final_cell_state.shape

TensorShape([32, 10])

In [17]:
class LSTM(Model):
    def __init__(self, units1, units2, num_classes):
        super(LSTM, self).__init__()
        self.sequential = tf.keras.Sequential([
            layers.LSTM(units1, return_sequences=True),
            layers.BatchNormalization(),
            layers.LSTM(units2),
            layers.BatchNormalization(),
            layers.Dense(num_classes, activation=tf.nn.softmax)
        ])

    def call(self, x):
        out = self.sequential(x)
        return out

In [18]:
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [19]:
x_train = x_train / 255.
x_test = x_test / 255.

In [20]:
units_1 = 128
units_2 = 256
num_classes = 10

model = LSTM(units_1, units_2, num_classes)

In [21]:
model.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [22]:
batch_size = 128
epochs = 3

model.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          batch_size=batch_size,
          epochs=epochs)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7ff2a203e190>

In [23]:
class Bi_LSTM(Model):
    def __init__(self, units1, units2, num_classes):
        super(Bi_LSTM, self).__init__()
        
        self.sequential = tf.keras.Sequential([
            layers.Bidirectional(layers.LSTM(units1, return_sequences=True)),
            layers.BatchNormalization(),
            layers.Bidirectional(layers.LSTM(units2)),
            layers.BatchNormalization(),
            layers.Dense(num_classes, activation=tf.nn.softmax)
        ])

    def call(self, x):
        out = self.sequential(x)
        return out

In [24]:
model2 = Bi_LSTM(units_1, units_2, num_classes)

In [25]:
model2.compile(optimizer='sgd',
               loss='sparse_categorical_crossentropy',
               metrics=['accuracy'])

In [26]:
model2.fit(x_train, y_train,
           validation_data=(x_test, y_test),
           batch_size=batch_size,
           epochs=epochs)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7ff2a1e97c10>