<a href="https://colab.research.google.com/github/dunliangyang2010/Deep-Learning-practice/blob/master/RNN_basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models

In [2]:
# Sample(batch) size: 1
# Sequence Length: 5 (資料長度)
# feature dimension: 8 (每個時間點的資料向量長度)

inputs = tf.random.normal([1, 5, 8])
inputs.shape

TensorShape([1, 5, 8])

# tf.keras.layers.LSTM

In [4]:
output = layers.LSTM(units=3)(inputs) # 只拿最後一筆的h,長度3
output2 = layers.LSTM(units=3, return_sequences=True)(inputs) # 每個時間點的h都拿
print(output.shape) # (batch size, feature dim) 
print(output2.shape) # (batch size, seq length, feature dim)

(1, 3)
(1, 5, 3)


In [5]:
whole_seq_output, final_hiden_state, final_cell_state = layers.LSTM(units=3, 
                                    return_sequences=True, 
                                    return_state=True)(inputs)

print('whole_seq_output: ', whole_seq_output.shape, whole_seq_output)
print('final_hiden_state (h): ', final_hiden_state.shape, final_hiden_state)
print('final_cell_state (c): ', final_cell_state.shape, final_cell_state)

whole_seq_output:  (1, 5, 3) tf.Tensor(
[[[ 0.06528592 -0.03278505 -0.07613759]
  [-0.13571326 -0.1907154  -0.09549402]
  [-0.57082224 -0.3330478   0.09159414]
  [-0.06113085 -0.1965439   0.05639385]
  [ 0.14942108 -0.08224882 -0.00061382]]], shape=(1, 5, 3), dtype=float32)
final_hiden_state (h):  (1, 3) tf.Tensor([[ 0.14942108 -0.08224882 -0.00061382]], shape=(1, 3), dtype=float32)
final_cell_state (c):  (1, 3) tf.Tensor([[ 0.40483788 -0.21497783 -0.00160905]], shape=(1, 3), dtype=float32)


# tf.keras.layers.GRU

In [6]:
output = layers.GRU(units=3)(inputs)
print(output.shape)

(1, 3)


In [7]:
whole_sequence_output, final_state = layers.GRU(3, 
                          return_sequences=True, 
                          return_state=True)(inputs)

print('whole_seq_output: ', whole_sequence_output.shape, whole_sequence_output)
print('final_state (h): ', final_state.shape, final_state) # 可觀察到GRU當下的記憶就是最後的輸出,與LSTM不同

whole_seq_output:  (1, 5, 3) tf.Tensor(
[[[-0.2137111   0.04040337 -0.14240587]
  [-0.01702046 -0.0473718  -0.27545074]
  [-0.01935483  0.00455482 -0.39267024]
  [ 0.12052972  0.18415612  0.10434673]
  [ 0.25861856  0.5208134  -0.03838231]]], shape=(1, 5, 3), dtype=float32)
final_state (h):  (1, 3) tf.Tensor([[ 0.25861856  0.5208134  -0.03838231]], shape=(1, 3), dtype=float32)


# tf.keras.layers.Bidirectional

In [8]:
output = layers.Bidirectional(layers.LSTM(3))(inputs)
print(output.shape) # 順逆向的長度3+3, 預設merge mode=concate

(1, 6)


In [9]:
# merge_mode: 'sum' or 'concat'
output = layers.Bidirectional(layers.LSTM(3), 
                merge_mode='sum')(inputs)
print(output.shape)

(1, 3)


In [10]:
# return_sequences: True
output = layers.Bidirectional(layers.LSTM(3, return_sequences=True))(inputs)
print(output.shape)

(1, 5, 6)


In [11]:
# return_sequences: True, 
output, forward_h, forward_c, backward_h, backward_c = layers.Bidirectional(layers.LSTM(3, 
                                              return_sequences=True, 
                                              return_state=True))(inputs)
print('output : ', output.shape, output)
print('forward_h : ', forward_h.shape, forward_h)
print('forward_c : ', forward_c.shape, forward_c)
print('backward_h : ', backward_h.shape, backward_h)
print('backward_c : ', backward_c.shape, backward_c)

output :  (1, 5, 6) tf.Tensor(
[[[ 0.20590037 -0.13478883 -0.10357321  0.23417853 -0.5115745
    0.16535993]
  [ 0.51269567  0.04870303  0.04200366  0.27141201 -0.3168338
    0.14911361]
  [ 0.47624192  0.16086176  0.07994915  0.15956287 -0.16377613
    0.02678044]
  [ 0.42288646  0.3494871   0.11617518  0.00827353 -0.36378828
    0.039612  ]
  [ 0.13780111  0.0341048   0.05465233 -0.16844952 -0.40405765
    0.04321381]]], shape=(1, 5, 6), dtype=float32)
forward_h :  (1, 3) tf.Tensor([[0.13780111 0.0341048  0.05465233]], shape=(1, 3), dtype=float32)
forward_c :  (1, 3) tf.Tensor([[0.5668829  0.07714961 0.12409285]], shape=(1, 3), dtype=float32)
backward_h :  (1, 3) tf.Tensor([[ 0.23417853 -0.5115745   0.16535993]], shape=(1, 3), dtype=float32)
backward_c :  (1, 3) tf.Tensor([[ 0.359542  -1.0086633  0.413715 ]], shape=(1, 3), dtype=float32)


# Many to one

In [12]:
input = layers.Input(shape=(5, 8)) # (seq len, feature dim)
x = layers.LSTM(3)(input)
ouput = layers.Dense(10, activation='softmax')(x)
model = models.Model(input, ouput)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 5, 8)]            0         
                                                                 
 lstm_9 (LSTM)               (None, 3)                 144       
                                                                 
 dense (Dense)               (None, 10)                40        
                                                                 
Total params: 184
Trainable params: 184
Non-trainable params: 0
_________________________________________________________________


In [13]:
# multi-layer
input = layers.Input(shape=(5, 8))
x1 = layers.LSTM(128, return_sequences=True)(input)
x2 = layers.LSTM(256, return_sequences=True)(x1)
x3 = layers.LSTM(512)(x2) # 最後一層可拿最後時間點的輸出, 但前面兩層LSTM必須開啟sequence,否則資訊無法往下層傳遞
ouput = layers.Dense(10, activation='softmax')(x3)
model = models.Model(input, ouput)
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 5, 8)]            0         
                                                                 
 lstm_10 (LSTM)              (None, 5, 128)            70144     
                                                                 
 lstm_11 (LSTM)              (None, 5, 256)            394240    
                                                                 
 lstm_12 (LSTM)              (None, 512)               1574912   
                                                                 
 dense_1 (Dense)             (None, 10)                5130      
                                                                 
Total params: 2,044,426
Trainable params: 2,044,426
Non-trainable params: 0
_________________________________________________________________


# Many-to-many (same length)

In [14]:
# 以每個時間點都有一個輸出的type為例
input = layers.Input(shape=(500, 8))
x = layers.LSTM(10, return_sequences=True)(input)
output = layers.Dense(20, activation='softmax')(x)
model = models.Model(input, output)
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 500, 8)]          0         
                                                                 
 lstm_13 (LSTM)              (None, 500, 10)           760       
                                                                 
 dense_2 (Dense)             (None, 500, 20)           220       
                                                                 
Total params: 980
Trainable params: 980
Non-trainable params: 0
_________________________________________________________________


# Many-to-many (different length) : Seq2seq