# Evn

In [3]:
import os
import random
import shutil
import json
import zipfile
import math
import copy
import collections
import re

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K

from tqdm.notebook import tqdm

In [4]:
# random seed initialize
random_seed = 1234
random.seed(random_seed)
np.random.seed(random_seed)
tf.random.set_seed(random_seed)

In [5]:
!nvidia-smi

'nvidia-smi'��(��) ���� �Ǵ� �ܺ� ����, ������ �� �ִ� ���α׷�, �Ǵ�
��ġ ������ �ƴմϴ�.


# 입력 및 Vocab

In [6]:
# 입력 문장
sentences = [
    '나는 오늘 기분이 좋아 나는 오늘 우울해'
]

In [7]:
# 각 문장을 띄어쓰기 단위로 분할
words = []
for sentence in sentences:
    words.extend(sentence.split())

# 중복 단어 제거
words = list(dict.fromkeys(words))

# 각 단어별 고유한 번호 부여
word_to_id = {'[PAD]': 0, '[UNK]': 1}
for word in words:
    word_to_id[word] = len(word_to_id)

# 각 숫자별 단어 부여
id_to_word = {_id:word for word, _id in word_to_id.items()}

word_to_id, id_to_word

({'[PAD]': 0, '[UNK]': 1, '나는': 2, '오늘': 3, '기분이': 4, '좋아': 5, '우울해': 6},
 {0: '[PAD]', 1: '[UNK]', 2: '나는', 3: '오늘', 4: '기분이', 5: '좋아', 6: '우울해'})

In [8]:
# 학습용 입력 데이터 생성
train_inputs = []
for sentence in sentences:
    train_inputs.append([word_to_id[word] for word in sentence.split()])

# train inputs을 numpy array로 변환
train_inputs = np.array(train_inputs)

train_inputs

array([[2, 3, 4, 5, 2, 3, 6]])

In [9]:
# embedding
embedding = tf.keras.layers.Embedding(len(word_to_id), 4)

In [10]:
# 단어벡터
hidden = embedding(train_inputs)
hidden

<tf.Tensor: shape=(1, 7, 4), dtype=float32, numpy=
array([[[ 0.02788493,  0.00853021,  0.02688133, -0.00167429],
        [ 0.00873896, -0.04772998, -0.00643448,  0.04782395],
        [ 0.04810704, -0.03535189,  0.04945729,  0.02813626],
        [-0.02798697, -0.00110071,  0.0133124 ,  0.00637371],
        [ 0.02788493,  0.00853021,  0.02688133, -0.00167429],
        [ 0.00873896, -0.04772998, -0.00643448,  0.04782395],
        [-0.00963336, -0.01013004,  0.01826939,  0.02741292]]],
      dtype=float32)>

# RNN

## return sequence=False, return_state=False

In [11]:
# rnn
rnn_11 = tf.keras.layers.SimpleRNN(units=5)
output_11 = rnn_11(hidden)  # (bs, units)
print(output_11)

tf.Tensor([[ 0.0133051  -0.07108936  0.01356495  0.19825459 -0.03756261]], shape=(1, 5), dtype=float32)


In [12]:
# rnn weights
weights = rnn_11.get_weights()
Wx = weights[0]
Wh = weights[1]
b = weights[2]
print(Wx.shape)  # (d_model, unit)
print(Wh.shape)  # (unit, unit)
print(b.shape)  # (unit,)

(4, 5)
(5, 5)
(5,)


## return sequence=True, return_state=False

In [13]:
# (return_sequences=True)
rnn_12 = tf.keras.layers.SimpleRNN(units=5, return_sequences=True)
output_12 = rnn_12(hidden)  # (bs, seq, units)
print(output_12)

tf.Tensor(
[[[ 2.9929111e-02  4.1185715e-03 -7.3335776e-03 -2.7490623e-02
   -4.2592518e-02]
  [ 2.1723522e-02  3.4244298e-03 -8.8698789e-03  1.3401904e-02
   -2.1286123e-02]
  [ 4.8892796e-02 -1.5492374e-02  5.2009160e-03 -5.0389312e-02
   -6.7127936e-02]
  [ 5.3107727e-02  3.3684168e-02  4.7106568e-02  2.6926834e-02
   -1.6819073e-02]
  [ 6.6921934e-02 -6.2622130e-06  3.1169422e-02 -2.7292369e-02
    2.3001323e-02]
  [ 5.5012077e-02 -4.9766436e-02 -4.8280206e-02  2.3197087e-02
    1.8371815e-02]
  [ 1.5149727e-02 -9.3269743e-02  1.8400174e-02  7.5475569e-04
   -6.5119117e-02]]], shape=(1, 7, 5), dtype=float32)


## return sequence=False, return_state=True

In [14]:
# (return_state=True)
rnn_13 = tf.keras.layers.SimpleRNN(units=5, return_state=True)
output_13, fw_h_13 = rnn_13(hidden)  # (bs, units), (bs, units)
print(output_13)
print(fw_h_13)

tf.Tensor([[-0.22489886  0.08027562  0.02204562 -0.07525434 -0.12735644]], shape=(1, 5), dtype=float32)
tf.Tensor([[-0.22489886  0.08027562  0.02204562 -0.07525434 -0.12735644]], shape=(1, 5), dtype=float32)


## return sequence=True, return_state=True

In [15]:
# (return_sequences=True, return_state=True)
rnn_14 = tf.keras.layers.SimpleRNN(units=5, return_sequences=True, return_state=True)
output_14, fw_h_14 = rnn_14(hidden)  # (bs, seq, units), (bs, units)
print(output_14)
print(fw_h_14)

tf.Tensor(
[[[-0.00567131  0.00702238  0.0166551  -0.02649674 -0.00639815]
  [ 0.00745121  0.00397363 -0.03161482  0.00067732  0.03525218]
  [ 0.02503373  0.04381723 -0.01435504 -0.07607194 -0.03647323]
  [ 0.03158387 -0.03594298 -0.03295875  0.09170677  0.00911083]
  [-0.06396969  0.06692141  0.02803504 -0.0789187  -0.05069152]
  [ 0.03279418 -0.04417153 -0.09341428  0.03336658  0.09773418]
  [ 0.01325149  0.01543665  0.01574939 -0.08289386 -0.10723302]]], shape=(1, 7, 5), dtype=float32)
tf.Tensor([[ 0.01325149  0.01543665  0.01574939 -0.08289386 -0.10723302]], shape=(1, 5), dtype=float32)


## init hidden state

In [16]:
# rnn_11 with fw_h_13
rnn_11(hidden, initial_state=[fw_h_13])  # (bs, units)

<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[-0.06465919, -0.2559439 , -0.12518845,  0.13231523, -0.02561211]],
      dtype=float32)>

In [17]:
# rnn_12 with fw_h_14
rnn_12(hidden, initial_state=[fw_h_14])  # (bs, n_seq, units)

<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
array([[[ 0.0897052 ,  0.12074084,  0.02214285, -0.00741253,
         -0.06301703],
        [ 0.06890635,  0.02803105,  0.03387965, -0.06119581,
          0.07141123],
        [ 0.13040169, -0.04140754, -0.09347052, -0.06496175,
         -0.03006374],
        [ 0.11627129, -0.03900621,  0.06014572, -0.02257578,
         -0.09820992],
        [ 0.14631154,  0.03326439,  0.08039216,  0.05096843,
         -0.02580848],
        [ 0.08789139, -0.06898137,  0.05173641,  0.02846741,
          0.1006607 ],
        [ 0.04806688, -0.15309522, -0.04242582,  0.05481387,
          0.01750816]]], dtype=float32)>

In [18]:
# rnn_13 with fw_h_13
rnn_13(hidden, initial_state=[fw_h_13])  # (bs, units)

[<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.16090795, -0.01084652,  0.00649461,  0.03989045,  0.08458716]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.16090795, -0.01084652,  0.00649461,  0.03989045,  0.08458716]],
       dtype=float32)>]

In [19]:
# rnn_14 with fw_h_14
rnn_14(hidden, initial_state=[fw_h_14])  # (bs, n_seq, units)

[<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
 array([[[-0.04023583, -0.04856524, -0.00745722,  0.08569601,
           0.03263333],
         [-0.03993216,  0.04022603, -0.01334171, -0.12017839,
           0.0170453 ],
         [ 0.08773065, -0.04925722, -0.03939975, -0.0144046 ,
           0.00531508],
         [-0.01835137,  0.00441734,  0.0444012 ,  0.04724797,
          -0.07235703],
         [-0.06754467,  0.03039356,  0.00770028, -0.02238411,
           0.06637931],
         [ 0.07404424, -0.00928036, -0.0494388 , -0.06849192,
           0.03991262],
         [ 0.03545615, -0.01660543,  0.01871964,  0.04337613,
          -0.07521324]]], dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.03545615, -0.01660543,  0.01871964,  0.04337613, -0.07521324]],
       dtype=float32)>]

# LSTM

## return sequence=False, return_state=False

In [20]:
# lstm
lstm_11 = tf.keras.layers.LSTM(units=5)
output_11 = lstm_11(hidden)  # (bs, units)
print(output_11)

tf.Tensor([[ 0.01252828 -0.01186483 -0.01100006  0.00128962  0.00689079]], shape=(1, 5), dtype=float32)


In [21]:
# lstm weights
weights = lstm_11.get_weights()
Wx = weights[0]
Wh = weights[1]
b = weights[2]
print(Wx.shape)  # (d_model, unit * 4) (Wxf, Wxi, Wxc, Wxo)
print(Wh.shape)  # (unit, unit * 4) (Whf, Whi, Whc, Who)
print(b.shape)  # (unit * 4) (bf, bi, bc, bo)

(4, 20)
(5, 20)
(20,)


## return sequence=True, return_state=False

In [22]:
# (return_sequences=True)
lstm_12 = tf.keras.layers.LSTM(units=5, return_sequences=True)
output_12 = lstm_12(hidden)  # (bs, seq, units)
print(output_12)

tf.Tensor(
[[[-0.00544438 -0.00505095  0.00146407 -0.00019775 -0.00408297]
  [ 0.00092337  0.00106309 -0.00256752  0.00214575 -0.00409309]
  [-0.00466804 -0.00234009 -0.00467811  0.00283395 -0.01177095]
  [-0.00136526  0.00112071 -0.00880491  0.00172207 -0.00966275]
  [-0.00722189 -0.00401253 -0.00514568  0.00093245 -0.01217541]
  [-0.00101653  0.00202121 -0.00762216  0.00274471 -0.01070324]
  [ 0.00060078  0.00327611 -0.0104871   0.00319098 -0.00986872]]], shape=(1, 7, 5), dtype=float32)


## return sequence=False, return_state=True

In [23]:
# (return_state=True)
lstm_13 = tf.keras.layers.LSTM(units=5, return_state=True)
output_13, fw_h_13, fw_c_13 = lstm_13(hidden)  # (bs, units * 2), (bs, units), (bs, units)
print(output_13)
print(fw_h_13)
print(fw_c_13)

tf.Tensor(
[[ 7.6129837e-03 -1.5287815e-02  7.3035848e-03  4.7374528e-05
   1.1121367e-02]], shape=(1, 5), dtype=float32)
tf.Tensor(
[[ 7.6129837e-03 -1.5287815e-02  7.3035848e-03  4.7374528e-05
   1.1121367e-02]], shape=(1, 5), dtype=float32)
tf.Tensor(
[[ 1.5119700e-02 -3.0797761e-02  1.4539105e-02  9.5068710e-05
   2.2492610e-02]], shape=(1, 5), dtype=float32)


## return sequence=True, return_state=True

In [24]:
# (return_sequences=True, return_state=True)
lstm_14 = tf.keras.layers.LSTM(units=5, return_sequences=True, return_state=True)
output_14, fw_h_14, fw_c_14 = lstm_14(hidden)  # (bs, seq, units), (bs, units), (bs, units)
print(output_14)
print(fw_h_14)
print(fw_c_14)

tf.Tensor(
[[[ 0.00040525 -0.00275002  0.00038114 -0.00120663  0.00231446]
  [-0.00520107 -0.00223845 -0.00043894 -0.0053069  -0.00388033]
  [-0.00688379 -0.00716671 -0.00024295 -0.00974881 -0.00305707]
  [-0.00176606 -0.0059406   0.00495924 -0.00458891 -0.00075247]
  [-0.00092784 -0.00677031  0.00413757 -0.00491618  0.00158901]
  [-0.00619131 -0.00487417  0.00241962 -0.00824052 -0.00451456]
  [-0.00361855 -0.00470538  0.00690782 -0.00543056 -0.0025354 ]]], shape=(1, 7, 5), dtype=float32)
tf.Tensor([[-0.00361855 -0.00470538  0.00690782 -0.00543056 -0.0025354 ]], shape=(1, 5), dtype=float32)
tf.Tensor([[-0.00723412 -0.0093214   0.01380899 -0.01084146 -0.00508808]], shape=(1, 5), dtype=float32)


## init hidden state

In [25]:
# lstm_11 with fw_h_13, fw_c_13
lstm_11(hidden, initial_state=[fw_h_13, fw_c_13])  # (bs, units)

<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[ 0.01215496, -0.01394066, -0.01243898,  0.00035355,  0.00925506]],
      dtype=float32)>

In [26]:
# lstm_12 with fw_h_14, fw_c_14
lstm_12(hidden, initial_state=[fw_h_14, fw_c_14])  # (bs, n_seq, units)

<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
array([[[-7.3097218e-03, -8.6579267e-03,  6.5108282e-03, -4.6995529e-03,
         -6.1655953e-03],
        [ 2.5162593e-04, -1.8132351e-03,  1.2117121e-03, -1.3395884e-03,
         -5.7245716e-03],
        [-4.5920075e-03, -4.5240764e-03, -1.8206030e-03,  4.4292374e-05,
         -1.3040293e-02],
        [-8.1744191e-04, -5.4931070e-04, -6.6617588e-03, -3.8644689e-04,
         -1.0613153e-02],
        [-6.4202966e-03, -5.2729584e-03, -3.5087364e-03, -6.8675220e-04,
         -1.2861537e-02],
        [-8.9271700e-05,  1.0454296e-03, -6.3446402e-03,  1.5852140e-03,
         -1.1164520e-02],
        [ 1.5599118e-03,  2.5538316e-03, -9.4951764e-03,  2.3441277e-03,
         -1.0163450e-02]]], dtype=float32)>

In [27]:
# lstm_13 with fw_h_13, fw_c_13
lstm_13(hidden, initial_state=[fw_h_13, fw_c_14])  # (bs, units), (bs, units), (bs, units)

[<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.00796632, -0.01567245,  0.00890056, -0.00168277,  0.01155532]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.00796632, -0.01567245,  0.00890056, -0.00168277,  0.01155532]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.01581565, -0.03159543,  0.01772808, -0.00337746,  0.02337499]],
       dtype=float32)>]

In [28]:
# lstm_14 with fw_h_14, fw_c_14
lstm_14(hidden, initial_state=[fw_h_14, fw_c_14])  # (bs, n_seq, units), (bs, units), (bs, units)

[<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
 array([[[-0.00241018, -0.00558355,  0.00569973, -0.0054552 ,
           0.00055526],
         [-0.00738834, -0.00379501,  0.00365711, -0.00856859,
          -0.00504782],
         [-0.0085752 , -0.00787436,  0.00294753, -0.01219721,
          -0.00383559],
         [-0.00311176, -0.00611526,  0.00749481, -0.00639922,
          -0.00124679],
         [-0.00200747, -0.00661883,  0.00610426, -0.00623128,
           0.00128764],
         [-0.00705873, -0.00454286,  0.00394774, -0.00918079,
          -0.00467455],
         [-0.00432382, -0.00428938,  0.00812357, -0.00609291,
          -0.00260176]]], dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.00432382, -0.00428938,  0.00812357, -0.00609291, -0.00260176]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.00864326, -0.00849324,  0.016246  , -0.01216442, -0.00522   ]],
       dtype=float32)>]

# GRU

## return sequence=False, return_state=False

In [29]:
# bi GRU
gru_11 = tf.keras.layers.GRU(units=5)
output_11 = gru_11(hidden)  # (bs, units)
print(output_11)

tf.Tensor([[-0.00128841  0.01591058 -0.01686392 -0.00689237 -0.01078903]], shape=(1, 5), dtype=float32)


In [30]:
# bi GRU weights
weights = gru_11.get_weights()
Wx = weights[0]
Wh = weights[1]
b = weights[2]
print(Wx.shape)  # (d_model, unit * 3) (Wxr, Wxz, Wxg)
print(Wh.shape)  # (unit, unit * 3) (Whr, Whz, Whg)
print(b.shape)  # (2, unit * 3) (bxr, bxz, bxg),(bhr, bhz, bhg)

(4, 15)
(5, 15)
(2, 15)


## return sequence=True, return_state=False

In [31]:
# (return_sequences=True)
gru_12 = tf.keras.layers.GRU(units=5, return_sequences=True)
output_12 = gru_12(hidden)  # (bs, seq, units)
print(output_12)

tf.Tensor(
[[[-0.00724988 -0.01204025 -0.00660731 -0.00241936  0.01091108]
  [-0.00690028 -0.00760665  0.00473171  0.02366268  0.02218487]
  [-0.01982778 -0.02418694 -0.000265    0.03126299  0.04256843]
  [-0.00805993 -0.00707422  0.00594501  0.02301816  0.02065854]
  [-0.01467839 -0.0149042  -0.00365479  0.01337637  0.02291579]
  [-0.01298529 -0.00852203  0.00638297  0.03420675  0.02937598]
  [-0.01063064 -0.00421955  0.01115373  0.0308162   0.02547237]]], shape=(1, 7, 5), dtype=float32)


## return sequence=False, return_state=True

In [34]:
# (return_state=True)
gru_13 = tf.keras.layers.GRU(units=5, return_sequences=False, return_state=True)
output_13 = gru_13(hidden)  
print(output_13)

[<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[-0.0019341 ,  0.01071739,  0.00788937,  0.01066312, -0.00221146]],
      dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[-0.0019341 ,  0.01071739,  0.00788937,  0.01066312, -0.00221146]],
      dtype=float32)>]


## return sequence=True, return_state=True

In [36]:
# (return_sequences=True, return_state=True)
gru_14 = tf.keras.layers.GRU(units=5, return_sequences=True, return_state=True)
output_14 = gru_14(hidden)
print(output_14)

[<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
array([[[-0.00381347,  0.00607597,  0.00153289,  0.00026855,
          0.01145666],
        [-0.00331714,  0.01010486, -0.01224199, -0.01133284,
          0.00853806],
        [-0.00093711,  0.01803945, -0.0145158 , -0.01409486,
          0.02851611],
        [ 0.00880802,  0.01154291, -0.0096231 , -0.00740737,
          0.012196  ],
        [ 0.00363141,  0.01305108, -0.00426213, -0.00214698,
          0.01840561],
        [ 0.00235771,  0.01434101, -0.01598028, -0.01207952,
          0.01253426],
        [ 0.00402145,  0.01470503, -0.0129416 , -0.01024149,
          0.00994625]]], dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[ 0.00402145,  0.01470503, -0.0129416 , -0.01024149,  0.00994625]],
      dtype=float32)>]


## init hidden state

In [32]:
gru_11(hidden, initial_state=[fw_h_13])  # (bs, units)

<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[-0.00107348,  0.0158253 , -0.01736606, -0.00721362, -0.01050446]],
      dtype=float32)>

In [None]:
gru_12(hidden, initial_state=[fw_h_14])  # (bs, n_seq, units)

<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
array([[[-0.00300019, -0.00323922, -0.01454308, -0.00958822,
          0.01849951],
        [-0.00319965, -0.00230684, -0.00040348,  0.0188137 ,
          0.02772161],
        [-0.01684529, -0.02090497, -0.00366238,  0.02805248,
          0.04653085],
        [-0.00582952, -0.00502904,  0.00368652,  0.0209058 ,
          0.02341765],
        [-0.01303946, -0.01359165, -0.00516605,  0.01199155,
          0.02483771],
        [-0.01180617, -0.00767625,  0.00535123,  0.03328608,
          0.03071455],
        [-0.00979845, -0.00366684,  0.01045386,  0.03020769,
          0.02639962]]], dtype=float32)>

In [35]:
gru_13(hidden, initial_state=[fw_h_13])  # (bs, units), (bs, units)

[<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.00189696,  0.01057259,  0.00823002,  0.01054995, -0.00209581]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.00189696,  0.01057259,  0.00823002,  0.01054995, -0.00209581]],
       dtype=float32)>]

In [37]:
gru_14(hidden, initial_state=[fw_h_14])  # (bs, n_seq, units), (bs, units)

[<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
 array([[[-0.00717009,  0.00367933,  0.00580888, -0.00234792,
           0.00994279],
         [-0.00600007,  0.00882519, -0.00961242, -0.01255127,
           0.00756697],
         [-0.00295713,  0.01734281, -0.01287273, -0.01462314,
           0.02789093],
         [ 0.00736622,  0.01113355, -0.00857693, -0.00759361,
           0.01176882],
         [ 0.00261882,  0.01280464, -0.00359433, -0.0021952 ,
           0.01811581],
         [ 0.00166098,  0.014185  , -0.01555701, -0.01207631,
           0.01233568],
         [ 0.00354787,  0.01460313, -0.0126679 , -0.01022038,
           0.00981097]]], dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.00354787,  0.01460313, -0.0126679 , -0.01022038,  0.00981097]],
       dtype=float32)>]

# Bidirectional RNN

## return sequence=False, return_state=False

In [38]:
# bi rnn
# rnn_11 = tf.keras.layers.SimpleRNN(units=5)
bi_rnn_11 = tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(units=5))
output_11 = bi_rnn_11(hidden)  # (bs, units * 2)
print(output_11)

tf.Tensor(
[[ 0.00189243 -0.08352485 -0.07952982 -0.02134597  0.01254618 -0.10349479
  -0.02856481 -0.15433244  0.04896942  0.0649122 ]], shape=(1, 10), dtype=float32)


In [39]:
# bi rnn weights
weights = bi_rnn_11.get_weights()
Wx = weights[0]
Wh = weights[1]
b = weights[2]
print(Wx.shape)  # (d_model, unit)
print(Wh.shape)  # (unit, unit)
print(b.shape)  # (unit,)

(4, 5)
(5, 5)
(5,)


## return sequence=True, return_state=False

In [40]:
# (return_sequences=True)
bi_rnn_12 = tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(units=5, return_sequences=True))
output_12 = bi_rnn_12(hidden)  # (bs, seq, units* 2)
print(output_12)

tf.Tensor(
[[[ 0.02063904  0.03965816  0.03919788  0.02908319 -0.02135444
    0.03373568  0.1435729   0.0226585  -0.01834988  0.01760598]
  [-0.06732998  0.01192621  0.01037599  0.03202324 -0.04788924
   -0.02202281 -0.12320484  0.00310692  0.03954257  0.02772268]
  [-0.00692481  0.09928045  0.00049276  0.0842956  -0.02588287
    0.02357068  0.0346979   0.06235293 -0.08903261  0.01622929]
  [-0.10489257  0.04832264  0.01487477  0.05294417  0.0026204
   -0.02221387 -0.06631253 -0.00934883  0.01024336  0.03339537]
  [-0.02521384  0.11432076  0.01652069  0.04469955  0.06719343
    0.04058784  0.02231554 -0.00393318 -0.01607523  0.03056315]
  [-0.08703329  0.01557361  0.07161147  0.08565229  0.04799198
    0.02592861 -0.04054319 -0.03698469 -0.03118086  0.01623694]
  [-0.00699221  0.11693998  0.02447138  0.02125133  0.06086859
   -0.0062323  -0.04051992 -0.00674628 -0.02538864 -0.00846069]]], shape=(1, 7, 10), dtype=float32)


## return sequence=False, return_state=True

In [41]:
# (return_state=True)
bi_rnn_13 = tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(units=5, return_state=True))
output_13, fw_h_13, bw_h_13 = bi_rnn_13(hidden)  # (bs, units * 2), (bs, units), (bs, units)
print(output_13)
print(fw_h_13)
print(bw_h_13)

tf.Tensor(
[[ 0.10142462  0.04384787  0.03959856  0.09941955  0.05877532  0.1923726
  -0.0812621   0.04585573  0.21233372 -0.06199192]], shape=(1, 10), dtype=float32)
tf.Tensor([[0.10142462 0.04384787 0.03959856 0.09941955 0.05877532]], shape=(1, 5), dtype=float32)
tf.Tensor([[ 0.1923726  -0.0812621   0.04585573  0.21233372 -0.06199192]], shape=(1, 5), dtype=float32)


## return sequence=True, return_state=True

In [42]:
# (return_sequences=True, return_state=True)
bi_rnn_14 = tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(units=5, return_sequences=True, return_state=True))
output_14, fw_h_14, bw_h_14 = bi_rnn_14(hidden)  # (bs, seq, units * 2), (bs, units), (bs, units)
print(output_14)
print(fw_h_14)
print(bw_h_14)

tf.Tensor(
[[[-0.01223004 -0.03604717 -0.01213866  0.00832459 -0.00985193
   -0.06842471  0.03804661 -0.21594398 -0.07498597  0.12322075]
  [-0.01207433 -0.02585809  0.01065563  0.0117806  -0.05265585
    0.07812024 -0.1537342  -0.00190179 -0.13922735  0.13310386]
  [-0.03699969 -0.07803842  0.04209686  0.04328768 -0.0377161
    0.07696801  0.08312837 -0.06378457 -0.02303883  0.17275529]
  [-0.0314906  -0.00641297  0.09045608  0.04872285 -0.02032935
   -0.06239618 -0.07913798  0.00644678 -0.02948126  0.07311695]
  [-0.00251456  0.00482456  0.03361512  0.09806921 -0.01537914
    0.00374243  0.0306529  -0.0893319  -0.05894313  0.04060608]
  [ 0.06832524  0.01860224  0.02926935  0.06120522 -0.07875358
    0.03820577 -0.03235299  0.02452954 -0.05579672  0.06007536]
  [ 0.07677119  0.03446358  0.09452502 -0.0302586   0.00149062
    0.0218113   0.02294274 -0.00379264 -0.01796763  0.03280125]]], shape=(1, 7, 10), dtype=float32)
tf.Tensor([[ 0.07677119  0.03446358  0.09452502 -0.0302586   0.00

## init hidden state

In [43]:
# bi_rnn_11 with fw_h_13, bw_h_13
bi_rnn_11(hidden, initial_state=[fw_h_13, bw_h_13])  # (bs, units * 2)

<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[ 0.03366553,  0.02708558, -0.088336  ,  0.02204747,  0.11622971,
        -0.0972553 , -0.23567973, -0.14445545,  0.18366556, -0.04445885]],
      dtype=float32)>

In [44]:
# bi_rnn_12 with fw_h_14, bw_h_14
bi_rnn_12(hidden, initial_state=[fw_h_14, bw_h_14])  # (bs, n_seq, units * 2)

<tf.Tensor: shape=(1, 7, 10), dtype=float32, numpy=
array([[[-0.00958688,  0.03306839,  0.121411  , -0.05310348,
         -0.06980097, -0.00270118,  0.34216237,  0.05943167,
          0.04920276, -0.08195006],
        [-0.08903914, -0.00823845, -0.01903536, -0.08953562,
         -0.03271431, -0.14414802, -0.26751807,  0.14107464,
          0.11605872, -0.01104428],
        [ 0.02232577, -0.00883713, -0.02639924,  0.04660336,
          0.01856825,  0.02391093,  0.23287676,  0.21018222,
         -0.1373031 ,  0.04852056],
        [ 0.01353178,  0.00450392,  0.00479943,  0.05030998,
         -0.02010378, -0.12037753, -0.16119109,  0.04842454,
          0.18212938,  0.15873083],
        [ 0.00532184,  0.08084926,  0.03913521,  0.04759574,
         -0.05087122,  0.1654538 ,  0.07834639,  0.14952205,
         -0.1347119 ,  0.14144836],
        [-0.11827578,  0.02918994,  0.00035903,  0.04068655,
         -0.04247618, -0.04291089, -0.04649406, -0.14875302,
          0.07161842,  0.22109677],


In [47]:
# bi_rnn_13 with fw_h_13, bw_h_13
bi_rnn_13(hidden, initial_state=[fw_h_13, bw_h_13])

[<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
 array([[ 0.21820243,  0.12051316,  0.03422751,  0.10649259,  0.11619177,
          0.31769815, -0.1452783 ,  0.04461541,  0.30978468,  0.12155467]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[0.21820243, 0.12051316, 0.03422751, 0.10649259, 0.11619177]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.31769815, -0.1452783 ,  0.04461541,  0.30978468,  0.12155467]],
       dtype=float32)>]

In [48]:
# bi_rnn_14 with fw_h_14, bw_h_14
bi_rnn_14(hidden, initial_state=[fw_h_14, bw_h_14])

[<tf.Tensor: shape=(1, 7, 10), dtype=float32, numpy=
 array([[[-0.01605416,  0.08521213,  0.00787716, -0.0052132 ,
           0.02986126, -0.08073828, -0.18156148, -0.25961453,
          -0.11645738,  0.19696285],
         [ 0.04976504,  0.01947387, -0.06751589,  0.02501584,
           0.01579398,  0.17407632, -0.08744291, -0.17113174,
          -0.17401822,  0.25746417],
         [ 0.01264177, -0.07194784, -0.03912844, -0.03732142,
          -0.07277697,  0.09552296, -0.07634453, -0.01732098,
           0.03210618,  0.3469225 ],
         [-0.05013782, -0.04536487,  0.09190008, -0.06441382,
           0.02577553, -0.14590366, -0.03704854, -0.19435053,
          -0.0259119 ,  0.20409103],
         [-0.11520636,  0.00189489, -0.01346462,  0.07705233,
           0.02070218,  0.04140914, -0.18265292, -0.1194224 ,
          -0.15420522,  0.1466956 ],
         [ 0.03836298, -0.07230781, -0.03827994,  0.11328375,
          -0.06635913,  0.1209978 ,  0.06455722, -0.10884307,
          -0.07171

# Bidirectional LSTM

## return sequence=False, return_state=False

In [49]:
# lstm
bi_lstm_11 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=5))
output_11 = bi_lstm_11(hidden)  # (bs, units * 2)
print(output_11)

tf.Tensor(
[[-0.01047844 -0.00272136  0.01447616 -0.015981   -0.00944974 -0.01022396
   0.00184913  0.00054924  0.01690395  0.00960285]], shape=(1, 10), dtype=float32)


In [50]:
# lstm weights
weights = bi_lstm_11.get_weights()
Wx = weights[0]
Wh = weights[1]
b = weights[2]
print(Wx.shape)  # (d_model, unit * 4) (Wxf, Wxi, Wxc, Wxo)
print(Wh.shape)  # (unit, unit * 4) (Whf, Whi, Whc, Who)
print(b.shape)  # (unit * 4) (bf, bi, bc, bo)

(4, 20)
(5, 20)
(20,)


## return sequence=True, return_state=False

In [51]:
# (return_sequences=True)
bi_lstm_12 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=5, return_sequences=True))
output_12 = bi_lstm_12(hidden)  # (bs, seq, units * 2)
print(output_12)

tf.Tensor(
[[[ 0.00572704  0.00143487 -0.00331461 -0.00193452 -0.00626948
    0.00095727 -0.00672461  0.00755316 -0.00082623 -0.00279788]
  [-0.00464334 -0.00477877 -0.00704962 -0.00027559  0.00117185
   -0.00306182 -0.0122618   0.01030242 -0.00170412 -0.00148065]
  [-0.00122711 -0.00535861 -0.01451639 -0.00197635 -0.00332223
    0.00109893 -0.00459027  0.00640099  0.00262677 -0.00232618]
  [-0.00385974 -0.00179318 -0.00893673  0.0011897  -0.00094004
   -0.00352854  0.00051255  0.00387651  0.00455648 -0.00036182]
  [ 0.00240173  0.00063563 -0.01005731 -0.00161065 -0.00760287
   -0.0011374  -0.00527796  0.00479362  0.00015036 -0.00207249]
  [-0.00728943 -0.00494087 -0.01211869 -0.00037022 -0.000363
   -0.00581124 -0.00962973  0.00659043 -0.00069188 -0.00031079]
  [-0.0091411  -0.00195885 -0.01065393  0.00111449  0.00126692
   -0.00256846 -0.00012026  0.00160678  0.00367497 -0.00049144]]], shape=(1, 7, 10), dtype=float32)


## return sequence=False, return_state=True

In [52]:
# (return_state=True)
bi_lstm_13 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=5, return_state=True))
output_13, fw_h_13, fw_c_13, bw_h_13, bw_c_13 = bi_lstm_13(hidden)  # (bs, units * 2), (bs, units), (bs, units), (bs, units), (bs, units)
print(output_13)
print(fw_h_13)
print(fw_c_13)
print(bw_h_13)
print(bw_c_13)

tf.Tensor(
[[-0.00669851 -0.01033889 -0.00779064  0.01347377 -0.00082833  0.01547839
   0.00376636 -0.00398766 -0.00370576 -0.00819959]], shape=(1, 10), dtype=float32)
tf.Tensor([[-0.00669851 -0.01033889 -0.00779064  0.01347377 -0.00082833]], shape=(1, 5), dtype=float32)
tf.Tensor([[-0.01344749 -0.02074135 -0.01553759  0.02708873 -0.0016644 ]], shape=(1, 5), dtype=float32)
tf.Tensor([[ 0.01547839  0.00376636 -0.00398766 -0.00370576 -0.00819959]], shape=(1, 5), dtype=float32)
tf.Tensor([[ 0.03100743  0.00752903 -0.00798306 -0.00741232 -0.0162381 ]], shape=(1, 5), dtype=float32)


## return sequence=True, return_state=True

In [53]:
# (return_sequences=True, return_state=True)
# (return_sequences=True, return_state=True)
bi_lstm_14 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=5, return_sequences=True, return_state=True))
output_14, fw_h_14, fw_c_14, bw_h_14, bw_c_14 = bi_lstm_14(hidden)  # (bs, seq, units), (bs, units), (bs, units), (bs, units), (bs, units)
print(output_14)
print(fw_h_14)
print(fw_c_14)
print(bw_h_14)
print(bw_c_14)

tf.Tensor(
[[[-1.73568190e-03  9.66656429e-04 -4.43200534e-03  9.81729609e-05
    5.99584868e-03  3.58588505e-03  6.28273608e-03 -9.33102565e-04
    1.30135426e-03 -1.71137229e-02]
  [-7.28761358e-03 -3.23330332e-03 -9.18741710e-03 -7.00382655e-03
   -2.57872953e-03  4.31145914e-03  1.27696879e-02  2.18096888e-03
   -2.25784932e-03 -2.00917311e-02]
  [-1.40764881e-02 -5.22490079e-03 -2.07485575e-02 -9.74385813e-03
    3.33819329e-03 -7.73747306e-05  9.84441210e-03  1.33407535e-03
    2.33387039e-03 -1.52610773e-02]
  [-1.04366727e-02 -7.36432569e-03 -1.33569920e-02 -7.55398208e-03
    6.90097004e-05 -1.78182859e-03  4.63476637e-03  1.80065818e-03
   -4.06197691e-03 -6.97691925e-03]
  [-9.36021190e-03 -4.31238394e-03 -1.47491610e-02 -6.92136539e-03
    6.73899380e-03  2.75624776e-03  5.51511627e-03  2.85831600e-04
   -1.84834193e-04 -9.86234285e-03]
  [-1.27383284e-02 -6.90077711e-03 -1.69051476e-02 -1.33840935e-02
   -1.56315311e-03  3.62244830e-03  1.00047905e-02  3.20798019e-03
   -5

## init hidden state

In [54]:
# bi_lstm_11 with fw_h_13, fw_c_13, bw_h_13, bw_c_13
bi_lstm_11(hidden, initial_state=[fw_h_13, fw_c_13, bw_h_13, bw_c_13])  # (bs, units * 2)

<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[-0.01044984, -0.00225714,  0.01167913, -0.01586506, -0.00975995,
        -0.0087426 ,  0.00403183,  0.00167814,  0.01551284,  0.00888616]],
      dtype=float32)>

In [None]:
# bi_lstm_12 with fw_h_14, fw_c_14, bw_h_14, bw_c_14
bi_lstm_12(hidden, initial_state=[fw_h_14, fw_c_14, bw_h_14, bw_c_14])  # (bs, n_seq, units * 2)

<tf.Tensor: shape=(1, 7, 10), dtype=float32, numpy=
array([[[-0.00878573, -0.00138058, -0.00968741,  0.00640582,
          0.00185933, -0.0087426 ,  0.00403183,  0.00167814,
          0.01551284,  0.00888616],
        [-0.0080578 , -0.00069413, -0.00048683, -0.00101975,
         -0.00723064, -0.01281356,  0.00418963,  0.00720075,
          0.01938561,  0.00453016],
        [-0.01302137,  0.0030489 ,  0.00382255, -0.00920187,
         -0.00642651, -0.00775529,  0.00828512,  0.00324923,
          0.01202187,  0.00806246],
        [-0.0085477 , -0.00070268,  0.00368323, -0.01126228,
         -0.00488006,  0.00182763,  0.00492204,  0.00440744,
          0.00702173, -0.00042675],
        [-0.0108273 ,  0.00209046,  0.002517  , -0.0111129 ,
         -0.00116849, -0.00197708,  0.0065663 ,  0.00252955,
          0.00687623,  0.00017145],
        [-0.01069329, -0.00088743,  0.01118687, -0.01366006,
         -0.00971235, -0.00227178,  0.00494221,  0.00695366,
          0.00949318, -0.00672844],


In [55]:
# bi_lstm_13 with fw_h_13, fw_c_13, bw_h_13, bw_c_13
bi_lstm_13(hidden, initial_state=[fw_h_13, fw_c_13, bw_h_13, bw_c_13])

[<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
 array([[-0.00782868, -0.01307623, -0.00731933,  0.01435827, -0.00225139,
          0.01593334,  0.0055482 , -0.0060663 , -0.00531265, -0.01116456]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.00782868, -0.01307623, -0.00731933,  0.01435827, -0.00225139]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.01571304, -0.02624407, -0.01459286,  0.02888137, -0.00452371]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.01593334,  0.0055482 , -0.0060663 , -0.00531265, -0.01116456]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.03193098,  0.01108811, -0.01215264, -0.01063845, -0.02210738]],
       dtype=float32)>]

In [56]:
# bi_lstm_14 with fw_h_14, fw_c_14, bw_h_14, bw_c_14
bi_lstm_14(hidden, initial_state=[fw_h_14, fw_c_14, bw_h_14, bw_c_14])

[<tf.Tensor: shape=(1, 7, 10), dtype=float32, numpy=
 array([[[-1.04460837e-02, -5.75428503e-03, -1.51174720e-02,
          -1.06630018e-02,  4.14686324e-03,  4.68043843e-03,
           4.43026749e-03, -2.26083025e-03, -1.06409297e-03,
          -1.94848534e-02],
         [-1.34683149e-02, -7.94618204e-03, -1.70643963e-02,
          -1.61122922e-02, -3.28263291e-03,  5.61679387e-03,
           1.07295765e-02,  5.99681865e-04, -4.72701155e-03,
          -2.32779253e-02],
         [-1.85028408e-02, -8.59788433e-03, -2.64112856e-02,
          -1.75623894e-02,  3.32411123e-03,  1.51165563e-03,
           7.80272018e-03, -4.54908470e-04, -1.74975939e-04,
          -1.95330251e-02],
         [-1.35340365e-02, -9.73690581e-03, -1.73044503e-02,
          -1.38761597e-02,  4.28098312e-04,  8.07055985e-05,
           2.85251974e-03, -1.78919363e-04, -6.44189958e-03,
          -1.27323698e-02],
         [-1.15437629e-02, -5.96557325e-03, -1.73409060e-02,
          -1.22294575e-02,  7.28086615e-03

# Bidirectional GRU

## return sequence=False, return_state=False

In [None]:
# bi GRU

In [None]:
# bi GRU weights

## return sequence=True, return_state=False

In [None]:
# (return_sequences=True)

## return sequence=False, return_state=True

In [None]:
# (return_state=True)

## return sequence=True, return_state=True

In [None]:
# (return_sequences=True, return_state=True)

## init hidden state

In [None]:
# bi_gru_11 with fw_h_13, bw_h_13

In [None]:
# bi_gru_12 with fw_h_14, bw_h_14

In [None]:
# bi_gru_13 with fw_h_13, bw_h_13

In [None]:
# bi_gru_14 with fw_h_14, bw_h_14

# RNN 모델

In [57]:
def build_model(n_vocab, d_model, n_seq, n_out):
    """
    RNN 모델
    :param n_vocab: vocabulary 단어 수
    :param d_model: 단어를 의미하는 벡터의 차원 수
    :param n_seq: 문장길이 (단어 수)
    :param n_out: 예측할 class 개수
    """
    input = tf.keras.layers.Input(shape=(n_seq,))

    embedding = tf.keras.layers.Embedding(n_vocab, d_model)  # (n_vocab x d_model)
    hidden = embedding(input)  # (bs, 1, d_model)

    ########################################
    rnn_1 = tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(units=128, return_sequences=True))
    hidden = rnn_1(hidden)  # (bs, 128)
    rnn_2 = tf.keras.layers.SimpleRNN(units=128, return_sequences=True)
    hidden = rnn_2(hidden)
    ########################################

    output = tf.keras.layers.Dense(n_out, activation=tf.nn.softmax)(hidden)  # (bs, 1, n_vocab)

    model = tf.keras.Model(inputs=input, outputs=output)
    return model

In [58]:
# 모델 생성
model = build_model(len(word_to_id), 8, 7, 2)
# 모델 내용 그래프 출력
tf.keras.utils.plot_model(model, 'model.png', show_shapes=True)

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')


In [59]:
model.predict(train_inputs)

array([[[0.5035379 , 0.4964621 ],
        [0.5245969 , 0.47540313],
        [0.47764194, 0.52235806],
        [0.50906247, 0.49093753],
        [0.4950456 , 0.50495446],
        [0.50214225, 0.49785778],
        [0.444639  , 0.55536103]]], dtype=float32)