In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
train_X = [[1, 2, 3, 4, 5],
           [2, 4, 6, 8, 10],
           [1, 3, 5, 7, 9],
           [0, 2, 4, 6, 8]]
print(np.shape(train_X))

(4, 5)


In [3]:
train_X = np.array(train_X, dtype=np.float32)
print(train_X.shape)

(4, 5)


In [4]:
train_X

array([[ 1.,  2.,  3.,  4.,  5.],
       [ 2.,  4.,  6.,  8., 10.],
       [ 1.,  3.,  5.,  7.,  9.],
       [ 0.,  2.,  4.,  6.,  8.]], dtype=float32)

### RNN의 경우 2차원이 아닌 3차원 tensor로 값을 입력받기 때문에 3차원으로 변환해준다.

In [5]:
train_X = np.array([train_X], dtype=np.float32)
print(train_X.shape)

(1, 4, 5)


### RNN에서 중요한 파라미터인 return_sequences와 return_state에 대해 알아보자
- 두 파라미터의 default값은 False이다

- return_sequence = False 일때는 마지막 시점의 hidden state만 출력됨

1) 그렇다면 return_sequence 가 True라면?

In [6]:
# 우선 hidden_size는 임의로 3으로 정한다.
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=True, return_state=False)
hidden_state = rnn(train_X)

print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))

## 모든 시점의 hidden state가 출력된다.

train_X : [[[ 1.  2.  3.  4.  5.]
  [ 2.  4.  6.  8. 10.]
  [ 1.  3.  5.  7.  9.]
  [ 0.  2.  4.  6.  8.]]] 	 shape : (1, 4, 5)
hidden_state : [[[-0.9009483  -0.94809747 -0.999983  ]
  [-0.95548874 -0.9880447  -1.        ]
  [-0.87326026 -0.9812782  -1.        ]
  [-0.7010123  -0.9768617  -1.        ]]] 	 shape : (1, 4, 3)


return_sequence = False?
- 마지막 시점의 hidden state가 출력됨

In [7]:
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=False, return_state=False)
hidden_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))

hidden_state : [[0.97920805 0.12901808 0.9742424 ]] 	 shape : (1, 3)


return_state = True라면?
- return_sequence의 값이 True/False인지 관계없이 마지막 시점의 은닉상태를 출력

In [8]:
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=True, return_state=True)
hidden_state, last_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state: {} \t shape: {}'.format(last_state, last_state.shape))

hidden_state : [[[ 0.93805945 -0.92109364  0.8985214 ]
  [ 0.9714131  -0.9981801   0.998904  ]
  [ 0.9170356  -0.997459    0.9970659 ]
  [ 0.81769097 -0.9960911   0.9908694 ]]] 	 shape : (1, 4, 3)
last_state: [[ 0.81769097 -0.9960911   0.9908694 ]] 	 shape: (1, 3)


return_sequence = False인데 return_state = True인 경우는?
- 마지막 시점의 hidden state 출력

In [9]:
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=False, return_state=True)
hidden_state, last_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state: {} \t shape: {}'.format(last_state, last_state.shape))

hidden_state : [[ 0.9130733 -0.9999983  0.7967856]] 	 shape : (1, 3)
last_state: [[ 0.9130733 -0.9999983  0.7967856]] 	 shape: (1, 3)


# 실습 1 - RNN

"토마토"를 학습해봅시다!

input='토토마를자먹'

output='토마토를먹자'

In [10]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNNCell, Dense, TimeDistributed, RNN

idx2char = ['토', '마', '를', '먹', '자']

x_data = [[0, 0, 1, 2, 4, 3]] # 토 토 마 를 자 먹
y_data = [[0, 1, 0, 2, 3, 4]] # 토 마 토 를 먹 자

num_classes = 5 # 토큰의 갯수
input_dim = 5
sequence_len = 6 # len(x_data)
learning_rate = 0.1 # 학습률

### 데이터 변환 - 원핫인코딩

In [11]:
x_one_hot = tf.keras.utils.to_categorical(x_data, num_classes=num_classes)
y_one_hot = tf.keras.utils.to_categorical(y_data, num_classes=num_classes)

In [12]:
x_one_hot

array([[[1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0.]]], dtype=float32)

In [13]:
y_one_hot

array([[[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]]], dtype=float32)

In [14]:
x_one_hot.shape
# 시퀀스수, 시권스길이, dim 사이즈

(1, 6, 5)

In [15]:
model = Sequential() # 선언
cell = SimpleRNNCell(units=num_classes, input_shape=(sequence_len, input_dim)) # simpleRNNCell

model.add(RNN(cell=cell, return_sequences=True, return_state=False, input_shape = (sequence_len, input_dim)))
model.add(TimeDistributed(Dense(units=num_classes, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rnn_4 (RNN)                 (None, 6, 5)              55        
                                                                 
 time_distributed (TimeDist  (None, 6, 5)              30        
 ributed)                                                        
                                                                 
Total params: 85 (340.00 Byte)
Trainable params: 85 (340.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
model.fit(x_one_hot, y_one_hot, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x780d2936a320>

In [17]:
pred = model.predict(x_one_hot)
pred



array([[[8.3342808e-01, 7.8783728e-02, 1.4268801e-02, 5.3815316e-02,
         1.9704048e-02],
        [1.0453114e-01, 8.6869282e-01, 4.0427700e-04, 7.8256866e-03,
         1.8546091e-02],
        [9.4835246e-01, 9.3038799e-03, 1.3425623e-03, 3.7927125e-02,
         3.0739498e-03],
        [8.2280841e-03, 2.1934726e-04, 9.2170614e-01, 6.5925820e-03,
         6.3253775e-02],
        [5.3834710e-02, 2.0104172e-03, 9.1417907e-03, 8.8789123e-01,
         4.7121830e-02],
        [1.8604156e-02, 1.2751722e-02, 2.6045358e-02, 5.4000948e-02,
         8.8859785e-01]]], dtype=float32)

In [18]:
# pred
for i, word in enumerate(pred):
  print(" ".join([idx2char[c] for c in np.argmax(word, axis=1)]))

토 마 토 를 먹 자


# LSTM

In [19]:
from keras.layers import LSTM

# 우선 hidden_size는 임의로 3으로 정한다.
hidden_size = 3 # hidden state 차원수
lstm = LSTM(units=hidden_size, return_sequences=False, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state: {} \t shape: {}'.format(last_state, last_state.shape))
print('last_cell_sate : {} \t shape: {}'.format(last_cell_state, last_cell_state.shape))
# RNN과 LSTM의 차이점은 LSTM의 경우 return_state = Treu인 경우 last_cell_state까지 출력해준다는 것이 다르다.

hidden_state : [[ 5.9907019e-01 -1.2911958e-04 -2.2646226e-03]] 	 shape : (1, 3)
last_state: [[ 5.9907019e-01 -1.2911958e-04 -2.2646226e-03]] 	 shape: (1, 3)
last_cell_sate : [[ 3.0370393 -1.2554034 -2.8988013]] 	 shape: (1, 3)


In [20]:
# 우선 hidden_size는 임의로 3으로 정한다.
hidden_size = 3 # hidden state 차원수
lstm = LSTM(units=hidden_size, return_sequences=True, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state: {} \t shape: {}'.format(last_state, last_state.shape))
print('last_cell_sate : {} \t shape: {}'.format(last_cell_state, last_cell_state.shape))
# return_sequnce가 True인 경우 모든 hidden_state값이 출력되므로 4개 값에대한 hidden_state가 모두 출력되었다.

hidden_state : [[[ 5.7978645e-02 -2.0750590e-02 -7.3899436e-01]
  [ 6.6231310e-02 -2.3676634e-03 -7.6560372e-01]
  [ 8.4123462e-02 -4.3837088e-03 -7.6853728e-01]
  [ 1.4861754e-01  1.2025949e-04 -7.7017426e-01]]] 	 shape : (1, 4, 3)
last_state: [[ 1.4861754e-01  1.2025949e-04 -7.7017426e-01]] 	 shape: (1, 3)
last_cell_sate : [[ 1.62711799e-01  1.20383615e-04 -1.02384937e+00]] 	 shape: (1, 3)
