In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
train_X = [[1, 2, 3, 4, 5],
           [2, 4, 6, 8, 10],
           [1, 3, 5, 7, 9],
           [0, 2, 4, 6, 8]]
print(np.shape(train_X))

(4, 5)


In [3]:
train_X = np.array(train_X, dtype=np.float32)
print(train_X.shape)

(4, 5)


In [4]:
train_X

array([[ 1.,  2.,  3.,  4.,  5.],
       [ 2.,  4.,  6.,  8., 10.],
       [ 1.,  3.,  5.,  7.,  9.],
       [ 0.,  2.,  4.,  6.,  8.]], dtype=float32)

### RNN의 경우 2차원이 아닌 3차원 tensor로 값을 입력받기 때문에 3차원으로 변환해준다.

In [5]:
train_X = np.array([train_X], dtype=np.float32)
print(train_X.shape)

(1, 4, 5)


### RNN에서 중요한 파라미터인 return_sequences와 return_state에 대해 알아보자
- 두 파라미터의 default값은 False이다

- return_sequence = False 일때는 마지막 시점의 hidden state만 출력됨

1) 그렇다면 return_sequence 가 True라면?

In [6]:
# 우선 hidden_size는 임의로 3으로 정한다.
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=True, return_state=False)
hidden_state = rnn(train_X)

print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))

## 모든 시점의 hidden state가 출력된다.

train_X : [[[ 1.  2.  3.  4.  5.]
  [ 2.  4.  6.  8. 10.]
  [ 1.  3.  5.  7.  9.]
  [ 0.  2.  4.  6.  8.]]] 	 shape : (1, 4, 5)
hidden_state : [[[-0.99834466 -0.9995572  -0.7017604 ]
  [-1.         -1.         -0.98164487]
  [-1.         -1.         -0.9774209 ]
  [-0.99999917 -0.9999988  -0.96423507]]] 	 shape : (1, 4, 3)


return_sequence = False?
- 마지막 시점의 hidden state가 출력됨

In [7]:
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=False, return_state=False)
hidden_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))

hidden_state : [[-0.9997105 -0.7454802 -1.       ]] 	 shape : (1, 3)


return_state = True라면?
- return_sequence의 값이 True/False인지 관계없이 마지막 시점의 은닉상태를 출력

In [8]:
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=True, return_state=True)
hidden_state, last_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state: {} \t shape: {}'.format(last_state, last_state.shape))

hidden_state : [[[-0.9999975   0.9998937  -0.9985452 ]
  [-1.          1.         -0.9999995 ]
  [-1.          0.99999267 -0.9999933 ]
  [-1.          0.9993174  -0.999917  ]]] 	 shape : (1, 4, 3)
last_state: [[-1.         0.9993174 -0.999917 ]] 	 shape: (1, 3)


return_sequence = False인데 return_state = True인 경우는?
- 마지막 시점의 hidden state 출력

In [9]:
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=False, return_state=True)
hidden_state, last_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state: {} \t shape: {}'.format(last_state, last_state.shape))

hidden_state : [[ 0.99999326 -0.2476268  -0.9999801 ]] 	 shape : (1, 3)
last_state: [[ 0.99999326 -0.2476268  -0.9999801 ]] 	 shape: (1, 3)


# 실습 1 - RNN

"토마토"를 학습해봅시다!

input='토토마를자먹'

output='토마토를먹자'

In [10]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNNCell, Dense, TimeDistributed, RNN

idx2char = ['토', '마', '를', '먹', '자']

x_data = [[0, 0, 1, 2, 4, 3]] # 토 토 마 를 자 먹
y_data = [[0, 1, 0, 2, 3, 4]] # 토 마 토 를 먹 자

num_classes = 5 # 토큰의 갯수
input_dim = 5
sequence_len = 6 # len(x_data)
learning_rate = 0.1 # 학습률

### 데이터 변환 - 원핫인코딩

In [11]:
x_one_hot = tf.keras.utils.to_categorical(x_data, num_classes=num_classes)
y_one_hot = tf.keras.utils.to_categorical(y_data, num_classes=num_classes)

In [12]:
x_one_hot

array([[[1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0.]]], dtype=float32)

In [13]:
y_one_hot

array([[[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]]], dtype=float32)

In [14]:
x_one_hot.shape
# 시퀀스수, 시권스길이, dim 사이즈

(1, 6, 5)

In [15]:
model = Sequential() # 선언
cell = SimpleRNNCell(units=num_classes, input_shape=(sequence_len, input_dim)) # simpleRNNCell

model.add(RNN(cell=cell, return_sequences=True, return_state=False, input_shape = (sequence_len, input_dim)))
model.add(TimeDistributed(Dense(units=num_classes, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rnn_4 (RNN)                 (None, 6, 5)              55        
                                                                 
 time_distributed (TimeDist  (None, 6, 5)              30        
 ributed)                                                        
                                                                 
Total params: 85 (340.00 Byte)
Trainable params: 85 (340.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
model.fit(x_one_hot, y_one_hot, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f8a1838edd0>

In [17]:
pred = model.predict(x_one_hot)
pred



array([[[8.4141904e-01, 9.0626746e-02, 1.1299952e-02, 4.1563574e-02,
         1.5090697e-02],
        [7.2904974e-02, 9.0772998e-01, 6.4400083e-04, 4.0364084e-03,
         1.4684646e-02],
        [7.0864296e-01, 5.4933704e-02, 1.5539350e-01, 9.7023733e-03,
         7.1327537e-02],
        [1.2347493e-01, 8.3768653e-04, 7.0038038e-01, 1.6289704e-01,
         1.2409945e-02],
        [8.0955669e-02, 1.2966301e-02, 6.1087046e-02, 8.1975847e-01,
         2.5232477e-02],
        [7.8549117e-02, 9.3525320e-02, 2.4942605e-02, 7.4436464e-03,
         7.9553932e-01]]], dtype=float32)

In [18]:
# pred
for i, word in enumerate(pred):
  print(" ".join([idx2char[c] for c in np.argmax(word, axis=1)]))

토 마 토 를 먹 자
