In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
train_X = [[1, 2, 3, 4, 5],
           [2, 4, 6, 8, 10],
           [1, 3, 5, 7, 9],
           [0, 2, 4, 6, 8]]
print(np.shape(train_X))

(4, 5)


In [3]:
train_X = np.array(train_X, dtype=np.float32)
print(train_X.shape)

(4, 5)


In [4]:
train_X

array([[ 1.,  2.,  3.,  4.,  5.],
       [ 2.,  4.,  6.,  8., 10.],
       [ 1.,  3.,  5.,  7.,  9.],
       [ 0.,  2.,  4.,  6.,  8.]], dtype=float32)

### RNN의 경우 2차원이 아닌 3차원 tensor로 값을 입력받기 때문에 3차원으로 변환해준다.

In [5]:
train_X = np.array([train_X], dtype=np.float32)
print(train_X.shape)

(1, 4, 5)


### RNN에서 중요한 파라미터인 return_sequences와 return_state에 대해 알아보자
- 두 파라미터의 default값은 False이다

- return_sequence = False 일때는 마지막 시점의 hidden state만 출력됨

1) 그렇다면 return_sequence 가 True라면?

In [6]:
# 우선 hidden_size는 임의로 3으로 정한다.
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=True, return_state=False)
hidden_state = rnn(train_X)

print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))

## 모든 시점의 hidden state가 출력된다.

train_X : [[[ 1.  2.  3.  4.  5.]
  [ 2.  4.  6.  8. 10.]
  [ 1.  3.  5.  7.  9.]
  [ 0.  2.  4.  6.  8.]]] 	 shape : (1, 4, 5)
hidden_state : [[[ 0.98892605 -0.97905314 -0.9714717 ]
  [ 0.9999769  -0.99998623 -0.9977943 ]
  [ 0.9999788  -0.99983656 -0.99803555]
  [ 0.99998057 -0.99792165 -0.99829084]]] 	 shape : (1, 4, 3)


return_sequence = False?
- 마지막 시점의 hidden state가 출력됨

In [7]:
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=False, return_state=False)
hidden_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))

hidden_state : [[-0.9960491  0.9690958  0.9822106]] 	 shape : (1, 3)


return_state = True라면?
- return_sequence의 값이 True/False인지 관계없이 마지막 시점의 은닉상태를 출력

In [8]:
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=True, return_state=True)
hidden_state, last_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state: {} \t shape: {}'.format(last_state, last_state.shape))

hidden_state : [[[-0.9988192  -0.21659556  0.13683993]
  [-0.99999744  0.24029484 -0.12828583]
  [-0.9999333  -0.36182424  0.76637256]
  [-0.99915475 -0.76366997  0.55207014]]] 	 shape : (1, 4, 3)
last_state: [[-0.99915475 -0.76366997  0.55207014]] 	 shape: (1, 3)


return_sequence = False인데 return_state = True인 경우는?
- 마지막 시점의 hidden state 출력

In [9]:
hidden_size = 3 # hidden state 차원수
cell = layers.SimpleRNNCell(units = hidden_size) # SimpleRNNCell 선언
rnn = layers.RNN(cell, return_sequences=False, return_state=True)
hidden_state, last_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state: {} \t shape: {}'.format(last_state, last_state.shape))

hidden_state : [[-1. -1.  1.]] 	 shape : (1, 3)
last_state: [[-1. -1.  1.]] 	 shape: (1, 3)


# 실습 1 - RNN

"토마토"를 학습해봅시다!

input='토토마를자먹'

output='토마토를먹자'

In [10]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNNCell, Dense, TimeDistributed, RNN

idx2char = ['토', '마', '를', '먹', '자']

x_data = [[0, 0, 1, 2, 4, 3]] # 토 토 마 를 자 먹
y_data = [[0, 1, 0, 2, 3, 4]] # 토 마 토 를 먹 자

num_classes = 5 # 토큰의 갯수
input_dim = 5
sequence_len = 6 # len(x_data)
learning_rate = 0.1 # 학습률

### 데이터 변환 - 원핫인코딩

In [11]:
x_one_hot = tf.keras.utils.to_categorical(x_data, num_classes=num_classes)
y_one_hot = tf.keras.utils.to_categorical(y_data, num_classes=num_classes)

In [12]:
x_one_hot

array([[[1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0.]]], dtype=float32)

In [13]:
y_one_hot

array([[[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]]], dtype=float32)

In [14]:
x_one_hot.shape
# 시퀀스수, 시권스길이, dim 사이즈

(1, 6, 5)

In [15]:
model = Sequential() # 선언
cell = SimpleRNNCell(units=num_classes, input_shape=(sequence_len, input_dim)) # simpleRNNCell

model.add(RNN(cell=cell, return_sequences=True, return_state=False, input_shape = (sequence_len, input_dim)))
model.add(TimeDistributed(Dense(units=num_classes, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rnn_4 (RNN)                 (None, 6, 5)              55        
                                                                 
 time_distributed (TimeDist  (None, 6, 5)              30        
 ributed)                                                        
                                                                 
Total params: 85 (340.00 Byte)
Trainable params: 85 (340.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
model.fit(x_one_hot, y_one_hot, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7ec2a4451570>

In [17]:
pred = model.predict(x_one_hot)
pred



array([[[0.76311815, 0.15428029, 0.00470235, 0.04048477, 0.03741442],
        [0.3135152 , 0.67806923, 0.00114732, 0.00485737, 0.00241091],
        [0.76251227, 0.10409733, 0.09154917, 0.03147138, 0.0103698 ],
        [0.04310947, 0.0025311 , 0.6027566 , 0.30742213, 0.04418066],
        [0.09239992, 0.01797698, 0.09610111, 0.7016543 , 0.09186763],
        [0.02613313, 0.02018855, 0.04598192, 0.09342413, 0.8142723 ]]],
      dtype=float32)

In [18]:
# pred
for i, word in enumerate(pred):
  print(" ".join([idx2char[c] for c in np.argmax(word, axis=1)]))

토 마 토 를 먹 자


# LSTM

In [19]:
from keras.layers import LSTM

# 우선 hidden_size는 임의로 3으로 정한다.
hidden_size = 3 # hidden state 차원수
lstm = LSTM(units=hidden_size, return_sequences=False, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state: {} \t shape: {}'.format(last_state, last_state.shape))
print('last_cell_sate : {} \t shape: {}'.format(last_cell_state, last_cell_state.shape))
# RNN과 LSTM의 차이점은 LSTM의 경우 return_state = Treu인 경우 last_cell_state까지 출력해준다는 것이 다르다.

hidden_state : [[ 0.03022338  0.66187847 -0.06405859]] 	 shape : (1, 3)
last_state: [[ 0.03022338  0.66187847 -0.06405859]] 	 shape: (1, 3)
last_cell_sate : [[ 0.05069714  0.81916946 -0.0641546 ]] 	 shape: (1, 3)


In [20]:
# 우선 hidden_size는 임의로 3으로 정한다.
hidden_size = 3 # hidden state 차원수
lstm = LSTM(units=hidden_size, return_sequences=True, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state: {} \t shape: {}'.format(last_state, last_state.shape))
print('last_cell_sate : {} \t shape: {}'.format(last_cell_state, last_cell_state.shape))
# return_sequnce가 True인 경우 모든 hidden_state값이 출력되므로 4개 값에대한 hidden_state가 모두 출력되었다.

hidden_state : [[[-2.3451231e-02  9.1874921e-05  1.1196640e-01]
  [-1.1326149e-03  1.1429830e-05  9.9176429e-02]
  [-3.1324616e-03  1.7060569e-05  1.0648207e-01]
  [-8.2548279e-03  2.8237309e-05  1.0195712e-01]]] 	 shape : (1, 4, 3)
last_state: [[-8.2548279e-03  2.8237309e-05  1.0195712e-01]] 	 shape: (1, 3)
last_cell_sate : [[-3.8344331e+00  1.6974495e-04  2.0273790e+00]] 	 shape: (1, 3)


# GRU

In [21]:
from keras.layers import GRU

# 우선 hidden_size는 임의로 3으로 정한다.
hidden_size = 3 # hidden state 차원수
gru = GRU(units=hidden_size, return_sequences=False, return_state=True)
hidden_state, last_state = gru(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state: {} \t shape: {}'.format(last_state, last_state.shape))

hidden_state : [[ 0.00539069 -0.98952353 -0.99991465]] 	 shape : (1, 3)
last_state: [[ 0.00539069 -0.98952353 -0.99991465]] 	 shape: (1, 3)
