In [1]:
from keras.layers import LSTM, GRU, Input
from keras.models import Sequential, Model
from keras.layers import Embedding

Using TensorFlow backend.


In [2]:
import numpy as np

# 0. 샘플 데이터 생성
## 참조: http://121.140.2.142:8888/notebooks/test_notes/keras/Embedding%20Layer%20%26%20Padding.ipynb 의 샘플 데이터 생성

## 설명: Sequence length 10짜리 데이터

In [3]:
num_sentence = 1
sequence_length = 10
max_count_word = 100
sentences = np.random.randint(low=1, high=max_count_word, size=(num_sentence, sequence_length), dtype='int32')
print(sentences)
print('Shape:', sentences.shape)

[[ 4 21 64 13 77 66 54 30 81 87]]
Shape: (1, 10)


## 설명: Sequence length 7짜리 데이터

In [4]:
num_sentence = 1
sequence_length2 = 7
max_count_word = 100
sentences2 = np.random.randint(low=1, high=max_count_word, size=(num_sentence, sequence_length2), dtype='int32')
print(sentences2)
print('Shape:', sentences2.shape)

[[24 52 11 40 46 59 85]]
Shape: (1, 7)


# 1. GRU / LSTM
## 참조: https://keras.io/layers/recurrent/

In [5]:
def get_gru_model(hidden_dim, return_sequences, return_state, embedding):
    model = Sequential()
    if embedding:
        model.add(Embedding(input_dim=100, output_dim=1, input_length=10))
    model.add(GRU(units=hidden_dim, return_sequences=return_sequences, return_state=return_state))
    model.compile(loss='mse', optimizer='rmsprop')
    return model

def get_lstm_model(hidden_dim, return_sequences, return_state, embedding):
    model = Sequential()
    if embedding:
        model.add(Embedding(input_dim=100, output_dim=1, input_length=10))
    model.add(LSTM(units=hidden_dim, return_sequences=return_sequences, return_state=return_state))
    model.compile(loss='mse', optimizer='rmsprop')
    return model

In [6]:
def test_model(name, model, sentences, embedding):
    print('#### %s Model Test with embedding=%s ####' %(name, str(embedding)))
    if not embedding: # if embedding not used, 2d -> 3d
        _sentences = list()
        for sentence in sentences:
            _sentence = list()
            for s in sentence:
                _sentence.append([s])
            _sentences.append(_sentence)
    else:
        _sentences = sentences

    print('Change to correct format for RNN')
    print('sentences -> _sentences')
    print(sentences, '->', _sentences)
    _sentences = np.array(_sentences) # need to be numpy array type ...
    predicted = model.predict(_sentences)
    return predicted

## 1-1. 마지막 시퀀스 output만 가져옴 (default 파라미터 사용시)

### GRU와 LSTM 셀을 활용하며, embedding을 활용한 버전과 활용하지 않은 버전 두 가지를 적용하여 총 4개의 테스트 케이스가 존재한다.

In [7]:
hidden_dim = 1
return_sequences = False # Default
return_state = False # Default

### Test Case: GRU / Embedding = False

In [8]:
"""
Test Case: GRU / Embedding = False
"""
embedding = False
predicted = test_model(name='GRU', 
                       model=get_gru_model(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('GRU Output:')
print(predicted)
print('Shape:', predicted.shape)

#### GRU Model Test with embedding=False ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[[4], [21], [64], [13], [77], [66], [54], [30], [81], [87]]]

GRU Output:
[[-1.]]
Shape: (1, 1)


### Test Case: GRU / Embedding = True

In [9]:
"""
Test Case: GRU / Embedding = True
"""
embedding = True
predicted = test_model(name='GRU', 
                       model=get_gru_model(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('GRU Output:')
print(predicted)
print('Shape:', predicted.shape)

#### GRU Model Test with embedding=True ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[ 4 21 64 13 77 66 54 30 81 87]]

GRU Output:
[[-0.00632277]]
Shape: (1, 1)


### Test Case: LSTM / Embedding = False

In [10]:
"""
Test Case: LSTM / Embedding = False
"""
embedding = False
predicted = test_model(name='LSTM', 
                       model=get_lstm_model(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('LSTM Output:')
print(predicted)
print('Shape:', predicted.shape)

#### LSTM Model Test with embedding=False ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[[4], [21], [64], [13], [77], [66], [54], [30], [81], [87]]]

LSTM Output:
[[1.]]
Shape: (1, 1)


### Test Case: LSTM / Embedding = True

In [11]:
"""
Test Case: GRU / Embedding = True
"""
embedding = True
predicted = test_model(name='LSTM', 
                       model=get_lstm_model(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('LSTM Output:')
print(predicted)
print('Shape:', predicted.shape)

#### LSTM Model Test with embedding=True ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[ 4 21 64 13 77 66 54 30 81 87]]

LSTM Output:
[[0.02648621]]
Shape: (1, 1)


### 코멘트: GRU와 LSTM 모두 last output만을 사용할 때에는 똑같은 형태의 input과 output 데이터를 가진다. 각각의 메카니즘 또는 embedding의 여부에 따라 output 값은 달라진다. embedding이 있고 없고에 따라 input data의 shape은 변경되어야 한다.

## 1-2. 모든 시퀀스 output을 가져옴

In [12]:
hidden_dim = 1
return_sequences = True # Changed
return_state = False # Default


### Test Case: GRU / Embedding = False

In [13]:
"""
Test Case: GRU / Embedding = False
"""
embedding = False
predicted = test_model(name='GRU', 
                       model=get_gru_model(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('GRU Output:')
print(predicted)
print('Shape:', predicted.shape)

#### GRU Model Test with embedding=False ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[[4], [21], [64], [13], [77], [66], [54], [30], [81], [87]]]

GRU Output:
[[[0.9978595]
  [1.       ]
  [1.       ]
  [1.       ]
  [1.       ]
  [1.       ]
  [1.       ]
  [1.       ]
  [1.       ]
  [1.       ]]]
Shape: (1, 10, 1)


### Test Case: GRU / Embedding = True

In [14]:
"""
Test Case: GRU / Embedding = True
"""
embedding = True
predicted = test_model(name='GRU', 
                       model=get_gru_model(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('GRU Output:')
print(predicted)
print('Shape:', predicted.shape)

#### GRU Model Test with embedding=True ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[ 4 21 64 13 77 66 54 30 81 87]]

GRU Output:
[[[ 0.02192867]
  [ 0.02025646]
  [-0.00353545]
  [-0.01354325]
  [-0.03200561]
  [-0.02812015]
  [ 0.00748733]
  [-0.00540804]
  [-0.0123355 ]
  [ 0.00106698]]]
Shape: (1, 10, 1)


### Test Case: LSTM / Embedding = False

In [15]:
"""
Test Case: LSTM / Embedding = False
"""
embedding = False
predicted = test_model(name='LSTM', 
                       model=get_lstm_model(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('LSTM Output:')
print(predicted)
print('Shape:', predicted.shape)

#### LSTM Model Test with embedding=False ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[[4], [21], [64], [13], [77], [66], [54], [30], [81], [87]]]

LSTM Output:
[[[-0.6443548 ]
  [-0.91914594]
  [-0.984043  ]
  [-0.9923044 ]
  [-0.9984626 ]
  [-0.9995651 ]
  [-0.99981004]
  [-0.9998179 ]
  [-0.9999579 ]
  [-0.9999913 ]]]
Shape: (1, 10, 1)


### Test Case: LSTM / Embedding = True

In [16]:
"""
Test Case: LSTM / Embedding = True
"""
embedding = True
predicted = test_model(name='LSTM', 
                       model=get_lstm_model(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('LSTM Output:')
print(predicted)
print('Shape:', predicted.shape)

#### LSTM Model Test with embedding=True ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[ 4 21 64 13 77 66 54 30 81 87]]

LSTM Output:
[[[ 0.00607819]
  [-0.00538075]
  [ 0.00434323]
  [-0.00093756]
  [ 0.00690822]
  [ 0.00408977]
  [-0.00678301]
  [-0.00384558]
  [-0.00766471]
  [-0.00183106]]]
Shape: (1, 10, 1)


### 코멘트: return_sequences 또한 GRU<->LSTM비교시 input과 output의 형태는 차이가 없다. 단지 embedding을 할 경우와 하지 않을 경우 input data의 shape은 달라진다. 

## 1-3. 모든 시퀀스 output과 last output(+last hidden state)을 함께 가지고 옴

In [37]:
hidden_dim = 1
return_sequences = True # Changed
return_state = True # Changed

def get_gru_model2(hidden_dim, return_sequences, return_state, embedding):
    
    if embedding:
        inputs = Input(shape=(10, ))
        _inputs = Embedding(input_dim=100, output_dim=1, input_length=10)(inputs)
    else:
        inputs = Input(shape=(10, 1))
        _inputs = inputs
    outputs, last_output = GRU(units=hidden_dim, return_sequences=return_sequences, return_state=return_state)(_inputs)
    outputs = [outputs, last_output]

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='mse', optimizer='rmsprop')
    return model

def get_lstm_model2(hidden_dim, return_sequences, return_state, embedding):
    if embedding:
        inputs = Input(shape=(10, ))
        _inputs = Embedding(input_dim=100, output_dim=1, input_length=10)(inputs)
    else:
        inputs = Input(shape=(10, 1))
        _inputs = inputs
    outputs, state_h, state_c = LSTM(units=hidden_dim, return_sequences=return_sequences, return_state=return_state)(_inputs)
    outputs = [outputs, state_h, state_c]

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='mse', optimizer='rmsprop')
    return model

### Test Case: GRU / Embedding = False

In [20]:
"""
Test Case: GRU / Embedding = False
"""
embedding = False
predicted = test_model(name='GRU', 
                       model=get_gru_model2(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('GRU Output:')
print(predicted[0])
print('')
print('GRU Last Output by return_state:')
print(predicted[1])

print('')
print('Sequence Output Shape:', predicted[0].shape)
print('Last Output Shape:', predicted[1].shape)

#### GRU Model Test with embedding=False ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[[4], [21], [64], [13], [77], [66], [54], [30], [81], [87]]]

GRU Output:
[[[0.]
  [0.]
  [0.]
  [0.]
  [0.]
  [0.]
  [0.]
  [0.]
  [0.]
  [0.]]]

GRU Last Output by return_state:
[[0.]]

Sequence Output Shape: (1, 10, 1)
Last Output Shape: (1, 1)


### Test Case: GRU / Embedding = True

In [21]:
"""
Test Case: GRU / Embedding = True
"""
embedding = True
predicted = test_model(name='GRU', 
                       model=get_gru_model2(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('GRU Output:')
print(predicted[0])
print('')
print('GRU Last Output by return_state:')
print(predicted[1])

print('')
print('Sequence Output Shape:', predicted[0].shape)
print('Last Output Shape:', predicted[1].shape)

#### GRU Model Test with embedding=True ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[ 4 21 64 13 77 66 54 30 81 87]]

GRU Output:
[[[ 3.6939475e-04]
  [-3.5797853e-03]
  [ 3.2458536e-03]
  [-2.9120129e-06]
  [ 5.0567961e-03]
  [ 4.5182616e-03]
  [-6.6371402e-03]
  [ 6.4807045e-03]
  [ 9.0500861e-03]
  [ 1.2209806e-02]]]

GRU Last Output by return_state:
[[0.01220981]]

Sequence Output Shape: (1, 10, 1)
Last Output Shape: (1, 1)


### Test Case: LSTM / Embedding = False

In [22]:
"""
Test Case: LSTM / Embedding = False
"""
embedding = False
predicted = test_model(name='LSTM', 
                       model=get_lstm_model2(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('LSTM Output:')
print(predicted[0])
print('')
print('LSTM Last Output by return_state:')
print(predicted[1])
print('')
print('LSTM Last hidden_state by return_state:')
print(predicted[2])

print('')
print('Sequence Output Shape:', predicted[0].shape)
print('Last Output Shape:', predicted[1].shape)
print('Last Hidden State Shape:', predicted[2].shape)

#### LSTM Model Test with embedding=False ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[[4], [21], [64], [13], [77], [66], [54], [30], [81], [87]]]

LSTM Output:
[[[0.]
  [0.]
  [0.]
  [0.]
  [0.]
  [0.]
  [0.]
  [0.]
  [0.]
  [0.]]]

LSTM Last Output by return_state:
[[0.]]

LSTM Last hidden_state by return_state:
[[0.]]

Sequence Output Shape: (1, 10, 1)
Last Output Shape: (1, 1)
Last Hidden State Shape: (1, 1)


### Test Case: LSTM / Embedding = True

In [23]:
"""
Test Case: LSTM / Embedding = False
"""
embedding = True
predicted = test_model(name='LSTM', 
                       model=get_lstm_model2(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('LSTM Output:')
print(predicted[0])
print('')
print('LSTM Last Output by return_state:')
print(predicted[1])
print('')
print('LSTM Last hidden_state by return_state:')
print(predicted[2])

print('')
print('Sequence Output Shape:', predicted[0].shape)
print('Last Output Shape:', predicted[1].shape)
print('Last Hidden State Shape:', predicted[2].shape)

#### LSTM Model Test with embedding=True ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[ 4 21 64 13 77 66 54 30 81 87]]

LSTM Output:
[[[-0.00191574]
  [-0.01311182]
  [-0.0142023 ]
  [-0.01597268]
  [-0.01081148]
  [-0.00324047]
  [-0.0098202 ]
  [-0.00839193]
  [ 0.00266636]
  [-0.00498142]]]

LSTM Last Output by return_state:
[[-0.00498142]]

LSTM Last hidden_state by return_state:
[[-0.00986878]]

Sequence Output Shape: (1, 10, 1)
Last Output Shape: (1, 1)
Last Hidden State Shape: (1, 1)


### 코멘트: Embedding의 유무는 output의 형태와 아무런 관련이 없다. GRU와 LSTM은 return_state와 return_sequences를 했을 때 결과의 갯수가 다르다. GRU는 last output만 반환하고 LSTM은 last cell state 또한 반환하기 때문이다. 이것은 두 셀 구조를 들여다 봄으로써 이해하길 권장한다.

## 1-4. Return sequences=False, return state=True일 때

### Embedding 레이어의 존재 유무는 지금부터는 default로 True로 놓고 볼 예정이다. 위에서 output shape은 동일함을 몇 번이나 확인했기 때문이다. 또한 Embedding 값이 True일 때 더 풍부한 값의 변화를 볼 수 있다. 이번에는 GRU와 LSTM의 return_sequences=False, return_state=True 상태일 때 나오는 값을 확인한다.

In [39]:
hidden_dim = 1
return_sequences = False # Default
return_state = True # Changed


### Test Case GRU

In [41]:
"""
Test Case: GRU / Embedding = False
"""
embedding = True
predicted = test_model(name='GRU', 
                       model=get_gru_model2(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('GRU Output:')
print(predicted[0])
print('')
print('GRU Last Output by return_state:')
print(predicted[1])

print('')
print('Sequence Output Shape:', predicted[0].shape)
print('Last Output Shape:', predicted[1].shape)

#### GRU Model Test with embedding=True ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[ 4 21 64 13 77 66 54 30 81 87]]

GRU Output:
[[0.00016529]]

GRU Last Output by return_state:
[[0.00016529]]

Sequence Output Shape: (1, 1)
Last Output Shape: (1, 1)


### Test Case LSTM

In [42]:
"""
Test Case: LSTM / Embedding = False
"""
embedding = True
predicted = test_model(name='LSTM', 
                       model=get_lstm_model3(hidden_dim=hidden_dim, return_sequences=return_sequences, 
                                           return_state=return_state, embedding=embedding),
                       sentences=sentences,
                       embedding=embedding
                      )

print('')
print('LSTM Output:')
print(predicted[0])
print('')
print('LSTM Last Output by return_state:')
print(predicted[1])
print('')
print('LSTM Last hidden_state by return_state:')
print(predicted[2])

print('')
print('Sequence Output Shape:', predicted[0].shape)
print('Last Output Shape:', predicted[1].shape)
print('Last Hidden State Shape:', predicted[2].shape)

#### LSTM Model Test with embedding=True ####
Change to correct format for RNN
sentences -> _sentences
[[ 4 21 64 13 77 66 54 30 81 87]] -> [[ 4 21 64 13 77 66 54 30 81 87]]

LSTM Output:
[[0.00025799]]

LSTM Last Output by return_state:
[[0.00025799]]

LSTM Last hidden_state by return_state:
[[0.00051868]]

Sequence Output Shape: (1, 1)
Last Output Shape: (1, 1)
Last Hidden State Shape: (1, 1)
