# 이 쥬피터 노트북은 Keras의 기능을 정확히 알고 이해하기 위해 테스트하기 위해 생성되었습니다.

# Embedding test

In [1]:
import keras.backend.tensorflow_backend as K
from keras.layers import Dense, Input, Lambda, GRU, Concatenate, LSTM
from keras.models import Model

import numpy as np

Using TensorFlow backend.


# Simple test for GRU(or LSTM) outputs, hidden states...

In [2]:
latent_dim = 2
enc_seq_length = 3
dec_seq_length = 5

enc_input = np.random.randint(10, size=(1, enc_seq_length, latent_dim))
dec_input = np.random.randint(10, size=(1, dec_seq_length, latent_dim))
dec_output = np.random.randint(10, size=(1, 1))

print(enc_input.shape)
print(enc_input)

print('')

print(dec_input.shape)
print(dec_input)

(1, 3, 2)
[[[0 2]
  [2 6]
  [5 3]]]

(1, 5, 2)
[[[3 3]
  [6 7]
  [7 4]
  [6 3]
  [0 2]]]


In [3]:
enc_inputs = Input(shape=(enc_seq_length, latent_dim))

encoder = GRU(latent_dim, return_state=True, return_sequences=True)
encoder_outputs, encoder_states = encoder(enc_inputs)

outputs = [encoder_outputs, encoder_states]

enc_model = Model(inputs=enc_inputs, outputs=outputs)
enc_model.compile(optimizer='rmsprop', loss='mse')
print(enc_model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 3, 2)              0         
_________________________________________________________________
gru_1 (GRU)                  [(None, 3, 2), (None, 2)] 30        
Total params: 30
Trainable params: 30
Non-trainable params: 0
_________________________________________________________________
None


In [4]:
enc_model.predict(enc_input)

[array([[[-0.46849972, -0.16795884],
         [-0.964507  , -0.16795884],
         [-0.9670544 , -0.52285415]]], dtype=float32),
 array([[-0.9670544 , -0.52285415]], dtype=float32)]

In [8]:
enc_inputs = Input(shape=(enc_seq_length, latent_dim))

encoder = LSTM(latent_dim, return_state=True, return_sequences=True)
encoder_outputs, encoder_state_h, encoder_state_c = encoder(enc_inputs)
# encoder_states = [encoder_state_h, encoder_state_c]

outputs = [encoder_outputs, encoder_state_h, encoder_state_c]

enc_model = Model(inputs=enc_inputs, outputs=outputs)
enc_model.compile(optimizer='rmsprop', loss='mse')
print(enc_model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 3, 2)              0         
_________________________________________________________________
lstm_3 (LSTM)                [(None, 3, 2), (None, 2), 40        
Total params: 40
Trainable params: 40
Non-trainable params: 0
_________________________________________________________________
None


In [9]:
enc_model.predict(enc_input)

[array([[[ 0.0854265 ,  0.05885421],
         [ 0.17130549,  0.        ],
         [-0.21622829,  0.23644088]]], dtype=float32),
 array([[-0.21622829,  0.23644088]], dtype=float32),
 array([[-0.219696  ,  0.54099584]], dtype=float32)]

In [13]:
enc_inputs = Input(shape=(enc_seq_length, latent_dim))
dec_inputs = Input(shape=(dec_seq_length, latent_dim))

encoder = LSTM(latent_dim, return_state=True, return_sequences=True)
encoder_outputs, encoder_state_h, encoder_state_c = encoder(enc_inputs)
# encoder_states = [encoder_state_h, encoder_state_c]

decoder1 = LSTM(latent_dim, return_state=True, return_sequences=True)
decoder_outputs, decoder_state_h, decoder_state_c = decoder1(dec_inputs, initial_state=[encoder_state_h, encoder_state_c])

decoder2 = LSTM(latent_dim, return_state=True, return_sequences=True)
decoder_outputs2, decoder_state_h2, decoder_state_c2 = decoder2(dec_inputs, initial_state=[encoder_state_h, encoder_state_c])

outputs = [encoder_outputs, encoder_state_h, encoder_state_c, decoder_outputs, decoder_state_h, decoder_state_c,
          decoder_outputs2, decoder_state_h2, decoder_state_c2]

enc_model = Model(inputs=[enc_inputs, dec_inputs], outputs=outputs)
enc_model.compile(optimizer='rmsprop', loss='mse')
print(enc_model.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           (None, 3, 2)         0                                            
__________________________________________________________________________________________________
lstm_10 (LSTM)                  [(None, 3, 2), (None 40          input_10[0][0]                   
__________________________________________________________________________________________________
input_11 (InputLayer)           (None, 5, 2)         0                                            
__________________________________________________________________________________________________
lstm_11 (LSTM)                  [(None, 5, 2), (None 40          input_11[0][0]                   
                                                                 lstm_10[0][1]                    
          

In [14]:
enc_model.predict([enc_input, dec_input])

[array([[[ 0.12602474, -0.11073221],
         [ 0.        , -0.02346477],
         [-0.04158545,  0.08337524]]], dtype=float32),
 array([[-0.04158545,  0.08337524]], dtype=float32),
 array([[-0.10542868,  0.33409706]], dtype=float32),
 array([[[-0.23210803,  0.17166544],
         [-0.2602268 ,  0.16050757],
         [-0.11689018,  0.05366934],
         [-0.11317842,  0.01821852],
         [-0.16554847,  0.03445369]]], dtype=float32),
 array([[-0.16554847,  0.03445369]], dtype=float32),
 array([[-0.5616043 ,  0.05377926]], dtype=float32),
 array([[[-0.        ,  0.2971657 ],
         [ 0.        ,  0.29141217],
         [ 0.        ,  0.4596558 ],
         [ 0.        ,  0.4899067 ],
         [ 0.03371994,  0.32732704]]], dtype=float32),
 array([[0.03371994, 0.32732704]], dtype=float32),
 array([[0.19804108, 1.0452218 ]], dtype=float32)]

### 아하, return_state는 encoder_outputs의 마지막 시퀀스 데이터를 가지고 오는구나? 그런데 만약 패딩한 데이터라면, 저걸 그대로 써도 되는 걸까?

In [28]:
new_enc_input = np.copy(enc_input)
new_enc_input[:, enc_seq_length - 2] = [0, 0]
new_enc_input[:, enc_seq_length - 1] = [0, 0]
print(new_enc_input)

[[[1 4]
  [0 0]
  [0 0]]]


In [29]:
enc_model.predict(new_enc_input)

[array([[[ 0.992243  , -0.6890673 ],
         [ 0.64822954, -0.49443895],
         [ 0.42478132, -0.3401747 ]]], dtype=float32),
 array([[ 0.42478132, -0.3401747 ]], dtype=float32)]

### 위의 케이스에서 알 수 있듯이, return_state를 그대로 활용하면, 원래 예상하던 encoder의 output과는 다른 값을 활용하게 된다. 두 번째 케이스에서는 output의 1번째 데이터를 활용해야 한다. 따라서 아래와 같은 과정이 필요하다.

# Sample: 진짜 sequence 길이에 따라 마지막 output index를 찾는 로직 테스트

In [219]:
# Example from: http://lancerous.com/detail/46526869/46527020

enc_inputs = Input(shape=(enc_seq_length, latent_dim))

enc_pad_index = get_pad_index()(enc_inputs)
seq_index = Lambda(lambda x: K.sum(x, axis=-2) - 1)(enc_pad_index)


encoder = GRU(latent_dim, return_state=True, return_sequences=True)
encoder_outputs, encoder_states = encoder(enc_inputs)

# encoder_outputs = Multiply()([enc_pad_index, encoder_outputs])
# encoder_outputs2 = Multiply()([encoder_outputs, seq_index])
# encoder_outputs = encoder_outputs[seq_index]
# seq_index2 = Lambda(lambda x: K.cast(x, 'int32'))(seq_index)
# values = Lambda(lambda x: K.tf.gather_nd(x, seq_index2 - 1))(encoder_outputs)
# values = Lambda(lambda x: x[seq_index2 - 1, :])(encoder_outputs)

print(seq_index2.shape)
# LAST RELEVANT OUTPUT
# create the row index with tf.range
row_idx = Lambda(lambda x: tf.reshape(tf.range(tf.shape(x)[0]), (-1,1)))(seq_index)

# stack with column index
idx = Lambda(lambda x: tf.stack([row_idx, K.cast(x, 'int32')], axis=-1))(seq_index)
# extract the elements with gather_nd
values = Lambda(lambda x: tf.gather_nd(x, idx))(encoder_outputs)

outputs = [encoder_outputs, encoder_states, values]
# outputs = [encoder_outputs, encoder_states, row_idx]

enc_model = Model(inputs=enc_inputs, outputs=outputs)
enc_model.compile(optimizer='rmsprop', loss='mse')
print(enc_model.summary())

(?, 1)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_119 (InputLayer)       (None, 3, 2)              0         
_________________________________________________________________
gru_111 (GRU)                [(None, 3, 2), (None, 2)] 30        
_________________________________________________________________
lambda_389 (Lambda)          (None, 1, 2)              0         
Total params: 30
Trainable params: 30
Non-trainable params: 0
_________________________________________________________________
None


In [220]:
print(new_enc_input)
enc_model.predict(new_enc_input)

[[[1 4]
  [0 0]
  [0 0]]]


[array([[[-0.15122743, -0.57640064],
         [-0.0673607 , -0.26010168],
         [-0.03089126, -0.10822966]]], dtype=float32),
 array([[-0.03089126, -0.10822966]], dtype=float32),
 array([[[-0.15122743, -0.57640064]]], dtype=float32)]

In [221]:
enc_input = np.random.randint(10, size=(2, enc_seq_length, latent_dim))
enc_input[:, 2] = [0, 0]
print(enc_input)

enc_model.predict(enc_input)

[[[5 2]
  [5 9]
  [0 0]]

 [[3 1]
  [5 1]
  [0 0]]]


[array([[[-0.08232233,  0.        ],
         [-0.08232233, -0.2970863 ],
         [-0.03749942, -0.1245542 ]],
 
        [[-0.05926357,  0.12103889],
         [-0.03514289,  0.12103889],
         [-0.01499547,  0.05193049]]], dtype=float32),
 array([[-0.03749942, -0.1245542 ],
        [-0.01499547,  0.05193049]], dtype=float32),
 array([[[-0.08232233, -0.2970863 ]],
 
        [[-0.03514289,  0.12103889]]], dtype=float32)]

# Attention layer에 적용하여 테스트한다.

In [74]:
from keras.layers import Multiply, Reshape, Flatten, Embedding
import tensorflow as tf

In [4]:
latent_dim = 2
enc_seq_length = 3
dec_seq_length = 5

enc_input = np.random.randint(10, size=(1, enc_seq_length, latent_dim))
dec_input = np.random.randint(10, size=(1, dec_seq_length, latent_dim))
dec_output = np.random.randint(10, size=(1, 1))

print(enc_input.shape)
print(enc_input)

print('')

print(dec_input.shape)
print(dec_input)

(1, 3, 2)
[[[2 4]
  [3 6]
  [0 8]]]

(1, 5, 2)
[[[4 4]
  [7 7]
  [9 5]
  [4 7]
  [2 2]]]


In [137]:
def repeat_vector(seq_length, axis):
    return Lambda(lambda x: K.repeat_elements(K.expand_dims(x, axis), seq_length, axis))

# Sum of last dimension is 0, then that means it is padded !
def get_pad_index():
    return Lambda(lambda x: K.cast(K.not_equal(K.sum(x, axis=-1, keepdims=True), 0), 'float32'))

def get_last_outputs(inputs, outputs, dimension, seq_length):
    if dimension == 2:
        new_inputs = Reshape((seq_length, 1))(inputs)
    else:
        new_inputs = inputs
    pad_index = get_pad_index()(new_inputs)
    last_index = Lambda(lambda x: K.sum(x, axis=-2) - 1)(pad_index)

    # LAST RELEVANT OUTPUT
    # create the row index with tf.range
    row_idx = Lambda(lambda x: tf.reshape(tf.range(tf.shape(x)[0]), (-1,1)))(last_index)

    # stack with column index
    idx = Lambda(lambda x: tf.stack([row_idx, K.cast(x, 'int32')], axis=-1))(last_index)
    # extract the elements with gather_nd
    last_outputs = Lambda(lambda x: tf.gather_nd(x, idx))(outputs)
    
    last_outputs = Reshape((latent_dim, ))(last_outputs)
    return pad_index, last_outputs

# Test 패딩된 위치의 output 찾기 with 3d input

In [140]:

enc_inputs = Input(shape=(enc_seq_length, latent_dim))
dec_inputs = Input(shape=(dec_seq_length, latent_dim))

encoder = GRU(latent_dim, return_state=True, return_sequences=True)
encoder_outputs, encoder_states = encoder(enc_inputs)

pad_index, last_outputs = get_last_outputs(enc_inputs, encoder_outputs, dimension=3, seq_length=enc_seq_length)

# decoder = GRU(latent_dim, return_state=True, return_sequences=True)
# decoder_outputs, decoder_states = decoder(dec_inputs, initial_state=last_outputs)


# Attention Layer
# repeat_d = repeat_vector(enc_seq_length, 2)(decoder_outputs)
# repeat_e = repeat_vector(dec_seq_length, 1)(encoder_outputs)


# concat_v = Concatenate()([repeat_d, repeat_e])
# dense_score_layer = Dense(latent_dim, activation='tanh')
# dense_score = dense_score_layer(concat_v)
# dense2_score = Dense(1)(dense_score)
# dense2_score = Reshape((dec_seq_length, enc_seq_length))(dense2_score)

# softmax_score = Activation('softmax')(dense2_score)

# Make Context Vector

# outputs = [repeat_d, repeat_e]
# outputs = [repeat_e]
outputs = [pad_index, last_outputs, encoder_outputs]

model = Model(inputs=[enc_inputs, dec_inputs], outputs=outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
print(model.summary())
# output_array = model.predict(input_array)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_74 (InputLayer)           (None, 3, 2)         0                                            
__________________________________________________________________________________________________
gru_37 (GRU)                    [(None, 3, 2), (None 30          input_74[0][0]                   
__________________________________________________________________________________________________
lambda_79 (Lambda)              (None, 1, 2)         0           gru_37[0][0]                     
__________________________________________________________________________________________________
lambda_75 (Lambda)              (None, 3, 1)         0           input_74[0][0]                   
__________________________________________________________________________________________________
reshape_13

In [141]:
new_enc_input = np.copy(enc_input)
new_enc_input[:, 2] = [0, 0]
model.predict([new_enc_input, dec_input])

[array([[[1.],
         [1.],
         [0.]]], dtype=float32),
 array([[ 0.9745162, -0.7309223]], dtype=float32),
 array([[[ 0.85658723, -0.4007408 ],
         [ 0.9745162 , -0.7309223 ],
         [ 0.45146894, -0.35265613]]], dtype=float32)]

# Test 패딩된 위치의 output 찾기 with 2d input

In [142]:

enc_inputs = Input(shape=(enc_seq_length, ))
dec_inputs = Input(shape=(dec_seq_length, latent_dim))

enc_inputs_embed = Embedding(10, latent_dim)(enc_inputs)

encoder = GRU(latent_dim, return_state=True, return_sequences=True)
encoder_outputs, encoder_states = encoder(enc_inputs_embed)

pad_index, last_outputs = get_last_outputs(enc_inputs, encoder_outputs, dimension=2, seq_length=enc_seq_length)

# decoder = GRU(latent_dim, return_state=True, return_sequences=True)
# decoder_outputs, decoder_states = decoder(dec_inputs, initial_state=last_outputs)


# Attention Layer
# repeat_d = repeat_vector(enc_seq_length, 2)(decoder_outputs)
# repeat_e = repeat_vector(dec_seq_length, 1)(encoder_outputs)


# concat_v = Concatenate()([repeat_d, repeat_e])
# dense_score_layer = Dense(latent_dim, activation='tanh')
# dense_score = dense_score_layer(concat_v)
# dense2_score = Dense(1)(dense_score)
# dense2_score = Reshape((dec_seq_length, enc_seq_length))(dense2_score)

# softmax_score = Activation('softmax')(dense2_score)

# Make Context Vector

# outputs = [repeat_d, repeat_e]
# outputs = [repeat_e]
outputs = [pad_index, last_outputs, encoder_outputs]
# outputs = encoder_outputs

model = Model(inputs=[enc_inputs, dec_inputs], outputs=outputs)
# model = Model(inputs=enc_inputs, outputs=outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
print(model.summary())
# output_array = model.predict(input_array)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_76 (InputLayer)           (None, 3)            0                                            
__________________________________________________________________________________________________
embedding_13 (Embedding)        (None, 3, 2)         20          input_76[0][0]                   
__________________________________________________________________________________________________
gru_38 (GRU)                    [(None, 3, 2), (None 30          embedding_13[0][0]               
__________________________________________________________________________________________________
reshape_14 (Reshape)            (None, 3, 1)         0           input_76[0][0]                   
__________________________________________________________________________________________________
lambda_84 

In [143]:
new_enc_input = np.random.randint(10, size=(1, enc_seq_length))
new_enc_input[:, 1:] = 0
print(new_enc_input)
print('')
model.predict([new_enc_input, dec_input])

[[9 0 0]]



[array([[[1.],
         [0.],
         [0.]]], dtype=float32),
 array([[ 0.00588792, -0.00853912]], dtype=float32),
 array([[[ 0.00588792, -0.00853912],
         [ 0.00971485, -0.01169102],
         [ 0.01161896, -0.0128266 ]]], dtype=float32)]

In [27]:

enc_inputs = Input(shape=(enc_seq_length, latent_dim))
dec_inputs = Input(shape=(dec_seq_length, latent_dim))

encoder = GRU(latent_dim, return_state=True, return_sequences=True)
encoder_outputs, encoder_states = encoder(enc_inputs)

pad_index, last_outputs = get_last_outputs(enc_inputs, encoder_outputs)

# decoder = GRU(latent_dim, return_state=True, return_sequences=True)
# decoder_outputs, decoder_states = decoder(dec_inputs, initial_state=last_outputs)


# Attention Layer
# repeat_d = repeat_vector(enc_seq_length, 2)(decoder_outputs)
repeat_e = repeat_vector(dec_seq_length, 1)(encoder_outputs)


# concat_v = Concatenate()([repeat_d, repeat_e])
# dense_score_layer = Dense(latent_dim, activation='tanh')
# dense_score = dense_score_layer(concat_v)
# dense2_score = Dense(1)(dense_score)
# dense2_score = Reshape((dec_seq_length, enc_seq_length))(dense2_score)

# softmax_score = Activation('softmax')(dense2_score)

# Make Context Vector

# outputs = [repeat_d, repeat_e]
outputs = [repeat_e]

model = Model(inputs=[enc_inputs, dec_inputs], outputs=outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
print(model.summary())
# output_array = model.predict(input_array)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 3, 2)              0         
_________________________________________________________________
gru_4 (GRU)                  [(None, 3, 2), (None, 2)] 30        
_________________________________________________________________
lambda_16 (Lambda)           (None, 5, 3, 2)           0         
Total params: 30
Trainable params: 30
Non-trainable params: 0
_________________________________________________________________
None


In [15]:
model.predict([enc_input, dec_input])

[array([[[[ 0.33879137, -0.42596382],
          [ 0.33879137, -0.42596382],
          [ 0.33879137, -0.42596382]],
 
         [[ 0.1054918 , -0.8267227 ],
          [ 0.1054918 , -0.8267227 ],
          [ 0.1054918 , -0.8267227 ]],
 
         [[-0.6399848 , -0.9956191 ],
          [-0.6399848 , -0.9956191 ],
          [-0.6399848 , -0.9956191 ]],
 
         [[-0.6399848 , -0.13997185],
          [-0.6399848 , -0.13997185],
          [-0.6399848 , -0.13997185]],
 
         [[-0.54603815, -0.24638326],
          [-0.54603815, -0.24638326],
          [-0.54603815, -0.24638326]]]], dtype=float32),
 array([[[[ 0.1296094 , -0.27320027],
          [ 0.32684386, -0.4980604 ],
          [ 0.7306807 ,  0.39620203]],
 
         [[ 0.1296094 , -0.27320027],
          [ 0.32684386, -0.4980604 ],
          [ 0.7306807 ,  0.39620203]],
 
         [[ 0.1296094 , -0.27320027],
          [ 0.32684386, -0.4980604 ],
          [ 0.7306807 ,  0.39620203]],
 
         [[ 0.1296094 , -0.27320027],
          

In [60]:
seq_length = 3
inputs = Input(shape=(seq_length, latent_dim))
outputs, last_output = GRU(latent_dim, return_state=True, return_sequences=True)(inputs)

model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='rmsprop', loss='mse')
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_21 (InputLayer)        (None, 3, 2)              0         
_________________________________________________________________
gru_11 (GRU)                 [(None, 3, 2), (None, 2)] 30        
Total params: 30
Trainable params: 30
Non-trainable params: 0
_________________________________________________________________
None


In [61]:
layer_outputs = list()
for idx, l in enumerate(model.layers):
    if idx == 0:
        continue
    layer_outputs.append(l.output)
get_3rd_layer_output = K.function([model.layers[0].input],
                                  layer_outputs)
layer_output = get_3rd_layer_output([enc_input])
print('')
for l_output in layer_output:
    print(l_output[0][0])
    print('')

TypeError: Can not convert a list into a Tensor or Operation.

In [55]:
latent_dim

2

In [233]:
print()
for l_output in layer_output:
    print(l_output[0][0])
    print('')




NameError: name 'layer_output' is not defined

In [None]:
layer_output[0][0][0]