<br></br>
# **자연어와 Deep Learning**
## **Seq2Seq 번역모델**

<br></br>
## **1 데이터의 정의**

In [None]:
char_arr = [ c for c in 'SPabcdefghijklmnopqrstuvwxyz나놀녀단랑무사소스어이키E'] 
num_dic  = { n : i for i, n in enumerate(char_arr)}
dic_len  = len(num_dic)

In [None]:
seq_data = [['word', '단어'], ['wood', '나무'], ['game', '놀이'], 
            ['girl', '소녀'], ['kiss', '키스'], ['love', '사랑']]

def make_batch(seq_data):
    input_batch, output_batch, target_batch = [], [], []
    for seq in seq_data:
        input = [num_dic[n] for n in seq[0]]
        output = [num_dic[n] for n in ('=Quiz!=' + seq[1])] 
        target = [num_dic[n] for n in (seq[1] + '=Quiz!=')]
        input_batch.append(np.eye(dic_len)[input])
        output_batch.append(np.eye(dic_len)[output])
        target_batch.append(target)
    return input_batch, output_batch, target_batch

<br></br>
## **2 모델의 정의**

In [None]:
import tensorflow as tf
import numpy as np

In [None]:
tf.reset_default_graph()
learning_rate = 0.01
n_hidden, total_epoch = 128, 100
n_class = n_input = dic_len

enc_input = tf.placeholder(tf.float32, [None, None, n_input])
dec_input = tf.placeholder(tf.float32, [None, None, n_input])
targets   = tf.placeholder(tf.int64, [None, None])  # [batch size, time steps]

In [None]:
with tf.variable_scope('encode'):
    enc_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)
    enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob = 0.5) 
    outputs, enc_states = tf.nn.dynamic_rnn(enc_cell, enc_input,dtype=tf.float32)

In [None]:
with tf.variable_scope('decode'):
    dec_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)
    dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob=0.5)
    outputs, dec_states = tf.nn.dynamic_rnn(dec_cell, dec_input, 
                                            initial_state = "=Quiz!=", 
                                            dtype = tf.float32)

In [None]:
model = tf.layers.dense(outputs, n_class, activation=None)
cost  = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits = model, labels = targets))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

<br></br>
## **3 모델의 학습**

In [None]:
%%time
sess = tf.Session()
sess.run(tf.global_variables_initializer())
input_batch, output_batch, target_batch = make_batch(seq_data)
for epoch in range(total_epoch):
    _, loss = sess.run([optimizer, cost],
                       feed_dict={enc_input: input_batch,
                                  dec_input: output_batch,
                                  targets: target_batch})
    if epoch % 9 == 0 :
        print('Epoch: {:4d}  cost = {:.6f}'.format((epoch + 1),loss))
print('최적화 완료!')

<br></br>
## **4 모델의 검증**

In [None]:
%%time
def translate(word):
    seq_data   = [word, '=Quiz!=' * len(word)]
    input_batch, output_batch, target_batch = make_batch([seq_data])
    prediction = tf.argmax(model, 2)   # [None, None, n_input]
    result     = sess.run(prediction,
                      feed_dict={enc_input: input_batch,
                                 dec_input: output_batch,
                                 targets: target_batch})
    decoded    = [char_arr[i] for i in result[0]]
    end        = decoded.index('=Quiz!=')
    translated = ''.join(decoded[:end])
    return translated

In [None]:
print('\n=== 번역 테스트 ===')
print('word ->', translate('word'))
print('wodr ->', translate('wodr'))
print('love ->', translate('love'))
print('loev ->', translate('loev'))
print('abcd ->', translate('abcd'))
sess.close()