<a href="https://colab.research.google.com/github/JeonJeongMin/tensorflow-examples/blob/master/Supervised%20Learning/RNN03_seq2seq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import module and Define words

In [0]:
import tensorflow as tf
import numpy as np

char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz단어나무놀이소녀키스사랑싫해세계지속하다가엇너의']
num_dic = {n: i for i, n in enumerate(char_arr)}
dic_len = len(num_dic)

seq_data = [['word','단어'],['wood','나무'],
            ['game','놀이'],['girl','소녀'],
            ['kiss','키스'],['love','사랑'],
            ['hate','싫어'],['world','세계'],
            ['keep','지속하다'],['going','가다'],
            ['what','무엇'],['your','너의']]

#Function

In [0]:
#func - S, E, P 단어뒤에 SEP문자를 붙여 길이가 다양하도록 함
def putSEP(word,func):
  max_len = 5
  n_word=len(word)
  word+=func*(max_len-n_word)
      
  return word

#One hot encoding등 인코딩,디코닝,타겟 데이터를 정리한다. 
def make_batch(seq_data):
  input_batch = []
  output_batch = []
  target_batch = []

  for seq in seq_data:
    input = [num_dic[n] for n in putSEP(seq[0],'P')]
    output = [num_dic[n] for n in ('S' + putSEP(seq[1],'E'))]
    target = [num_dic[n] for n in (putSEP(seq[1],'E') + 'E')]

    input_batch.append(np.eye(dic_len)[input])
    output_batch.append(np.eye(dic_len)[output])
    target_batch.append(target)

  return input_batch, output_batch, target_batch

#Setting Parameter

In [0]:
learning_rate = 0.01
n_hidden = 128
total_epoch = 100
n_class = n_input = dic_len

#Construct seq2seq Network

In [0]:
tf.reset_default_graph()
enc_input = tf.placeholder(tf.float32, [None, None, n_input])
dec_input = tf.placeholder(tf.float32, [None, None, n_input])
targets = tf.placeholder(tf.int64, [None, None])

with tf.variable_scope('encode'):
    enc_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)
    enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob=0.5)

    outputs, enc_states = tf.nn.dynamic_rnn(enc_cell, enc_input,
                                            dtype=tf.float32)

with tf.variable_scope('decode'):
    dec_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)
    dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob=0.5)
    
    outputs, dec_states = tf.nn.dynamic_rnn(dec_cell, dec_input,
                                            initial_state=enc_states,
                                            dtype=tf.float32)


model = tf.layers.dense(outputs, n_class, activation=None)


cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=model, labels=targets))

optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

#Learning

In [11]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

input_batch, output_batch, target_batch = make_batch(seq_data)

print('Learning Start!')
for epoch in range(total_epoch):
  _, cost_val = sess.run([optimizer, cost],
                     feed_dict={enc_input:input_batch,
                                dec_input:output_batch,
                                targets: target_batch})
  #print('Epoch:', '%04d' % (epoch + 1),
  #       'cost =', '{:.6f}'.format(cost_val))

print('Learning Finish!')


Learning Start!
Learning Finish!


#Test and Result

In [12]:
def translate(word):
  
  word = putSEP(word,'P');
  seq_data = [word, 'P'*len(word)]
  input_batch, output_batch, target_batch = make_batch([seq_data])
  
  prediction = tf.argmax(model,2)#[n_output,step,char]
  result = sess.run(prediction, feed_dict={enc_input:input_batch,
                                           dec_input:output_batch,
                                           targets:target_batch})
  decoded = [char_arr[i] for i in result[0]]
  
  end = decoded.index('E')
  print(decoded)
  translated = ''.join(decoded[:end])
  
  return translated
print(translate('your'))

['너', '의', 'E', 'E', 'E', 'E']
너의
