In [16]:
'''
  code by Minho Ryu @bzantium
  reference : https://github.com/golbin/TensorFlow-Tutorials/blob/master/10%20-%20RNN/03%20-%20Seq2Seq.py
              https://github.com/graykode/nlp-tutorial/blob/master/4-1.Seq2Seq/Seq2Seq_Tensor.ipynb
              
'''
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dense
import numpy as np

tf.reset_default_graph()
# S: Symbol that shows starting of decoding input
# E: Symbol that shows starting of decoding output
# P: Symbol that will fill in blank sequence if current batch data size is short than time steps

char_arr = list('SEPabcdefghijklmnopqrstuvwxyz')
num_dic = {n: i for i, n in enumerate(char_arr)}

seq_data = [['man', 'woman'], ['men', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]

# Seq2Seq Parameter
vocab_size = len(num_dic)
n_embed = 5
n_step = 5
n_hidden = 128
n_class = len(num_dic) # number of class(=number of vocab)

def make_batch(seq_data):
    input_batch, output_batch, target_batch = [], [], []

    for seq in seq_data:
        for i in range(2):
            seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))

        input = [num_dic[n] for n in seq[0]]
        output = [num_dic[n] for n in ('S' + seq[1])]
        target = [num_dic[n] for n in (seq[1] + 'E')]

        input_batch.append(input)
        output_batch.append(output)
        target_batch.append(target)

    return input_batch, output_batch, target_batch

  
# Model
class seq2seq(object):
    def __init__(self, sess, vocab_size, n_embed, n_hidden, n_class):
        self.sess = sess
        self.n_hidden = n_hidden
        self.n_class = n_class
        self._build_model()
                
    def _build_model(self):
        self.enc_input = tf.placeholder(tf.int32, [None, n_step]) # [batch_size, max_len(=encoder_step), n_class]
        self.dec_input = tf.placeholder(tf.int32, [None, n_step+1]) # [batch_size, max_len+1(=decoder_step) (becase of 'S' or 'E'), n_class]
        self.targets = tf.placeholder(tf.int32, [None, None]) # [batch_size, max_len+1], not one-hot
        
        embedding = Embedding(vocab_size, n_embed)
        enc_input = embedding(self.enc_input)
        dec_input = embedding(self.dec_input)
        
        fc = Dense(n_class, input_shape=(n_hidden,))

        with tf.variable_scope('encode'):
            enc_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
            enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob=0.5)
            _, enc_states = tf.nn.dynamic_rnn(enc_cell, enc_input, dtype=tf.float32)
            # encoder state will go to decoder initial_state, enc_states : [batch_size, n_hidden(=128)]

        with tf.variable_scope('decode'):
            dec_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
            dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob=0.5)
            outputs, _ = tf.nn.dynamic_rnn(dec_cell, dec_input, initial_state=enc_states, dtype=tf.float32)

        logits = fc(outputs) # model : [batch_size, max_len+1, n_class]

        self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.targets))
        self.optimizer = tf.train.AdamOptimizer(0.001).minimize(self.cost)

        self.prediction = tf.argmax(logits, 2)

        sess.run(tf.global_variables_initializer())
    
    def train(self, enc_input, dec_input, targets):
        return self.sess.run([self.cost, self.optimizer], feed_dict={self.enc_input: enc_input, self.dec_input: dec_input, self.targets: targets})
     
    def predict(self, enc_input, dec_input):
        return self.sess.run(self.prediction, feed_dict={self.enc_input: enc_input, self.dec_input: dec_input})
                

# Training
run_config = tf.ConfigProto()
run_config.gpu_options.allow_growth=True
sess =  tf.Session(config=run_config)

input_batch, output_batch, target_batch = make_batch(seq_data)
model = seq2seq(sess, vocab_size, n_embed, n_hidden, n_class)
for epoch in range(1000):
    loss, _ = model.train(input_batch, output_batch, target_batch)
    if (epoch + 1) % 100 == 0:
        print('Epoch:', '%03d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))


# Test
def translate(word):
    seq_data = [word, 'P' * len(word)]
    
    input_batch, output_batch, _ = make_batch([seq_data])
    result = model.predict(input_batch, output_batch)
    decoded = [char_arr[i] for i in result[0]]
    if 'E' in decoded:
        end = decoded.index('E')
        translated = ''.join(decoded[:end])
    else:
        translated = ''.join(decoded)
    return translated.replace('P','')

print('test')
print('man ->', translate('man'))
print('men ->', translate('men'))
print('king ->', translate('king'))
print('black ->', translate('black'))
print('upp ->', translate('upp'))

Epoch: 100 cost = 1.567791
Epoch: 200 cost = 0.482214
Epoch: 300 cost = 0.179591
Epoch: 400 cost = 0.057933
Epoch: 500 cost = 0.041828
Epoch: 600 cost = 0.014552
Epoch: 700 cost = 0.013977
Epoch: 800 cost = 0.007216
Epoch: 900 cost = 0.016227
Epoch: 1000 cost = 0.015089
test
man -> woman
men -> women
king -> queen
black -> white
upp -> down
