# Seq2Seq
實現基礎版的Seq2Seq，輸入一個英文單字，模型將輸出一個對字母排序後的單字
<br>輸入 : hello
<br>輸出 : ehllo

本程式試著不使用Tensorflow所提供的Helper指令，完成Scheduled Sampling<br>
Scheduled Sampling是一種解決訓練和生成時輸入數據分布不一致的方法<br>

在一般的Seq2Seq模型的inference階段中，如果Sequence中在t時刻中產生錯誤的值，在t時刻之後的輸入狀態將會受到影響，而該誤差會隨著生成過程不斷向後累積；而Scheduled Sampling以一定概率將Decoder自己產生的值作為Decoder端的輸入，這樣即使前面產生錯誤的值，其目標仍然是最大化真實目標序列的概率，模型會朝著正確的方向進<br>

在訓練早期Scheduled Sampling主要使用target中的真實值作為Decoder端的輸入，可以將模型從隨機初始化的狀態快速引導至一個合理的狀態；隨著訓練的進行，該方法會逐漸更多地使用Decoder自己產生的值作為Decoder端的輸入，以解決數據分布不一致的問題<br>

<img src="3_scheduled_sampling_不使用helper指令_version_1.jpg" style="width:1140px;height:600px;float:middle">

以上為Decoder端的計算流程<br>
當前時刻decoder_cell會直接吃mlstm_cell_output(上一個時刻decoder_cell的輸出)<br>

In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import time
import copy
import tensorflow as tf
from tensorflow.python.layers.core import Dense

# 讀取數據

In [2]:
f = open('data/letters_source.txt', 'r', encoding = 'utf-8')
source_data = f.read()

f = open('data/letters_target.txt', 'r', encoding = 'utf-8')
target_data = f.read()

# 數據預處理

In [3]:
def extract_character_vocab(data):
    
    special_words = ['<PAD>' , '<UNK>' , '<GO>' , '<EOS>']
    
    words = []
    for line in data.split('\n'):
        for character in line:
            if character not in words:
                words.append(character)

    # 將四個特殊字加入詞庫       
    int_to_vocab = {idx: word for idx , word in enumerate(special_words + words)}
    vocab_to_int = dict(zip(int_to_vocab.values() , int_to_vocab.keys()))

    return int_to_vocab, vocab_to_int

In [4]:
source_int_to_letter , source_letter_to_int = extract_character_vocab(source_data)
target_int_to_letter , target_letter_to_int = extract_character_vocab(target_data)

# 將所有字母轉換成index
source_int = []
for line in source_data.split('\n'):
    temp = []
    for letter in line:
        temp.append(source_letter_to_int[letter])
    source_int.append(temp)    
        
target_int = []
for line in target_data.split('\n'):
    temp = []
    for letter in line:
        temp.append(target_letter_to_int[letter])
    temp = temp + [target_letter_to_int['<EOS>']]
    target_int.append(temp)   

In [5]:
# 決定source_int與target_int中的最大長度
# 因為後面的decoder的rnn不是使用tf.nn.dynamic_rnn，無法使用動態長度的功能，所以在這裡就要決定每個batch的長度
source_max_length , target_max_length = 0 , 0  
for vob_source , vob_target in zip(source_int , target_int):
    if len(vob_source) > source_max_length:
        source_max_length = len(vob_source)    
    if len(vob_target) > target_max_length:
        target_max_length = len(vob_target)  

# 分別對source_int與target_int_pad 補source_letter_to_int['<PAD>']與target_letter_to_int['<PAD>']到最大長度  
source_int_pad , target_int_pad = [] , []
for i_source , j_target in zip(range(len(source_int)) , range(len(target_int))):
    temp_source = source_int[i_source].copy()
    while len(temp_source) < source_max_length:
        temp_source.append(source_letter_to_int['<PAD>']) 
    source_int_pad.append(temp_source)
    
    temp_target = target_int[j_target].copy()
    while len(temp_target) < target_max_length:
        temp_target.append(target_letter_to_int['<PAD>']) 
    target_int_pad.append(temp_target)       

source_int_pad = np.array(source_int_pad)
target_int_pad = np.array(target_int_pad)     

In [6]:
# 超參數
# Number of Epochs
epochs = 200
# Batch Size
batch_size = 128
# RNN Size
rnn_hidden_unit = 50
# Number of Layers
num_layers = 1
# Embedding Size
encoding_embedding_size = 15
decoding_embedding_size = rnn_hidden_unit
# Learning Rate
learning_rate = 0.001
source_vocab_size = len(source_int_to_letter)
target_vocab_size = len(target_int_to_letter)

# Build Model

## 輸入層

In [7]:
input_data = tf.placeholder(tf.int32, [None , source_max_length] , name = 'inputs')
targets = tf.placeholder(tf.int32, [None , target_max_length] , name = 'targets')
targets_onehot = tf.one_hot(tf.reshape(targets , [-1]) , depth = target_vocab_size)
lr = tf.placeholder(tf.float32 , name = 'learning_rate')

# 決定到底是"t-1階段的輸出"還是"target中的真實答案"，當作t階段的輸入
from_model_or_target = tf.placeholder(tf.bool , [target_max_length , ] , name = 'on_train')

## Encoder

需要對source數據進行embedding，再傳入Decoder中的RNN

In [8]:
# input_data: 輸入tensor
# rnn_hidden_unit: rnn隱層結點數量
# num_layers: rnn cell的層數
# source_sequence_length: source數據的序列長度
# source_vocab_size: source數據的詞庫大小
# encoding_embedding_size: embedding的向量維度

# Encoder embedding
'''
encoder_embed_input = tf.contrib.layers.embed_sequence(input_data , source_vocab_size , encoding_embedding_size) 
                                                  ⇕ 相當於
encoder_embeddings = tf.Variable(tf.random_uniform([source_vocab_size , encoding_embedding_size]))
encoder_embed_input = tf.nn.embedding_lookup(encoder_embeddings , input_data)

若懶得寫兩行程式可以直接用tf.contrib.layers.embed_sequence這個函數
介紹 : https://www.tensorflow.org/api_docs/python/tf/contrib/layers/embed_sequence
'''
encoder_embeddings = tf.Variable(tf.random_uniform([source_vocab_size , encoding_embedding_size]))
encoder_embed_input = tf.nn.embedding_lookup(encoder_embeddings , input_data)

def get_lstm_cell(rnn_hidden_unit):
    lstm_cell = tf.contrib.rnn.LSTMCell(rnn_hidden_unit, 
                                        initializer = tf.random_uniform_initializer(-0.1 , 0.1))
    return lstm_cell

with tf.variable_scope('encoder'):   
    encoder_cell = tf.contrib.rnn.MultiRNNCell([get_lstm_cell(rnn_hidden_unit) for _ in range(num_layers)])
    
    encoder_output, encoder_state = tf.nn.dynamic_rnn(encoder_cell , 
                                                      encoder_embed_input, 
                                                      dtype = tf.float32)

## Decoder

In [9]:
# 預處理後的decoder輸入
# 在batch中每一筆data最前面加上<GO>，並移除最後一個字，所以每一筆data的詞的數目並無改變
# cut掉最後一個字
# ending = tf.strided_slice(targets , [0, 0] , [batch_size, -1] , [1, 1]) # 等同於 ending = tf.identity(targets[: , 0:-1])
ending = tf.identity(targets[: , 0:-1])
decoder_input = tf.concat([tf.fill([batch_size, 1] , target_letter_to_int['<GO>']) , ending] , axis = 1)

In [10]:
# decoding_embedding_size: embedding的向量維度
# num_layers: rnn cell的層數
# rnn_size: RNN單元的隱層結點數量
# encoder_state: encoder端編碼的狀態向量
# decoder_input: decoder端輸入

# 1. Embedding
decoder_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size]))
decoder_embed_input = tf.nn.embedding_lookup(decoder_embeddings , decoder_input)

with tf.variable_scope('decoder'):
    # 2. 建造Decoder中的RNN單元
    decoder_cell = tf.contrib.rnn.MultiRNNCell([get_lstm_cell(rnn_hidden_unit) for _ in range(num_layers)])
    state = encoder_state 
    outputs  = []
    for time_step in range(0 , target_max_length):
        if time_step > 0: tf.get_variable_scope().reuse_variables()  
        
        # 在訓練的過程中，除了time_step為0以外，每一個time_step都投擲硬幣決定
        # 要用decoder_embed_input[: , time_step , :](target中的真實答案)，還是mlstm_cell_output(machine自己產生的output)，輸入decoder_cell
        # 正面代表當前時刻decoder_cell要吃decoder_embed_input[: , time_step , :](target中的真實答案)
        # 反面代表當前時刻decoder_cell要吃mlstm_cell_output(上一個時刻decoder_cell的輸出)
        # 也就是coin tossing為[True , True , True , True , True , True , True , True]會逐漸變為[Faslse , Faslse , Faslse , Faslse , Faslse , Faslse , Faslse , Faslse]
        # 前幾個epoch會有很大的機率擲出正面，但隨著訓練的過程擲出反面的機率會越來越大
        # 在後面的步驟會調節擲出正反面的機率
        if time_step == 0: 
            input_to_decoder = decoder_embed_input[: , 0 , :]
            mlstm_cell_output , state = decoder_cell(input_to_decoder , state)
                
        elif time_step > 0: 
            input_to_decoder = tf.cond(from_model_or_target[time_step] , 
                                       lambda: decoder_embed_input[: , time_step , :] , 
                                       lambda: mlstm_cell_output)
            mlstm_cell_output , state = decoder_cell(input_to_decoder , state)
        
        outputs.append(mlstm_cell_output)

In [11]:
outputs_ = tf.transpose(tf.convert_to_tensor(outputs) , [1 , 0 , 2])    
outputs_ = tf.reshape(outputs_ , [-1 , rnn_hidden_unit]) 

weights = tf.Variable(tf.truncated_normal([rnn_hidden_unit , target_vocab_size] , mean = 0.01 , stddev = 0.1))
biases = tf.Variable(tf.zeros([1 , target_vocab_size]) + 0.0001)
logits = tf.matmul(outputs_ , weights) + biases 

# predicting_logits與訓練無關，純粹只是要看結果
predicting_logits = tf.nn.softmax(logits)   
predicting_logits = tf.argmax(predicting_logits , axis = 1)
predicting_logits = tf.reshape(predicting_logits , [batch_size , -1] , name = 'predictions')

In [12]:
# Loss function
loss = tf.nn.softmax_cross_entropy_with_logits(labels = targets_onehot , logits = logits)
total_loss = tf.reduce_mean(loss)

# Optimizer
optimizer = tf.train.AdamOptimizer(lr)

# Gradient Clipping
gradients = optimizer.compute_gradients(total_loss)
capped_gradients = [(tf.clip_by_value(grad, -5. , 5.), var) for grad, var in gradients if grad is not None]
train_op = optimizer.apply_gradients(capped_gradients)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [13]:
# 將數據集分割為train和validation
train_source = source_int_pad[batch_size:]
train_target = target_int_pad[batch_size:]
# 留出一個batch進行驗證
valid_source = source_int_pad[:batch_size]
valid_target = target_int_pad[:batch_size]

## Training

In [14]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

prob = 1e-3
alpha = 1.075
stop_early = 0
for epoch_i in range(0 , epochs):
    
    # 在每進行一個epoch前，把每個batch的index先決定出來
    batch_index = []
    temp = []
    count = 0 # 隨機決定index的開頭 
    while len(batch_index) <= 77:  # 1個batch裡只有77筆資料
        temp.append(count)
        count += 1
        if len(temp) == batch_size:
            batch_index.append(temp)
            temp = []
        if count == len(train_source):
            count = 0
    
    # 一開始會出現[True , True , True , True , True , True , True , True]
    # 隨著epoch的增長，逐漸開始出現[Faslse , Faslse , Faslse , Faslse , Faslse , Faslse , Faslse , Faslse]
    # 在Decoder端的訓練，一開始t階段的輸入為t-1階段的期望輸出(Teacher Forcing)，到最後逐漸變為t階段的輸入為t-1階段的輸出(Sampling)
    coin_tossing = np.random.choice(a = 2 , 
                                    size = target_max_length , 
                                    replace = True , 
                                    p = [prob , 1 - prob])
    coin_tossing = coin_tossing.astype(bool)
    if alpha * prob < 1  : prob = alpha * prob # p會隨著epoch增加越來越大
    elif alpha * prob > 1: prob = 1.        

    for batch_i in range(0 , 77):
        train_source_batch , train_target_batch =\
        train_source[batch_index[batch_i] , :] , train_target[batch_index[batch_i] , :] 
        
        _ , training_loss , predicting_logits_result =\
        sess.run([train_op, total_loss , predicting_logits] , 
                 feed_dict = {input_data : train_source_batch ,
                              targets : train_target_batch ,
                              from_model_or_target : coin_tossing ,
                              lr: learning_rate})
   
        if batch_i % 30 == 0: # 每隔30個輪查看一下結果
            validation_loss = sess.run(total_loss, 
                                       feed_dict = {input_data : valid_source ,
                                                    targets : valid_target ,
                                                    from_model_or_target : coin_tossing}) 

            print('Epoch : {}/{} \nBatch : {}/{} \nTraining Loss : {:.3f} \nValidation loss: {:.3f} \nstop_early : {} \ncoin_tossing : {}'
                  .format(epoch_i , epochs , 
                          batch_i , len(train_source) // batch_size , 
                          training_loss , validation_loss , 
                          stop_early , coin_tossing))
            
            index = np.random.randint(batch_size)
            print('Source : {}'.format([source_int_to_letter[i] for i in train_source_batch[index]] ))
            print('Target : {}'.format([target_int_to_letter[i] for i in train_target_batch[index]] ))
            print('Predict : {}\n'.format([target_int_to_letter[i] for i in predicting_logits_result[index]] ))
        
    if coin_tossing.sum() == 0:
        stop_early += 1

    if stop_early == 20: # 當coin_tossing全部為False的次數為20時即停止計算
        break        
    
    
# 保存模型
saver = tf.train.Saver()
saver.save(sess , 'trained_model/save_net')
print('Model Trained and Saved')

Epoch : 0/200 
Batch : 0/77 
Training Loss : 3.353 
Validation loss: 3.302 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['q', 'v', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['q', 'v', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['s', 'p', 'p', 'p', 'p', 'p', 'p', 'p']

Epoch : 0/200 
Batch : 30/77 
Training Loss : 2.367 
Validation loss: 2.331 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['m', 'q', 's', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['m', 'q', 's', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 0/200 
Batch : 60/77 
Training Loss : 1.939 
Validation loss: 1.966 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['o', 'x', 'g', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['g', 'o', 'x', '<EOS>', '<PAD>', '<PAD>', '<PAD>',

Epoch : 8/200 
Batch : 0/77 
Training Loss : 0.806 
Validation loss: 0.735 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['s', 'd', 'v', 'm', 'f', '<PAD>', '<PAD>']
Target : ['d', 'f', 'm', 's', 'v', '<EOS>', '<PAD>', '<PAD>']
Predict : ['c', 'f', 'p', 's', 'v', '<EOS>', '<PAD>', '<PAD>']

Epoch : 8/200 
Batch : 30/77 
Training Loss : 0.705 
Validation loss: 0.699 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['p', 'p', 'k', 'v', '<PAD>', '<PAD>', '<PAD>']
Target : ['k', 'p', 'p', 'v', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['g', 'p', 'p', 'v', '<EOS>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 8/200 
Batch : 60/77 
Training Loss : 0.658 
Validation loss: 0.668 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['o', 't', 'n', 'y', '<PAD>', '<PAD>', '<PAD>']
Target : ['n', 'o', 't', 'y', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['l', 'o', 't', 'x', '<E

Epoch : 16/200 
Batch : 0/77 
Training Loss : 0.302 
Validation loss: 0.278 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['z', 's', 'u', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['s', 'u', 'z', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['s', 'u', 'z', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 16/200 
Batch : 30/77 
Training Loss : 0.251 
Validation loss: 0.264 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['m', 'm', 'e', 't', 'm', 'z', '<PAD>']
Target : ['e', 'm', 'm', 'm', 't', 'z', '<EOS>', '<PAD>']
Predict : ['e', 'm', 'm', 'm', 'u', 'z', '<EOS>', '<PAD>']

Epoch : 16/200 
Batch : 60/77 
Training Loss : 0.242 
Validation loss: 0.276 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['k', 'r', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['k', 'r', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['k',

Epoch : 24/200 
Batch : 0/77 
Training Loss : 0.142 
Validation loss: 0.147 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['q', 'h', 'u', 'y', 'r', 'u', '<PAD>']
Target : ['h', 'q', 'r', 'u', 'u', 'y', '<EOS>', '<PAD>']
Predict : ['h', 'r', 'r', 'u', 'u', 'y', '<EOS>', '<PAD>']

Epoch : 24/200 
Batch : 30/77 
Training Loss : 0.128 
Validation loss: 0.141 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['a', 'b', 'q', 'k', 'q', 'l', 'p']
Target : ['a', 'b', 'k', 'l', 'p', 'q', 'q', '<EOS>']
Predict : ['a', 'b', 'i', 'l', 'p', 'q', 'q', '<EOS>']

Epoch : 24/200 
Batch : 60/77 
Training Loss : 0.115 
Validation loss: 0.131 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['a', 'y', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['a', 'y', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['a', 'y', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>

Epoch : 32/200 
Batch : 0/77 
Training Loss : 0.080 
Validation loss: 0.088 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['s', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['s', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['s', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 32/200 
Batch : 30/77 
Training Loss : 0.075 
Validation loss: 0.093 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['m', 'p', 'i', 'o', 'w', 'd', '<PAD>']
Target : ['d', 'i', 'm', 'o', 'p', 'w', '<EOS>', '<PAD>']
Predict : ['d', 'i', 'm', 'o', 'p', 'w', '<EOS>', '<PAD>']

Epoch : 32/200 
Batch : 60/77 
Training Loss : 0.069 
Validation loss: 0.082 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['d', 'g', 'g', 'w', 'o', 'm', 'e']
Target : ['d', 'e', 'g', 'g', 'm', 'o', 'w', '<EOS>']
Predict : ['d', 'e', 'g', 'g', 

Epoch : 40/200 
Batch : 0/77 
Training Loss : 0.052 
Validation loss: 0.057 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['l', 'a', 'a', 'f', 'f', 'u', 'g']
Target : ['a', 'a', 'f', 'f', 'g', 'l', 'u', '<EOS>']
Predict : ['a', 'a', 'f', 'f', 'g', 'l', 'u', '<EOS>']

Epoch : 40/200 
Batch : 30/77 
Training Loss : 0.050 
Validation loss: 0.051 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['m', 'q', 'm', 'w', 'u', 'l', '<PAD>']
Target : ['l', 'm', 'm', 'q', 'u', 'w', '<EOS>', '<PAD>']
Predict : ['k', 'm', 'm', 'q', 'u', 'w', '<EOS>', '<PAD>']

Epoch : 40/200 
Batch : 60/77 
Training Loss : 0.042 
Validation loss: 0.049 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['t', 'z', 'i', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['i', 't', 'z', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['i', 't', 'z', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']


Epoch : 48/200 
Batch : 0/77 
Training Loss : 0.335 
Validation loss: 0.421 
stop_early : 0 
coin_tossing : [ True  True  True  True  True False  True  True]
Source : ['x', 'v', 'p', 'd', 'l', 'c', 'f']
Target : ['c', 'd', 'f', 'l', 'p', 'v', 'x', '<EOS>']
Predict : ['c', 'd', 'f', 'l', 'p', 'v', 'x', 'y']

Epoch : 48/200 
Batch : 30/77 
Training Loss : 0.137 
Validation loss: 0.130 
stop_early : 0 
coin_tossing : [ True  True  True  True  True False  True  True]
Source : ['z', 'a', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['a', 'z', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['a', 'z', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 48/200 
Batch : 60/77 
Training Loss : 0.070 
Validation loss: 0.065 
stop_early : 0 
coin_tossing : [ True  True  True  True  True False  True  True]
Source : ['w', 'v', 's', 'm', 'd', '<PAD>', '<PAD>']
Target : ['d', 'm', 's', 'v', 'w', '<EOS>', '<PAD>', '<PAD>']
Predict : ['d', 'm', 's', 'v', 'w', '<EOS>'

Epoch : 56/200 
Batch : 0/77 
Training Loss : 0.060 
Validation loss: 0.058 
stop_early : 0 
coin_tossing : [ True  True  True  True  True False  True  True]
Source : ['v', 'q', 'i', 's', 't', 'a', 'i']
Target : ['a', 'i', 'i', 'q', 's', 't', 'v', '<EOS>']
Predict : ['a', 'i', 'i', 'q', 's', 't', 'v', '<EOS>']

Epoch : 56/200 
Batch : 30/77 
Training Loss : 0.056 
Validation loss: 0.053 
stop_early : 0 
coin_tossing : [ True  True  True  True  True False  True  True]
Source : ['o', 'j', 'u', 's', 'z', '<PAD>', '<PAD>']
Target : ['j', 'o', 's', 'u', 'z', '<EOS>', '<PAD>', '<PAD>']
Predict : ['j', 'o', 's', 'u', 'z', '<EOS>', '<PAD>', '<PAD>']

Epoch : 56/200 
Batch : 60/77 
Training Loss : 0.045 
Validation loss: 0.052 
stop_early : 0 
coin_tossing : [ True  True  True  True  True False  True  True]
Source : ['z', 'b', 'q', 'n', 'o', 'z', '<PAD>']
Target : ['b', 'n', 'o', 'q', 'z', 'z', '<EOS>', '<PAD>']
Predict : ['b', 'n', 'o', 'q', 'z', 'z', '<EOS>', '<PAD>']

Epoch : 57/200 
Batch :

Epoch : 64/200 
Batch : 0/77 
Training Loss : 0.027 
Validation loss: 0.034 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['y', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['y', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['y', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 64/200 
Batch : 30/77 
Training Loss : 0.025 
Validation loss: 0.033 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['f', 'k', 'q', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['f', 'k', 'q', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['f', 'k', 'q', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 64/200 
Batch : 60/77 
Training Loss : 0.022 
Validation loss: 0.029 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['o', 't', 'n', 'y', '<PAD>', '<PAD>', '<PAD>']
Target : ['n', 'o', 't', 'y', '<EOS>'

Epoch : 72/200 
Batch : 0/77 
Training Loss : 0.077 
Validation loss: 0.061 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['s', 'i', 's', 'b', 'm', 'z', 'f']
Target : ['b', 'f', 'i', 'm', 's', 's', 'z', '<EOS>']
Predict : ['b', 'f', 'i', 'm', 's', 'v', 'z', '<EOS>']

Epoch : 72/200 
Batch : 30/77 
Training Loss : 0.027 
Validation loss: 0.035 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['d', 'u', 'z', 'f', '<PAD>', '<PAD>', '<PAD>']
Target : ['d', 'f', 'u', 'z', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['d', 'f', 'u', 'z', '<EOS>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 72/200 
Batch : 60/77 
Training Loss : 0.021 
Validation loss: 0.028 
stop_early : 0 
coin_tossing : [ True  True  True  True  True  True  True  True]
Source : ['p', 'y', 't', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['p', 't', 'y', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['p', 't', 'y', '<EOS>', '<PAD>', '<P

Epoch : 80/200 
Batch : 0/77 
Training Loss : 0.018 
Validation loss: 0.029 
stop_early : 0 
coin_tossing : [ True  True  True  True  True False  True False]
Source : ['i', 'f', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['f', 'i', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['f', 'i', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 80/200 
Batch : 30/77 
Training Loss : 0.024 
Validation loss: 0.026 
stop_early : 0 
coin_tossing : [ True  True  True  True  True False  True False]
Source : ['m', 'p', 'i', 'o', 'w', 'd', '<PAD>']
Target : ['d', 'i', 'm', 'o', 'p', 'w', '<EOS>', '<PAD>']
Predict : ['d', 'i', 'm', 'o', 'p', 'w', '<EOS>', '<PAD>']

Epoch : 80/200 
Batch : 60/77 
Training Loss : 0.019 
Validation loss: 0.031 
stop_early : 0 
coin_tossing : [ True  True  True  True  True False  True False]
Source : ['j', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['j', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PA

Epoch : 88/200 
Batch : 0/77 
Training Loss : 0.069 
Validation loss: 0.083 
stop_early : 0 
coin_tossing : [False  True False False  True False  True  True]
Source : ['k', 'a', 'l', 'e', 'y', 'p', '<PAD>']
Target : ['a', 'e', 'k', 'l', 'p', 'y', '<EOS>', '<PAD>']
Predict : ['a', 'e', 'k', 'l', 'p', 'y', '<EOS>', '<PAD>']

Epoch : 88/200 
Batch : 30/77 
Training Loss : 0.053 
Validation loss: 0.067 
stop_early : 0 
coin_tossing : [False  True False False  True False  True  True]
Source : ['r', 'q', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['q', 'r', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['q', 'r', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 88/200 
Batch : 60/77 
Training Loss : 0.037 
Validation loss: 0.060 
stop_early : 0 
coin_tossing : [False  True False False  True False  True  True]
Source : ['l', 'k', 's', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['k', 'l', 's', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : [

Epoch : 96/200 
Batch : 30/77 
Training Loss : 0.036 
Validation loss: 0.054 
stop_early : 1 
coin_tossing : [False False False False False False False False]
Source : ['t', 'm', 't', 'l', 'v', 'e', 'j']
Target : ['e', 'j', 'l', 'm', 't', 't', 'v', '<EOS>']
Predict : ['e', 'j', 'l', 'm', 't', 't', 'v', '<EOS>']

Epoch : 96/200 
Batch : 60/77 
Training Loss : 0.032 
Validation loss: 0.054 
stop_early : 1 
coin_tossing : [False False False False False False False False]
Source : ['x', 'z', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['x', 'z', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['x', 'z', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 97/200 
Batch : 0/77 
Training Loss : 0.030 
Validation loss: 0.053 
stop_early : 2 
coin_tossing : [False False False False False False False False]
Source : ['t', 'j', 'q', 'r', 'z', '<PAD>', '<PAD>']
Target : ['j', 'q', 'r', 't', 'z', '<EOS>', '<PAD>', '<PAD>']
Predict : ['j', 'q', 'r', 't', 'z', '<E

Epoch : 104/200 
Batch : 30/77 
Training Loss : 0.017 
Validation loss: 0.030 
stop_early : 9 
coin_tossing : [False False False False False False False False]
Source : ['r', 'y', 'j', 'w', '<PAD>', '<PAD>', '<PAD>']
Target : ['j', 'r', 'w', 'y', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['j', 'r', 'w', 'y', '<EOS>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 104/200 
Batch : 60/77 
Training Loss : 0.017 
Validation loss: 0.028 
stop_early : 9 
coin_tossing : [False False False False False False False False]
Source : ['l', 'k', 's', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Target : ['k', 'l', 's', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Predict : ['k', 'l', 's', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Epoch : 105/200 
Batch : 0/77 
Training Loss : 0.014 
Validation loss: 0.029 
stop_early : 10 
coin_tossing : [False False False False False False False False]
Source : ['r', 'r', 'i', 't', 'u', 'j', '<PAD>']
Target : ['i', 'j', 'r', 'r', 't', 'u', '<EOS>', '<PAD>']
Predict : ['i', 'j'

Epoch : 112/200 
Batch : 30/77 
Training Loss : 0.010 
Validation loss: 0.020 
stop_early : 17 
coin_tossing : [False False False False False False False False]
Source : ['p', 'x', 'i', 'f', 'k', 'i', '<PAD>']
Target : ['f', 'i', 'i', 'k', 'p', 'x', '<EOS>', '<PAD>']
Predict : ['f', 'i', 'i', 'k', 'p', 'x', '<EOS>', '<PAD>']

Epoch : 112/200 
Batch : 60/77 
Training Loss : 0.010 
Validation loss: 0.018 
stop_early : 17 
coin_tossing : [False False False False False False False False]
Source : ['b', 'i', 'g', 'n', 'd', 'n', 'c']
Target : ['b', 'c', 'd', 'g', 'i', 'n', 'n', '<EOS>']
Predict : ['b', 'c', 'd', 'g', 'i', 'n', 'n', '<EOS>']

Epoch : 113/200 
Batch : 0/77 
Training Loss : 0.010 
Validation loss: 0.020 
stop_early : 18 
coin_tossing : [False False False False False False False False]
Source : ['p', 'r', 'v', 's', 'y', '<PAD>', '<PAD>']
Target : ['p', 'r', 's', 'v', 'y', '<EOS>', '<PAD>', '<PAD>']
Predict : ['p', 'r', 's', 'v', 'y', '<EOS>', '<PAD>', '<PAD>']

Epoch : 113/200 


## Testing

In [15]:
import os    
sess = tf.Session()
new_saver = tf.train.import_meta_graph(os.path.join('trained_model/save_net.meta'))
new_saver.restore(sess, tf.train.latest_checkpoint(os.path.join('trained_model')))

graph = tf.get_default_graph()
input_data = graph.get_tensor_by_name('inputs:0')
targets = graph.get_tensor_by_name('targets:0')
predicting_logits = graph.get_tensor_by_name('predictions:0')
from_model_or_target = graph.get_tensor_by_name('on_train:0')

input_word = 'common'

test_source = [] 
for letter in input_word:
    if letter not in source_letter_to_int.keys():
        test_source.append(source_letter_to_int['<UNK>'])
    else:
        test_source.append(source_letter_to_int[letter])
        
# 輸入的句子的長度是固定source_max_length，所以補source_letter_to_int['<PAD>']到長度為source_max_length
while len(test_source) < source_max_length:
    test_source.append(source_letter_to_int['<PAD>'])
test_source = [test_source] * batch_size   
        
# test_target輸入的值可以隨便選，只要長度為target_max_length即可    
test_target = [0 for _ in range(0 , target_max_length)] 
test_target = [test_target] * batch_size

test_source = np.array(test_source)
test_target = np.array(test_target)
answer = sess.run(predicting_logits , feed_dict = {input_data : test_source ,
                                                   targets : test_target ,
                                                   from_model_or_target : coin_tossing})

answer = answer[0 , :]
answer_to_letter = []
for num in answer:
    answer_to_letter.append(target_int_to_letter[num])
print(answer_to_letter)     

INFO:tensorflow:Restoring parameters from trained_model\save_net
['c', 'm', 'm', 'n', 'o', 'o', '<EOS>', '<PAD>']
