In [1]:
"""
The helper file
"""

import os
import pickle
import copy
import numpy as np

CODES = {'<unk>': 0, '<s>': 1, '</s>': 2}

def load_data(path):
    """
    Load Dataset from File
    """
    input_file = os.path.join(path)
    with open(input_file, 'r', encoding='utf-8') as f:
        data = f.read()

    return data

def preprocess_and_save_data(source_path, target_path):
    """
    Preprocess Text Data.  Save to to file.
    """
    
    # Preprocess
    source_text = load_data(source_path)
    target_text = load_data(target_path)

    source_text = source_text.lower()
    target_text = target_text.lower()

    source_vocab_to_int, source_int_to_vocab = create_lookup_tables(source_text)
    
    target_vocab_to_int, target_int_to_vocab = create_lookup_tables(target_text)
    
    source_text, target_text = text_to_ids(source_text, target_text, source_vocab_to_int, target_vocab_to_int)

    # Save Data
    pickle.dump((
        (source_text, target_text),
        (source_vocab_to_int, target_vocab_to_int),
        (source_int_to_vocab, target_int_to_vocab)), open('preprocess.p', 'wb'))

def load_preprocess():
    """
    Load the Preprocessed Training data and return them in batches of <batch_size> or less
    """
    return pickle.load(open('preprocess.p', mode='rb'))

def create_lookup_tables(text):
    """
    Create lookup tables for vocabulary
    """
    vocab = set(text.split())
    vocab_to_int = copy.copy(CODES)
    
    for v_i, v in enumerate(vocab, len(CODES)):
        vocab_to_int[v] = v_i

    int_to_vocab = {v_i: v for v, v_i in vocab_to_int.items()}

    return vocab_to_int, int_to_vocab

def save_params(params):
    """
    Save parameters to file
    """
    pickle.dump(params, open('params.p', 'wb'))

def load_params():
    """
    Load parameters from file
    """
    return pickle.load(open('params.p', mode='rb'))

def batch_data(source, target, batch_size):
    """
    Batch source and target together
    """
    for batch_i in range(0, len(source)//batch_size):
        start_i = batch_i * batch_size
        source_batch = source[start_i:start_i + batch_size]
        target_batch = target[start_i:start_i + batch_size]
        yield np.array(pad_sentence_batch(source_batch)), np.array(pad_sentence_batch(target_batch))

def pad_sentence_batch(sentence_batch):
    """
    Pad sentence with </s> id
    """
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [CODES['</s>']] * (max_sentence - len(sentence))
            for sentence in sentence_batch]

def text_to_ids(source_text, target_text, source_vocab_to_int, target_vocab_to_int):
    """
    Convert source and target text to proper word ids
    :param source_text: String that contains all the source text.
    :param target_text: String that contains all the target text.
    :param source_vocab_to_int: Dictionary to go from the source words to an id
    :param target_vocab_to_int: Dictionary to go from the target words to an id
    :return: A tuple of lists (source_id_text, target_id_text)
    """
    source_text_to_id = [[source_vocab_to_int[word] for word in line.split()] for line in source_text.split('\n')]
    target_text_to_id = [[target_vocab_to_int[word] for word in line.split()] for line in target_text.split('\n')]
    
    return (source_text_to_id, target_text_to_id)

In [2]:
source_path = 'data/small_vocab_fr'
target_path = 'data/small_vocab_en'
source_text = load_data(source_path)
target_text = load_data(target_path)

In [3]:
preprocess_and_save_data(source_path, target_path)

In [4]:
from distutils.version import LooseVersion
import warnings
import tensorflow as tf

# Check TensorFlow Version
print('TensorFlow Version: {}'.format(tf.__version__))
assert LooseVersion(tf.__version__) in [LooseVersion('1.4.0')], 'This project requires TensorFlow version 1.5  You are using {}'.format(tf.__version__)

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

  from ._conv import register_converters as _register_converters


TensorFlow Version: 1.4.0
Default GPU Device: /device:GPU:0


In [5]:
import numpy as np

(source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = load_preprocess()
# pad_sentence_batch(source_int_text)
source_vocab = len(source_vocab_to_int)
target_vocab = len(target_vocab_to_int)

In [6]:
class Seq2seqHyperparams(object):
    def __init__(self, hidden_units=256, n_layers_enconder=2,
                 n_layers_decoder=2, num_encoder_symbols=source_vocab, 
                 num_decoder_symbols=target_vocab, learning_rate=0.01,
                 embedding_size=15, max_gradient_norm=5.0, dtype=tf.float32,
                 epochs=1, dropout=0.2, forget_bias=1.0,
                 use_beam_search=True, beam_width=10, length_penalty_weight=0.0,
                 use_attention=True, learning_rate_decay=False, 
                 use_bidirectional_enconder=False):
    
        self.hidden_units = hidden_units
        self.n_layers_enconder = n_layers_enconder
        self.n_layers_decoder = n_layers_decoder
        self.num_encoder_symbols = num_encoder_symbols
        self.num_decoder_symbols = num_decoder_symbols
        self.learning_rate = learning_rate
        self.embedding_size = embedding_size
        self.max_gradient_norm = max_gradient_norm
        self.dtype = dtype
        self.dropout = dropout
        self.forget_bias = forget_bias
        self.use_beam_search = use_beam_search
        self.beam_width = beam_width
        self.length_penalty_weight = length_penalty_weight
        self.use_attention = use_attention
        self.learning_rate_decay = learning_rate_decay
        self.use_bidirectional_enconder = use_bidirectional_enconder


        # Extra vocabulary symbols
        unk = '<unk>'
        sos = '<s>'
        eos = '</s>' # also function as PAD
        self.extra_tokens = [unk, sos, eos]
        self.unk_token = self.extra_tokens.index(unk) #unk_token = 0
        self.start_token = self.extra_tokens.index(sos) # start_token = 1
        self.end_token = self.extra_tokens.index(eos)   # end_token = 2

hparams = Seq2seqHyperparams()

In [7]:
import tensorflow.contrib.seq2seq as seq2seq
from tensorflow.contrib.rnn import MultiRNNCell
from tensorflow import layers

tf.reset_default_graph()

train_graph = tf.Graph()
with train_graph.as_default():
    
    ### DEFINING PLACEHOLDERS ###

    # encoder_inputs: [batch_size, max_time_steps]
    encoder_inputs = tf.placeholder(dtype=tf.int32,
                shape=(None, None), name='encoder_inputs')

    # encoder_inputs_length: [batch_size]
    encoder_inputs_length = tf.placeholder(
                dtype=tf.int32, shape=(None,), name='encoder_inputs_length')

    # get dynamic batch_size
    batch_size = tf.shape(encoder_inputs)[0]

    ### TRAIN MODE PLACEHOLDERS ###

    # decoder_inputs: [batch_size, max_time_steps]
    decoder_inputs = tf.placeholder(
                    dtype=tf.int32, shape=(None, None), name='decoder_inputs')

    # decoder_inputs_length: [batch_size]
    decoder_inputs_length = tf.placeholder(
                    dtype=tf.int32, shape=(None,), name='decoder_inputs_length')

    decoder_start_token = tf.ones(
                    shape=[batch_size, 1], dtype=tf.int32) * hparams.start_token
    decoder_end_token = tf.ones(
                    shape=[batch_size, 1], dtype=tf.int32) * hparams.end_token  


    # decoder_inputs_train: [batch_size , max_time_steps + 1]
    # insert sos symbol in front of each decoder input
    decoder_inputs_train = tf.concat([decoder_start_token,
                                          decoder_inputs], axis=1)

    # decoder_inputs_length_train: [batch_size]
    decoder_inputs_length_train = decoder_inputs_length + 1

    # decoder_targets_train: [batch_size, max_time_steps + 1]
    # insert eos symbol at the end of each decoder input
    decoder_targets_train = tf.concat([decoder_inputs,
                                           decoder_end_token], axis=1)

In [8]:
with train_graph.as_default():
    ## DEFINING ENCODER ##

    encoder_embeddings = tf.Variable(tf.random_uniform([hparams.num_encoder_symbols, hparams.embedding_size], -1.0, 1.0),
                                     dtype=hparams.dtype)

    # Embedded_inputs: [batch_size, time_step, embedding_size]
    encoder_inputs_embedded = tf.nn.embedding_lookup(
        params=encoder_embeddings, ids=encoder_inputs)

    if hparams.use_bidirectional_enconder:  #bidirectional encoder is not working!
        
        num_bi_layers = int(hparams.n_layers_enconder / 2)
        num_residual_layers = hparams.n_layers_enconder - 1
        num_bi_residual_layers = int(num_residual_layers / 2)
        
        print(num_bi_layers, num_residual_layers, num_bi_residual_layers)
        
        cell_list = []
        for i in range(hparams.n_layers_enconder):
            cell = tf.contrib.rnn.BasicLSTMCell(hparams.hidden_units, forget_bias=hparams.forget_bias)

            if (i >= hparams.n_layers_enconder - num_residual_layers):
                cell = tf.contrib.rnn.ResidualWrapper(cell, residual_fn=None)
                if hparams.dropout > 0.0:
                    cell = tf.contrib.rnn.DropoutWrapper(
                        cell=cell, input_keep_prob=(1.0 - hparams.dropout))
            
            cell_list.append(cell)
            
        if len(cell_list) == 1:  # Single layer.
            fw_cell = cell_list[0]
            bw_cell = cell_list[0]
        else:  # Multi layers
            fw_cell = tf.contrib.rnn.MultiRNNCell(cell_list)
            bw_cell = tf.contrib.rnn.MultiRNNCell(cell_list)

        fw_cell = tf.contrib.rnn.BasicLSTMCell(hparams.n_layers_enconder)
        bw_cell = tf.contrib.rnn.BasicLSTMCell(hparams.n_layers_enconder)

        bi_outputs, bi_state = tf.nn.bidirectional_dynamic_rnn(
                                                        fw_cell,
                                                        bw_cell,
                                                        encoder_inputs_embedded,
                                                        dtype=dtype,
                                                        sequence_length=encoder_inputs_length,
                                                        time_major=False,
                                                        swap_memory=True)
        print(bi_outputs, "\n\n", bi_state)

        encoder_outputs, bi_encoder_state = tf.concat(bi_outputs, -1), bi_state
        
        if num_bi_layers == 1:
            encoder_last_state = bi_encoder_state
        else:
            # alternatively concat forward and backward states
            encoder_state = []
            for layer_id in range(num_bi_layers):
                encoder_state.append(bi_encoder_state[0][layer_id])  # forward
                encoder_state.append(bi_encoder_state[1][layer_id])  # backward
            encoder_last_state = tuple(encoder_state)

        encoder_state = bi_encoder_state
        
    else:
        # Build RNN cell
        cells = []
        for _ in range(hparams.n_layers_enconder):
            cell = tf.contrib.rnn.BasicLSTMCell(hparams.hidden_units, forget_bias=hparams.forget_bias)
            if hparams.dropout > 0.0:
                cell = tf.contrib.rnn.DropoutWrapper(
                    cell=cell, input_keep_prob=(1.0 - hparams.dropout))
            cells.append(cell)
        if hparams.n_layers_enconder == 1:
            encoder_cells = cells[0]
        else:
            encoder_cells = tf.contrib.rnn.MultiRNNCell(cells)

        encoder_outputs, encoder_last_state = tf.nn.dynamic_rnn(
            cell=encoder_cells, inputs=encoder_inputs_embedded,
            sequence_length=encoder_inputs_length, dtype=hparams.dtype,
            time_major=False)

In [9]:
with train_graph.as_default():
    ### DEFINING DECODER ###

    # Building decoder_cell
    cells = []
    # Build RNN cell
    for _ in range(hparams.n_layers_decoder):
        cell = tf.contrib.rnn.BasicLSTMCell(hparams.hidden_units, forget_bias=hparams.forget_bias)
        if hparams.dropout > 0.0:
            cell = tf.contrib.rnn.DropoutWrapper(
                cell=cell, input_keep_prob=(1.0 - hparams.dropout))
        cells.append(cell)
    if hparams.n_layers_decoder == 1:
        decoder_cells = cells[0]
    else:
        decoder_cells = tf.contrib.rnn.MultiRNNCell(cells)

    if hparams.use_attention:
        memory = encoder_outputs
        
        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
            hparams.hidden_units,
            memory,
            memory_sequence_length=encoder_inputs_length,
            normalize=True)
        
        decoder_cells_train = tf.contrib.seq2seq.AttentionWrapper(
            decoder_cells,
            attention_mechanism,
            attention_layer_size=hparams.hidden_units,
            alignment_history=False,
            output_attention=True,
            name="attention")
        
        decoder_initial_state = decoder_cells_train.zero_state(batch_size, hparams.dtype).clone(
          cell_state=encoder_last_state)
        
    else:
        decoder_cells_train = decoder_cells
        decoder_initial_state = encoder_last_state

    decoder_embeddings = tf.Variable(tf.random_uniform([hparams.num_decoder_symbols, hparams.embedding_size], -1.0, 1.0), dtype=hparams.dtype)
    
    # decoder_inputs_embedded: [batch_size, max_time_step + 1, embedding_size]
    decoder_inputs_embedded = tf.nn.embedding_lookup(
        params=decoder_embeddings, ids=decoder_inputs_train)

In [10]:
with train_graph.as_default():
    ### TRAIN MODE ###
    
    # Helper to feed inputs for training: read inputs from dense ground truth vectors
    training_helper = seq2seq.TrainingHelper(inputs=decoder_inputs_embedded,
                                       sequence_length=decoder_inputs_length_train,
                                       time_major=False,
                                        name='training_helper')

    training_decoder = seq2seq.BasicDecoder(cell=decoder_cells_train,
                                       helper=training_helper,
                                       initial_state=decoder_initial_state)

    # decoder_outputs_train: BasicDecoderOutput
    #                        namedtuple(rnn_outputs, sample_id)
    # decoder_outputs_train.rnn_output: [batch_size, max_time_step + 1, num_decoder_symbols] if output_time_major=False
    #                                   [max_time_step + 1, batch_size, num_decoder_symbols] if output_time_major=True
    # decoder_outputs_train.sample_id: [batch_size], tf.int32
    (decoder_outputs_train, decoder_last_state_train, 
         decoder_outputs_length_decode)  = seq2seq.dynamic_decode(decoder=training_decoder,
                                                        output_time_major=False,
                                                        swap_memory=True,
                                                        impute_finished=True)

    # More efficient to do the projection on the batch-time-concatenated tensor
    # logits_train: [batch_size, max_time_step + 1, num_decoder_symbols]
    
    sample_id = decoder_outputs_train.sample_id
    
    output_layer = layers.Dense(hparams.num_decoder_symbols, name='output_projection')
    logits_train = output_layer(decoder_outputs_train.rnn_output)

In [11]:
with train_graph.as_default():
    
    ### LOSS, GRADIEND AND OPTIMIZATION ###
    
    if hparams.learning_rate_decay:
        global_step = tf.Variable(0, trainable=False)

        learning_rate = tf.constant(hparams.learning_rate)

        #using luong10 decay scheme
        decay_factor = 0.5
        start_decay_step = int(hparams.epochs / 2)
        decay_times = 10

        remain_steps = hparams.epochs - start_decay_step
        decay_steps = int(remain_steps / decay_times)

        learning_rate = tf.cond(global_step < start_decay_step,
                                lambda: hparams.learning_rate,
                                lambda: tf.train.exponential_decay(
                                    hparams.learning_rate,
                                    (global_step - start_decay_step),
                                    decay_steps, decay_factor, staircase=True),
                                name="learning_rate_decay_cond")
    
    # Maximum decoder time_steps in current batch
    max_decoder_length = tf.reduce_max(decoder_inputs_length_train)
    
    # masks: masking for valid and padded time steps, [batch_size, max_time_step + 1]
    target_weights = tf.sequence_mask(lengths=decoder_inputs_length_train, 
                             maxlen=max_decoder_length, dtype=hparams.dtype, name='masks')
    
    crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=decoder_targets_train, logits=logits_train)
    
    loss = (tf.reduce_sum(crossent * target_weights) /
        tf.cast(batch_size, dtype=hparams.dtype))

    trainable_params = tf.trainable_variables()
    
    opt = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
    
    gradients = tf.gradients(loss, 
                             trainable_params)
    
    clip_gradients, gradient_norm = tf.clip_by_global_norm(gradients, hparams.max_gradient_norm)
    
    updates = opt.apply_gradients(
            zip(clip_gradients, trainable_params))

In [12]:
with train_graph.as_default():

    ### INFERENCE MODE ###
    start_tokens = tf.fill([batch_size], hparams.start_token)
    
    decoder_initial_state_infer = tf.contrib.seq2seq.tile_batch(
                  encoder_last_state, multiplier=hparams.beam_width)
    
    if hparams.use_attention:
        memory = tf.contrib.seq2seq.tile_batch(
          memory, multiplier=hparams.beam_width)
        
        source_sequence_length = tf.contrib.seq2seq.tile_batch(
          encoder_inputs_length, multiplier=hparams.beam_width)
        
        encoder_last_state = tf.contrib.seq2seq.tile_batch(
          encoder_last_state, multiplier=hparams.beam_width)
        
        batch_size = batch_size * hparams.beam_width
        
        attention_mechanism_infer = tf.contrib.seq2seq.BahdanauAttention(
            hparams.hidden_units,
            memory,
            memory_sequence_length=source_sequence_length,
            normalize=True)
        
        decoder_cells_infer = tf.contrib.seq2seq.AttentionWrapper(
            decoder_cells,
            attention_mechanism_infer,
            attention_layer_size=hparams.hidden_units,
            alignment_history=False,
            output_attention=True,
            name="attention_infer")
        
        decoder_initial_state_infer = decoder_cells_infer.zero_state(batch_size, hparams.dtype).clone(
          cell_state=encoder_last_state)
    
    if hparams.use_beam_search:

        inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
              cell=decoder_cells_infer,
              embedding=decoder_embeddings,
              start_tokens=start_tokens,
              end_token=hparams.end_token,
              initial_state=decoder_initial_state_infer,
              beam_width=hparams.beam_width,
              output_layer=output_layer,
              length_penalty_weight=hparams.length_penalty_weight)
        
    else:
        inference_helper = seq2seq.GreedyEmbeddingHelper(decoder_embeddings,
                                                        start_tokens=start_tokens,
                                                        end_token=hparams.end_token)

        inference_decoder = seq2seq.BasicDecoder(cell=decoder_cells_infer,
                                                 helper=inference_helper,
                                                 initial_state=decoder_initial_state,
                                                 output_layer=output_layer)
    
    maximum_iterations = tf.round(tf.reduce_max(encoder_inputs_length) * 2)
    
    (decoder_infer_outputs, decoder_infer_last_state,
                 decoder_infer_outputs_length) = (seq2seq.dynamic_decode(
                    decoder=inference_decoder,
                    output_time_major=False,
                    maximum_iterations=maximum_iterations))
    
    if hparams.use_beam_search:
        decoder_pred_decode = decoder_infer_outputs.predicted_ids
        tf.identity(decoder_pred_decode, 'decoder_pred_decode')
    
    else:
        logits_infer = decoder_infer_outputs.rnn_output
        sample_id_infer = decoder_infer_outputs.sample_id                                                                       

In [30]:
#training parameters
class TrainingHyperparams(object):
    def __init__(self, epochs=3, batch_size=512):
        self.epochs = epochs
        self.batch_size = batch_size

train_hparams = TrainingHyperparams()

In [31]:
def sentence_to_seq(sentence, vocab_to_int):
    """
    Convert a sentence to a sequence of ids
    :param sentence: String
    :param vocab_to_int: Dictionary to go from the words to an id
    :return: List of word ids
    """
    lower_case_words = [word.lower() for word in sentence.split()]
    
    word_id = [vocab_to_int.get(word, vocab_to_int['<unk>']) for word in lower_case_words]
    
    return word_id

import time

### TRAINING ###
save_path = 'checkpoints/dev'

train_source = source_int_text[train_hparams.batch_size:]
train_target = target_int_text[train_hparams.batch_size:]

valid_source = source_int_text[:train_hparams.batch_size]
valid_target = target_int_text[:train_hparams.batch_size]

get_accuracy_every = 30

with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())
    steps = 0
    
    for epoch_i in range(train_hparams.epochs):
        
        step = 0
        for batch_i, (source_batch, target_batch) in enumerate(
                batch_data(train_source, train_target, train_hparams.batch_size)):
            start_time = time.time()
            
            source_batch_seq_lenght = []
            for item in source_batch:
                source_batch_seq_lenght.append(np.shape(item)[0])
            
            target_batch_seq_lenght = []
            for item in target_batch:
                target_batch_seq_lenght.append(np.shape(item)[0])
                
#             if (source_batch_seq_lenght[0] > 300): #OOM problems with datasets containing very large sentences
#                 continue
                
            _, loss_val = sess.run(
                [updates, loss],
                {encoder_inputs: source_batch,
                 decoder_inputs: target_batch,
                encoder_inputs_length: source_batch_seq_lenght,
                decoder_inputs_length: target_batch_seq_lenght})

            print('Epoch {:>3} Batch {:>4}/{}, Loss: {:>6.3f}'
                  .format(epoch_i, batch_i, len(source_int_text) // train_hparams.batch_size, loss_val))
                
            end_time = time.time()
            
    print("Training time: ", end_time - start_time)
    # Save Model
    saver = tf.train.Saver()
    saver.save(sess, save_path)
    print('Model Trained and Saved')

Epoch   0 Batch    0/269, Loss: 97.822
Epoch   0 Batch    1/269, Loss: 77.312
Epoch   0 Batch    2/269, Loss: 435.758
Epoch   0 Batch    3/269, Loss: 95.297
Epoch   0 Batch    4/269, Loss: 143.852
Epoch   0 Batch    5/269, Loss: 92.283
Epoch   0 Batch    6/269, Loss: 78.013
Epoch   0 Batch    7/269, Loss: 79.135
Epoch   0 Batch    8/269, Loss: 67.675
Epoch   0 Batch    9/269, Loss: 67.247
Epoch   0 Batch   10/269, Loss: 61.134
Epoch   0 Batch   11/269, Loss: 59.242
Epoch   0 Batch   12/269, Loss: 56.646
Epoch   0 Batch   13/269, Loss: 54.664
Epoch   0 Batch   14/269, Loss: 55.011
Epoch   0 Batch   15/269, Loss: 54.845
Epoch   0 Batch   16/269, Loss: 51.762
Epoch   0 Batch   17/269, Loss: 51.130
Epoch   0 Batch   18/269, Loss: 49.760
Epoch   0 Batch   19/269, Loss: 53.007
Epoch   0 Batch   20/269, Loss: 48.209
Epoch   0 Batch   21/269, Loss: 50.687
Epoch   0 Batch   22/269, Loss: 48.596
Epoch   0 Batch   23/269, Loss: 46.859
Epoch   0 Batch   24/269, Loss: 50.131
Epoch   0 Batch   25/26

Epoch   0 Batch  211/269, Loss:  8.951
Epoch   0 Batch  212/269, Loss:  8.942
Epoch   0 Batch  213/269, Loss:  8.937
Epoch   0 Batch  214/269, Loss:  8.878
Epoch   0 Batch  215/269, Loss:  8.445
Epoch   0 Batch  216/269, Loss:  8.572
Epoch   0 Batch  217/269, Loss:  8.461
Epoch   0 Batch  218/269, Loss:  8.319
Epoch   0 Batch  219/269, Loss:  8.438
Epoch   0 Batch  220/269, Loss:  8.688
Epoch   0 Batch  221/269, Loss:  7.845
Epoch   0 Batch  222/269, Loss:  7.825
Epoch   0 Batch  223/269, Loss:  7.873
Epoch   0 Batch  224/269, Loss:  7.695
Epoch   0 Batch  225/269, Loss:  7.322
Epoch   0 Batch  226/269, Loss:  7.108
Epoch   0 Batch  227/269, Loss:  7.495
Epoch   0 Batch  228/269, Loss:  6.896
Epoch   0 Batch  229/269, Loss:  6.858
Epoch   0 Batch  230/269, Loss:  6.703
Epoch   0 Batch  231/269, Loss:  6.551
Epoch   0 Batch  232/269, Loss:  5.928
Epoch   0 Batch  233/269, Loss:  5.809
Epoch   0 Batch  234/269, Loss:  5.749
Epoch   0 Batch  235/269, Loss:  5.785
Epoch   0 Batch  236/269,

Epoch   1 Batch  154/269, Loss:  0.687
Epoch   1 Batch  155/269, Loss:  0.744
Epoch   1 Batch  156/269, Loss:  0.706
Epoch   1 Batch  157/269, Loss:  0.759
Epoch   1 Batch  158/269, Loss:  0.638
Epoch   1 Batch  159/269, Loss:  0.690
Epoch   1 Batch  160/269, Loss:  0.683
Epoch   1 Batch  161/269, Loss:  0.654
Epoch   1 Batch  162/269, Loss:  0.682
Epoch   1 Batch  163/269, Loss:  0.681
Epoch   1 Batch  164/269, Loss:  0.703
Epoch   1 Batch  165/269, Loss:  0.769
Epoch   1 Batch  166/269, Loss:  0.658
Epoch   1 Batch  167/269, Loss:  0.715
Epoch   1 Batch  168/269, Loss:  0.708
Epoch   1 Batch  169/269, Loss:  0.697
Epoch   1 Batch  170/269, Loss:  0.643
Epoch   1 Batch  171/269, Loss:  0.657
Epoch   1 Batch  172/269, Loss:  0.740
Epoch   1 Batch  173/269, Loss:  0.713
Epoch   1 Batch  174/269, Loss:  0.634
Epoch   1 Batch  175/269, Loss:  0.954
Epoch   1 Batch  176/269, Loss:  0.669
Epoch   1 Batch  177/269, Loss:  0.662
Epoch   1 Batch  178/269, Loss:  0.650
Epoch   1 Batch  179/269,

Epoch   2 Batch   97/269, Loss:  0.605
Epoch   2 Batch   98/269, Loss:  0.541
Epoch   2 Batch   99/269, Loss:  0.610
Epoch   2 Batch  100/269, Loss:  0.553
Epoch   2 Batch  101/269, Loss:  0.582
Epoch   2 Batch  102/269, Loss:  0.605
Epoch   2 Batch  103/269, Loss:  0.597
Epoch   2 Batch  104/269, Loss:  0.590
Epoch   2 Batch  105/269, Loss:  0.575
Epoch   2 Batch  106/269, Loss:  0.515
Epoch   2 Batch  107/269, Loss:  0.591
Epoch   2 Batch  108/269, Loss:  0.501
Epoch   2 Batch  109/269, Loss:  0.633
Epoch   2 Batch  110/269, Loss:  0.532
Epoch   2 Batch  111/269, Loss:  0.603
Epoch   2 Batch  112/269, Loss:  0.628
Epoch   2 Batch  113/269, Loss:  0.578
Epoch   2 Batch  114/269, Loss:  0.566
Epoch   2 Batch  115/269, Loss:  0.589
Epoch   2 Batch  116/269, Loss:  0.602
Epoch   2 Batch  117/269, Loss:  0.585
Epoch   2 Batch  118/269, Loss:  0.520
Epoch   2 Batch  119/269, Loss:  0.558
Epoch   2 Batch  120/269, Loss:  0.588
Epoch   2 Batch  121/269, Loss:  0.555
Epoch   2 Batch  122/269,

In [32]:
# Save parameters for checkpoint
save_params(save_path)

In [33]:
import tensorflow as tf
import numpy as np

_, (source_vocab_to_int, target_vocab_to_int), (source_int_to_vocab, target_int_to_vocab) = load_preprocess()
load_path = load_params()

In [34]:
translate_sentence = "new jersey est parfois calme pendant l' automne , et il est neigeux en avril ."
#fr to en
#input: "new jersey est parfois calme pendant l' automne , et il est neigeux en avril ."
#target:"new jersey is sometimes quiet during autumn , and it is snowy in april ."

#en to vi
#input:  "Họ viết gần 1000 trang về chủ đề này ."
#target: "They wrote almost a thousand pages on the topic ."

print(translate_sentence)

"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
translate_sentence = sentence_to_seq(translate_sentence, source_vocab_to_int)
print(np.shape(translate_sentence))

loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
    # Load saved model
    loader = tf.train.import_meta_graph(load_path + '.meta')
    loader.restore(sess, load_path)

    encoder_inputs = loaded_graph.get_tensor_by_name('encoder_inputs:0')
    encoder_inputs_length = loaded_graph.get_tensor_by_name('encoder_inputs_length:0')
    decoder_pred_decode = loaded_graph.get_tensor_by_name('decoder_pred_decode:0')
    
    predicted_ids = sess.run(decoder_pred_decode, {encoder_inputs: [translate_sentence],
                                                       encoder_inputs_length: [np.shape(translate_sentence)[0]]})[0]

print('Input')
print('  Word Ids:      {}'.format([i for i in translate_sentence]))
print('  Source Words: {}'.format([source_int_to_vocab[i] for i in translate_sentence]))

print('\nPrediction')
print('  Word Ids:      {}'.format([i[0] for i in predicted_ids]))
print('  Predicted Words: {}'.format([target_int_to_vocab[i[0]] for i in predicted_ids]))

new jersey est parfois calme pendant l' automne , et il est neigeux en avril .
(16,)
INFO:tensorflow:Restoring parameters from checkpoints/dev
Input
  Word Ids:      [273, 88, 69, 78, 37, 151, 6, 124, 49, 239, 39, 69, 140, 31, 313, 215]
  Source Words: ['new', 'jersey', 'est', 'parfois', 'calme', 'pendant', "l'", 'automne', ',', 'et', 'il', 'est', 'neigeux', 'en', 'avril', '.']

Prediction
  Word Ids:      [135, 135, 19, 110, 110, 98, 130, 178, 31, 31, 193, 31, 115, 160, 225, 225, 8, 225, 121, 225, 8, 160, 225, 8, 160, 225, 8, 160, 225, 8, 225, 29]
  Predicted Words: ['been', 'been', 'little', 'fruit', 'fruit', 'my', 'lemons', 'cats', 'rabbit', 'rabbit', 'snakes', 'rabbit', '.', 'never', 'summer', 'summer', 'last', 'summer', ',', 'summer', 'last', 'never', 'summer', 'last', 'never', 'summer', 'last', 'never', 'summer', 'last', 'summer', 'tower']


In [35]:
print('\nTranslation:\n')
translation = ''
for word_i in predicted_ids:
    translation += target_int_to_vocab[word_i[0]] + ' '
    
print(translation)


Translation:

been been little fruit fruit my lemons cats rabbit rabbit snakes rabbit . never summer summer last summer , summer last never summer last never summer last never summer last summer tower 
