In [59]:
#imports 

from os.path import expanduser
import numpy as np
import tensorflow as tf
import sys

GLOBAL_VOCAB_SIZE = 257


In [99]:
def extract_data(filename):
    relative_path = "../../data/"
    data = open(expanduser(relative_path+filename),"rb")
    data_list = []
    for line in data:
        data_list.append(np.asarray(list(line)))
    return np.asarray(data_list)

def preprocess_data(data):
    newdata = []
    for row in data:
        newrow = np.zeros(row.shape)
        for i in range(len(row)):
            newrow[i]=ord(row[i])
        newdata.append(newrow)
    return np.array(newdata)

def encoder(hidden_size, input_data):
    encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size)
    initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
    outputs, state = tf.nn.dynamic_rnn(encoder_cell, input_data, initial_state = initial_state, dtype=tf.float32)
    return outputs, state

def decoder(hidden_size, input_data):
    decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size)
    helper = tf.contrib.seq2seq.TrainingHelper(input_data, decoder_lengths)
    initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
    outputs, state = tf.nn.dynamic_rnn(decoder_cell, input_data, initial_state = initial_state, dtype=tf.float32)
    return outputs

def sentence_to_one_hot(sentence_numpy_array):
    one_hot_data = np.zeros((len(sentence_numpy_array),GLOBAL_VOCAB_SIZE))
    for i in range(len(sentence_numpy_array)):
        one_hot_data[i][int(sentence_numpy_array[i])]=1
    return one_hot_data

def data_to_one_hot(data):
    data_list = []
    for sentence in data:
        data_list.append(sentence_to_one_hot(sentence))
    return data_list
    # return one hot vectors for each input character

def generate_decoder_target(numpy_sequence):
    #EOS at the end
    seq_reshaped=numpy_sequence.reshape((1,numpy_sequence.shape[0],numpy_sequence.shape[1]))
    EOS = np.zeros((1,1,257))
    EOS[0,0,256] = 1
    return np.append(x,EOS,axis=1)

def generate_decoder_input(numpy_sequence):
    #EOS at the beginning
    seq_reshaped=numpy_sequence.reshape((1,numpy_sequence.shape[0],numpy_sequence.shape[1]))
    EOS = np.zeros((1,1,257))
    EOS[0,0,256] = 1
    return np.append(EOS,x,axis=1)

def generate_encoder_input(numpy_sequence):
    return numpy_sequence.reshape((1,numpy_sequence.shape[0],numpy_sequence.shape[1]))

In [61]:
"""
Prepare Data
"""

#extract data
train_english = extract_data("train.10k.en")
train_german = extract_data("train.10k.de")
valid_english = extract_data("valid.100.en")
valid_german = extract_data("valid.100.de")

#preprocess data
train_english_processed = preprocess_data(train_english)
train_german_processed = preprocess_data(train_german)
valid_english_processed = preprocess_data(valid_english)
valid_german_processed = preprocess_data(valid_german)

#data to one hot
one_hot_train_english = data_to_one_hot(train_english_processed)
one_hot_train_german = data_to_one_hot(train_german_processed)
one_hot_valid_english = data_to_one_hot(valid_english_processed)
one_hot_valid_german = data_to_one_hot(valid_german_processed)


In [104]:
#inputs and outputs

tf.reset_default_graph() 
sess = tf.InteractiveSession() 

batch_size = 1
max_sequence_length = 10
encoder_inputs = tf.placeholder(shape=(batch_size, None, GLOBAL_VOCAB_SIZE), dtype=tf.float32, name='encoder_inputs')
decoder_inputs = tf.placeholder(shape=(batch_size, None, GLOBAL_VOCAB_SIZE), dtype=tf.float32, name='decoder_inputs')
decoder_targets = tf.placeholder(shape=(batch_size, None, GLOBAL_VOCAB_SIZE), dtype=tf.float32, name='decoder_targets')

In [105]:
#encoder network
hidden_size = 100
batch_size = 100
with tf.variable_scope('encoder'):
    encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size)
    (encoder_output, encoder_final_state) = tf.nn.dynamic_rnn(encoder_cell, encoder_inputs, dtype=tf.float32)


In [106]:
#decoder network
with tf.variable_scope('decoder'):
    decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size)
    (decoder_output, decoder_final_state) = tf.nn.dynamic_rnn(decoder_cell, decoder_inputs, initial_state = encoder_final_state, dtype=tf.float32)

decoder_logits = tf.contrib.layers.linear(decoder_output, GLOBAL_VOCAB_SIZE)
decoder_prediction = tf.argmax(decoder_logits)

In [107]:
#optimizer
stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
    labels=decoder_targets,
    logits=decoder_logits,
)

loss = tf.reduce_mean(stepwise_cross_entropy)
train_op = tf.train.AdamOptimizer().minimize(loss)

In [108]:
# start session
sess.run(tf.global_variables_initializer())

data_train = one_hot_train_english
data_labels = one_hot_train_german

for i in range(len(data_train)):
    input_sequence = data_train[i]
    output_sequence = data_labels[i]

    loss_ = sess.run([loss, train_op],
        feed_dict={
            encoder_inputs: generate_encoder_input(input_sequence),
            decoder_inputs: generate_decoder_input(output_sequence),
            decoder_targets: generate_decoder_target(output_sequence),
        })
    
    print loss_

[5.5460315, None]
[5.5352378, None]
[5.5243373, None]
[5.5131769, None]
[5.4999194, None]
[5.485013, None]
[5.4650874, None]
[5.4386644, None]
[5.3996711, None]
[5.3394704, None]
[5.2291522, None]
[5.0429311, None]
[4.7993741, None]
[4.5448742, None]
[4.3222103, None]
[4.1335917, None]
[3.9647334, None]
[3.8142729, None]
[3.677259, None]
[3.5515034, None]
[3.4368556, None]
[3.332624, None]
[3.2397745, None]
[3.1576931, None]
[3.0869825, None]
[3.0265203, None]
[2.9758241, None]
[2.9331725, None]
[2.8978794, None]
[2.868588, None]
[2.8440888, None]
[2.8231077, None]
[2.8048377, None]
[2.7888064, None]
[2.7743282, None]
[2.7614083, None]
[2.7498317, None]
[2.7396758, None]
[2.7306972, None]
[2.7228506, None]
[2.7159379, None]
[2.7098794, None]
[2.7048867, None]
[2.7008262, None]
[2.6976159, None]
[2.6948125, None]
[2.6923368, None]
[2.6898978, None]
[2.6873362, None]
[2.6849968, None]
[2.6816788, None]
[2.6788681, None]
[2.6758132, None]
[2.6733165, None]
[2.6698627, None]
[2.6668596, No

KeyboardInterrupt: 