In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn

In [3]:
tf.set_random_seed(777)  # reproducibility

In [4]:
sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

char_set = list(set(sentence))
char_dic = {w: i for i, w in enumerate(char_set)}

print("dic: ", char_dic)

dic:  {',': 0, "'": 1, 'r': 2, 'g': 3, 'b': 4, 'w': 5, 'i': 6, 'k': 7, 'p': 8, 'e': 9, 'l': 10, 's': 11, 'c': 12, '.': 13, 'h': 14, 'f': 15, 'd': 16, 'y': 17, 'm': 18, ' ': 19, 't': 20, 'a': 21, 'o': 22, 'u': 23, 'n': 24}


In [5]:
data_dim = len(char_set)
hidden_size = len(char_set)
num_classes = len(char_set)
sequence_length = 10  # Any arbitrary number
learning_rate = 0.1

In [7]:
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]
    
    if i < 10:
        print(i, x_str, '->', y_str)

    x = [char_dic[c] for c in x_str]  # x str to index
    y = [char_dic[c] for c in y_str]  # y str to index

    dataX.append(x)
    dataY.append(y)
    
batch_size = len(dataX)

print("batch_size: ", batch_size)
print("X_shape: ", np.shape(dataX))
print("Y_shape: ", np.shape(dataY))

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
batch_size:  170
X_shape:  (170, 10)
Y_shape:  (170, 10)


In [8]:
X = tf.placeholder(tf.int32, [None, sequence_length])
Y = tf.placeholder(tf.int32, [None, sequence_length])

# One-hot encoding
X_one_hot = tf.one_hot(X, num_classes)
print(X_one_hot)  # check out the shape

Tensor("one_hot:0", shape=(?, 10, 25), dtype=float32)


In [9]:
# Make a lstm cell with hidden_size (each unit output vector size)
def lstm_cell():
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
    return cell

multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(2)], state_is_tuple=True)
print("m_cells: ", multi_cells)

# outputs: unfolding size x hidden size, state = hidden size
outputs, _states = tf.nn.dynamic_rnn(multi_cells, X_one_hot, dtype=tf.float32)


m_cells:  <tensorflow.python.ops.rnn_cell_impl.MultiRNNCell object at 0x000001D1E43B5198>


In [10]:
# FC layer
X_for_fc = tf.reshape(outputs, [-1, hidden_size])
outputs = tf.contrib.layers.fully_connected(X_for_fc, num_classes, activation_fn=None)

# reshape out for sequence_loss
outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes])


In [11]:
# All weights are 1 (equal weights)
weights = tf.ones([batch_size, sequence_length])

sequence_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights)
mean_loss = tf.reduce_mean(sequence_loss)
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(mean_loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [13]:
for i in range(500):
    _, l, results = sess.run(
        [train_op, mean_loss, outputs], feed_dict={X: dataX, Y: dataY})
    for j, result in enumerate(results):
        index = np.argmax(result, axis=1)
        
        if(i % 100 == 0 and j % 50 == 0):
            print(i, j, ''.join([char_set[t] for t in index]), l)

0 0 l you want 0.22898279
0 50   ether to 0.22898279
0 100 s and work 0.22898279
0 150 tndless im 0.22898279
100 0 t you want 0.22874066
100 50   ether to 0.22874066
100 100 s and work 0.22874066
100 150 tndless im 0.22874066
200 0 g you want 0.22870076
200 50   ether to 0.22870076
200 100 , and work 0.22870076
200 150 tndless im 0.22870076
300 0 t you want 0.2290435
300 50 h ether to 0.2290435
300 100 s and work 0.2290435
300 150 tndless im 0.2290435
400 0 g you want 0.22863056
400 50 h ether to 0.22863056
400 100 , and work 0.22863056
400 150 tndless im 0.22863056


In [14]:
# Let's print the last char of each result to check it works
results = sess.run(outputs, feed_dict={X: dataX})
for j, result in enumerate(results):
    index = np.argmax(result, axis=1)
    if j is 0:  # print all for the first result to make a sentence
        print(''.join([char_set[t] for t in index]), end='')
    else:
        print(char_set[index[-1]], end='')

p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.