In [1]:
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # reproducibility


In [2]:
sample = " if you want you"
idx2char = list(set(sample))  # index -> char
char2idx = {c: i for i, c in enumerate(idx2char)}  # char -> idex

print("idxToChar: ", idx2char)
print("charToIdx: ", char2idx)

idxToChar:  ['i', 'o', ' ', 'a', 'f', 'w', 'y', 't', 'u', 'n']
charToIdx:  {'i': 0, 'o': 1, ' ': 2, 'a': 3, 'f': 4, 'w': 5, 'y': 6, 't': 7, 'u': 8, 'n': 9}


In [3]:
# hyper parameters
dic_size = len(char2idx)  # RNN input size (one hot size)
rnn_hidden_size = len(char2idx)  # RNN output size
num_classes = len(char2idx)  # final output size (RNN or softmax, etc.)
batch_size = 1  # one sample data, one batch
sequence_length = len(sample) - 1  # number of lstm rollings (unit #)
learning_rate = 0.1

In [4]:
sample_idx = [char2idx[c] for c in sample]  # char to index
x_data = [sample_idx[:-1]]  # X data sample (0 ~ n-1) hello: hell
y_data = [sample_idx[1:]]   # Y label sample (1 ~ n) hello: ello
print("X_data: ", x_data)
print("Y_data: ", y_data)
print("X_shape: ", np.shape(x_data))
print("Y_shape: ", np.shape(y_data))

X_data:  [[2, 0, 4, 2, 6, 1, 8, 2, 5, 3, 9, 7, 2, 6, 1]]
Y_data:  [[0, 4, 2, 6, 1, 8, 2, 5, 3, 9, 7, 2, 6, 1, 8]]
X_shape:  (1, 15)
Y_shape:  (1, 15)


In [5]:
X = tf.placeholder(tf.int32, [None, sequence_length])  # X data
Y = tf.placeholder(tf.int32, [None, sequence_length])  # Y label

print("X: ", X)
print("Y: ", Y)

X:  Tensor("Placeholder:0", shape=(?, 15), dtype=int32)
Y:  Tensor("Placeholder_1:0", shape=(?, 15), dtype=int32)


In [6]:
# flatten the data (ignore batches for now). No effect if the batch size is 1
X_one_hot = tf.one_hot(X, num_classes)  # one hot: 1 -> 0 1 0 0 0 0 0 0 0 0
X_for_softmax = tf.reshape(X_one_hot, [-1, rnn_hidden_size])

# softmax layer (rnn_hidden_size -> num_classes)
softmax_w = tf.get_variable("softmax_w", [rnn_hidden_size, num_classes])
softmax_b = tf.get_variable("softmax_b", [num_classes])
outputs = tf.matmul(X_for_softmax, softmax_w) + softmax_b

In [7]:
# expend the data (revive the batches)
outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes])
weights = tf.ones([batch_size, sequence_length])

# Compute sequence cost/loss
sequence_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)  # mean all sequence loss
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

prediction = tf.argmax(outputs, axis=2)

In [8]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(3000):
        l, _ = sess.run([loss, train], feed_dict={X: x_data, Y: y_data})
        result = sess.run(prediction, feed_dict={X: x_data})

        # print char using dic
        result_str = [idx2char[c] for c in np.squeeze(result)]
        if(i < 20 or i > 2980):
            print(i, "loss:", l, "Prediction:", ''.join(result_str))

0 loss: 2.3208687 Prediction: aayaaytaontnaay
1 loss: 2.10981 Prediction: yoyyoytyont yoy
2 loss: 1.9163897 Prediction: yo yoy yont yoy
3 loss: 1.738809 Prediction: yo you yant you
4 loss: 1.5747924 Prediction: yo you yant you
5 loss: 1.4242975 Prediction: yf you yant you
6 loss: 1.2881373 Prediction: yf you yant you
7 loss: 1.1666163 Prediction: yf you yant you
8 loss: 1.0591308 Prediction: yf you yant you
9 loss: 0.9643479 Prediction: yf you yant you
10 loss: 0.880652 Prediction: yf you yant you
11 loss: 0.80653286 Prediction: yf you yant you
12 loss: 0.7407698 Prediction: yf you yant you
13 loss: 0.68245125 Prediction: yf you yant you
14 loss: 0.6308998 Prediction: yf you yant you
15 loss: 0.58557206 Prediction: yf you yant you
16 loss: 0.5459771 Prediction: yf you yant you
17 loss: 0.511629 Prediction: yf you yant you
18 loss: 0.48202607 Prediction: yf you yant you
19 loss: 0.45665312 Prediction: yf you yant you
2981 loss: 0.27732173 Prediction: yf you yant you
2982 loss: 0.2773222