In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (7,7) # Make the figures a bit bigger

import tensorflow as tf

from tensorflow.contrib.rnn.python.ops import rnn_cell

In [3]:
!mkdir -p ../data
!curl -L -o ../data/will_play_text.csv https://commondatastorage.googleapis.com/ckannet-storage/2012-04-24T183403/will_play_text.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  9.8M  100  9.8M    0     0  36.7M      0 --:--:-- --:--:-- --:--:-- 36.6M


In [4]:
df = pd.read_csv('../data/will_play_text.csv', sep=';', header=None, names=['row', 'piece', 'chapter', 'section', 'character', 'line'])

In [99]:
sentences = df[~df.character.isnull()].line

# mapping
letters = ['>', '<'] + list(set(''.join(list(sentences))))
nletters = len(letters)

bw_mapping = dict(enumerate(letters, 1))
fw_mapping = dict([(c, ix) for ix, c in enumerate(letters, 1)])

rsen_sz = 64
sen_sz = 1 + rsen_sz + 1

def pad_sentence(s):
    l = len(s)
    coded = [fw_mapping[c] for c in ['>'] + list(s) + ['<']]
    return coded + [0] * (rsen_sz - l)

# dataset
sentences = [pad_sentence(s) for s in sentences if len(s)<=rsen_sz]
sentences = np.array(sentences)
sentences.shape

(111005, 66)

In [100]:
sentences[2]

array([ 1, 29, 14, 21, 58, 41, 70, 59, 53, 54, 22, 59, 58, 11, 22, 10, 70,
       54, 71,  8, 36, 14, 21, 59, 21, 58, 53, 67, 67, 59, 14, 54, 11, 58,
       10, 13, 58, 14, 59,  8, 58, 41, 70, 10, 36, 64, 11,  2,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0])

In [107]:
hidden_state_size = 256

def rnn_text(sen_sz=sen_sz):
    cell_state = tf.placeholder(tf.float32, [batch_size, hidden_state_size])
    hidden_state = tf.placeholder(tf.float32, [batch_size, hidden_state_size])
    init_state = tf.nn.rnn_cell.LSTMStateTuple(cell_state, hidden_state)
    
    seq_input = tf.placeholder(tf.int64, [None, sen_sz], name='sentence_input')
    
    xs = seq_input[:, :-1]
    ys = seq_input[:, 1:]
    
    xs_valid_mask = tf.logical_and(tf.not_equal(xs, 0), tf.not_equal(xs, 2)) # exclude last character
    xs_seq_len = tf.reduce_sum(tf.to_int32(xs_valid_mask), axis=1)
    
    # Turn to zero-based
    #xs = tf.Print(xs, [xs], message='xs before', summarize=100)
    #xs = xs - 1
    #xs = tf.Print(xs, [xs], message='xs after', summarize=100)
    #ys = ys - 1
    #xs = tf.Print(xs, [ys], message='ys after', summarize=100)
    #xs = tf.Print(xs, [xs_seq_len], message='xs length', summarize=100)
    #xs = tf.Print(xs, [xs_valid_mask], message='xs mask', summarize=100)
    #xs = tf.Print(xs, [xs[:, xs_seq_len-1]], message='last xs', summarize=100)
    #xs = tf.Print(xs, [xs_seq_len, xs[:, xs_seq_len-1]], message='xs length', summarize=100)
    
    xs_one_hot = tf.one_hot(xs, depth=nletters+1)
    ys_one_hot = tf.one_hot(ys, depth=nletters+1)
    
    rnn_cell = tf.nn.rnn_cell.LSTMCell(hidden_state_size, state_is_tuple=True)
    outputs, state = tf.nn.dynamic_rnn(rnn_cell, xs_one_hot, sequence_length=xs_seq_len, initial_state=init_state)

    with tf.variable_scope("output"):
        W_hy = tf.get_variable('W_hy', [hidden_state_size, nletters+1])
        B_hy = tf.get_variable('B_hy', [nletters+1])
    
    logits = tf.tensordot(outputs, W_hy, axes=1) + B_hy
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=ys_one_hot, logits=logits)
    masked_cross_entropy = tf.multiply(cross_entropy, tf.to_float(xs_valid_mask))
    summed_entropy = tf.reduce_sum(masked_cross_entropy, 1)
    sequence_entropy = tf.divide(summed_entropy, tf.to_float(xs_seq_len))
    total_entropy = tf.reduce_mean(sequence_entropy)
    
    return seq_input, (cell_state, hidden_state), total_entropy


def rnn_synth_text(sen_sz=sen_sz):
    cell_state = tf.placeholder(tf.float32, [batch_size, hidden_state_size])
    hidden_state = tf.placeholder(tf.float32, [batch_size, hidden_state_size])
    init_state = tf.nn.rnn_cell.LSTMStateTuple(cell_state, hidden_state)
    
    char_input = tf.placeholder(tf.int64, [None])
    x = tf.one_hot(char_input, depth=nletters)
    
    rnn_cell = tf.nn.rnn_cell.LSTMCell(hidden_state_size, state_is_tuple=True)
    outputs, state = tf.nn.dynamic_rnn(rnn_cell, x, sequence_length=[1], initial_state=init_state)

    with tf.variable_scope("output"):
        W_hy = tf.get_variable('W_hy', [hidden_state_size, nletters])
        B_hy = tf.get_variable('B_hy', [nletters])

    logits = tf.tensordot(outputs, W_hy, axes=1) + B_hy
    probs = tf.nn.softmax(logits=logits)
    
    return char_input, init_state, probs, state

In [108]:
def my_optimize(sess, train, Xs, loss,
                name='train', steps=1000,
                optimizer=tf.train.GradientDescentOptimizer(0.5),
                dict={}, batch_size=100, report_steps=500):
    train_step = optimizer.minimize(loss)
    
    sess.run(tf.global_variables_initializer())

    for i in range(steps):
        
        # Such a hack
        start = (i * batch_size) % train.shape[0]
        end = ((i+1) * batch_size) % train.shape[0]
        
        if start < end:
            batch_xs = train[start:end]
            dict = dict.copy()# Avoid getting the keys from a different graph
            dict[Xs] = batch_xs
            _, xloss = sess.run([train_step, loss], feed_dict=dict)
            
            if i % report_steps == 0:
                print('Step {}\tLoss: {}'.format(i, xloss))

In [109]:
batch_size = 256

g = tf.Graph()
with g.as_default():
    with tf.Session() as sess:
        
        Xs, (cell_state, hidden_state), loss = rnn_text()
        
        optimizer = tf.train.AdamOptimizer()
        my_optimize(sess,
                    sentences,
                    Xs,
                    loss,
                    steps=1001,
                    report_steps=100,
                    name='train-1',
                    batch_size=batch_size,
                    dict={cell_state: np.zeros((batch_size, hidden_state_size)),
                          hidden_state: np.zeros((batch_size, hidden_state_size)),
                         },
                    optimizer=optimizer)
        tf.train.Saver().save(sess, 'artifacts/model-rnn-2-v1.ckpt')

Step 0	Loss: 4.405694484710693
Step 100	Loss: 3.079648971557617
Step 200	Loss: 2.7579002380371094
Step 300	Loss: 2.5285120010375977
Step 400	Loss: 2.381396532058716
Step 500	Loss: 2.2919301986694336
Step 600	Loss: 2.2863636016845703
Step 700	Loss: 2.2690131664276123
Step 800	Loss: 2.2186548709869385
Step 900	Loss: 2.224323272705078
Step 1000	Loss: 2.140319585800171


In [113]:
def rnn_synth_text(sen_sz=sen_sz):
    cell_state = tf.placeholder(tf.float32, [batch_size, hidden_state_size])
    hidden_state = tf.placeholder(tf.float32, [batch_size, hidden_state_size])
    init_state = tf.nn.rnn_cell.LSTMStateTuple(cell_state, hidden_state)
    
    char_input = tf.placeholder(tf.int64, [None, None], name='input_char')
    # turn to zero-based
    #char_input = char_input - 1
    x = tf.one_hot(char_input, depth=nletters+1)
    
    rnn_cell = tf.nn.rnn_cell.LSTMCell(hidden_state_size, state_is_tuple=True)
    outputs, state = tf.nn.dynamic_rnn(rnn_cell, x, sequence_length=[1], initial_state=init_state)

    with tf.variable_scope("output"):
        W_hy = tf.get_variable('W_hy', [hidden_state_size, nletters+1])
        B_hy = tf.get_variable('B_hy', [nletters+1])

    logits = tf.tensordot(outputs, W_hy, axes=1) + B_hy
    probs = tf.nn.softmax(logits=logits)
    
    return char_input, (cell_state, hidden_state), probs, state

batch_size = 1

g = tf.Graph()
with g.as_default():
    with tf.Session() as sess:
        char_input, init_state, p_output, out_state = rnn_synth_text()
        
        tf.train.Saver().restore(sess, 'artifacts/model-rnn-2-v1.ckpt')
        #sess.run(tf.global_variables_initializer())
        
        res = ['>']
        cur_char = fw_mapping['>']
        
        cur_cell_state = np.zeros((batch_size, hidden_state_size))
        cur_hidden_state = np.zeros((batch_size, hidden_state_size))

        for _ in range(100):#sen_sz - 1):        
            ps, state = sess.run([p_output, out_state],
                                 feed_dict={char_input: np.array([cur_char]).reshape((1, 1)),
                                            init_state: (cur_cell_state, cur_hidden_state)})
            #cur_char = np.argmax(ps)
            cur_char = np.random.choice(nletters+1, p=ps.reshape(nletters+1))
            # Turn to one-based
            #cur_char = cur_char + 1
            cur_state = state
            res.append(bw_mapping[cur_char])
            
            cur_cell_state, cur_hidden_state = state
            
            if bw_mapping[cur_char] is '<':
                break
        
        print (''.join(res))

INFO:tensorflow:Restoring parameters from artifacts/model-rnn-2-v1.ckpt
>the gotone aly epritut opourel grons awf the aysure.<
