In [1]:
import numpy as np
import tensorflow as tf

%load_ext autoreload
%autoreload 2

In [2]:
with open('data/profile_intro.csv') as f:
    data = f.read().splitlines()

text = ''.join(data)

chars = sorted(set(text))
char_to_ind = {c: i for i, c in enumerate(chars)}
ind_to_char = {v: k for k, v in char_to_ind.items()}

text_as_int = np.array([char_to_ind[c] for c in text])

In [3]:
def generate_batches(array, num_samples, num_chars):
    batch_size = num_samples * num_chars
    num_batches = int(len(array) / batch_size)

    # Keep the full batches and ignore the left.
    # from (1115394,) to (1115300,)
    array = array[:batch_size*num_batches]

    # from (1115300,) to (10, 111530)
    array = array.reshape((num_samples, -1))

    i = 0
    while True:
        x = array[:, i:i+num_chars]
        y = np.roll(x, shift=-1, axis=1)
        
        if i >= (array.shape[1] - num_chars):
            i = 0
        else:
            i += num_chars
            
        yield x, y

In [5]:
batch_size = 100         # Sequences per batch
num_steps = 100          # Number of sequence steps per batch
rnn_size = 256           # Size of hidden layers in rnn_cell
num_layers = 3           # Number of hidden layers
learning_rate = 0.005    # Learning rate

In [14]:
from char_rnn import CharRNN


model = CharRNN(len(chars), batch_size, num_steps, 'LSTM', rnn_size,
                num_layers, learning_rate)
batches = generate_batches(text_as_int, batch_size, num_steps)
model.train(batches, iters=2000)

step: 200  loss: 3.0507  0.2654 sec/batch
step: 400  loss: 2.2844  0.2707 sec/batch
step: 600  loss: 1.9872  0.2803 sec/batch
step: 800  loss: 1.8188  0.2661 sec/batch
step: 1000  loss: 1.7130  0.2656 sec/batch
step: 1200  loss: 1.6291  0.2726 sec/batch
step: 1400  loss: 1.6035  0.2752 sec/batch
step: 1600  loss: 1.5598  0.2719 sec/batch
step: 1800  loss: 1.5149  0.2702 sec/batch
step: 2000  loss: 1.4538  0.2744 sec/batch


In [15]:
from char_rnn import CharRNN

model = CharRNN(len(chars), batch_size, num_steps, 'LSTM', rnn_size,
                num_layers, learning_rate, sampling=True)

# choose the last checkpoint and generate new text
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = model.sample(checkpoint, n_samples=1000, vocab_size=len(chars),
                    vocab_to_ind=char_to_ind, ind_to_vocab=ind_to_char, prime="Hi, I'm ")
print(samp)

INFO:tensorflow:Restoring parameters from checkpoints/i2000_l256_LSTM_ckpt
Hi, I'm to me sanerion andey and atharing of mate than a day at a ban sorerond somest atemand and and the a diche. I'm saces I'mes sime soust and somatally a sime time, a lone, who ten the want mayist.. I math myseles. I'es semering my sectalall tantor and meally sountes than bean things mothiel sicite te meedingly of tite that I hund simestion myee seal serane oulestarases and to bet and a time aroris my and tat at and tous I ande the a seameder,s. "I dore sere and meast and teer to mand tratel andine seriouse ont me mereations, tord stire semally silina that time on a thinge of the andersant to santior, tand a don ontat amporse salation to seatar and the sica as sonoother tattie to are munes or a dand the wime the sillle outhool it to the beete on astime a stele as them atal the tist saca saris at at the sean to the time in atariate of out and and sores mest tho and attertses. I love. I am the tha deast a tell