In [1]:
import numpy as np
import tensorflow as tf

%load_ext autoreload
%autoreload 2

In [2]:
with open('data/profile_intro.csv') as f:
    data = f.read().splitlines()

text = ''.join(data)

chars = sorted(set(text))
char_to_ind = {c: i for i, c in enumerate(chars)}
ind_to_char = {v: k for k, v in char_to_ind.items()}

text_as_int = np.array([char_to_ind[c] for c in text])

In [3]:
def generate_batches(array, batch_shape):
    num_samples, num_chars = batch_shape
    total_items = num_samples * num_chars
    num_batches = int(len(array) / total_items)

    # Keep the full batches and ignore the left.
    # from (1115394,) to (1115300,)
    array = array[:total_items*num_batches]

    # from (1115300,) to (10, 111530)
    array = array.reshape((num_samples, -1))

    i = 0
    while True:
        x = array[:, i:i+num_chars]
        y = np.roll(x, shift=-1, axis=1)
        
        if i >= (array.shape[1] - num_chars):
            i = 0
        else:
            i += num_chars
            
        yield x, y

In [5]:
from char_rnn import CharRNN


batch_shape = (100, 100)
batches = generate_batches(text_as_int, batch_shape)
model = CharRNN(char_to_ind, batch_shape, rnn_size=256, num_layers=3, learning_rate=0.005, grad_clip=5)
model.train(batches, iters=30000, keep_prob=0.5)

step: 0 loss: 4.5552
step: 200 loss: 2.3369
step: 400 loss: 1.8307
step: 600 loss: 1.6458
step: 800 loss: 1.5674
step: 1000 loss: 1.5247
step: 1200 loss: 1.4381
step: 1400 loss: 1.4593
step: 1600 loss: 1.4226
step: 1800 loss: 1.4086
step: 2000 loss: 1.3644
step: 2200 loss: 1.3432
step: 2400 loss: 1.3583
step: 2600 loss: 1.3224
step: 2800 loss: 1.3432
step: 3000 loss: 1.3144
step: 3200 loss: 1.2720
step: 3400 loss: 1.2880
step: 3600 loss: 1.2895
step: 3800 loss: 1.2880
step: 4000 loss: 1.2728
step: 4200 loss: 1.2646
step: 4400 loss: 1.3011
step: 4600 loss: 1.2914
step: 4800 loss: 1.2448
step: 5000 loss: 1.2878
step: 5200 loss: 1.2996
step: 5400 loss: 1.2626
step: 5600 loss: 1.2874
step: 5800 loss: 1.2749
step: 6000 loss: 1.2752
step: 6200 loss: 1.2982
step: 6400 loss: 1.2054
step: 6600 loss: 1.2333
step: 6800 loss: 1.2188
step: 7000 loss: 1.2184
step: 7200 loss: 1.2410
step: 7400 loss: 1.2374
step: 7600 loss: 1.2321
step: 7800 loss: 1.1862
step: 8000 loss: 1.2723
step: 8200 loss: 1.2486

In [10]:
from char_rnn import CharRNN

model = CharRNN(char_to_ind, batch_shape, rnn_size=256, num_layers=3, learning_rate=0.005, grad_clip=5,  predict=True)
samp = model.predict(prime="I'm", num_char=1000)
print(samp)

INFO:tensorflow:Restoring parameters from checkpoints/i29800_l256_ckpt
I'm truthful than I do was an enversation with amount of conversation. The communication is can be silly in a cal though, and I love to go out and get old and love to travel and eat going.""I've spent them almost 2 years ago, I'm a bit of an outgoing, and I talk. I like to laugh and try new things, with a book, travel, and enjoy something new about a good feaduring thing to do with. I am a consultant woman with a great lover of a good started and a great side of their food, with an adventure of all time when I'm staying in. I like to go to a show and walk or spend time one with friends.""Hm there happens, there is, that takes the taste of some past few text and I like a good chean trait, tennis with friends all my friends in my face. I'm a pretty social activist and adore thinkers of the mind, and to see my backgrounds and I am a posible one, because I like to live it to the best thing. I like to think of myself as 