In [166]:
from keras.callbacks import LambdaCallback, TensorBoard
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random, sys, io, re, string

In [167]:
shakes_lines = []
poem_starts = []
next_ln = False
min_ = 100

with open("data/shakespeare.txt") as f:
    
    # Read in all lines
    lines = f.readlines()
    for line in lines[1:]:
        
        # replace poem breaks with ~
        if re.match('\s+\d+', line):
            shakes_lines.append('~')
            next_ln = True
            continue
            
        # get rid of blank lines
        seq = line.strip()
        if len(seq) < 3:
            continue
        else:
            min_ = len(seq)
        # remove punctuation
        seq = seq.translate(str.maketrans('', '', string.punctuation))
        # make lowercase
        seq = seq.lower()
        #print(seq)
        shakes_lines.append(seq)
        
        if next_ln:
            poem_starts.append(seq)
    
processed_text = '\n'.join(shakes_lines)
# print(processed_text[:60*20])
# print(poem_starts[:5])

maxlen = max([len(ln) for ln in processed_text.split('\n')]) + 1
print('max length: ', maxlen)
print('min length: ', min_)

window_size = 40

max length:  58
min length:  46


In [168]:
chars = sorted(list(set(processed_text)))
print('total chars:', len(chars))
print(chars)
char_index = dict((c, i) for i, c in enumerate(chars))
index_char = dict((i, c) for i, c in enumerate(chars))

total chars: 29
['\n', ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '~']


In [169]:
# cut the text in semi-redundant sequences of window_size characters
step = 3
sentences = []
next_chars = []
for i in range(0, len(processed_text) - window_size, step):
    sentences.append(processed_text[i: i + window_size])
    next_chars.append(processed_text[i + window_size])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), window_size, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

print(x.shape, y.shape)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        try:
            x[i, t, char_index[char]] = 1
        except:
            print(i, t, char_index[char])
    y[i, char_index[next_chars[i]]] = 1

nb sequences: 30268
Vectorization...
(30268, 40, 29) (30268, 29)


In [170]:
model = Sequential()
model.add(LSTM(128, input_shape=(window_size, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = keras.optimizers.RMSprop(clipnorm=1)
model.compile(optimizer, 'categorical_crossentropy', 
              metrics=['categorical_accuracy', 'categorical_crossentropy'])

In [171]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [175]:
def generate_poem(from_seed, temp=0.5, fast=True):
    print(f'-- Generating Poem With Temperature: {temp}')

    generated = ''
    sentence = from_seed
    generated += sentence
    
    real_start = from_seed[len(from_seed) - len(from_seed.lstrip()):]
    print(f'-- From seed: \n\"{real_start}\"\n')
    
    if not fast:
        sys.stdout.write(real_start)

    lines = 1
    if not fast:
        while lines < 14:
            x_pred = np.zeros((1, window_size, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_index[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temp)
            next_char = index_char[next_index]

            sentence = sentence[1:] + next_char

            if not fast:
                sys.stdout.write(next_char)
                sys.stdout.flush()

            generated += next_char
            lines = len(generated.split('\n'))
    else:
        for _ in range(300):
            x_pred = np.zeros((1, window_size, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_index[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temp)
            next_char = index_char[next_index]

            sentence = sentence[1:] + next_char

            if not fast:
                sys.stdout.write(next_char)
                sys.stdout.flush()

            generated += next_char
            lines = len(generated.split('\n'))
        
    if fast:
        print(generated)
    else:
        print()


In [176]:
def on_epoch_end(epoch, _, epochs_split=5):
    if epoch % epochs_split == 0:
        
        start_index = random.randint(0, len(processed_text) - window_size - 1)
        for temp in [0.2, 0.5, 1.0]:

            sentence = processed_text[start_index: start_index + window_size]
            generate_poem(sentence, temp=temp)

In [177]:
#tensorboard = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True)


In [178]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])#, tensorboard])

Epoch 1/60
-- Generating Poem With Temperature: 0.2
-- From seed: 
"my passion
a womans gentle heart but no"



KeyboardInterrupt: 

In [143]:
prompt = poem_starts[3]
prompt = (window_size - len(prompt)) * ' ' + prompt

#print(prompt)

for t in [0.25, 0.5, 0.75]:
    generate_poem(prompt, temp=t)

-- Generating Poem With Temperature: 0.25
-- From seed: 
"will be a tattered weed of small worth held"

will be a tattered weed of small worth held conting make
whore in the elower and give anceet freek
thou art beauty see noo the cally thes from there is thee with seef in my boonts cantrit
by all awrell from the eer lave your seef your deeds the eputter paides sind lie disprite
to tenf stiove gover sweet the looke grien
that hes precesure thought is shall and your seed
~
what deam the beauty though is my govent
and to be i mave that for my same coont toull
wooll be a cartrration shore part
the of thy seve that wild do hiverres preek
brione seenk love from thy thy sime of thy prigg
and will no sweet striegh that love that from thee sweet

-- Generating Poem With Temperature: 0.5
-- From seed: 
"will be a tattered weed of small worth held"

will be a tattered weed of small worth held is and prace
perp thou and they fair a pover yel
in the gouds sumel sunge my dessace cance
and thou broc