In [1]:
from keras.callbacks import LambdaCallback, TensorBoard
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random, sys, io, re, string

Using TensorFlow backend.


In [2]:
shakes_lines = []
poem_starts = []
next_ln = False
min_ = 100

with open("data/shakespeare.txt") as f:
    
    # Read in all lines
    lines = f.readlines()
    for line in lines[1:]:
        
        # replace poem breaks with ~
        if re.match('\s+\d+', line):
            shakes_lines.append('~')
            next_ln = True
            continue
            
        # get rid of blank lines
        seq = line.strip()
        if len(seq) < 3:
            continue
        else:
            min_ = len(seq)
        # remove punctuation
        seq = seq.translate(str.maketrans('', '', string.punctuation))
        # make lowercase
        seq = seq.lower()
        #print(seq)
        shakes_lines.append(seq)
        
        if next_ln:
            poem_starts.append(seq)
    
processed_text = '\n'.join(shakes_lines)
# print(processed_text[:60*20])
# print(poem_starts[:5])

maxlen = max([len(ln) for ln in processed_text.split('\n')]) + 1
print('max length: ', maxlen)
print('min length: ', min_)

window_size = 40

max length:  58
min length:  46


In [3]:
chars = sorted(list(set(processed_text)))
print('total chars:', len(chars))
print(chars)
char_index = dict((c, i) for i, c in enumerate(chars))
index_char = dict((i, c) for i, c in enumerate(chars))

total chars: 29
['\n', ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '~']


In [4]:
# cut the text in semi-redundant sequences of window_size characters
step = 3
sentences = []
next_chars = []
for i in range(0, len(processed_text) - window_size, step):
    sentences.append(processed_text[i: i + window_size])
    next_chars.append(processed_text[i + window_size])

X = np.zeros((len(sentences), window_size, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

print(X.shape, y.shape)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        try:
            X[i, t, char_index[char]] = 1
        except:
            print(i, t, char_index[char])
    y[i, char_index[next_chars[i]]] = 1

nb sequences: 30268
Vectorization...
(30268, 40, 29) (30268, 29)


In [6]:
model = Sequential()
model.add(LSTM(128, input_shape=(window_size, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = keras.optimizers.RMSprop(clipnorm=1)
model.compile(optimizer, 'categorical_crossentropy', 
              metrics=['categorical_accuracy', 'categorical_crossentropy'])

In [7]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [8]:
def generate_poem(from_seed, temp=0.5, fast=True):
    print(f'-- Generating Poem With Temperature: {temp}')

    generated = ''
    sentence = from_seed
    generated += sentence
    
    real_start = from_seed[len(from_seed) - len(from_seed.lstrip()):]
    print(f'-- From seed: \n\"{real_start}\"\n')
    
    if not fast:
        sys.stdout.write(real_start)

    lines = 1
    if not fast:
        while lines < 14:
            x_pred = np.zeros((1, window_size, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_index[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temp)
            next_char = index_char[next_index]

            sentence = sentence[1:] + next_char

            if not fast:
                sys.stdout.write(next_char)
                sys.stdout.flush()

            generated += next_char
            lines = len(generated.split('\n'))
    else:
        for _ in range(300):
            x_pred = np.zeros((1, window_size, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_index[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temp)
            next_char = index_char[next_index]

            sentence = sentence[1:] + next_char

            if not fast:
                sys.stdout.write(next_char)
                sys.stdout.flush()

            generated += next_char
            lines = len(generated.split('\n'))
        
    if fast:
        print(generated)
    else:
        print()


In [9]:
def on_epoch_end(epoch, _, epochs_split=5):
    if epoch % epochs_split == 0:
        
        start_index = random.randint(0, len(processed_text) - window_size - 1)
        for temp in [0.2, 0.5, 1.0]:

            sentence = processed_text[start_index: start_index + window_size]
            generate_poem(sentence, temp=temp)

In [10]:
#tensorboard = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True)


In [11]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(X, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])#, tensorboard])

Epoch 1/60
-- Generating Poem With Temperature: 0.2
-- From seed: 
"e are not so stout
nor gates of steel so"

e are not so stout
nor gates of steel so the thes yo the the the the the the the the the the the the the the the the the the the the sthe th the the the the the s th th in the the the the the the the the that the the the the the the the the the the th th the the the the the the the the the the the the the the the the the the the the the t
-- Generating Poem With Temperature: 0.5
-- From seed: 
"e are not so stout
nor gates of steel so"

e are not so stout
nor gates of steel sovelh thelgthd  ove the d o ss the se mos thd th a the e hi st to s me se in mhe sod tno shes tto 
ohii ig th tht an that no ywht se nt st we aal  oo d deat he swg th to n t e my this th t tha s dtd los
the 
ae the whe tho e aid thge we thon tet t ne thiths whe goe tbov shte tee she aty the thy the b
-- Generating Poem With Temperature: 1.0
-- From seed: 
"e are not so stout
nor gates of steel so"

e are n

<keras.callbacks.callbacks.History at 0x7ff6603b4bd0>

In [12]:
model.fit(x, y,
          batch_size=128,
          epochs=100,
          callbacks=[print_callback])

Epoch 1/100
-- Generating Poem With Temperature: 0.2
-- From seed: 
"ef
to bear greater wrong than hates know"

ef
to bear greater wrong than hates know
and ther for mert and to amporth heart
whiss brice and that win his ruds do glens
~
whose fass whem on will is graning doon
then for my self thee thee a diend hell will kingh swent
for thy will wrow dost some for me will it made
the world arrceat thou soots and sweetss and rease
misere to wat no mi
-- Generating Poem With Temperature: 0.5
-- From seed: 
"ef
to bear greater wrong than hates know"

ef
to bear greater wrong than hates know
and mine eses beavere plans not ment
and i fright foom that my self dost to ee
~
as i a wheplenting nime youth sum on thee
~
tils my showhs art for wert a ofow lowed
so wett thou mistr bestes to have this merd
and loves eve thy blade douch core in thee
~
thous most dost bo het bot wise or your fout
-- Generating Poem With Temperature: 1.0
-- From seed: 
"ef
to bear greater wrong than hates know"

ef
to 

<keras.callbacks.callbacks.History at 0x7ff66018cc90>

In [13]:

    prompt = "  shall i compare thee to a summers day\n"

    for t in [0.25, 0.5, 0.75]:
        generate_poem(prompt, temp=t)

-- Generating Poem With Temperature: 0.25
-- From seed: 
"shall i compare thee to a summers day
"

  shall i compare thee to a summers day
thou asting hearts all sorn works youre grow
than nod with for thou my swee pond reap
o reart i way be ithing not of clill
and beatien peicuulss in me prichliged
the oir more triffe inge of all thy 
urbalsed figheded dot deensme
by orn mear hear heautherss me whin thingst
th sourdsuch far thou worlo
-- Generating Poem With Temperature: 0.5
-- From seed: 
"shall i compare thee to a summers day
"

  shall i compare thee to a summers day
thou astique will hath you detand howile
whom canntt thus what i wrow sore to wee
so but frinile i spounts dith love how love
~
who loved what eedon pabseand muthow that blought
thos grees thee and lids not mear hild of youl
tass thy hust wask o heers and air me way
on tiens heich can ouc tome a mee
-- Generating Poem With Temperature: 0.75
-- From seed: 
"shall i compare thee to a summers day
"

  shall i compare thee