In [159]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import os
import numpy as np
from keras.callbacks import LambdaCallback
import sys
import random
import string
import re

In [162]:
# read in text, lower case
text_full = open(os.path.join(os.getcwd(), 'data/shakespeare.txt')).read().lower()

In [163]:
# remove numbers
text_no_num = ''.join([i for i in text_full if not i.isdigit()])
# remove new lines
text_no_nline = text_no_num.replace("\n", '')

In [165]:
# remove punctuation and tabs

punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
no_punct = ""
for char in text_no_nline:
    if char not in punctuations:
        no_punct = no_punct + char

text = re.sub(' +', ' ', no_punct)

In [166]:
# from https://keras.io/examples/lstm_text_generation/

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 27


In [167]:
chars

[' ',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [168]:
# from https://keras.io/examples/lstm_text_generation/
# cut the text in semi-redundant sequences of maxlen characters

maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 29602


In [169]:
# from https://keras.io/examples/lstm_text_generation/

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [84]:
np.shape(x)

(32545, 40, 61)

In [85]:
np.shape(y)

(32545, 61)

In [86]:
y

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False,  True, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [170]:
# define model
model = Sequential()
model.add(LSTM(125, input_shape=(x.shape[1], x.shape[2])))
model.add(Dense(len(chars), activation='softmax'))
print(model.summary())

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 125)               76500     
_________________________________________________________________
dense_4 (Dense)              (None, 27)                3402      
Total params: 79,902
Trainable params: 79,902
Non-trainable params: 0
_________________________________________________________________
None


In [171]:
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [172]:
# temperature = 1.0

In [173]:
# from https://keras.io/examples/lstm_text_generation/

def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [298]:
# from https://keras.io/examples/lstm_text_generation/

def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    
    # print only 10th epoch
    if epoch%100==0:
        print()
        print('----- Generating text after Epoch: %d' % epoch)

        start_index = random.randint(0, len(text) - maxlen - 1)
        for temperature in [0.25, 0.75, 1.5]:
            print('----- temperature:', temperature)

            generated = ''
            sentence = text[start_index: start_index + maxlen]
            generated += sentence
            print('----- Generating with seed: "' + sentence + '"')
            sys.stdout.write(generated)

            for i in range(400):
                x_pred = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(sentence):
                    x_pred[0, t, char_indices[char]] = 1.

                preds = model.predict(x_pred, verbose=0)[0]
                next_index = sample(preds, temperature)
                next_char = indices_char[next_index]

                sentence = sentence[1:] + next_char

                sys.stdout.write(next_char)
                sys.stdout.flush()
            print()
    else: 
        pass

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

In [194]:
# fit model
model.fit(x, y, batch_size=32, epochs=100, verbose=2, callbacks=[print_callback])

Epoch 1/100
 - 27s - loss: 0.8847 - accuracy: 0.7301

----- Generating text after Epoch: 0
----- temperature: 0.25
----- Generating with seed: "son music to hear why hearst thou music "
son music to hear why hearst thou music and thee the world mouth re painted how a fomen forthing and so so thou the world i to keed for thy sunfeathert me dis caired thee beauty one womblong of my sweet were it not than my self a long have preasor of the fairt a a send the beauty on your self and thee thee belies summers with lost and contould me doth long hast where it not thou art i am not love and this paress by yet are gondless by t
----- temperature: 0.75
----- Generating with seed: "son music to hear why hearst thou music "
son music to hear why hearst thou music comed be endand the forghts on the ramethou akn that mull for all meame doth londs crouch pottricet it st oll whor be endle butl you tremes wore thoughtshenger muli shall the self for this pleing math not to be st leave whose be night dil

<keras.callbacks.callbacks.History at 0x641058f50>

In [293]:
def gen_sonnet(model, seed):
    "generate sonnet from model given seed"

    for temperature in [0.25, 0.75, 1.5]:
        print('\n ==temperature:', temperature)

        generated = ''
        sentence = seed
        generated += sentence
        print('==Generating with seed: "' + sentence + '"')
        
        print('\n', seed)
        for line in range(13):
            
            sentence = sentence
            
            for i in range(400): # why 400?
                x_pred = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(sentence):
                    x_pred[0, t, char_indices[char]] = 1.

                preds = model.predict(x_pred, verbose=0)[0]
                next_index = sample(preds, temperature)
                next_char = indices_char[next_index]

                sentence = sentence[1:] + next_char

            sonnet = sentence
            print('\n', sonnet)
            
    


In [None]:
char_seed = "shall i compare thee to a summers day "

In [294]:
gen_sonnet(model, seed=char_seed)


 ==temperature: 0.25
==Generating with seed: "shall i compare thee to a summers day "

 shall i compare thee to a summers day 

 a e a ree eo beeo aiefaaafhrtta meo oe

 aaio o wth etitnoner ao esesooayt etre

 unyr earsuo a eay to jmosreeruooooooo 

 oeooedi ealhipa eyhtnoia e eye elo reo

 aoo o ooooo o oenieyieeayhtne elesomes

   eifoio uooeayhrg gteoo einesati wtst

 ysoo yeoo esy eaysabesaehomaehmti eres

  o ooeo ealllayeteelsyia iiaaa e eyhte

  a o eeyenhrle reay temi tnoitn eli o 

 eahsaeeoiayu oruoen vwnoh ui ety ineae

 esvsto oa o ooeaneneio  aaayriaye e oe

 piy cpiaaooooo o oefeey teteito eti tc

 eteeoooooooneaeaneehisoeaehiaehraid re

 ==temperature: 0.75
==Generating with seed: "shall i compare thee to a summers day "

 shall i compare thee to a summers day 

 ooo o oeoo o rrmgngse reo mua o oeoooo

 tia me reo ioo nellaiesrslbya eu rei a

  sltttlgvttuifooooo o eo enhme aaoeooa

 o oeeuftilayaaeanhreyhi ifelaaayhcia m

 sdo ouoooooo n npa meo ey elaefiae ino

 lrit

In [None]:
# get stuck on vowel strings

In [None]:
# run again more epochs 

In [None]:
# fit model
model_1000 = Sequential()
model_1000.add(LSTM(125, input_shape=(x.shape[1], x.shape[2])))
model_1000.add(Dense(len(chars), activation='softmax'))
model_1000.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model_1000.fit(x, y, batch_size=32, epochs=100, verbose=2, callbacks=[print_callback])

Epoch 1/100


In [None]:
gen_sonnet(model_1000, seed=char_seed)

In [208]:
# # choose random index 
# start_index = random.randint(0, len(text) - maxlen - 1)
# # 40 character snippet to seed new sequence
# sentence = text[start_index: start_index + maxlen]

# # or 
# # sentence = char_seed

# # run through 400 characters
# for i in range(400):
#     # predictions 40 characters to choose by 61 possible characters
#     x_pred = np.zeros((1, maxlen, len(chars)))
#     for t, char in enumerate(sentence):
#         # fill in 1 at that character (row /40) in index of character (column)
#         x_pred[0, t, char_indices[char]] = 1.

#     # generate prob of next character (array of probabilites for each character)
#     preds = model.predict(x_pred, verbose=0)[0]
#     # pick character from distribution of pred (output=index)
#     next_index = sample(preds, temperature)
#     # use index to find character
#     next_char = indices_char[next_index]

#     # add character to sentence seeded, replace 1st character with last new character 
#     sentence = sentence[1:] + next_char

# #     sys.stdout.write(next_char)
# # sys.stdout.write(sentence)
# # sys.stdout.flush()
# print(sentence)

t they pay the vess chould not book ho l
