In [46]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation
from keras.optimizers import RMSprop

import numpy as np
import random
import sys

In [72]:
class Sonnet:
    def __init__(self):
        self.lines = []

    def add_line(self, line):
        self.lines.append(line)
        
    def print_lines(self):
        for line in self.lines:
            print(line)
            
    def get_lines(self):
        return self.lines
            
    # Returns the tokenization of the entire poem as a list of
    # all the words in the sonnet
    def get_poem_tokenization(self):
        tokenized = []
        for line in self.lines:
            tokenized.extend(line.split(' '))
            
        return tokenized
    
    # Returns the tokenization of the stanzas in the sonnet,
    # where each element in the returned list corresponds to
    # the tokenization of the corresponding stanza
    def get_stanza_tokenization(self):
        tokenized = []
        
        stanzas = [self.lines[0:4], 
                   self.lines[4:8], 
                   self.lines[8:12],
                   self.lines[12:14]]
        
        for i, lines in enumerate(stanzas):
            tokenized.append([])
            for line in lines:
                tokenized[i].extend(line.split(' '))
        
        return tokenized
    
    # Returns the tokenization of the lines in the sonnet,
    # where each element in the returned list corresponds to
    # the tokenization of the corresponding line
    def get_line_tokenization(self):
        tokenized = []
        
        for line in self.lines:
            tokenized.append(line.split(' '))
        
        return tokenized

# Takes in a string and strips the leading/trailing whitespace
#, makes the string lowercase, and removes all punctuation
def prettify(line):
    punctuation = ['.', ',', ':', ';', '!', '?']
    line = line.lower()
    
    # Remove punctuation
    #for c in punctuation:
    #    line = line.replace(c, '')
        
    return line

# List of all sonnets contained in data/shakespeare.txt
sonnets = []

f = open("data/shakespeare.txt", "r")

line = f.readline()
while line:
    # Create the new sonnet object and add it to the list
    sonnet = Sonnet()
    for i in range(14):
        line = prettify(f.readline())
        sonnet.add_line(line)
    
    sonnets.append(sonnet)
    
    # Skip over the space in between sonnets
    for i in range(3):
        line = f.readline()

In [77]:
def get_all_lines():
    lines = []
    for sonnet in sonnets:
        lines.extend(sonnet.get_lines())
    
    return lines

def create_model():
    lines = get_all_lines()
    chars = sorted(set([c for line in lines for c in line]))
    
    model = Sequential()
    model.add(LSTM(128, input_shape=(40, len(chars))))
    model.add(Dense(len(chars), activation='softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=0.01))
    
    return model

def train_model(model, epochs=100):
    lines = get_all_lines()
    chars = sorted(set([c for line in lines for c in line]))
    char_indices = dict((c, i) for i, c in enumerate(chars))
    full_text = ' '.join([line for line in lines])

    sequences = []
    next_chars = []
    for i in range(len(full_text) - 40):
        sequences.append(full_text[i: i + 40])
        next_chars.append(full_text[i + 40])
    
    x = np.zeros((len(sequences), 40, len(chars)), dtype=np.bool)
    y = np.zeros((len(sequences), len(chars)), dtype=np.bool)
    
    for i, sequence in enumerate(sequences):
        for j, char in enumerate(sequence):
            x[i, j, char_indices[char]] = 1
        y[i, char_indices[next_chars[i]]] = 1
        
    model.fit(x, y, batch_size=128, epochs=epochs)

model = create_model()
train_model(model, epochs=60)
    

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [79]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)
    

def generate_sonnet():
    lines = get_all_lines()
    chars = sorted(set([c for line in lines for c in line]))
    char_indices = dict((c, i) for i, c in enumerate(chars))
    indices_char = dict((i, c) for i, c in enumerate(chars))
    full_text = ' '.join([line for line in lines])
    
    for diversity in [0.2, 0.5, 1.0, 1,2]:
        print('------ temperature: ', diversity)
        
        generated = ''
        sentence = prettify("shall I compare thee to a summer's day?\n")
        generated += sentence
        
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)
        
        for i in range(400):
            x_pred = np.zeros((1, 40, len(chars)))
            for j, char in enumerate(sentence):
                x_pred[0, j, char_indices[char]] = 1.0
            
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            
            sentence = sentence[1:] + next_char
            
            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

generate_sonnet()

('------ temperature: ', 0.2)
----- Generating with seed: "shall i compare thee to a summer's day?
"
shall i compare thee to a summer's day?
 when thou art the world of the truth, thee she dost brow,
 and then the world should more than thy self bright,
 and there is not thee i cannot the truth.
 
 which she lov's which thou with

  after removing the cwd from sys.path.


 thee thy love,
 which thou art with thee thee i am thee,
   then whose brief farses and there thou art,
 and they had thee more breast with thee despited,
 and thence thee she thought it is not face,
 and ther()
('------ temperature: ', 0.5)
----- Generating with seed: "shall i compare thee to a summer's day?
"
shall i compare thee to a summer's day?
 when thou art hath thou beauty seemed thee,
   then then were it when i am not and me kind,
 steeatchoun in hie advantage is so surve.
 but the glory with the wired, which live erred
 my say thy self all my heart doth love's sweet self death,
 and made to thee i am new fiels come,
 but the heart and my will corrupion thee shines more,
 and with thee the world with golden tears,
 and look it is be()
('------ temperature: ', 1.0)
----- Generating with seed: "shall i compare thee to a summer's day?
"
shall i compare thee to a summer's day?
 in cheeks feed, but sweetes of thee may,
 crown and pity now bobbly things might,
 that in these offe