In [67]:
### https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py

''' Example script to generate text from Nietzsche's writings.

At least 20 epochs are required before the generated text
starts sounding coherent.

It is recommended to run this script on GPU, as recurrent
networks are quite computationally intensive.

If you try this script on new data, make sure your corpus
has at least ~100k characters. ~1M is better.
'''

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

from Sonnet_Set import Sonnet_Set
from Sonnet_Set import Sequence_Type
from Sonnet_Set import Element_Type

sonnet_set = Sonnet_Set("data/shakespeare.txt")
sonnets = open("data/shakespeare.txt")
sonnet_sequences = sonnet_set.get_sequences(sequence_type=Sequence_Type.SONNET, element_type=Element_Type.WORD)

text =[word for sonnet in sonnet_sequences for word in sonnet]
print('corpus length:', len(text))

words = sorted(list(set(text)))
print('total words:', len(words))
word_indices = dict((w, i) for i, w in enumerate(words))
indices_word = dict((i, w) for i, w in enumerate(words))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 8
step = 1
sentences = []
next_words = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_words.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(words)), dtype=np.bool)
y = np.zeros((len(sentences), len(words)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, word in enumerate(sentence):
        x[i, t, word_indices[word]] = 1
    y[i, word_indices[next_words[i]]] = 1


# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(words))))
model.add(Dense(len(words)))
model.add(Activation('softmax'))

#optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

Sonnet 99 is not 14 lines, skipping
Sonnet 126 is not 14 lines, skipping
[[1], [2], [2], [1], [2], [2], [1], [2], [2], [1], [1], [1, 2], [1], [1], [1], [1], [2], [1], [1], [1], [2], [1], [2], [1], [1], [3], [1], [3], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [4], [1], [1, 2], [1], [2], [1], [3], [1], [1], [1], [1], [1], [1, 2], [1], [1], [1], [1], [3], [1], [2], [2], [2], [1], [2], [1], [2], [2], [1], [1], [1], [1], [3], [2], [1], [1], [1], [1], [2], [1], [1], [1], [1], [1], [1], [2], [2], [1], [2], [1], [1], [1], [2], [1], [1], [1], [3], [1], [1], [1], [1], [2], [1], [1], [1], [1], [1], [1], [1, 2], [1], [1], [2], [1, 2], [2], [1], [1], [2], [1], [1], [3], [1], [2], [1], [1], [1], [1], [2], [1], [1], [1], [2], [1], [1], [1], [1], [1], [1], [1], [1], [2], [2], [3], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1, 2], [1], [1], [2, 3], [1], [2], [1], [2, 3], [1], [2], [2], [1], [1, 2], [1], [1], [2], [1], [2], [2], [3], [1], [1], [1], [1], [2], [1], [4], [2], [1],

Build model...


In [80]:
model.fit(x, y,
         batch_size=128,
         epochs=60)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<keras.callbacks.History at 0x7f5ec465ec18>

In [81]:
num_sonnets = len(sonnet_sequences)
num_words = len(words)
        

# Calculate sonnet context vectors

sonnet_contexts = np.zeros((num_sonnets, num_words))

for sonnet_index, sonnet in enumerate(sonnet_sequences):
    for word in sonnet:
        sonnet_contexts[sonnet_index, word] += 1

row_sums = sonnet_contexts.sum(axis=1)
sonnet_contexts = sonnet_contexts / row_sums[:, np.newaxis]

column_means = sonnet_contexts.mean(axis=0)
column_sds = sonnet_contexts.std(axis=0)

sonnet_contexts = sonnet_contexts - column_means[np.newaxis, :]
sonnet_contexts = sonnet_contexts / column_sds[np.newaxis, :]

def calculate_word_sequence_context(word_sequence):
    
    word_sequence_context = np.zeros((num_words,))
    
    for word in word_sequence:
        word_sequence_context[word] += 1
    
    word_sequence_context = word_sequence_context / sum(word_sequence_context)
    
    word_sequence_context = word_sequence_context - column_means
    word_sequence_context = word_sequence_context / column_sds
    
    return word_sequence_context


In [97]:
import numpy as np
# Generating with some sensicalness

random_sonnet_index = np.random.choice(range(len(sonnet_sequences)))
random_sonnet = sonnet_sequences[random_sonnet_index]

num_sonnets_to_compare_to = 20
context_weight = 1.0

# Pick the end of a line - this should serve as a decent seed for starting a new poem
sentence = random_sonnet[-maxlen - 1:-1]

start_index = random.randint(0, len(text) - maxlen - 1)
for diversity in [0.25, 0.75, 1, 1.5]:
    print('----- diversity:', diversity)
    
    print("----- Generating with seed -----")
    sonnet_set.print_sonnet(sentence, sequence_type=Sequence_Type.SONNET, element_type=Element_Type.WORD)
    print('----- End seed -----')
    
    current_phrase_window = sentence[:]
    generated_sonnet = []
    current_sonnet_line = 0
    num_syllables_this_line = 0
    previous_rhymable_words = [None, None]
    
    rhyming_words_vector = [1 if i in sonnet_set._rhyme_dictionary.keys() else 0 for i in range(len(words))]
    not_new_line_vector = [1 if i < sonnet_set._word_dictionary[Sonnet_Set.NEW_LINE_CHARACTER]
                           else 0 for i in range(len(words))]

    while current_sonnet_line < 14:
        
        x_pred = np.zeros((1, maxlen, len(words)))
        
        for t, word in enumerate(current_phrase_window):
            x_pred[0, t, word_indices[word]] = 1.
        
        preds = model.predict(x_pred, verbose=0)[0]
        
        if current_sonnet_line > 1:
            word_sequence_context = calculate_word_sequence_context(generated_sonnet)
            sonnet_similarities = np.zeros((num_sonnets,))
            for sonnet_index, sonnet in enumerate(sonnet_sequences):
                sonnet_similarity = np.matmul(word_sequence_context.T, sonnet_contexts[sonnet_index].T)
                sonnet_similarities[sonnet_index] = sonnet_similarity
            most_similar_sonnets = sonnet_similarities.argsort()[-num_sonnets_to_compare_to:][::-1]
            least_similar_sonnets = sonnet_similarities.argsort()[0:num_sonnets_to_compare_to]
            
            context_weights = np.array((num_words,))
            
            for sonnet_index in most_similar_sonnets:
                context_weights = context_weights + sonnet_contexts[sonnet_index]
            
            for sonnet_index in least_similar_sonnets:
                context_weights = context_weights - sonnet_contexts[sonnet_index]
            
            context_weights = (context_weights - context_weights.min()) / context_weights.max()
            
            context_weights = context_weights / context_weights.sum()
            
            preds = (1 - context_weight) * preds + (context_weight * context_weights)
        
        # If we're on the last syllable, this must be a rhymable word
        if num_syllables_this_line >= 9:
            if current_sonnet_line in [0, 1, 4, 5, 8, 9, 12]:
                preds = np.multiply(preds, rhyming_words_vector)
                next_word = sample(preds, diversity)
                if current_sonnet_line in [0, 4, 8, 12]:
                    previous_rhymable_words[0] = next_word
                else:
                    previous_rhymable_words[1] = next_word
            
                #print("Next rhymable word is '%s'" % sonnet_set._word_list[next_word])
            elif current_sonnet_line in [2, 3, 6, 7, 10, 11, 13]:
                
                if current_sonnet_line in [2, 6, 10, 13]:
                    previous_rhymable_word = previous_rhymable_words[0]
                else:
                    previous_rhymable_word = previous_rhymable_words[1]
                    
                rhyme_partners = sonnet_set._rhyme_pairs[sonnet_set._rhyme_dictionary[previous_rhymable_word]]
                rhyme_partner_vector = [1 if i in rhyme_partners else 0 for i in range(len(preds))]
                rhyme_partner_vector[previous_rhymable_word] = 0
                preds = np.multiply(preds, rhyme_partner_vector)
                next_word = sample(preds, diversity)
                previous_rhymable_word = None
            
                #print("Next rhyming word is '%s'" % sonnet_set._word_list[next_word])
            current_phrase_window = current_phrase_window[1:]+[next_word]
            generated_sonnet.append(next_word)
            num_syllables_this_line += sonnet_set._syllable_list_num[next_word][0]
            next_word = sonnet_set._word_dictionary[Sonnet_Set.NEW_LINE_CHARACTER]
            current_sonnet_line += 1
            num_syllables_this_line = 0
        else:
            preds = np.multiply(preds, not_new_line_vector)
            next_word = sample(preds, diversity)
            num_syllables_this_line += sonnet_set._syllable_list_num[next_word][0]
            
            #print("Next word is '%s'" % sonnet_set._word_list[next_word])
            
        current_phrase_window = current_phrase_window[1:]+[next_word]
        generated_sonnet.append(next_word)
        
    #print(generated_sonnet)

    sonnet_set.print_sonnet(generated_sonnet, sequence_type=Sequence_Type.SONNET, element_type=Element_Type.WORD)

----- diversity: 0.25
----- Generating with seed -----
Thy sweet virtue answer not thy show:
:
:
.
----- End seed -----




And they that have fair i at set more all,
And make my love before my verse in sweet,
Crooked join reeks flourish attainted thought fall,
Situation return iniquity meet:
Heaven distance spring mend plight jacks nimble purge,
Foison heavy revenues fled actor heaven,
Cheeks bar foe scarlet thinking gazed adonis urge,
Red moods onset fester we if sometimes even:
Impanelled sin hear stops quill t' scanted wrought,
Cheater every petty came paws onward lover,
Pry overthrow gravity who's rearward sought,
Curious lest plods fuel shade manners light cover:
  Off breasts besiege contracted wise then o'er,
  Love's proving snow lover's shines delves pass shore.
----- diversity: 0.75
----- Generating with seed -----
Thy sweet virtue answer not thy show:
:
:
.
----- End seed -----
And they that have a i here find thy growing,
Quite if it it my with part but be removed,
Life sober teachest admire furrows knowing,
Decree tattered asked justify special beloved:
Forget theirs use glance alas stormy own