In [3]:
### https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py

''' Example script to generate text from Nietzsche's writings.

At least 20 epochs are required before the generated text
starts sounding coherent.

It is recommended to run this script on GPU, as recurrent
networks are quite computationally intensive.

If you try this script on new data, make sure your corpus
has at least ~100k characters. ~1M is better.
'''

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

from Sonnet_Set import Sonnet_Set
from Sonnet_Set import Sequence_Type
from Sonnet_Set import Element_Type

sonnet_set = Sonnet_Set("data/shakespeare.txt")
sonnets = open("data/shakespeare.txt")
sonnet_sequences = sonnet_set.get_sequences(sequence_type=Sequence_Type.SONNET, element_type=Element_Type.WORD)

text =[word for sonnet in sonnet_sequences for word in sonnet]
print('corpus length:', len(text))

words = sorted(list(set(text)))
print('total words:', len(words))
word_indices = dict((w, i) for i, w in enumerate(words))
indices_word = dict((i, w) for i, w in enumerate(words))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 8
step = 1
sentences = []
next_words = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_words.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(words)), dtype=np.bool)
y = np.zeros((len(sentences), len(words)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, word in enumerate(sentence):
        x[i, t, word_indices[word]] = 1
    y[i, word_indices[next_words[i]]] = 1


# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(10, input_shape=(maxlen, len(words))))
model.add(Dense(len(words)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

Sonnet 99 is not 14 lines, skipping
Sonnet 126 is not 14 lines, skipping
corpus length: 19972
total words: 3160
nb sequences: 19964
Vectorization...
Build model...


In [4]:
model.fit(x, y,
         batch_size=128,
         nb_epoch=15)



Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7ff4d356e198>

In [5]:
num_sonnets = len(sonnet_sequences)
num_words = len(words)
        

# Calculate sonnet context vectors

sonnet_contexts = np.zeros((num_sonnets, num_words))

for sonnet_index, sonnet in enumerate(sonnet_sequences):
    for word in sonnet:
        sonnet_contexts[sonnet_index, word] += 1

row_sums = sonnet_contexts.sum(axis=1)
sonnet_contexts = sonnet_contexts / row_sums[:, np.newaxis]

column_means = sonnet_contexts.mean(axis=0)
column_sds = sonnet_contexts.std(axis=0)

sonnet_contexts = sonnet_contexts - column_means[np.newaxis, :]
sonnet_contexts = sonnet_contexts / column_sds[np.newaxis, :]

def calculate_word_sequence_context(word_sequence):
    
    word_sequence_context = np.zeros((num_words,))
    
    for word in word_sequence:
        word_sequence_context[word] += 1
    
    word_sequence_context = word_sequence_context / sum(word_sequence_context)
    
    word_sequence_context = word_sequence_context - column_means
    word_sequence_context = word_sequence_context / column_sds
    
    return word_sequence_context


In [17]:
import numpy as np
# Generating with some sensicalness

random_sonnet_index = np.random.choice(range(len(sonnet_sequences)))
random_sonnet = sonnet_sequences[random_sonnet_index]

num_sonnets_to_compare_to = 20
context_weight = 0.4

# Pick the end of a line - this should serve as a decent seed for starting a new poem
sentence = random_sonnet[-maxlen - 1:-1]

start_index = random.randint(0, len(text) - maxlen - 1)
for diversity in [0.25, 0.75, 1.5]:
    print('----- diversity:', diversity)
    
    print("----- Generating with seed -----")
    sonnet_set.print_sonnet(sentence, sequence_type=Sequence_Type.SONNET, element_type=Element_Type.WORD)
    print('----- End seed -----')
    
    current_phrase_window = sentence[:]
    generated_sonnet = []
    current_sonnet_line = 0
    num_syllables_this_line = 0
    previous_rhymable_words = [None, None]
    
    rhyming_words_vector = [1 if i in sonnet_set._rhyme_dictionary.keys() else 0 for i in range(len(words))]
    not_new_line_vector = [1 if i < sonnet_set._word_dictionary[Sonnet_Set.NEW_LINE_CHARACTER]
                           else 0 for i in range(len(words))]

    while current_sonnet_line < 14:
        
        x_pred = np.zeros((1, maxlen, len(words)))
        
        for t, word in enumerate(current_phrase_window):
            x_pred[0, t, word_indices[word]] = 1.
        
        preds = model.predict(x_pred, verbose=0)[0]
        
        if current_sonnet_line > 1:
            word_sequence_context = calculate_word_sequence_context(generated_sonnet)
            sonnet_similarities = np.zeros((num_sonnets,))
            for sonnet_index, sonnet in enumerate(sonnet_sequences):
                sonnet_similarity = np.matmul(word_sequence_context.T, sonnet_contexts[sonnet_index].T)
                sonnet_similarities[sonnet_index] = sonnet_similarity
            most_similar_sonnets = sonnet_similarities.argsort()[-num_sonnets_to_compare_to:][::-1]
            least_similar_sonnets = sonnet_similarities.argsort()[0:num_sonnets_to_compare_to]
            
            context_weights = np.array((num_words,))
            
            for sonnet_index in most_similar_sonnets:
                context_weights = context_weights + sonnet_contexts[sonnet_index]
            
            for sonnet_index in least_similar_sonnets:
                context_weights = context_weights - sonnet_contexts[sonnet_index]
            
            context_weights = (context_weights - context_weights.min()) / context_weights.max()
            
            context_weights = context_weights / context_weights.sum()
            
            preds = (1 - context_weight) * preds + (context_weight * context_weights)
        
        # If we're on the last syllable, this must be a rhymable word
        if num_syllables_this_line >= 9:
            if current_sonnet_line in [0, 1, 4, 5, 8, 9, 12]:
                preds = np.multiply(preds, rhyming_words_vector)
                next_word = sample(preds, diversity)
                if current_sonnet_line in [0, 4, 8, 12]:
                    previous_rhymable_words[0] = next_word
                else:
                    previous_rhymable_words[1] = next_word
            
                #print("Next rhymable word is '%s'" % sonnet_set._word_list[next_word])
            elif current_sonnet_line in [2, 3, 6, 7, 10, 11, 13]:
                
                if current_sonnet_line in [2, 6, 10, 13]:
                    previous_rhymable_word = previous_rhymable_words[0]
                else:
                    previous_rhymable_word = previous_rhymable_words[1]
                    
                rhyme_partners = sonnet_set._rhyme_pairs[sonnet_set._rhyme_dictionary[previous_rhymable_word]]
                rhyme_partner_vector = [1 if i in rhyme_partners else 0 for i in range(len(preds))]
                preds = np.multiply(preds, rhyme_partner_vector)
                next_word = sample(preds, diversity)
                previous_rhymable_word = None
            
                #print("Next rhyming word is '%s'" % sonnet_set._word_list[next_word])
            current_phrase_window = current_phrase_window[1:]+[next_word]
            generated_sonnet.append(next_word)
            num_syllables_this_line += sonnet_set._syllable_list_num[next_word][0]
            next_word = sonnet_set._word_dictionary[Sonnet_Set.NEW_LINE_CHARACTER]
            current_sonnet_line += 1
            num_syllables_this_line = 0
        else:
            preds = np.multiply(preds, not_new_line_vector)
            next_word = sample(preds, diversity)
            num_syllables_this_line += sonnet_set._syllable_list_num[next_word][0]
            
            #print("Next word is '%s'" % sonnet_set._word_list[next_word])
            
        current_phrase_window = current_phrase_window[1:]+[next_word]
        generated_sonnet.append(next_word)
        
    #print(generated_sonnet)

    sonnet_set.print_sonnet(generated_sonnet, sequence_type=Sequence_Type.SONNET, element_type=Element_Type.WORD)

----- diversity: 0.25
----- Generating with seed -----
I may not remove nor be removed:
:
:
.
----- End seed -----




And i i if i not to me be be,
Or like by love's and and a time days place,
To not to time with a his a strong see,
So beauty's world shall i you to thee grace:
To you to love with your and doth me me,
O so all that i to your self love's thee,
So in the love to my love to time be,
When i which thou which in the love him be:
To sad in and of my self but time time,
In the love with with doth cold eyes to be,
And in love to i love to be be time,
But and a and a his youth to day be:
  So say that with doth your love to be not,
  So not i when with thy doth our doth not.
----- diversity: 0.75
----- Generating with seed -----
I may not remove nor be removed:
:
:
.
----- End seed -----
And in me looks to do i better dost hate,
Time thou me then ill then not a so thing,
That centre i a that eyed jewel love hate,
Than fault night unworthiness fair best thing:
Base not him to i nor your fair part denied,
Interest dispraise in more such a hand might,
So then jealous love chance saucy no beside,
Ne

In [72]:
print(len(sonnet_set._character_list))

31
