In [2]:
import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np 
import pickle

In [6]:
next_words = 91
max_sequence_len = 15
total_words = 7228

In [3]:
token_path = "./sonnet_model/data/tokenizer.pkl"
with open(token_path, "rb") as fp:
  tokenizer = pickle.load(fp)

In [14]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150)))
model.add(Dense(total_words, activation='softmax'))
adam = Adam(lr=0.01)

In [15]:
model_path = "./sonnet_model/sonnet-model.hdf5"
model.load_weights(model_path)

In [16]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 14, 100)           722800    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 300)               301200    
_________________________________________________________________
dense_1 (Dense)              (None, 7228)              2175628   
Total params: 3,199,628
Trainable params: 3,199,628
Non-trainable params: 0
_________________________________________________________________


In [17]:
def genSonnet(seed_text):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict_classes(token_list, verbose=0)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word

    seed_text = seed_text.split(" ")

    i=0
    for word in seed_text: 
        if i%7 == 0:
            print()
        if i%28==0:
            print()
        print(word, end = " ") 
        i = i+1

In [18]:
# estimating 7 words per line => 7*13 = 91
seed_text = "The sky is bright and blue"

genSonnet(seed_text)



The sky is bright and blue a 
pleasing pain the father of the elm 
tree bough drew he fear of deadly 
other friends displayed fear thy continual haste 

of those in thy mind thy matchless 
away on cupid of love do bless 
è d cheek and thy love in 
you 's ruthless gale of whose unappeased 

conduct the lays more fear spear 'd 
e'en around tranquillity watch she made you 
woods waste of wings of man of 
wealth believed bosom and let in vain 

from me thy flame though others to 
judge whilst many from my passion 