In [1]:
import numpy as np
from pickle import dump, load
from random import randint
from keras.models import model_from_json
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, LSTM, Embedding

from txt2sequence import convert_to_sequences

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Convert Text to Sequences
N.B. If 'texts/hp_sequences.txt' has already been generated you don't need to run the cell below

In [None]:
# convert_to_sequences()

# Import Saved Model

In [2]:
with open('models/251019/model.json', 'r') as json_file:
    json_model = json_file.read()

model = model_from_json(json_model)
model.load_weights('models/251019/model.h5')


tokenizer = load(open('models/251019/tokenizer.pkl', 'rb'))
reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))
unk_ind = tokenizer.word_index['unknownword']

# Load Data

In [3]:
def load_doc(filename):
    # open the file as read only
    file = open(filename, 'r')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text
 
# load cleaned text sequences
in_filename = 'texts/hp_sequences.txt'
doc = load_doc(in_filename)
lines = doc.split('\n')
seq_length = len(lines[0].split()) - 1

# Text Generation

In [4]:
# utility functions
def capitalise(sentence):
    return sentence[0].upper() + sentence[1:]

def sentence_case(text, punct):
    punct = punct + ' '
    text = map(capitalise, [x for x in text.split(punct)])
    return punct.join(text)

def display_txt(text):
    first_stop = False
    if text[:13] == 'endofsentence':
        text = text[13:]
        first_stop = True
    text = text.replace(' endofsentence ', '. ')
    text = text.replace('endofsentence','')
    text = sentence_case(text, '.')
    text = sentence_case(text, '?')
    text = sentence_case(text, '!')
    if first_stop:
        text = '. ' + text
    print(text)
    
def prepare_txt(text):
    text = text.replace('. ', ' endofsentence ')
    text = text.lower()
    text = ' '.join([word if word in tokenizer.word_index.keys() else 'unknownword' for word in text.split(' ')])
    return text

def generate_seq(model, tokenizer, seq_length, seed_text, n_words):
    result = list()
    in_text = seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        # encode the text as integer
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        # truncate sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        # predict probabilities for each word
        probs = model.predict_proba(encoded, verbose=0)[0]
        
        # only consider top n best words
        n = 5
        inds = [x for x in np.argpartition(probs, -n)[-n:] if x != unk_ind]

        probs = probs[inds] / probs[inds].sum()

        word_ind = np.random.choice(inds, p=probs)
        out_word = reverse_word_map[word_ind]
        
        in_text += ' ' + out_word
        result.append(out_word)
    return ' '.join(result)

In [21]:
# get random line from the books to use as the seed
seed_text = lines[randint(0,len(lines))]
display_txt(seed_text)

Said harry his hand shaking. Its all right im here make it stop make it stop moaned dumbledore. Yes. Yes thisll make it stop lied harry. He tipped the contents of the goblet into dumbledores open mouth. Dumbledore screamed the noise echoed all around the vast


In [22]:
generated = generate_seq(model, tokenizer, seq_length, prepare_txt(seed_text), 100)
display_txt(generated)

Landing in a large sky. Harry ignored him. They ought to have been expelled. And the dursleys had received a few hours of the summer he was sure that his eyes were now asleep as they walked in and out of sight. Ron was still dressed. The next morning harry had to explain how to teach the first week he had had to discuss the summer. They had not seen what was happening. Harry recognized the dursleys. He was not sure it was the first years to destroy it had been the
