In [1]:
import numpy as np
from pickle import dump, load
from random import randint
from keras.models import model_from_json
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, LSTM, Embedding

from txt2sequence import convert_to_sequences

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Convert Text to Sequences
N.B. If 'texts/hp_sequences.txt' has already been generated you don't need to run the cell below

In [2]:
convert_to_sequences()

importing books
cleaning text
removing infrequent words
saving sequences to file


# Import Saved Model

In [2]:
with open('models/070520/model.json', 'r') as json_file:
    json_model = json_file.read()

model = model_from_json(json_model)
model.load_weights('models/070520/model.h5')


tokenizer = load(open('models/070520/tokenizer.pkl', 'rb'))
reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))
unk_ind = tokenizer.word_index['unknownword']

Instructions for updating:
Colocations handled automatically by placer.


# Load Data

In [3]:
def load_doc(filename):
    # open the file as read only
    file = open(filename, 'r')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text
 
# load cleaned text sequences
in_filename = 'texts/hp_sequences.txt'
doc = load_doc(in_filename)
lines = doc.split('\n')
seq_length = len(lines[0].split()) - 1

# Text Generation

In [4]:
# utility functions
def capitalise(sentence):
    if len(sentence) != 0:
        return sentence[0].upper() + sentence[1:]
    else:
        return ''

def sentence_case(text, punct):
    punct = punct + ' '
    text = map(capitalise, [x for x in text.split(punct)])
    return punct.join(text)

def display_txt(text):
    first_stop = False
    if text[:13] == 'endofsentence':
        text = text[13:]
        first_stop = True
    text = text.replace(' endofsentence ', '. ')
    text = text.replace('endofsentence','')
    text = sentence_case(text, '.')
    text = sentence_case(text, '?')
    text = sentence_case(text, '!')
    if first_stop:
        text = '. ' + text
    print(text)
    
def prepare_txt(text):
    if text[-1] == '.':
        text = text[:-1] + ' endofsentence'
    text = text.replace('. ', ' endofsentence ')
    text = text.lower()
    text = ' '.join([word if word in tokenizer.word_index.keys() else 'unknownword' for word in text.split(' ')])
    return text

def generate_seq(model, tokenizer, seq_length, seed_text, n_words):
    result = list()
    in_text = seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        # encode the text as integer
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        # truncate sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        # predict probabilities for each word
        probs = model.predict_proba(encoded, verbose=0)[0]
        
        # only consider top n best words
        n = 5
        inds = [x for x in np.argpartition(probs, -n)[-n:] if x != unk_ind]

        probs = probs[inds] / probs[inds].sum()

        word_ind = np.random.choice(inds, p=probs)
        out_word = reverse_word_map[word_ind]
        
        in_text += ' ' + out_word
        result.append(out_word)
    return ' '.join(result)

To generate new text it is necessary to provide some input seed text. This can either be done by taking a random line from the original Harry Potter books OR writing your own seed text from scratch. Both techniques are shown below.

In [59]:
# get random line from the books to use as the seed
seed_text = lines[randint(0,len(lines))]
display_txt(seed_text)

Harry ran to the compartment door and ron threw it open and stood back to let him on. They leaned out of the window and waved at mr and mrs weasley until the train turned a corner and blocked them from view. I need to talk to you in


In [65]:
generated = generate_seq(model, tokenizer, seq_length, prepare_txt(seed_text), 100)
display_txt(generated)

The department of mysteries said harry truthfully. I was going to tell us that he was a good idea of doing it said ron indicating the large bound jug and stalked with her long handkerchief. I dunno said harry. He keeps thinking. Yeah said ron. And i know. I dunno said harry crossly. I think he teaches meself. And thats what happened to you. Said harry hotly. Harry said nothing but he was a great prickling that he was not a growing dark mark in azkaban. He was thinner


In [85]:
# use custom input as seed text
seed_text = "Sirius Black and Harry were both riding the Hogwarts Express in disguise. There were a number of aurors on board who were none the wiser."

In [86]:
generated = generate_seq(model, tokenizer, seq_length, prepare_txt(seed_text), 100)
display_txt(generated)

The dursleys had only flared in the midst of the corner beside him the moment of witchcraft and faded he had sprung with silver smoke blazing with flaming dark hair and silver glittered pots bottles in the grass. Harry felt his face now strongly that the only sound in his hand swam from nowhere previously. Harry recognized the pain of his hood he felt himself so violently to the ground and his knees that was in azkaban. . .  it belonged. . . . The eyes boy was still glistening


In [5]:
# use custom input as seed text
seed_text = "Heart hammering, harry pushed his cart after them. They stopped and so did he, just near enough to hear what they were saying. Now what's the platform number? said the boy's mother. Nine and three-quarters! piped a smal girl, also red-headed, who was holding her hand, Mom, can't I go... You're not old enough Ginny now be quiet. All right Percy you go"


In [22]:
generated = generate_seq(model, tokenizer, seq_length, prepare_txt(seed_text), 100)
display_txt(generated)

To the hospital wing. Harry and ron looked at the wall. Howre they doing. Asked umbridge reddening at harry with vampires cast with interest. Oh yeah you havent noticed you know who said ron clapping a hand to look at him and beckoned harry roughly. Harry looked around. The door opened. Harry yelled. It was like fangs harrys hair and bloodshot face echoed loudly with a rush of fury. Harry and ron had a very good teacher who had been a relief for being a second later he said i dunno


In [28]:
generated = generate_seq(model, tokenizer, seq_length, prepare_txt(seed_text), 100)
display_txt(generated)

Back. Harry asked harry grabbing note to hermione. You dont worry about me said hagrid earnestly. I suppose so said a witch and the minister of magic information yaxley is not insulting to students. I shall admit you are the caretaker. Oh yeah said mr weasley turning to the door with glee. Yep said stan indicating a slytherin quidditch squad and madame maxime and seamus finnigan fleur thomas eagerly cheered in surprise and tugged mustache whether it was a bit dangerous. The class was now shining with apprehension that floated to a large
