In [1]:
import string
import time
import numpy as np
from numpy import array
from pickle import dump, load
from collections import Counter
from tqdm import tqdm_notebook
from random import randint
from keras.models import load_model, model_from_json, Sequential
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.layers import Dense, LSTM, Embedding
from keras.optimizers import Adadelta

Using TensorFlow backend.


In [2]:
with open('models/251019/model.json', 'r') as json_file:
    json_model = json_file.read()

model = model_from_json(json_model)
model.load_weights('models/251019/model.h5')


tokenizer = load(open('models/251019/tokenizer.pkl', 'rb'))
reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))
unk_ind = tokenizer.word_index['unknownword']

Instructions for updating:
Colocations handled automatically by placer.


In [3]:
def load_doc(filename):
    # open the file as read only
    file = open(filename, 'r')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text
 
# load cleaned text sequences
in_filename = 'texts/hp_sequences.txt'
doc = load_doc(in_filename)
lines = doc.split('\n')
seq_length = len(lines[0].split()) - 1

In [110]:
# utility functions
def capitalise(sentence):
    return sentence[0].upper() + sentence[1:]

def sentence_case(text, punct):
    punct = punct + ' '
    text = map(capitalise, [x for x in text.split(punct)])
    return punct.join(text)

def display_txt(text):
    first_stop = False
    if text[:13] == 'endofsentence':
        text = text[13:]
        first_stop = True
    text = text.replace(' endofsentence ', '. ')
    text = text.replace('endofsentence','')
    text = sentence_case(text, '.')
    text = sentence_case(text, '?')
    text = sentence_case(text, '!')
    if first_stop:
        text = '. ' + text
    print(text)
    
def prepare_txt(text):
    text = text.replace('. ', ' endofsentence ')
    text = text.lower()
    text = ' '.join([word if word in tokenizer.word_index.keys() else 'unknownword' for word in text.split(' ')])
    return text

def generate_seq(model, tokenizer, seq_length, seed_text, n_words):
    result = list()
    in_text = seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        # encode the text as integer
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        # truncate sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        # predict probabilities for each word
        probs = model.predict_proba(encoded, verbose=0)[0]
        
        # only consider top n best words
        n = 5
        inds = [x for x in np.argpartition(probs, -n)[-n:] if x != unk_ind]

        probs = probs[inds] / probs[inds].sum()

        word_ind = np.random.choice(inds, p=probs)
        out_word = reverse_word_map[word_ind]
        
        in_text += ' ' + out_word
        result.append(out_word)
    return ' '.join(result)

In [94]:
# get random line from the books to use as the seed
seed_text = lines[randint(0,len(lines))]
display_txt(seed_text)

Mind then. I didnt mean it to happen. Harry said. It was a dream. Can you control what you dream about hermione. If you just learned to apply occlumency but harry was not interested in being told off he wanted to discuss what he had just


In [117]:
generated = generate_seq(model, tokenizer, seq_length, prepare_txt(seed_text), 100)
display_txt(generated)

Seen. But why would he know anything about his first morning he was surprised to see what he was doing. He did not know what he had to do. But there was as he was to discuss the news that they had been expelled and it was a bit more. But if he had to explain he was not surprised to see what he was saying. The first week of gryffindor was saying the whole class was now looking around the end of the school and the students in the castle. They knew what
