In [70]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Dropout, LSTM
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io, os
import pickle
import string

In [4]:
dialogues_dict = pickle.load(open('dialogues.pkl', 'rb'))

In [46]:
harry_dialogue = dialogues_dict['HARRY'].copy()

In [47]:
harry_dialogue

['Yes, Aunt Petunia.',
 'Yes, Uncle Vernon.',
 "He's asleep!",
 "Sorry about him. He doesn't understand what it's like, lying there day after day, having people press their ugly faces in on you.",
 "Can you...hear me?  It's just...I've never talked to a snake before. Do you...I mean...do you talk to people often?  You're from Burma, aren't you? Was it nice there, do you miss your family?  I see. That's me as well. I never knew my parents, either.",
 'Anytime.',
 'Ow!',
 "I swear I don't know! One minute the glass was there and then it was gone! It was like magic!",
 "Hey, give it back! It's mine!",
 "Because there's no post on Sunday? Ah, right you are, Harry. No post on Sunday. Hah! No blasted letters today. No, sir.  No sir, not one blasted, miserable---",
 'Get off! Ahh!',
 "They're my letters! Let go of me!",
 'Make a wish, Harry.',
 'Thank you! {Opens cake, which reads',
 'Excuse me, who are you?',
 'Sorry, no.',
 'Learnt what?',
 "I-I'm a what?",
 "A wizard. And a thumping good o

In [53]:
harry_corpus = ' '.join(harry_dialogue)

In [54]:
harry_corpus



In [55]:
print('corpus length:', len(harry_corpus))

corpus length: 58699


In [56]:
chars = sorted(list(set(harry_corpus)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 75


In [57]:
seq_len = 50
step = 3
sentences = []
next_chars = []

for i in range(0, len(harry_corpus) - seq_len, step):
    sentences.append(harry_corpus[i: i + seq_len])
    next_chars.append(harry_corpus[i + seq_len])
print('Number of sequences:', len(sentences))


chars = sorted(list(set(harry_corpus)))
print('Unique characters:', len(chars))

char_indices = dict((char, chars.index(char)) for char in chars)

Number of sequences: 19550
Unique characters: 75


In [59]:
n_chars = len(harry_corpus)
n_vocab = len(chars)
n_sentences = len(sentences)

In [60]:
##Vectorizing

In [61]:
x = np.zeros((n_sentences, seq_len, n_vocab), dtype=np.bool)
y = np.zeros((n_sentences, n_vocab), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [62]:
##creating checkpoints

In [67]:
filepath="lstm4_weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
filepath_dir = os.path.dirname(filepath)
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, 
                             save_best_only=True, mode='min')
callbacks_list2 = [checkpoint]

In [83]:
callbacks_list2

[<keras.callbacks.ModelCheckpoint at 0xb30d2ac50>]

In [68]:
##Model

In [71]:
model = Sequential()
model.add(LSTM(256, input_shape=(seq_len, n_vocab)))
model.add(Dropout(0.2))
model.add(Dense(n_vocab, activation='softmax'))

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [80]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    np.seterr(divide = 'ignore') 
    return np.argmax(probas)

In [None]:
##Fit model

In [86]:
model.fit(x, y, epochs=20, batch_size=128, callbacks=callbacks_list2)

Instructions for updating:
Use tf.cast instead.
Epoch 1/20

Epoch 00001: loss improved from inf to 3.25785, saving model to lstm4_weights-improvement-01-3.2578.hdf5
Epoch 2/20

Epoch 00002: loss improved from 3.25785 to 2.97333, saving model to lstm4_weights-improvement-02-2.9733.hdf5
Epoch 3/20

Epoch 00003: loss improved from 2.97333 to 2.59556, saving model to lstm4_weights-improvement-03-2.5956.hdf5
Epoch 4/20

Epoch 00004: loss improved from 2.59556 to 2.41336, saving model to lstm4_weights-improvement-04-2.4134.hdf5
Epoch 5/20

Epoch 00005: loss improved from 2.41336 to 2.30896, saving model to lstm4_weights-improvement-05-2.3090.hdf5
Epoch 6/20

Epoch 00006: loss improved from 2.30896 to 2.23038, saving model to lstm4_weights-improvement-06-2.2304.hdf5
Epoch 7/20

Epoch 00007: loss improved from 2.23038 to 2.16756, saving model to lstm4_weights-improvement-07-2.1676.hdf5
Epoch 8/20

Epoch 00008: loss improved from 2.16756 to 2.10513, saving model to lstm4_weights-improvement-08-

<keras.callbacks.History at 0xb354fd898>

In [87]:
filepath_current = "lstm4_weights-improvement-20-1.4900.hdf5"
model.load_weights(filepath_current)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
##Generate Text

In [98]:
start_index = random.randint(0, n_chars - seq_len - 1)
generated_text = harry_corpus[start_index: start_index + seq_len]

for temperature in [0.4]:
    quote = []
    for i in range(100):
        sampled = np.zeros((1, seq_len, n_vocab))
        for t, char in enumerate(generated_text):
            sampled[0, t, char_indices[char]] = 1.
    
    preds = model.predict(sampled, verbose=0)[0]
    next_index = sample(preds, temperature)
    next_char = chars[next_index]

    generated_text += next_char
    generated_text = generated_text[1:]

    quote.append(next_char)

quote_gen = "".join(quote)
print(quote_gen)

n
