In [4]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Dropout, LSTM
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io, os
import pickle
import string
from keras.utils import np_utils

Using TensorFlow backend.


In [5]:
dialogues_dict = pickle.load(open('dialogues.pkl', 'rb'))

In [6]:
harry_dialogue = dialogues_dict['HARRY'].copy()

In [1]:
# harry_dialogue

In [7]:
harry_corpus = ' '.join(harry_dialogue)

In [2]:
# harry_corpus

In [8]:
print('corpus length:', len(harry_corpus))

corpus length: 58699


In [9]:
# create mapping of unique chars to integers
chars = sorted(list(set(harry_corpus)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [10]:
n_chars = len(harry_corpus)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  58699
Total Vocab:  75


In [11]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = harry_corpus[i:i + seq_length]
    seq_out = harry_corpus[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  58599


In [12]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [13]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [14]:
# define the checkpoint
filepath="lstm1-weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [15]:
model.fit(X, y, epochs=40, batch_size=128, callbacks=callbacks_list)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100

Epoch 00001: loss improved from inf to 3.22513, saving model to lstm-weights-improvement-01-3.2251.hdf5
Epoch 2/100

Epoch 00002: loss improved from 3.22513 to 3.04403, saving model to lstm-weights-improvement-02-3.0440.hdf5
Epoch 3/100

Epoch 00003: loss improved from 3.04403 to 2.94472, saving model to lstm-weights-improvement-03-2.9447.hdf5
Epoch 4/100

Epoch 00004: loss improved from 2.94472 to 2.89161, saving model to lstm-weights-improvement-04-2.8916.hdf5
Epoch 5/100

Epoch 00005: loss improved from 2.89161 to 2.85025, saving model to lstm-weights-improvement-05-2.8502.hdf5
Epoch 6/100

Epoch 00006: loss improved from 2.85025 to 2.81909, saving model to lstm-weights-improvement-06-2.8191.hdf5
Epoch 7/100

Epoch 00007: loss improved from 2.81909 to 2.79621, saving model to lstm-weights-improvement-07-2.7962.hdf5
Epoch 8/100

Epoch 00008: loss improved from 2.79621 to 2.77303, saving model to lstm-weights-improvement-08-


Epoch 00041: loss improved from 1.89087 to 1.87233, saving model to lstm-weights-improvement-41-1.8723.hdf5
Epoch 42/100

Epoch 00042: loss improved from 1.87233 to 1.85147, saving model to lstm-weights-improvement-42-1.8515.hdf5
Epoch 43/100

Epoch 00043: loss improved from 1.85147 to 1.83420, saving model to lstm-weights-improvement-43-1.8342.hdf5
Epoch 44/100

Epoch 00044: loss improved from 1.83420 to 1.81137, saving model to lstm-weights-improvement-44-1.8114.hdf5
Epoch 45/100

Epoch 00045: loss improved from 1.81137 to 1.78121, saving model to lstm-weights-improvement-45-1.7812.hdf5
Epoch 46/100

Epoch 00046: loss improved from 1.78121 to 1.77051, saving model to lstm-weights-improvement-46-1.7705.hdf5
Epoch 47/100

Epoch 00047: loss improved from 1.77051 to 1.74662, saving model to lstm-weights-improvement-47-1.7466.hdf5
Epoch 48/100

Epoch 00048: loss improved from 1.74662 to 1.73236, saving model to lstm-weights-improvement-48-1.7324.hdf5
Epoch 49/100

Epoch 00049: loss impro


Epoch 00086: loss improved from 1.37814 to 1.37475, saving model to lstm-weights-improvement-86-1.3748.hdf5
Epoch 87/100

Epoch 00087: loss did not improve from 1.37475
Epoch 88/100

Epoch 00088: loss did not improve from 1.37475
Epoch 89/100

Epoch 00089: loss improved from 1.37475 to 1.37235, saving model to lstm-weights-improvement-89-1.3724.hdf5
Epoch 90/100

Epoch 00090: loss did not improve from 1.37235
Epoch 91/100

Epoch 00091: loss improved from 1.37235 to 1.36779, saving model to lstm-weights-improvement-91-1.3678.hdf5
Epoch 92/100

Epoch 00092: loss improved from 1.36779 to 1.33907, saving model to lstm-weights-improvement-92-1.3391.hdf5
Epoch 93/100

Epoch 00093: loss did not improve from 1.33907
Epoch 94/100

Epoch 00094: loss did not improve from 1.33907
Epoch 95/100

Epoch 00095: loss did not improve from 1.33907
Epoch 96/100

Epoch 00096: loss did not improve from 1.33907
Epoch 97/100

Epoch 00097: loss improved from 1.33907 to 1.32043, saving model to lstm-weights-imp

<keras.callbacks.History at 0xb27e491d0>

In [37]:
# load the network weights

# filename = "lstm5-weights-improvement-100-0.9618.hdf5"
filename = "lstm-weights-improvement-97-1.3204.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [38]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [39]:
# pick a random seed
start = np.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print( "Seed:")
# print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(100):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
 the mone me. Aruule me?  Mow. Wou ie wou teve g momstlde. What sas the blondr oet of to poen it? Wh
Done.


In [41]:
# pick a random seed
start = np.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print( "Seed:")
# print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(500):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
#     index = np.argmax(prediction)
    index = np.random.choice(len(prediction[0]),p=prediction[0])
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
eng hn the gnhwt dfd io h mere of fy. Iemei aetees. Hhmk.  I’le oe.  Tpa iir ii atis. B'll gor oh io  - dr toe sisce ie tio erenr, Si Saareod wharl aon mnr-  Sle ttel iy whi coa’e carel bei?  Yoeyre hoc gblo! oom? Sho ann tha semeo Demmid thoti fr I dos Bro, H doswe.. W aldles har.  Bkb yhe hldd o. Bed I gonit temen bhr on. Ohef im hnd. .Ol. Mharey!they'd whst yiu eew i wortire,  Ms mo meoe. Teathm ihn?  Det oome se hid tosesls? Tolhksir hnint'  Tnlker’e tsy’rel,s cr toe pafe afyuyed tork soueeg
Done.
