In [2]:
# Adapted from Chapter 11 in LDL

In [9]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
import tensorflow as tf
import logging
tf.get_logger().setLevel(logging.ERROR)

In [12]:
# parameters
EPOCHS = 5 # cut down the batch size due to a long runtime
BATCH_SIZE = 256
INPUT_FILE_NAME = 'data/frankenstein.txt'
WINDOW_LENGTH = 40
WINDOW_STEP = 3
BEAM_SIZE = 8
NUM_LETTERS = 11
MAX_LENGTH = 50

In [13]:
# Open the input file
file = open(INPUT_FILE_NAME, 'r', encoding='utf-8')
text = file.read()
file.close()

In [14]:
# Make lowercase, remove newline as well as extra spaces
text = text.lower()
text = text.replace('\n', ' ')
text = text.replace(' ', '')

In [15]:
# Encode characters as indices
unique_chars = list(set(text))
char_to_index = dict((ch, index) for index, ch in enumerate(unique_chars))
index_to_char = dict((index, ch) for index, ch in enumerate(unique_chars))
encoding_width = len(char_to_index)

In [16]:
# Create training examples
fragments = []
targets = []
for i in range(0, len(text) - WINDOW_LENGTH, WINDOW_STEP):
  fragments.append(text[i: i + WINDOW_LENGTH])
  targets.append(text[i + WINDOW_LENGTH])

In [17]:
# Converting to one-hot encoded training data
X = np.zeros((len(fragments), WINDOW_LENGTH, encoding_width))
y = np.zeros((len(fragments), encoding_width))

for i, fragment in enumerate(fragments):
  for j, char in enumerate(fragment):
    X[i, j, char_to_index[char]] = 1
  target_char = targets[i]
  y[i, char_to_index[target_char]] = 1

In [18]:
targets[0:5]

['b', 'a', 'w', 'l', 'o']

In [19]:
fragments[0:5]

['\ufefftheprojectgutenbergebookoffrankenstein,',
 'eprojectgutenbergebookoffrankenstein,bym',
 'ojectgutenbergebookoffrankenstein,bymary',
 'ctgutenbergebookoffrankenstein,bymarywol',
 'utenbergebookoffrankenstein,bymarywollst']

In [20]:
# Build and train model
model = Sequential()
model.add(LSTM(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2, input_shape=(None, encoding_width)))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(encoding_width, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [21]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, None, 128)         99840     
                                                                 
 lstm_1 (LSTM)               (None, 128)               131584    
                                                                 
 dense (Dense)               (None, 66)                8514      
                                                                 
Total params: 239,938
Trainable params: 239,938
Non-trainable params: 0
_________________________________________________________________


In [22]:
history = model.fit(X, y, validation_split=0.05, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=2, shuffle=True)

Epoch 1/5
445/445 - 153s - loss: 2.9710 - val_loss: 2.8912 - 153s/epoch - 344ms/step
Epoch 2/5
445/445 - 146s - loss: 2.7136 - val_loss: 2.8171 - 146s/epoch - 329ms/step
Epoch 3/5
445/445 - 149s - loss: 2.6458 - val_loss: 2.7678 - 149s/epoch - 335ms/step
Epoch 4/5
445/445 - 149s - loss: 2.5972 - val_loss: 2.7368 - 149s/epoch - 335ms/step
Epoch 5/5
445/445 - 149s - loss: 2.5555 - val_loss: 2.6942 - 149s/epoch - 334ms/step


In [33]:
# Create initial single beam represented by triplet as (prob, string, one-hot encoded string)
letters = 'the body '
one_hots = []

for i, char in enumerate(letters):
  x = np.zeros(encoding_width)
  # x[char_to_index[char]] = 1 # for some reason, this line errors
  one_hots.append(x)
beams = [(np.log(1.0), letters, one_hots)]

In [34]:
# Predict NUM_LETTERS into the future
for i in range(NUM_LETTERS):
  minibatch_list = []
  # Create minibatch from one-hot encodings, and predict the result
  for triple in beams:
    minibatch_list.append(triple[2])
    minibatch = np.array(minibatch_list)
    y_predict = model.predict(minibatch, verbose=0)
    new_beams = []
    for j, softmax_vec in enumerate(y_predict):
      triple = beams[j]
      # Create BEAM_SIZE new beams from each existing beam
      for k in range(BEAM_SIZE):
        char_index = np.argmax(softmax_vec)
        new_prob = triple[0] + np.log(softmax_vec[char_index])
        new_letters = triple[1] + index_to_char[char_index]
        x = np.zeros(encoding_width)
        x[char_index] = 1
        new_one_hots = triple[2].copy()
        new_one_hots.append(x)
        new_beams.append((new_prob, new_letters, new_one_hots))
        softmax_vec[char_index] = 0
  new_beams.sort(key=lambda tup: tup[0], reverse=True)
  beams = new_beams[0:BEAM_SIZE]
for item in beams:
  print(item[1])

the body thesthesthe
the body andandandin
the body theandandin
the body thestheande
the body thesthestha
the body andandandan
the body thestheandi
the body thestheanda
