In [1]:
import tensorflow as tf
import os
import glob
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt
from tensorflow import keras
import pandas as pd

In [2]:
def generate_text_seq(model, tokenizer, text_seq_length, seed_text, n_words):
    seed_text_orig = seed_text
    text = []
    predictions = []
    for _ in range(n_words):
        encoded = tokenizer.texts_to_sequences([seed_text])[0]
        encoded = pad_sequences([encoded], maxlen = text_seq_length, truncating='pre')
        y_predict = model.predict_classes(encoded)
        predicted_word = ''
        for word, index in tokenizer.word_index.items():
          if index == y_predict:
            predicted_word = word
            break
        seed_text = seed_text + ' ' + predicted_word
        text.append(predicted_word)
        predictions.append(predicted_word)
#     print(f"Original Text: {seed_text_orig}\nPredicted text: {seed_text}")
    return ' '.join(predictions)

In [3]:
text =  open("/home/aho/git/malazan/txt/malazan_08_book_four_toll_the_hounds.txt").read()
text_split = text.split()

## Generate training data length of 50 words
length = 25 + 1
lines = []

for i in range(length, len(text_split)):
  seq = text_split[i-length:i]
  line = ' '.join(seq)
  lines.append(line)

print(len(lines))

tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
sequences = tokenizer.texts_to_sequences(lines)

sequences = np.array(sequences)
# X, y = sequences[:, :-1], sequences[:,-1]
X = np.array([l[0:25] for l in sequences])
y = np.array([l[-1] for l in sequences])
vocab_size = len(tokenizer.word_index) + 1
y = to_categorical(y, num_classes=vocab_size)
seq_length = X.shape[1]

print(X.shape)
print(seq_length)

vocab_size

95708
(95708, 25)
25


8150

In [37]:
model = Sequential()
model.add(Embedding(vocab_size, 100, input_length=seq_length))
model.add(LSTM(lstm_size, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(lstm_size))
model.add(Dropout(0.2))
model.add(Dense(vocab_size, activation='softmax'))

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 25, 100)           815000    
_________________________________________________________________
lstm (LSTM)                  (None, 25, 300)           481200    
_________________________________________________________________
dropout (Dropout)            (None, 25, 300)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 300)               721200    
_________________________________________________________________
dropout_1 (Dropout)          (None, 300)               0         
_________________________________________________________________
dense (Dense)                (None, 8150)              2453150   
Total params: 4,470,550
Trainable params: 4,470,550
Non-trainable params: 0
______________________________________________

In [None]:
## Read in models and generate text
## Final output:  A table with rows as number of iters, columns are predictions

In [35]:
lstm_preds = {}

for lstm_size in [100, 200, 300]:
    lstm_preds[f'malazan-tth-lstm_size_{lstm_size}'] = {}
    for iter in [10, 20, 30, 50, 150, 200, 300]:
        model_tmp = keras.models.load_model(f"/projects/dapg/nlp/malazan-tth-lstm_size_{lstm_size}/cp-0{str(iter).rjust(3, '0')}.ckpt")
        tmp_pred = generate_text_seq(model_tmp, tokenizer, seq_length, "the power of that sword was breathtaking even for a god", 50)
        lstm_preds[f'malazan-tth-lstm_size_{lstm_size}'][str(iter).rjust(3, '0')] = tmp_pred

In [16]:
batch_preds = {}

for batch_size in [64, 128, 256]:
    batch_preds[f'malazan-tth-batch_size_{batch_size}'] = {}
    for iter in [10, 20, 30, 50, 150, 200, 300]:
        model_tmp = keras.models.load_model(f"/projects/dapg/nlp/malazan-tth-batch_size_{batch_size}/cp-0{str(iter).rjust(3, '0')}.ckpt")
        tmp_pred = generate_text_seq(model_tmp, tokenizer, seq_length, "the power of that sword was breathtaking even for a god", 50)
        batch_preds[f'malazan-tth-batch_size_{batch_size}'][str(iter).rjust(3, '0')] = tmp_pred

In [8]:
pd.set_option('display.max_colwidth', 0)

# Interpreting Results

In [36]:
pd.DataFrame(lstm_preds)

Unnamed: 0,malazan-tth-lstm_size_100,malazan-tth-lstm_size_200,malazan-tth-lstm_size_300
10,s face and the man s face and the man s face and the man s face and the man s face and the man s face and the man s face and the man s face and the man s face and the man s face and the man,s eyes and the man was not the man s eyes was the man s eyes and the man was a man s eyes and the man was a man s eyes and the man was a man s eyes was the man s eyes and the man was a,s face and the man s head was a single thing of the city of the city of the city of the city of the city of the city of the city of the city of the city of the city of the city of the city of the city
20,s head and the hounds of shadow and the world s head and the hounds of shadow and the world s head and the hounds of shadow and the world s head and the hounds of shadow and the world s head and the hounds of shadow and the world,s charms the other tool the god had been spoken and the last thing of the fated a woman who had been cooing of weather of reeds the battlefield was threading into the midst of the cart and the one of the hoses was unclaimed and the one who had,breathtaking terrifying the world was gone the dragon was gone the beast was gone destroying the way of the city and the hound s head was the hounds of shadow and the hound s head was a endless machinery of skins and bone as if the beast was gone the
30,s charms that the world of shadow and the world of shadow deliciously feminine deadly empress conjured and the hounds of shadow was a few dog a few dog chasing mistaken and faceted in the first man he had been a coward of the world of the city of the,s consideration and needed to be stationed in the grasses the entire way of the world the explosions the suave lie and the whipping snow sprayed winds and then the one of the chained was the first robust among the nobles s a dead and the high king halts it,breathtaking terrifying he could feel the principle of convergence and cunning to the south of the gadrobi hills and the hounds of shadow had begun to shape the pickings of the house s head and acrobatically avoiding the impact boomed in the midst of the city exploded and the hounds
50,s careering arrival and now hinted in the air exploded from the impact s snout inward into the midst of a tapestry and the hound s head struck the lance cart and the phoenix inn was proving elusive a single refrain of the phoenix inn he had been a single,breathtaking terrifying the five plain of the gedderone f te was sinking away in the sky the world was rank with the dream who had once been itkovian down in the path of the enemy he was not endest silann could feel the same when they were truly nimander did,breathtaking terrifying they toppled on the sisters s careering eyes swung down from the saddle the beast s head pitched back on the mirror waters of the street and then a jungle nestled impossibly as the hounds of shadow had begun to shape the pickings on the cobbles and then
150,breathtaking terrifying they were chained in its wake each other frozen eyes than the soul of death and it was snatched to pass the hounds of shadow was a few thing a few paces ahead and it was a few paces away from the garden the foreman said was a,breathtaking terrifying they were but ghosts as they fought with a fancy aghast but now he would feel the ones more of anomander rake had ever been the cloud to spill the blind of oblivion the city it had been a time but the one that bore the lord of,breathtaking terrifying they looked gouged used their soft gifts before one side to the night a god s brain this lost his companion did the most chaos had now killed free of the tumbled wrinkled canvas of bodies that scurried in the centre of the city if he was worthy
200,breathtaking creatures on the wagon closing over her hands into the midst of a dozen nearby structures figures now clambered on the cobbles and then drawn in a chunk of stone that swirled past a screaming street and whirled through the estates district and eastward to the estate compound that,s dark judgement those few and the grip and all the lord of the dead f te and two great ravens were playing and proof of dread dread forces of shadow and two others and now the moon arrived in its skull had draining long and so much a single,breathtaking terrifying they might have been sufficient cause for some time could prove some trouble are indeed as they were in a group in an instant dragons would not be so centuries to heal wait to do and to see the damage but all this one and anomander rake himself
300,breathtaking creatures on the hounds tattoo like black roots a thundering of soldiering but the one who sought to arrive out of this dark was bleeding telling us on to me no could be so much he drew out that and that he could gaze forward enough and he had,s consideration a pattern a sword no little woman had been watching and none of this will such take this by the two god had things he would have to send him home the man had never been that it would be but he had no other to fall into,breathtaking terrifying they drew them into the air the gloom was locked in convulsions a convergence of inimical elements a narrow door to stare into the base of his brain surna and a few deep over the doorway s head were crossed his arms and i need to find a


We begin by looking at how LSTM layer size affects the ability of our model to learn.  After 10 and 20 iterations, there's no noticeable difference between each model, with repetition and nonsensical words produced.  What's interesting is that by iteration 30, the model with LSTM size 300 begins to predict the word sequence "breathtaking terrifying" following our input sentence, which LSTM size 200 and 100  predicing this reaching this point in 50 and 150 iterations, respectively.  

As we reach 300 iterations, the following is noticeable:

- LSTM size 100 diverges from the phrase `breathtaking terrifying` to `breathtaking creatures` at iteration 200  
- LSTM size 200 goes on its own path at iteration 200 and begins predicting new word sequences  
- LSTM size 300 remains adamant `breahtaking terrifying` is the most likely sequence following our input sentence  

Even with 300 epochs, the generated text prediction is not coherent, but is able to capture some of the key elements in the book following the specific scene.  For example, the LSTM size 300 is able to identify `a convergence` which is indeed a key theme surrounding this passage.

In [17]:
pd.DataFrame(batch_preds)

Unnamed: 0,malazan-tth-batch_size_64,malazan-tth-batch_size_128,malazan-tth-batch_size_256
10,he had been a single thing of the city the god was the god was the god was the god was the god was the god was the god of the city the god was the god of the city the god was the god s face the beast was,and the man s head and a single thing and the man s head and a single thing of the city and the man s head and the man s head and a single thing and then he had been a single thing and then he had been a single,s face and the beast s face and the beast s face he was a god s one s one s face he was a man s one s one s face he was a man s one s one s one s one s one of the city that
20,to behold the man s hoofs echoed in the midst of the track sembling almost transparent in the air the ancient man s head were prodding the air and a moment later she was thrown from the saddle and then he could not be necessary and worse else and then,that begins the notion of shadow had been singular indulgence she had been a single refrain of the city of the world the redeemer were a few of the world the redeemer were a few of the city that was the harvester of shadow had been chosen by the city,the guard had been a single ravens gambit the world the two man had been a few priestess the man s eye the man s head was a few of the world the two man had been a few of the world the two man had been a few priestess
30,breathtaking terrifying and the nearest things were doomed to the petty squabbles of the city the explosions the gloom the ancient moon chapter a bow wave of awareness and then the beast was finished the entire hounds of shadow unleashed the endless machinery of bhokarala tumbled loose from the air,he had been arguing and in shaping the man s eye clutching the city and the conveyance tear down in the air and the hound s momentum slammed him from the air and then the hounds of shadow was fading dulling his own stock of the city and the man,and then he could not be a newly vacated seat to the city of the gadrobi district and the beleaguered gossipy barrier between the city the caged plain and then the hounds of shadow had been tacked to the phoenix inn the foreman was not even so clever he had
50,breathtaking terrifying and a few pulped bodies in the mix clouds of dust spurts of hissing flame from ruptured gas pipes the ominous subterranean roar of deadlier eruptions such a thing can stick a thumping cart to the south of the house harsh and bone the undead god slowly said,s own law no one would match the gratitude of the past and the frailty of draconus unleashed the ancient family held the edge of the man s eye and the victor s instructions and the victor s instructions was a prick under his neck the beast of the track,the night of the world the hounds of shadow was no time and he had been so much to resist the riot of senseless celebration shorn had been chained for that he had no idea he was a survivor he d been a few day clip s not his own
150,breathtaking terrifying they were about the hounds of shadow could be viewed as inevitable given the scant training these assassins possessed since this group was the spit leading to close the beast s head struck the hound s head pitched down on his knees doubling up the edge of the,to witness to stand in the ash swarmed sky above their stumpy howling into their smoke samar dev saw the river of darkness that passes up in some jutting rush through the detritus while a handful of terrified pilgrims fled back the other side of the base of the wagon,was suddenly hunger and all that were the taste of patience but what was he doing nothing he could make use the very one who had been so much of them he was not speaking about it he was seeing as you have people brushed his cheek and then the
200,breathtaking terrifying they were of the same no one else to bear witness to a home of power until the very goddess was like a single refrain of some blessed sister a gods standing there was a fresh mound of raw earth steaming vines were visibly snaking down to the,breathtaking terrifying they continued sighing no doubt as if the soul had never lost for such a manifestation of chaos above its very indifference as the unexpected soul of the great ravens wheeled adding their faded strokes of millions were air it stood on the animal s brain that staggered,breathtaking terrifying they had gone before the woman who had been so much as it was possible to go in a reconciliatory face of a spreading glittering face that was the only thing of such things could a clever gesture the one god had been an eye of the man
300,breathtaking terrifying their warrens left all is no lost to be an inspiring standard to stand in front of hardly a thing not even lost but it was some time before answering it meant to him of course they would all die but he had just been so much of,breathtaking terrifying the beast had gone to resist an act of courage to wipe their ancestors their heads draconus crawled in the path to the scene to fight everywhere to the high king s swift spite the god remains out in behind her a city are arrayed into a crushing,breathtaking terrifying they had gone now oh here now in the meantime how bout that and nimander was the only chance that yes he would make the knot of which made this matter the grim confines of gradithan and then she s and heard her mind and it was as


Next, we move on to making understanding how change batch size affects the training process of our model.  Let's pause first and consider the literature on batch size, which nmakes makes the following comments on batch size:

- Larger batch size yields faster computations and training of the data 
- At the same time, a batch size that's too large will result in a model that is less generalizable (this can be empirically tested with a test set and measuring accuracy)  
- A smaller batch size yields faster convergence to solutions

Do we observe this in the data?

From our previous review of how changing LSTM size affects model performance, we observed that "predicting "breahtaking terrifying" as the most likely sequence following our input sequence seemed to be the "convergent" solution.  We see with a small batch size of 64, the model converges to this prediction with only 30 epochs.  This is quite an improvement over larger batch sizes!

It's also noticeable that with a smaller batch size, we obtain more readable sentences that follow generally correct grammatical structure with fewer epochs than that of larger batch sizes. Certain sequences of predicted text are almost passable for being in the book, and some are comical (e.g., "draconus crawled in the path to the scene to fight everywhere").  

Sources:  [Source 1](https://machinelearningmastery.com/difference-between-a-batch-and-an-epoch/), [Source 2](https://medium.com/mini-distill/effect-of-batch-size-on-training-dynamics-21c14f7a716e), [Source 3](https://stats.stackexchange.com/questions/164876/what-is-the-trade-off-between-batch-size-and-number-of-iterations-to-train-a-neu)  



# Concluding Remarks

This was a good initial journey to understanding how the hyperparameters of batch size, epoch, and LSTM layer size affects training of a NLP text generating model.  Training time was quite long, averaging about 20 minutes per epoch, with only the last section of one single book.  Attempting to train on the full book, which would have yielded more interesting results, was over 2 hours per epoch.  Looking forward, the following could be done:

- Include a test set to measure model accuracy and empirically measure observational interpretation of effects of batch size and LSTM size
- Train on a larger sample of the book 
- Compare different models beyond tweaking of hyperparameters
- Try different Dropout rates