

# RNN for Text Generation

## Generating Text (encoded variables)

We saw how to generate continuous values, now let's see how to generalize this to generate categorical sequences (such as words or letters).

## Imports

In [1]:
import torch
from torch import nn
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Get Text Data

In [2]:
with open('TomSawyer.txt','r',encoding='utf8') as f:
    text = f.read()

In [3]:
text[:1000]

'THE ADVENTURES OF TOM SAWYER\n\nBy Mark Twain\n(Samuel Langhorne Clemens)\n\nPREFACE\n\nMost of the adventures recorded in this book really occurred; one or two\nwere experiences of my own, the rest those of boys who were schoolmates\nof mine. Huck Finn is drawn from life; Tom Sawyer also, but not from an\nindividual--he is a combination of the characteristics of three boys whom\nI knew, and therefore belongs to the composite order of architecture.\n\nThe odd superstitions touched upon were all prevalent among children and\nslaves in the West at the period of this story--that is to say, thirty or\nforty years ago.\n\nAlthough my book is intended mainly for the entertainment of boys and\ngirls, I hope it will not be shunned by men and women on that account,\nfor part of my plan has been to try to pleasantly remind adults of what\nthey once were themselves, and of how they felt and thought and talked,\nand what queer enterprises they sometimes engaged in.\n\nTHE AUTHOR.\n\nHARTFORD, 187

In [4]:
print(text[:1000])

THE ADVENTURES OF TOM SAWYER

By Mark Twain
(Samuel Langhorne Clemens)

PREFACE

Most of the adventures recorded in this book really occurred; one or two
were experiences of my own, the rest those of boys who were schoolmates
of mine. Huck Finn is drawn from life; Tom Sawyer also, but not from an
individual--he is a combination of the characteristics of three boys whom
I knew, and therefore belongs to the composite order of architecture.

The odd superstitions touched upon were all prevalent among children and
slaves in the West at the period of this story--that is to say, thirty or
forty years ago.

Although my book is intended mainly for the entertainment of boys and
girls, I hope it will not be shunned by men and women on that account,
for part of my plan has been to try to pleasantly remind adults of what
they once were themselves, and of how they felt and thought and talked,
and what queer enterprises they sometimes engaged in.

THE AUTHOR.

HARTFORD, 1876.


CHAPTER I

“TOM!”

No

In [5]:
len(text)

406270

## Encode Entire Text

In [6]:
a = [1,1,1,2,2,2,3,3]

In [7]:
b = set(a)

In [8]:
b

{1, 2, 3}

In [9]:
all_characters = set(text)

In [10]:
all_characters

{'\n',
 ' ',
 '!',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 '*',
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '?',
 '@',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 '[',
 ']',
 '_',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '“',
 '”'}

In [11]:
decoder = dict(enumerate(all_characters))

In [12]:
decoder


{0: 'S',
 1: '.',
 2: '*',
 3: 'z',
 4: 'R',
 5: 'I',
 6: 'a',
 7: '6',
 8: 'l',
 9: '?',
 10: 'r',
 11: '(',
 12: ')',
 13: 'Y',
 14: 'O',
 15: 'i',
 16: 'k',
 17: '”',
 18: 'J',
 19: 'y',
 20: 'P',
 21: 'u',
 22: '5',
 23: ':',
 24: 'V',
 25: 'T',
 26: '4',
 27: 'H',
 28: 'F',
 29: '8',
 30: 'G',
 31: 'X',
 32: '\n',
 33: '%',
 34: '[',
 35: 'U',
 36: 'w',
 37: 'o',
 38: 'A',
 39: 'f',
 40: '@',
 41: '9',
 42: 'x',
 43: '/',
 44: 'W',
 45: 'g',
 46: 'e',
 47: 'c',
 48: 'n',
 49: 'm',
 50: 'q',
 51: 't',
 52: ']',
 53: 'L',
 54: 'N',
 55: '“',
 56: ';',
 57: 'j',
 58: '3',
 59: 'v',
 60: 'h',
 61: '!',
 62: '7',
 63: '&',
 64: 'E',
 65: '_',
 66: 'D',
 67: 'b',
 68: 'Q',
 69: '1',
 70: '$',
 71: '0',
 72: 'C',
 73: "'",
 74: '-',
 75: 'p',
 76: 'd',
 77: 'B',
 78: 's',
 79: ' ',
 80: 'M',
 81: 'K',
 82: ',',
 83: '2'}

In [13]:
decoder.items()

dict_items([(0, 'S'), (1, '.'), (2, '*'), (3, 'z'), (4, 'R'), (5, 'I'), (6, 'a'), (7, '6'), (8, 'l'), (9, '?'), (10, 'r'), (11, '('), (12, ')'), (13, 'Y'), (14, 'O'), (15, 'i'), (16, 'k'), (17, '”'), (18, 'J'), (19, 'y'), (20, 'P'), (21, 'u'), (22, '5'), (23, ':'), (24, 'V'), (25, 'T'), (26, '4'), (27, 'H'), (28, 'F'), (29, '8'), (30, 'G'), (31, 'X'), (32, '\n'), (33, '%'), (34, '['), (35, 'U'), (36, 'w'), (37, 'o'), (38, 'A'), (39, 'f'), (40, '@'), (41, '9'), (42, 'x'), (43, '/'), (44, 'W'), (45, 'g'), (46, 'e'), (47, 'c'), (48, 'n'), (49, 'm'), (50, 'q'), (51, 't'), (52, ']'), (53, 'L'), (54, 'N'), (55, '“'), (56, ';'), (57, 'j'), (58, '3'), (59, 'v'), (60, 'h'), (61, '!'), (62, '7'), (63, '&'), (64, 'E'), (65, '_'), (66, 'D'), (67, 'b'), (68, 'Q'), (69, '1'), (70, '$'), (71, '0'), (72, 'C'), (73, "'"), (74, '-'), (75, 'p'), (76, 'd'), (77, 'B'), (78, 's'), (79, ' '), (80, 'M'), (81, 'K'), (82, ','), (83, '2')])

In [14]:
encoder = {char: ind for ind,char in decoder.items()}

In [15]:
encoder

{'S': 0,
 '.': 1,
 '*': 2,
 'z': 3,
 'R': 4,
 'I': 5,
 'a': 6,
 '6': 7,
 'l': 8,
 '?': 9,
 'r': 10,
 '(': 11,
 ')': 12,
 'Y': 13,
 'O': 14,
 'i': 15,
 'k': 16,
 '”': 17,
 'J': 18,
 'y': 19,
 'P': 20,
 'u': 21,
 '5': 22,
 ':': 23,
 'V': 24,
 'T': 25,
 '4': 26,
 'H': 27,
 'F': 28,
 '8': 29,
 'G': 30,
 'X': 31,
 '\n': 32,
 '%': 33,
 '[': 34,
 'U': 35,
 'w': 36,
 'o': 37,
 'A': 38,
 'f': 39,
 '@': 40,
 '9': 41,
 'x': 42,
 '/': 43,
 'W': 44,
 'g': 45,
 'e': 46,
 'c': 47,
 'n': 48,
 'm': 49,
 'q': 50,
 't': 51,
 ']': 52,
 'L': 53,
 'N': 54,
 '“': 55,
 ';': 56,
 'j': 57,
 '3': 58,
 'v': 59,
 'h': 60,
 '!': 61,
 '7': 62,
 '&': 63,
 'E': 64,
 '_': 65,
 'D': 66,
 'b': 67,
 'Q': 68,
 '1': 69,
 '$': 70,
 '0': 71,
 'C': 72,
 "'": 73,
 '-': 74,
 'p': 75,
 'd': 76,
 'B': 77,
 's': 78,
 ' ': 79,
 'M': 80,
 'K': 81,
 ',': 82,
 '2': 83}

In [16]:
encoder['?']

9

In [17]:
encoder['T']

25

In [18]:
encoder['Q']

68

In [23]:
encoder[' ']

79

In [20]:
encoded_text = np.array([encoder[char] for char in text])

In [21]:
len(encoded_text)

406270

In [22]:
encoded_text[:500]

array([25, 27, 64, 79, 38, 66, 24, 64, 54, 25, 35,  4, 64,  0, 79, 14, 28,
       79, 25, 14, 80, 79,  0, 38, 44, 13, 64,  4, 32, 32, 77, 19, 79, 80,
        6, 10, 16, 79, 25, 36,  6, 15, 48, 32, 11,  0,  6, 49, 21, 46,  8,
       79, 53,  6, 48, 45, 60, 37, 10, 48, 46, 79, 72,  8, 46, 49, 46, 48,
       78, 12, 32, 32, 20,  4, 64, 28, 38, 72, 64, 32, 32, 80, 37, 78, 51,
       79, 37, 39, 79, 51, 60, 46, 79,  6, 76, 59, 46, 48, 51, 21, 10, 46,
       78, 79, 10, 46, 47, 37, 10, 76, 46, 76, 79, 15, 48, 79, 51, 60, 15,
       78, 79, 67, 37, 37, 16, 79, 10, 46,  6,  8,  8, 19, 79, 37, 47, 47,
       21, 10, 10, 46, 76, 56, 79, 37, 48, 46, 79, 37, 10, 79, 51, 36, 37,
       32, 36, 46, 10, 46, 79, 46, 42, 75, 46, 10, 15, 46, 48, 47, 46, 78,
       79, 37, 39, 79, 49, 19, 79, 37, 36, 48, 82, 79, 51, 60, 46, 79, 10,
       46, 78, 51, 79, 51, 60, 37, 78, 46, 79, 37, 39, 79, 67, 37, 19, 78,
       79, 36, 60, 37, 79, 36, 46, 10, 46, 79, 78, 47, 60, 37, 37,  8, 49,
        6, 51, 46, 78, 32

In [24]:
print(text[:500])

THE ADVENTURES OF TOM SAWYER

By Mark Twain
(Samuel Langhorne Clemens)

PREFACE

Most of the adventures recorded in this book really occurred; one or two
were experiences of my own, the rest those of boys who were schoolmates
of mine. Huck Finn is drawn from life; Tom Sawyer also, but not from an
individual--he is a combination of the characteristics of three boys whom
I knew, and therefore belongs to the composite order of architecture.

The odd superstitions touched upon were all prevalent amo


## One Hot Encoding

As previously discussed, we need to one-hot encode our data inorder for it to work with the network structure. Make sure to review numpy if any of these operations confuse you!

In [25]:
a = np.array([0, 1, 1,2,7,3,2])

In [26]:
b = np.zeros((a.size, a.max()+1))
b

array([[0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.]])

In [27]:
b[np.arange(a.size),a] =1

In [28]:
b

array([[1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0.]])

In [31]:
def one_hot_encoder(encoded_text, num_uni_chars):
    '''
    encoded_text : batch of encoded text
    
    num_uni_chars = number of unique characters (len(set(text)))
    '''
    
    # METHOD FROM:
    # https://stackoverflow.com/questions/29831489/convert-encoded_textay-of-indices-to-1-hot-encoded-numpy-encoded_textay
      
    # Create a placeholder for zeros.
    one_hot = np.zeros((encoded_text.size, num_uni_chars))
    
    # Convert data type for later use with pytorch (errors if we dont!)
    one_hot = one_hot.astype(np.float32)

    # Using fancy indexing fill in the 1s at the correct index locations
    one_hot[np.arange(one_hot.shape[0]), encoded_text.flatten()] = 1.0
    

    # Reshape it so it matches the batch sahe
    one_hot = one_hot.reshape((*encoded_text.shape, num_uni_chars))
    
    return one_hot

In [32]:
one_hot_encoder(np.array([0,1,2]),3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]], dtype=float32)

In [118]:
# np.arange(encoded_text.shape[0])

In [119]:
# encoded_text

In [120]:
# encoded_text.flatten()

In [121]:
# int(406269/500)

In [122]:
# 500*812

--------------
---------------
# Creating Training Batches

We need to create a function that will generate batches of characters along with the next character in the sequence as a label.

-----------------
------------

In [33]:
example_text = np.arange(10)

In [34]:
example_text

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [35]:
# If we wanted 5 batches
example_text.reshape((5,-1))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [36]:
# samp_per_batch=10 

In [37]:
# seq_len=50

In [38]:
# char_per_batch = samp_per_batch * seq_len
# char_per_batch

In [39]:
# Number of batches available to make
# Use int() to round to nearest integer
# num_batches_avail = int(len(encoded_text)/char_per_batch)
# num_batches_avail

In [40]:
    # Cut off end of encoded_text that
#     won't fit evenly into a batch
#     encoded_text = encoded_text[:num_batches_avail * char_per_batch]
#     len(encoded_text)

In [41]:
    # Reshape text into rows the size of a batch
#     encoded_text = encoded_text.reshape((samp_per_batch, -1))
#     encoded_text.shape

In [42]:
# b = range(0, encoded_text.shape[1], seq_len)
# b

In [46]:
def generate_batches(encoded_text, samp_per_batch=10, seq_len=50):
    
    '''
    Generate (using yield) batches for training.
    
    X: Encoded Text of length seq_len
    Y: Encoded Text shifted by one
    
    Example:
    
    X:
    
    [[1 2 3]]
    
    Y:
    
    [[ 2 3 4]]
    
    encoded_text : Complete Encoded Text to make batches from
    batch_size : Number of samples per batch
    seq_len : Length of character sequence
       
    '''
    
    # Total number of characters per batch
    # Example: If samp_per_batch is 2 and seq_len is 50, then 100
    # characters come out per batch.
    char_per_batch = samp_per_batch * seq_len
    
    
    # Number of batches available to make
    # Use int() to round to nearest integer
    num_batches_avail = int(len(encoded_text)/char_per_batch)
    
    # Cut off end of encoded_text that
    # won't fit evenly into a batch
    encoded_text = encoded_text[:num_batches_avail * char_per_batch]
    
    
    # Reshape text into rows the size of a batch
    encoded_text = encoded_text.reshape((samp_per_batch, -1))
    

    # Go through each row in array.
    for n in range(0, encoded_text.shape[1], seq_len):
        
        # Grab feature characters
        x = encoded_text[:, n:n+seq_len]
        
        # y is the target shifted over by 1
        y = np.zeros_like(x)
       
        #
        try:
            y[:, :-1] = x[:, 1:]
            y[:, -1]  = encoded_text[:, n+seq_len]
            
        # FOR POTENTIAL INDEXING ERROR AT THE END    
        except:
            y[:, :-1] = x[:, 1:]
            y[:, -1] = encoded_text[:, 0]
            
        yield x, y

### Example of generating a batch

In [47]:
sample_text = encoded_text[:20]

In [48]:
sample_text

array([25, 27, 64, 79, 38, 66, 24, 64, 54, 25, 35,  4, 64,  0, 79, 14, 28,
       79, 25, 14])

In [57]:
batch_generator = generate_batches(encoded_text,samp_per_batch=10,seq_len=50)

In [58]:
# Grab first batch
x, y = next(batch_generator)

In [59]:
x

array([[25, 27, 64, 79, 38, 66, 24, 64, 54, 25, 35,  4, 64,  0, 79, 14,
        28, 79, 25, 14, 80, 79,  0, 38, 44, 13, 64,  4, 32, 32, 77, 19,
        79, 80,  6, 10, 16, 79, 25, 36,  6, 15, 48, 32, 11,  0,  6, 49,
        21, 46],
       [48, 76, 79,  6, 79, 67, 10, 37, 51, 60, 46, 10, 82, 79, 36, 15,
        51, 60, 37, 21, 51, 79, 76, 15, 78, 51, 15, 48, 47, 51, 15, 37,
        48, 79, 37, 39, 32, 47, 37,  8, 37, 10, 82, 79,  6, 48, 76, 79,
        60, 15],
       [76, 46, 10, 79, 39, 37,  8,  8, 37, 36, 46, 76, 23, 32, 32, 55,
        54, 37, 36, 82, 79, 78, 15, 10, 82, 79, 45, 37, 79,  6, 48, 76,
        79, 78, 15, 51, 79, 36, 15, 51, 60, 79, 51, 60, 46, 79, 45, 15,
        10,  8],
       [15, 78, 79, 16, 46, 46, 75, 79, 21, 78, 79, 39, 10, 37, 49, 79,
        65, 46, 59, 46, 10, 65, 32, 51, 46,  8,  8, 15, 48, 45, 74, 74,
        65,  6,  8, 36,  6, 19, 78, 65,  9, 17, 32, 32, 55, 14, 39, 79,
        47, 37],
       [79,  6, 48, 76, 79, 46, 19, 46, 76, 79, 51, 60, 46, 32, 78, 

In [60]:
y

array([[27, 64, 79, 38, 66, 24, 64, 54, 25, 35,  4, 64,  0, 79, 14, 28,
        79, 25, 14, 80, 79,  0, 38, 44, 13, 64,  4, 32, 32, 77, 19, 79,
        80,  6, 10, 16, 79, 25, 36,  6, 15, 48, 32, 11,  0,  6, 49, 21,
        46,  8],
       [76, 79,  6, 79, 67, 10, 37, 51, 60, 46, 10, 82, 79, 36, 15, 51,
        60, 37, 21, 51, 79, 76, 15, 78, 51, 15, 48, 47, 51, 15, 37, 48,
        79, 37, 39, 32, 47, 37,  8, 37, 10, 82, 79,  6, 48, 76, 79, 60,
        15, 78],
       [46, 10, 79, 39, 37,  8,  8, 37, 36, 46, 76, 23, 32, 32, 55, 54,
        37, 36, 82, 79, 78, 15, 10, 82, 79, 45, 37, 79,  6, 48, 76, 79,
        78, 15, 51, 79, 36, 15, 51, 60, 79, 51, 60, 46, 79, 45, 15, 10,
         8, 78],
       [78, 79, 16, 46, 46, 75, 79, 21, 78, 79, 39, 10, 37, 49, 79, 65,
        46, 59, 46, 10, 65, 32, 51, 46,  8,  8, 15, 48, 45, 74, 74, 65,
         6,  8, 36,  6, 19, 78, 65,  9, 17, 32, 32, 55, 14, 39, 79, 47,
        37, 21],
       [ 6, 48, 76, 79, 46, 19, 46, 76, 79, 51, 60, 46, 32, 78, 51, 

--------

## GPU Check

Remember this will take a lot longer on CPU!

In [61]:
torch.cuda.is_available()

False

# Creating the LSTM Model

**Note! We will have options for GPU users and CPU users. CPU will take MUCH LONGER to train and you may encounter RAM issues depending on your hardware. If that is the case, consider using cloud services like AWS, GCP, or Azure. Note, these may cost you money to use!**

In [64]:
class CharModel(nn.Module):
    
    def __init__(self, all_chars, num_hidden=256, num_layers=3,drop_prob=0.5,use_gpu=False):
        
        
        # SET UP ATTRIBUTES
        super().__init__()
        self.drop_prob = drop_prob
        self.num_layers = num_layers
        self.num_hidden = num_hidden
        self.use_gpu = use_gpu
        
        #CHARACTER SET, ENCODER, and DECODER
        self.all_chars = all_chars
        self.decoder = dict(enumerate(all_chars))
        self.encoder = {char: ind for ind,char in decoder.items()}
        
        
        self.lstm = nn.LSTM(len(self.all_chars), num_hidden, num_layers, dropout=drop_prob, batch_first=True)
        
        self.dropout = nn.Dropout(drop_prob)
        
        self.fc_linear = nn.Linear(num_hidden, len(self.all_chars))
      
    
    def forward(self, x, hidden):
                  
        
        lstm_output, hidden = self.lstm(x, hidden)
        
        
        drop_output = self.dropout(lstm_output)
        
        drop_output = drop_output.contiguous().view(-1, self.num_hidden)
        
        
        final_out = self.fc_linear(drop_output)
        
        
        return final_out, hidden
    
    
    def hidden_state(self, batch_size):
        '''
        Used as separate method to account for both GPU and CPU users.
        '''
        
        if self.use_gpu:
            
            hidden = (torch.zeros(self.num_layers,batch_size,self.num_hidden).cuda(),
                     torch.zeros(self.num_layers,batch_size,self.num_hidden).cuda())
        else:
            hidden = (torch.zeros(self.num_layers,batch_size,self.num_hidden),
                     torch.zeros(self.num_layers,batch_size,self.num_hidden))
        
        return hidden
        

## Instance of the Model

In [73]:
model = CharModel(
    all_chars=all_characters,
    num_hidden=128,
    num_layers=3,
    drop_prob=0.5,
    use_gpu=False,
)

In [74]:
total_param  = []
for p in model.parameters():
    total_param.append(int(p.numel()))

Try to make the total_parameters be roughly the same magnitude as the number of characters in the text.

In [75]:
sum(total_param)

384596

In [76]:
len(encoded_text)

406270

### Optimizer and Loss

In [77]:
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
criterion = nn.CrossEntropyLoss()

## Training Data and Validation Data

In [78]:
# percentage of data to be used for training
train_percent = 0.1

In [79]:
len(encoded_text)

406270

In [80]:
int(len(encoded_text) * (train_percent))

40627

In [81]:
train_ind = int(len(encoded_text) * (train_percent))

In [82]:
train_data = encoded_text[:train_ind]
val_data = encoded_text[train_ind:]

# Training the Network

## Variables

Feel free to play around with these values!

In [83]:
## VARIABLES

# Epochs to train for
epochs = 5
# batch size 
batch_size = 128

# Length of sequence
seq_len = 100

# for printing report purposes
# always start at 0
tracker = 0

# number of characters in text
num_char = max(encoded_text)+1

------

In [84]:
# Set model to train
model.train()


# Check to see if using GPU
if model.use_gpu:
    model.cuda()

for i in range(epochs):
    
    hidden = model.hidden_state(batch_size)
    
    
    for x,y in generate_batches(train_data,batch_size,seq_len):
        
        tracker += 1
        
        # One Hot Encode incoming data
        x = one_hot_encoder(x,num_char)
        
        # Convert Numpy Arrays to Tensor
        
        inputs = torch.from_numpy(x)
        targets = torch.from_numpy(y)
        
        # Adjust for GPU if necessary
        
        if model.use_gpu:
            
            inputs = inputs.cuda()
            targets = targets.cuda()
            
        # Reset Hidden State
        # If we dont' reset we would backpropagate through all training history
        hidden = tuple([state.data for state in hidden])
        
        model.zero_grad()
        
        lstm_output, hidden = model.forward(inputs,hidden)
        loss = criterion(lstm_output,targets.view(batch_size*seq_len).long())
        
        loss.backward()
        
        # POSSIBLE EXPLODING GRADIENT PROBLEM!
        # LET"S CLIP JUST IN CASE
        nn.utils.clip_grad_norm_(model.parameters(),max_norm=5)
        
        optimizer.step()
        
        
        
        ###################################
        ### CHECK ON VALIDATION SET ######
        #################################
        
        if tracker % 25 == 0:
            
            val_hidden = model.hidden_state(batch_size)
            val_losses = []
            model.eval()
            
            for x,y in generate_batches(val_data,batch_size,seq_len):
                
                # One Hot Encode incoming data
                x = one_hot_encoder(x,num_char)
                

                # Convert Numpy Arrays to Tensor

                inputs = torch.from_numpy(x)
                targets = torch.from_numpy(y)

                # Adjust for GPU if necessary

                if model.use_gpu:

                    inputs = inputs.cuda()
                    targets = targets.cuda()
                    
                # Reset Hidden State
                # If we dont' reset we would backpropagate through 
                # all training history
                val_hidden = tuple([state.data for state in val_hidden])
                
                lstm_output, val_hidden = model.forward(inputs,val_hidden)
                val_loss = criterion(lstm_output,targets.view(batch_size*seq_len).long())
        
                val_losses.append(val_loss.item())
            
            # Reset to training model after val for loop
            model.train()
            
            print(f"Epoch: {i} Step: {tracker} Val Loss: {val_loss.item()}")

-------
------

## Saving the Model

https://pytorch.org/tutorials/beginner/saving_loading_models.html

In [85]:
# Be careful to overwrite our original name file!
model_name = 'example.net'

In [86]:
torch.save(model.state_dict(),model_name)

## Load Model

In [87]:
# MUST MATCH THE EXACT SAME SETTINGS AS MODEL USED DURING TRAINING!

model = CharModel(
    all_chars=all_characters,
    num_hidden=128,
    num_layers=3,
    drop_prob=0.5,
    use_gpu=False,
)

In [88]:
model.load_state_dict(torch.load(model_name))
model.eval()

CharModel(
  (lstm): LSTM(84, 128, num_layers=3, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5)
  (fc_linear): Linear(in_features=128, out_features=84, bias=True)
)

# Generating Predictions

--------

In [89]:
def predict_next_char(model, char, hidden=None, k=1):
        
        # Encode raw letters with model
        encoded_text = model.encoder[char]
        
        # set as numpy array for one hot encoding
        # NOTE THE [[ ]] dimensions!!
        encoded_text = np.array([[encoded_text]])
        
        # One hot encoding
        encoded_text = one_hot_encoder(encoded_text, len(model.all_chars))
        
        # Convert to Tensor
        inputs = torch.from_numpy(encoded_text)
        
        # Check for CPU
        if(model.use_gpu):
            inputs = inputs.cuda()
        
        
        # Grab hidden states
        hidden = tuple([state.data for state in hidden])
        
        
        # Run model and get predicted output
        lstm_out, hidden = model(inputs, hidden)

        
        # Convert lstm_out to probabilities
        probs = F.softmax(lstm_out, dim=1).data
        
        
        
        if(model.use_gpu):
            # move back to CPU to use with numpy
            probs = probs.cpu()
        
        
        # k determines how many characters to consider
        # for our probability choice.
        # https://pytorch.org/docs/stable/torch.html#torch.topk
        
        # Return k largest probabilities in tensor
        probs, index_positions = probs.topk(k)
        
        
        index_positions = index_positions.numpy().squeeze()
        
        # Create array of probabilities
        probs = probs.numpy().flatten()
        
        # Convert to probabilities per index
        probs = probs/probs.sum()
        
        # randomly choose a character based on probabilities
        char = np.random.choice(index_positions, p=probs)
       
        # return the encoded value of the predicted char and the hidden state
        return model.decoder[char], hidden

In [90]:
def generate_text(model, size, seed='The', k=1):
        
      
    
    # CHECK FOR GPU
    if(model.use_gpu):
        model.cuda()
    else:
        model.cpu()
    
    # Evaluation mode
    model.eval()
    
    # begin output from initial seed
    output_chars = [c for c in seed]
    
    # intiate hidden state
    hidden = model.hidden_state(1)
    
    # predict the next character for every character in seed
    for char in seed:
        char, hidden = predict_next_char(model, char, hidden, k=k)
    
    # add initial characters to output
    output_chars.append(char)
    
    # Now generate for size requested
    for i in range(size):
        
        # predict based off very last letter in output_chars
        char, hidden = predict_next_char(model, output_chars[-1], hidden, k=k)
        
        # add predicted character
        output_chars.append(char)
    
    # return string of predicted text
    return ''.join(output_chars)

In [92]:
print(generate_text(model, 1000, seed='West ', k=3))

West ee  e t    tt  t     e t   e   e tt  eett ettt       e et  t  t ttt e  ette te    tee  te   ee    e t t t  te t  e    e t  et eeee   t et ttee    e  te   ee e ee t  ee t      te ettee e ee eeee  tet e  te   t eee t   eee  te  t tt t  eet  e  tt eetee etet  t  e e    t   t e  e   tte et t t  t  ee  te   t t  eeee  e   eett  t t   te ett te e tt t   te eett     eeee  eee  t    tte e teee ttte eeet t    te e    t eete e e e      te e   e       e ee ee  ettt          te      et t t  ttte et te  tte  tt teet t etet ete     ee  e    tt   ee e  et  e  t    eettt  eete   tt ee e  et  et    t t e ett tt   t etet   te te  te eeeee t tee ee tt t   e   tt    e  eettt e tt et   tetee e e e ettet ttee   e e tee e  t t t  e e ee  t  eeeteeeet  e   e  e   e te    te eeee  e   tee tettteeeet ee   teet e  ee    ee teet t t e  te  t     t ee t  e    t e te     tee  e tet tt    eete e   e     ee etete  eee  t  t        t   tt  t    t  e t  t  ettt t tt   e te e t ee ttet  eeteee tte  tee e ee ete  t 