# character-wise RNN

![Overview](https://github.com/udacity/deep-learning/raw/78c91a5607ecfdc29b762e45c082d7ca5047c8a1/intro-to-rnns/assets/charseq.jpeg)

In [19]:

import time
from collections import namedtuple
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(0)

## Loading data

In [2]:
!curl http://www.gutenberg.org/files/2591/2591-0.txt --create-dirs -o .pytorch/trialReport/trial.txt
with open('.pytorch/trialReport/trial.txt') as r:
    reports = r.read()
reports[:100]

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  547k  100  547k    0     0   568k      0 --:--:-- --:--:-- --:--:--  568k


'\ufeffThe Project Gutenberg EBook of Grimms’ Fairy Tales, by The Brothers Grimm\n\nThis eBook is for the us'

In [3]:
import re
words = re.split(r'\W+', reports)
words = [word.lower() for word in words]
print(words[:100])

['', 'the', 'project', 'gutenberg', 'ebook', 'of', 'grimms', 'fairy', 'tales', 'by', 'the', 'brothers', 'grimm', 'this', 'ebook', 'is', 'for', 'the', 'use', 'of', 'anyone', 'anywhere', 'at', 'no', 'cost', 'and', 'with', 'almost', 'no', 'restrictions', 'whatsoever', 'you', 'may', 'copy', 'it', 'give', 'it', 'away', 'or', 're', 'use', 'it', 'under', 'the', 'terms', 'of', 'the', 'project', 'gutenberg', 'license', 'included', 'with', 'this', 'ebook', 'or', 'online', 'at', 'www', 'gutenberg', 'org', 'title', 'grimms', 'fairy', 'tales', 'author', 'the', 'brothers', 'grimm', 'translator', 'edgar', 'taylor', 'and', 'marian', 'edwardes', 'posting', 'date', 'december', '14', '2008', 'ebook', '2591', 'release', 'date', 'april', '2001', 'last', 'updated', 'november', '7', '2016', 'language', 'english', 'character', 'set', 'encoding', 'utf', '8', 'start', 'of', 'this']


In [4]:
reports = ' '.join(words)

## Tokenization

In [5]:
vocab = tuple(set(reports))
print('vocabs:\n', vocab)
int_to_vocab = dict(enumerate(vocab))
print('int to vocab:\n', int_to_vocab)
vocab_to_int = {v: i for i,v in int_to_vocab.items()}
encoded = np.array([vocab_to_int[v] for v in reports], dtype=np.int32)

vocabs:
 ('0', '6', 'h', '1', '5', 'v', 's', '4', '8', '_', '7', 'c', 'l', 'r', 'd', 'x', 'i', 'm', '9', 'z', '3', 'n', 'f', 'b', 'e', 'p', 'u', ' ', 'g', 'w', 'o', 'y', '2', 'q', 'k', 'j', 't', 'a')
int to vocab:
 {0: '0', 1: '6', 2: 'h', 3: '1', 4: '5', 5: 'v', 6: 's', 7: '4', 8: '8', 9: '_', 10: '7', 11: 'c', 12: 'l', 13: 'r', 14: 'd', 15: 'x', 16: 'i', 17: 'm', 18: '9', 19: 'z', 20: '3', 21: 'n', 22: 'f', 23: 'b', 24: 'e', 25: 'p', 26: 'u', 27: ' ', 28: 'g', 29: 'w', 30: 'o', 31: 'y', 32: '2', 33: 'q', 34: 'k', 35: 'j', 36: 't', 37: 'a'}


In [6]:
encoded[:100]

array([27, 36,  2, 24, 27, 25, 13, 30, 35, 24, 11, 36, 27, 28, 26, 36, 24,
       21, 23, 24, 13, 28, 27, 24, 23, 30, 30, 34, 27, 30, 22, 27, 28, 13,
       16, 17, 17,  6, 27, 22, 37, 16, 13, 31, 27, 36, 37, 12, 24,  6, 27,
       23, 31, 27, 36,  2, 24, 27, 23, 13, 30, 36,  2, 24, 13,  6, 27, 28,
       13, 16, 17, 17, 27, 36,  2, 16,  6, 27, 24, 23, 30, 30, 34, 27, 16,
        6, 27, 22, 30, 13, 27, 36,  2, 24, 27, 26,  6, 24, 27, 30],
      dtype=int32)

## One-hot Encoding

In [7]:
def one_hot_encode(arr, n_labels):

    return np.eye(n_labels,n_labels,  dtype=np.float32)[arr]
# check that the function works as expected
test_seq = np.array([[3, 5, 1]])
one_hot = one_hot_encode(test_seq, 8)

print(one_hot)

[[[0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0.]]]


## Batching

```
                M steps ( seq length )
               xxx                 xxx
               x                     x
               x                     x                                Starting sequence:
               x                     x                                [1 2 3 4 5 6 7 8 9 10 11 12]
               x                     x
N batch size   x                     x                                Batch size = 2
(No. of steps) x                     x                                [1 2 3 4 5 6]
               x                     x                                [7 8 9 10 11 12]
               x                     x
               x                     x                                Seq length = 3
               x                     x
               x                     x                                  ┌─────┐
               x                     x                                [ │1 2 3│ 4 5 6]
               x                     x                                [ │7 8 9│ 10 11 12]
               x                     x                                  └─────┘
               xxx                 xxx

            xxxxxxxxxxxxxx   xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
                         xxxxx
                          xx

                          k= No. of batches = total chars/ N.M
```

In [8]:
def create_batches(arr, batch_size, seq_length):
    batch_size_total = batch_size * seq_length
    n_batches = len(arr) // batch_size_total
    arr = arr[:n_batches*batch_size_total]
    arr = arr.reshape((batch_size,-1))

    for n in range(0, arr.shape[1], seq_length):
        x = arr[:, n:n+seq_length]
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [9]:
batches = create_batches(encoded, 8, 50)
x, y= next(batches)
print('x:\n', x[:10, :10])
print('\ny:\n', y[:10, :10])

x:
 [[27 36  2 24 27 25 13 30 35 24]
 [ 6 27 37 21 14 27 36  2 24 27]
 [23 26 13 14 24 21 27 25 37 13]
 [ 6 34 27 36  2 24 27 13 16 21]
 [ 2 27 31 30 26 27 29 30 26 12]
 [ 6 36 13 37 21 28 24 27 36 30]
 [30 21 27 36  2 24 16 13 27 22]
 [13 14 27 37 27 28 13 24 37 36]]

y:
 [[36  2 24 27 25 13 30 35 24 11]
 [27 37 21 14 27 36  2 24 27  6]
 [26 13 14 24 21 27 25 37 13 36]
 [34 27 36  2 24 27 13 16 21 28]
 [27 31 30 26 27 29 30 26 12 14]
 [36 13 37 21 28 24 27 36 30 29]
 [21 27 36  2 24 16 13 27 22 37]
 [14 27 37 27 28 13 24 37 36 27]]


## Defining model

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [11]:
class textgenRNN(nn.Module):
    def __init__(self,n_output, n_hidden=256, n_layers=2, drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        self.n_output = n_output

        self.lstm = nn.LSTM(n_output, n_hidden, n_layers, dropout=drop_prob, batch_first=True)
        self.dropout1 = nn.Dropout(drop_prob)
        self.fc1 = nn.Linear(n_hidden, n_output)

    def forward(self, x, hidden):
        out, hid = self.lstm(x, hidden)
        out = self.dropout1(out)

        out = out.contiguous().view(-1, self.n_hidden)

        out = self.fc1(out)
        return out, hid

    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        hidden = (torch.zeros(self.n_layers, batch_size, self.n_hidden).to(device),
                  torch.zeros(self.n_layers, batch_size, self.n_hidden).to(device))

        return hidden

In [12]:
from torchsummary import summary
n_hidden=512
n_layers=2
model = textgenRNN(len(vocab), n_hidden=n_hidden, n_layers=n_layers)
model.to(device)
print(model)

textgenRNN(
  (lstm): LSTM(38, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout1): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=512, out_features=38, bias=True)
)


In [13]:
import os
if os.path.isdir('checkpoint') and os.path.isfile('./checkpoint/trialckpt.t7'):
    model.load_state_dict(torch.load('./checkpoint/trialckpt.t7', map_location=torch.device(device)))

## Training

In [14]:
from sklearn.model_selection import train_test_split
epochs=100
batch_size=32
seq_length=64
lr=0.01
clip=5
test_portion=0.1

# change model mode to train
model.train()

optim = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

# create training and validation data
data, test_data = train_test_split(encoded, test_size=test_portion, shuffle=False, random_state=0)

In [15]:
counter = 0
n_vocab = len(vocab)
best_val_loss = 1000
for e in range(epochs):
    # initialize hidden state
    h = model.init_hidden(batch_size)

    for x, y in create_batches(data, batch_size, seq_length):
        counter += 1

        # One-hot encode our data and make them Torch tensors
        x = one_hot_encode(x, n_vocab)
        inputs, targets = torch.from_numpy(x), torch.from_numpy(y)

        inputs, targets = inputs.to(device), targets.to(device)

        # Creating new variables for the hidden state, otherwise
        # we'd backprop through the entire training history
        h = tuple([each.data for each in h])

        # zero accumulated gradients
        model.zero_grad()

        # get the output from the model
        output, h = model(inputs, h)

        # calculate the loss and perform backprop
        loss = criterion(output, targets.view(batch_size*seq_length).long())
        loss.backward()
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        nn.utils.clip_grad_norm_(model.parameters(), clip)
        optim.step()

        # loss stats
        if counter % 100 == 0:
            # Get validation loss
            val_h = model.init_hidden(batch_size)
            val_losses = []
            model.eval()
            for x, y in create_batches(test_data, batch_size, seq_length):
                # One-hot encode our data and make them Torch tensors
                x = one_hot_encode(x, n_vocab)
                x, y = torch.from_numpy(x), torch.from_numpy(y)

                # Creating new variables for the hidden state, otherwise
                # we'd backprop through the entire training history
                val_h = tuple([each.data for each in val_h])

                inputs, targets = x, y
                inputs, targets = inputs.to(device), targets.to(device)

                output, val_h = model(inputs, val_h)
                val_loss = criterion(output, targets.view(batch_size*seq_length).long())

                val_losses.append(val_loss.item())

            print('==> Saving model ...')
            best_val_loss = min(np.mean(val_losses), best_val_loss)
            if not os.path.isdir('checkpoint'):
                os.mkdir('checkpoint')
            if best_val_loss == np.mean(val_losses):
              torch.save(model.state_dict(), './checkpoint/trialckpt.t7')

            model.train() # reset to train mode after iterationg through validation data

            print("Epoch: {}/{}...".format(e+1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.4f}...".format(loss.item()),
                  "Val Loss: {:.4f}".format(np.mean(val_losses)))

==> Saving model ...
Epoch: 1/100... Step: 100... Loss: 2.6166... Val Loss: 2.6582
==> Saving model ...
Epoch: 1/100... Step: 200... Loss: 2.2398... Val Loss: 2.3429
==> Saving model ...
Epoch: 2/100... Step: 300... Loss: 2.0099... Val Loss: 2.1482
==> Saving model ...
Epoch: 2/100... Step: 400... Loss: 1.8993... Val Loss: 2.0162
==> Saving model ...
Epoch: 3/100... Step: 500... Loss: 1.7479... Val Loss: 1.9247
==> Saving model ...
Epoch: 3/100... Step: 600... Loss: 1.5934... Val Loss: 1.8656
==> Saving model ...
Epoch: 4/100... Step: 700... Loss: 1.6504... Val Loss: 1.8033
==> Saving model ...
Epoch: 4/100... Step: 800... Loss: 1.5091... Val Loss: 1.7713
==> Saving model ...
Epoch: 4/100... Step: 900... Loss: 1.5150... Val Loss: 1.7537
==> Saving model ...
Epoch: 5/100... Step: 1000... Loss: 1.4988... Val Loss: 1.7170
==> Saving model ...
Epoch: 5/100... Step: 1100... Loss: 1.4187... Val Loss: 1.7102
==> Saving model ...
Epoch: 6/100... Step: 1200... Loss: 1.3841... Val Loss: 1.6771
=

# Predicting next character

In [16]:
def predict(model, inputs, h=None, top_k= None):
    x = np.array([[vocab_to_int[inputs]]])
    x = one_hot_encode(x, len(vocab))
    inp = torch.from_numpy(x).to(device)

    # get access to hidden state
    h = tuple([i.data for i in h])
    # pass inputs and hidden state to model
    out, h = model(inp, h)

    # get prob of the char
    p = F.softmax(out, dim=1).data
    p = p.to('cpu')

    if top_k is None:
        top_ch = np.arange(len(vocab))
    else:
        p, top_ch = p.topk(top_k)
        top_ch = top_ch.numpy().squeeze()

    p = p.numpy().squeeze()
    v = np.random.choice(top_ch, p=p/p.sum())

    return int_to_vocab[v], h


# Sample text

In [17]:
def sample(model, size, prime='the ', top_k=None):

    model.to(device)

    model.eval() # eval mode

    # First off, run through the prime characters
    chars = [ch for ch in prime]
    h = model.init_hidden(1)
    for ch in prime:
        char, h = predict(model, ch, h, top_k=top_k)

    chars.append(char)

    # Now pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(model, chars[-1], h, top_k=top_k)
        chars.append(char)

    return ''.join(chars)
print(sample(model, 2000, prime='the ', top_k=26))

the monster threw off she contented he ought to be fools on the window lid down the door oh not so good robbery dreading out of the house it came into the way and curling a fiddle and screamed to him the fish corn the forest all but it struck him into what s prince for and gone but when the sausages rose away in the meantime i am marleen and the bough straight him some wheel make carried and travelling his horse and amy to your golden he liked cut pearls it have he were rowend and asked him that they burnt as if the wide way of the died why did the horses about moying looking in the old cog why all his sister he ventures to her pain i never once more would save you along and looked down the poor world and the branches now she fell till there in the sausage en show carried back and she got on the ant take them before it shook so much by a cunning so batter at first as she dressed them up the sun who when he had eaten the servant who had way and how it was telling him crept down then he 

In [18]:
!ls -la .

total 24
drwxr-xr-x 1 root root 4096 Apr  2 12:58 .
drwxr-xr-x 1 root root 4096 Apr  2 12:30 ..
drwxr-xr-x 2 root root 4096 Apr  2 12:58 checkpoint
drwxr-xr-x 4 root root 4096 Mar 25 13:38 .config
drwxr-x--- 3 root root 4096 Apr  2 12:58 .pytorch
drwxr-xr-x 1 root root 4096 Mar 25 13:38 sample_data
