<a href="https://colab.research.google.com/github/devWithDeepak/dl_notebooks/blob/master/char_rnn_book_writer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F

In [0]:
def one_hot_encode(arr, n_labels):
    x = np.eye(n_labels)[arr]
    return x.astype(float)

In [4]:
arr=[3,4,5]
print(one_hot_encode(arr, 8))

[[0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]]


In [0]:
def get_batches(arr, batch_size, seq_length):
    total_batch_size = batch_size * seq_length
    
    # get total number of batches
    n_batches = len(arr) // total_batch_size
    
    # charactes included
    arr = arr[:n_batches * total_batch_size]
    
    # resize arr to batch size
    arr = arr.reshape((batch_size, -1))
    
    for n in range(0, arr.shape[1], seq_length):
        # features
        x = arr[: , n: n + seq_length]
        # targets
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[: , 1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[: , 1] = x[:, 1:], arr[:, 0]
            
        yield x, y

In [6]:
# check if cuda is available
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print("Training on GPU")
else:
    print("Training on CPU")

Training on GPU


In [0]:
class CharRNN(nn.Module):
    def __init__(self, tokens, n_hidden=256, n_layers=2, drop_prob=0.2, lr=0.001):
        super().__init__()
        
        # create dictionary from text
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch:ii for ii, ch in self.int2char.items()}
        
        # create instance variable
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.drop_prob = drop_prob
        self.lr = lr
        
        # define lstm layer
        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers,
                            dropout=drop_prob, batch_first=True)
        
        # Dropout layer
        self.dropout = nn.Dropout(drop_prob)
        
        # define fully connected layer
        self.fc = nn.Linear(n_hidden, len(self.chars))
        
    def forward(self, x, hidden):
        # lstm layer 
        x=x.float()
        r_out, hidden = self.lstm(x, hidden)

        # dropout layer
        r_out = self.dropout(r_out)

        # reshape output
        r_out = r_out.contiguous().view(-1, self.n_hidden)

        # fully connected layer
        output = self.fc(r_out)

        return output, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        # create two tensors for two hidden layer in lstm
        if(train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        return hidden

In [0]:
# train function
def train(net, data, n_epochs = 10, batch_size=10, seq_length=50,
          lr=0.01, val_split = 0.1, clip=5, print_every=10):
    # enable training mode
    net.train()
    
    # define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    
    # validation index
    val_idx = int(len(data) * (1-val_split))
    data, val_data = data[:val_idx], data[val_idx:]
    
    if(train_on_gpu):
        net.cuda()
    counter = 0
    n_labels = len(net.chars)
    for epoch in range(n_epochs):
        # initialize weights
        h = net.init_hidden(batch_size)
        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1
            # one hot encoding
            x = one_hot_encode(x, n_labels)
            # convert into tensor
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            # switch to GPU
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()
                
            # hidden variable state 
            h = tuple([each.data for each in h])
            
            # initialize gradients of all tensor to zero
            optimizer.zero_grad()
            # train the model
            output, h = net(inputs, h)
            # caluclate loss
            loss = criterion(output, targets.view(batch_size * seq_length).long())
            # backpropogation
            loss.backward()
            # clipping to prevent from exploding gradient
            nn.utils.clip_grad_norm(net.parameters(), clip)
            optimizer.step()
        print(f"Epoch: {epoch + 1}", f"loss: {loss.item()}")
            # validation data
#             if counter % print_every == 0:
#                 # initialize validation hidden state
#                 val_h = net.init_hidden(batch_size)
                
#                 val_losses = []
#                 # enable evaluation mode
#                 for x,y in get_batches(val_data, batch_size, seq_length):
#                     x = one_hot_encode(x, n_labels)
#                     # convert into tensor
#                     x, y = torch.from_numpy(x), torch.from_numpy(y)
#                     # validation hidden state
#                     val_h = tuple([each.data for each in val_h])
                    
#                     # convert into cuda
#                     if(train_on_gpu):
#                         inputs, targets = x.cuda(), y.cuda()
#                     else:
#                         inputs, targets = x, y
                    
#                     # output
#                     output, h = net(inputs, h)
#                     # calculate validation loss
#                     val_loss = criterion(output, targets.view(batch_size * seq_length).long())
#                     val_losses.append(val_loss)
#                 net.train()
#                 print("Epoch: {}".format(epoch+1),
#                      "step: {}".format(counter),
#                      "loss: {:.4f}".format(loss.item())
#                      # "val_loss: {:.4f}".format(np.mean(np.array(val_losses)))
#                      )

In [19]:
with open("gdrive/My Drive/dataset/anna.txt") as fr:
    text = fr.read()
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch:ii for ii, ch in int2char.items()}
encoded = np.array([char2int[ch] for ch in text])

n_hidden = 512
n_layers = 2

net = CharRNN(chars, n_hidden, n_layers)
print(net)

CharRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True, dropout=0.2)
  (dropout): Dropout(p=0.2)
  (fc): Linear(in_features=512, out_features=83, bias=True)
)


In [20]:
n_epochs = 20
seq_length = 100
batch_size = 128

# train the model
train(net, encoded, n_epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)



Epoch: 1 loss: 2.6174428462982178
Epoch: 2 loss: 2.156881809234619
Epoch: 3 loss: 1.885026216506958
Epoch: 4 loss: 1.7152546644210815
Epoch: 5 loss: 1.6163634061813354
Epoch: 6 loss: 1.5335824489593506
Epoch: 7 loss: 1.479609727859497
Epoch: 8 loss: 1.4306859970092773
Epoch: 9 loss: 1.404299259185791
Epoch: 10 loss: 1.363358497619629
Epoch: 11 loss: 1.3479340076446533
Epoch: 12 loss: 1.3192253112792969
Epoch: 13 loss: 1.2994418144226074
Epoch: 14 loss: 1.2886830568313599
Epoch: 15 loss: 1.2643039226531982
Epoch: 16 loss: 1.235701560974121
Epoch: 17 loss: 1.2195324897766113
Epoch: 18 loss: 1.2094656229019165
Epoch: 19 loss: 1.192408561706543
Epoch: 20 loss: 1.1863821744918823


In [0]:
# save model
checkpoint = {
    "n_layers": net.n_layers,
    "n_hidden": net.n_hidden,
    "state_dict": net.state_dict(),
    "tokens": net.chars
}

with open("lstm_book_writer.pth", "wb") as f:
  torch.save(checkpoint, f)

In [0]:
def predict(net, token, h=None, top_k=None):
  x = np.array([[net.char2int[ch] for ch in token]])
  x = one_hot_encode(x, len(net.chars))
  inputs = torch.from_numpy(x)
  if(train_on_gpu):
    inputs = inputs.cuda()
    
  h = tuple([each.data for each in h])
  # get output from model
  out, h = net(inputs, h)
  
  p = F.softmax(out, dim=1).data
  if(train_on_gpu):
    p = p.cpu()
  
  if top_k is None:
    top_ch = np.arange(len(net.chars))
  else:
    p, top_ch = p.topk(top_k)
    top_ch = top_ch.numpy().squeeze()
# select the likely next character with some element of randomness
  p = p.numpy().squeeze()
  char = np.random.choice(top_ch, p=p/p.sum())

  # return the encoded value of the predicted char and the hidden state
  return net.int2char[char], h

In [0]:
def sample(net, size, prime='The', top_k=None):
        
    if(train_on_gpu):
        net.cuda()
    else:
        net.cpu()
    
    net.eval() # eval mode
    
    # First off, run through the prime characters
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [39]:
print(sample(net, 1000, prime="Anna", top_k=5))

Anna, still more
shared.'

Anna heard the conviction to her.



Chapter 6


Alexay Alexandrovitch always had been satisfactin's to an attitude
in the pavaling from their conversation with him. But at the time
then to his wife he had an exceedingly connected that had a bare and
three honses, and they were straight a little.

"I always have some one in her, a drear.'s and telliment for my strung
force of his face. Then he was self-mere five on the thoughts
of the sacre of the ways that sure he marvelous, in the mushroom and
called up, so as to be altertald to a conversation with yathing on
his face, and well, and that to me that I shall be in letter
with him in them."

"And I'm glad you the solition of all some tried, in an
instant at the pavilion of her own interest."

"Well, weat, you must go and go away, and see it all, and how, and
is that he did not care to see me for anything but shame, because it
seemed to me, and how do you know, there are so and settle the principal
sincance," s