In [0]:
import numpy as np

def load_doc(filename):
  with open(filename, 'r') as f:
    doc = f.read()
  f.close()
  return doc

In [2]:
!wget https://raw.githubusercontent.com/udacity/deep-learning-v2-pytorch/master/recurrent-neural-networks/char-rnn/data/anna.txt
filename = './anna.txt'
doc = load_doc(filename)

--2020-04-09 05:49:22--  https://raw.githubusercontent.com/udacity/deep-learning-v2-pytorch/master/recurrent-neural-networks/char-rnn/data/anna.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2025486 (1.9M) [text/plain]
Saving to: ‘anna.txt’


2020-04-09 05:49:22 (21.9 MB/s) - ‘anna.txt’ saved [2025486/2025486]



In [3]:
chars = tuple(set(doc))
char2int = {ch : i for i, ch in enumerate(chars)}
int2char = {i : ch for i, ch in enumerate(chars)}

encoded = np.array([char2int[ch] for ch in doc])
encoded[:100]

array([32, 16, 73, 68, 75, 54, 35, 81, 17, 36, 36, 36, 29, 73, 68, 68, 41,
       81, 82, 73, 64, 70, 46, 70, 54, 55, 81, 73, 35, 54, 81, 73, 46, 46,
       81, 73, 46, 70, 59, 54, 11, 81, 54, 30, 54, 35, 41, 81,  4, 60, 16,
       73, 68, 68, 41, 81, 82, 73, 64, 70, 46, 41, 81, 70, 55, 81,  4, 60,
       16, 73, 68, 68, 41, 81, 70, 60, 81, 70, 75, 55, 81, 33, 76, 60, 36,
       76, 73, 41,  0, 36, 36, 23, 30, 54, 35, 41, 75, 16, 70, 60])

In [4]:
def one_hot_encode(arr, n_labels):
  one_hot = np.zeros((arr.size, n_labels), dtype='float32')
  one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1 
  one_hot = one_hot.reshape((*arr.shape, n_labels))
  return one_hot 

test_seq = np.array([[4, 5, 2], [1, 3, 0]])
one_hot = one_hot_encode(test_seq, 6)
print(one_hot)

[[[0. 0. 0. 0. 1. 0.]
  [0. 0. 0. 0. 0. 1.]
  [0. 0. 1. 0. 0. 0.]]

 [[0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0.]
  [1. 0. 0. 0. 0. 0.]]]


In [0]:
def get_batches(arr, batch_size, seq_length):

  total_length = batch_size*seq_length
  n_batches = len(arr)//total_length
  arr = arr[:n_batches*total_length]
  arr = arr.reshape((batch_size, -1))
  for n in range(0, arr.shape[1], seq_length):
    x = arr[:, n:n+seq_length]
    y = np.zeros_like(x)
    try:
      y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
    except IndexError:
      y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
    yield x, y 
  


In [6]:
batches = get_batches(encoded, 8, 50)
dataiter = iter(batches)
x, y = next(dataiter)
print(x[:10, :10])
print(y[:10, :10])

[[32 16 73 68 75 54 35 81 17 36]
 [55 33 60 81 75 16 73 75 81 73]
 [54 60 77 81 33 35 81 73 81 82]
 [55 81 75 16 54 81 13 16 70 54]
 [81 55 73 76 81 16 54 35 81 75]
 [13  4 55 55 70 33 60 81 73 60]
 [81 65 60 60 73 81 16 73 77 81]
 [ 8 50 46 33 60 55 59 41  0 81]]
[[16 73 68 75 54 35 81 17 36 36]
 [33 60 81 75 16 73 75 81 73 75]
 [60 77 81 33 35 81 73 81 82 33]
 [81 75 16 54 81 13 16 70 54 82]
 [55 73 76 81 16 54 35 81 75 54]
 [ 4 55 55 70 33 60 81 73 60 77]
 [65 60 60 73 81 16 73 77 81 55]
 [50 46 33 60 55 59 41  0 81 61]]


In [0]:
import torch.nn as nn 
import torch.nn.functional as F
import torch 

class CharRNN(nn.Module):
  def __init__(self, tokens, n_hidden, n_layers, drop_prob=0.5, lr=0.001):
    super().__init__()
    self.drop_prob = drop_prob
    self.n_layers = n_layers 
    self.n_hidden = n_hidden 
    self.lr = lr 

    self.chars = tokens 
    self.int2char = dict(enumerate(self.chars))
    self.char2int = {ch : i for i,ch in enumerate(self.chars)}

    self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, dropout=drop_prob, batch_first=True)

    self.dropout = nn.Dropout(drop_prob)

    self.fc = nn.Linear(n_hidden, len(self.chars))

  def forward(self, x, hidden):

    # Get the outputs and the new hidden state from the lstm 
    r_output, hidden = self.lstm(x, hidden)
    # pass through a dropout layer
    out = self.dropout(r_output)

    out = out.contiguous().view(-1, self.n_hidden)

    out = self.fc(out)
    
    return out, hidden 
  
  def init_hidden(self, batch_size):

    weight = next(self.parameters()).data

    hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device), 
              weight.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device))
    
    return hidden 

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [22]:
n_hidden = 512
n_layers = 2 

net = CharRNN(chars, n_hidden, n_layers).to(device)
print(net)

CharRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=83, bias=True)
)


In [0]:
import torch.optim as optim 

def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):
  """
    Argument
    net : CharRNN network
    data : text data to train the network 
    epochs : number of epochs to train 
    batch_size : number of mini-sequences per mini-batch, aka batch size 
    seq_length : number of character steps per mini-batch 
    lr: learning rate
    clip: gradient clipping
    val_frac: Fraction of data to hold out for validation
    print_every: Number of steps for printing training and validation loss
  """
  net.train()
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(net.parameters(), lr=lr)

  val_idx = int(val_frac*len(data))
  train, val_data = data[:-val_idx], data[-val_idx:]

  n_chars = len(net.chars)

  for e in range(epochs):

    count = 0
    h = net.init_hidden(batch_size)

    for x, y in get_batches(train, batch_size, seq_length):

      count += 1
      x = one_hot_encode(x, n_chars)
      inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
      inputs, targets = inputs.to(device), targets.to(device)

      net.zero_grad()
      h = tuple([each.data for each in h])
      out, h = net(inputs, h)
      loss = criterion(out, targets.view(batch_size*seq_length).long())
      loss.backward()
      nn.utils.clip_grad_norm_(net.parameters(), clip)
      optimizer.step()
      if count % print_every == 0:
        net.eval()
        val_losses = []
        val_h = net.init_hidden(batch_size)

        for x, y in get_batches(val_data, batch_size, seq_length):

          x = one_hot_encode(x, n_chars)
          inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
          inputs, targets = inputs.to(device), targets.to(device)
          val_h = tuple([each.data for each in val_h])
          out, val_h = net(inputs, val_h)
          val_loss = criterion(out, targets.view(batch_size*seq_length).long())
          val_losses.append(val_loss.item())
        
        net.train()
        print("Epoch: {}/{}...".format(e+1, epochs),
              "Step: {}...".format(count),
              "Loss: {:.4f}...".format(loss.item()),
              "Val Loss: {:.4f}".format(np.mean(val_losses)))


In [24]:
batch_size = 128
seq_length = 100
n_epochs = 20

train(net, encoded, n_epochs, batch_size, seq_length)

Epoch: 1/20... Step: 10... Loss: 3.2673... Val Loss: 3.2135
Epoch: 1/20... Step: 20... Loss: 3.1596... Val Loss: 3.1416
Epoch: 1/20... Step: 30... Loss: 3.1459... Val Loss: 3.1263
Epoch: 1/20... Step: 40... Loss: 3.1179... Val Loss: 3.1199
Epoch: 1/20... Step: 50... Loss: 3.1406... Val Loss: 3.1176
Epoch: 1/20... Step: 60... Loss: 3.1177... Val Loss: 3.1152
Epoch: 1/20... Step: 70... Loss: 3.1049... Val Loss: 3.1124
Epoch: 1/20... Step: 80... Loss: 3.1161... Val Loss: 3.1041
Epoch: 1/20... Step: 90... Loss: 3.1058... Val Loss: 3.0839
Epoch: 1/20... Step: 100... Loss: 3.0498... Val Loss: 3.0330
Epoch: 1/20... Step: 110... Loss: 3.0094... Val Loss: 2.9809
Epoch: 1/20... Step: 120... Loss: 2.8584... Val Loss: 2.8388
Epoch: 1/20... Step: 130... Loss: 2.7776... Val Loss: 2.7417
Epoch: 2/20... Step: 10... Loss: 2.5869... Val Loss: 2.5514
Epoch: 2/20... Step: 20... Loss: 2.5119... Val Loss: 2.4936
Epoch: 2/20... Step: 30... Loss: 2.5046... Val Loss: 2.4494
Epoch: 2/20... Step: 40... Loss: 2.4

In [0]:
model_name = 'rnn_x_epoch.net'

checkpoint = {'n_hidden' : net.n_hidden,
              'n_layers' : net.n_layers,
              'state_dict' : net.state_dict(),
              'tokens' : net.chars}
with open(model_name, 'wb') as f:
  torch.save(checkpoint, f)


In [0]:
def predict(net, char, h=None, top_k=None):

  x = np.array([[net.char2int[char]]])
  x = one_hot_encode(x, len(net.chars))
  inputs = torch.from_numpy(x).to(device)
  h = tuple([each.data for each in h])
  out, h = net(inputs, h)
  p = F.softmax(out, dim=1).data
  p = p.to("cpu")

  if top_k is None:
    top_ch = np.arange(len(net.chars))
  else:
    p, top_ch = p.topk(top_k)
    top_ch = top_ch.numpy().squeeze()
  
  p = p.numpy().squeeze()
  char = np.random.choice(top_ch, p=p/p.sum())

  return net.int2char[char], h 

In [0]:
def sample(net, size, prime='The', top_k=None):
  net.to(device)
  net.eval()
  chars = [ch for ch in prime]
  h = net.init_hidden(1)
  for ch in prime:
    char, h = predict(net, ch, h, top_k=top_k)
  
  chars.append(char)

  for ii in range(size):
    char, h = predict(net, char[-1], h, top_k=top_k)
    chars.append(char)
  
  return ''.join(chars)

In [26]:
print(sample(net, 1000, prime='Anana', top_k=5))

Anana had so
likely that he was the sound of the sort of angure that she
had not been so much too was the setten of that stucider, his fear
that hed head. A divorce who had been doubt, but he had tears at once
that they done they were to look it. The peasants carreed a such
sure with the son in the doctor's frequent on three commots of the
same.

"I've not sat the sort in the matter," she thought, and shiling the
princess as though it were as all the part of these particulation of
her hotes, and startly faller up. The sama shalles of this weeled shade of her hat
to hird her. That said the face was happiness, which had been to dine
in the choldes that she would say that he did not think of them, and so
to
show me at him. She could stand him, and, said to the princess for his
his side, stopping to his belatoo with a cheerful party to show the
tempheration.

"You don't know."

The princess went to the same significance, when Alexey
Alexandrovitch had talked of the children to the country,

In [27]:
with open('rnn_x_epoch.net', 'rb') as f:
  checkpoint = torch.load(f)
loaded = CharRNN(tokens=checkpoint['tokens'], n_hidden=checkpoint['n_hidden'], n_layers=checkpoint['n_layers'])

loaded.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [28]:
print(sample(net, 1000, top_k=5, prime="And Levin said"))

And Levin said,
and his fingers the sides were trying somewhere and his hands of
the sample of anything that seemed to her, that the old man, he
went on, she would have to speak to the story and at the second
consideration when this to call her somewish, her house that the
more conversation in her son she saw that in the class she went on
tabinitly on the country. "Ah! have you think it all," he added, smalighing,
but he would have letter alone in all that touching the
chicken and at the morning, and was standing and who took him her
life as those houses as he saw the stands. He was answered, steppand and
looking to a little. His brother was to be had to say.

"What have you seen my sight of her sun those well, I would be
decised. There's no terricill moment. Wanter to go and see them. Whon the
mother in the such such answer is the pretty satisfressor of the
country, but the sound of steps, and seen him, and we see him, and
thinking of the politics if it's a man of what we can grass hi