In [41]:
!wget https://raw.githubusercontent.com/udacity/deep-learning-v2-pytorch/master/recurrent-neural-networks/char-rnn/data/anna.txt

--2020-01-19 17:03:27--  https://raw.githubusercontent.com/udacity/deep-learning-v2-pytorch/master/recurrent-neural-networks/char-rnn/data/anna.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2025486 (1.9M) [text/plain]
Saving to: ‘anna.txt.1’


2020-01-19 17:03:27 (122 MB/s) - ‘anna.txt.1’ saved [2025486/2025486]



In [0]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [0]:
with open('anna.txt.1','r') as f:
    text = f.read()

In [46]:
text[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

In [0]:
chars = tuple(set(text))
int2char= dict(enumerate(chars))
char2int={ch: ii for ii, ch in int2char.items()}
encoded=np.array([char2int[ch] for ch in text])

In [48]:
encoded[:100]

array([34, 82, 73, 14, 26, 63, 81,  4, 47,  5,  5,  5,  6, 73, 14, 14, 79,
        4, 32, 73, 62, 56,  1, 56, 63,  7,  4, 73, 81, 63,  4, 73,  1,  1,
        4, 73,  1, 56, 40, 63, 39,  4, 63, 65, 63, 81, 79,  4, 12, 13, 82,
       73, 14, 14, 79,  4, 32, 73, 62, 56,  1, 79,  4, 56,  7,  4, 12, 13,
       82, 73, 14, 14, 79,  4, 56, 13,  4, 56, 26,  7,  4, 75, 22, 13,  5,
       22, 73, 79, 76,  5,  5, 42, 65, 63, 81, 79, 26, 82, 56, 13])

In [0]:
#Turn into one hot encode
def one_hot_encode(arr, n_labels):
    one_hot = np.zeros((arr.size, n_labels), dtype=np.float32)
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    return one_hot

In [50]:
test_seq = np.array([[3, 5, 1]])
one_hot = one_hot_encode(test_seq, 8)

print(one_hot)

[[[0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0.]]]


In [0]:
def get_batches(arr, batch_size, seq_length):
    batch_size_total = batch_size * seq_length
    n_batches = len(arr)//batch_size_total
    arr = arr[:n_batches * batch_size_total]
    arr = arr.reshape((batch_size, -1))
    
   
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:, n:n+seq_length]
        # The targets, shifted by one
        y =  np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [0]:
batches = get_batches(encoded, 8, 50)
x, y = next(batches)

In [53]:
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x
 [[34 82 73 14 26 63 81  4 47  5]
 [ 7 75 13  4 26 82 73 26  4 73]
 [63 13 31  4 75 81  4 73  4 32]
 [ 7  4 26 82 63  4 16 82 56 63]
 [ 4  7 73 22  4 82 63 81  4 26]
 [16 12  7  7 56 75 13  4 73 13]
 [ 4 44 13 13 73  4 82 73 31  4]
 [71 23  1 75 13  7 40 79 76  4]]

y
 [[82 73 14 26 63 81  4 47  5  5]
 [75 13  4 26 82 73 26  4 73 26]
 [13 31  4 75 81  4 73  4 32 75]
 [ 4 26 82 63  4 16 82 56 63 32]
 [ 7 73 22  4 82 63 81  4 26 63]
 [12  7  7 56 75 13  4 73 13 31]
 [44 13 13 73  4 82 73 31  4  7]
 [23  1 75 13  7 40 79 76  4 28]]


In [54]:
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

Training on GPU!


In [0]:
class CharRNN(nn.Module):
    
    def __init__(self, tokens, n_hidden=256, n_layers=2,
                               drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        
        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)
              
        self.dropout = nn.Dropout(drop_prob)
        
        self.fc = nn.Linear(n_hidden, len(self.chars))
      
    
    def forward(self, x, hidden):
   
        r_output, hidden = self.lstm(x, hidden)
       
        out = self.dropout(r_output)
        
        out = out.contiguous().view(-1, self.n_hidden)
        
        out = self.fc(out)
        
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
        return hidden

In [0]:
def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):
   
    net.train()
    
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
        
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    if(train_on_gpu):
        net.cuda()
    
    counter = 0
    n_chars = len(net.chars)
    for e in range(epochs):
       
        h = net.init_hidden(batch_size)
        
        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1
                        
            x = one_hot_encode(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            h = tuple([each.data for each in h])

            net.zero_grad()
           
            output, h = net(inputs, h)
            
            loss = criterion(output, targets.view(batch_size*seq_length).long())
            loss.backward()
            
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
                        
            if counter % print_every == 0:
                
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    if(train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output, targets.view(batch_size*seq_length).long())
                
                    val_losses.append(val_loss.item())
                
                net.train() 
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [57]:
n_hidden=512
n_layers=2

net = CharRNN(chars, n_hidden, n_layers)
print(net)

CharRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=83, bias=True)
)


In [58]:
batch_size = 128
seq_length = 100
n_epochs = 20 
train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

Epoch: 1/20... Step: 10... Loss: 3.2602... Val Loss: 3.2115
Epoch: 1/20... Step: 20... Loss: 3.1509... Val Loss: 3.1409
Epoch: 1/20... Step: 30... Loss: 3.1417... Val Loss: 3.1242
Epoch: 1/20... Step: 40... Loss: 3.1152... Val Loss: 3.1193
Epoch: 1/20... Step: 50... Loss: 3.1411... Val Loss: 3.1166
Epoch: 1/20... Step: 60... Loss: 3.1194... Val Loss: 3.1143
Epoch: 1/20... Step: 70... Loss: 3.1034... Val Loss: 3.1106
Epoch: 1/20... Step: 80... Loss: 3.1127... Val Loss: 3.1011
Epoch: 1/20... Step: 90... Loss: 3.0968... Val Loss: 3.0775
Epoch: 1/20... Step: 100... Loss: 3.0338... Val Loss: 3.0239
Epoch: 1/20... Step: 110... Loss: 2.9553... Val Loss: 2.9243
Epoch: 1/20... Step: 120... Loss: 2.8784... Val Loss: 2.8071
Epoch: 1/20... Step: 130... Loss: 2.7245... Val Loss: 2.6869
Epoch: 2/20... Step: 140... Loss: 2.6369... Val Loss: 2.5737
Epoch: 2/20... Step: 150... Loss: 2.5615... Val Loss: 2.5128
Epoch: 2/20... Step: 160... Loss: 2.5022... Val Loss: 2.4606
Epoch: 2/20... Step: 170... Loss:

In [0]:
# change the name, for saving multiple files
model_name = 'rnn_20_epoch.net'

checkpoint = {'n_hidden': net.n_hidden,
              'n_layers': net.n_layers,
              'state_dict': net.state_dict(),
              'tokens': net.chars}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

In [0]:
def predict(net, char, h=None, top_k=None):
        
        
        x = np.array([[net.char2int[char]]])
        x = one_hot_encode(x, len(net.chars))
        inputs = torch.from_numpy(x)
        
        if(train_on_gpu):
            inputs = inputs.cuda()
      
        h = tuple([each.data for each in h])
        
        out, h = net(inputs, h)
        p = F.softmax(out, dim=1).data
        if(train_on_gpu):
            p = p.cpu() 
        
        if top_k is None:
            top_ch = np.arange(len(net.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
        
        return net.int2char[char], h

In [0]:
def sample(net, size, prime='The', top_k=None):
        
    if(train_on_gpu):
        net.cuda()
    else:
        net.cpu()
    
    net.eval() 
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)

    chars.append(char)
    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [62]:
print(sample(net, 1000, prime='Anna', top_k=5))

Anna. But that it was
a chair in the counting-horses threeting his heart. Before he had not
broken out to him with a miles, had been completely calling, and her sincere
and all the comnified homest and talking to him, he had not been in
a special face of his brother with the sole face to her farely.

The same words was a sill and strange that iss way for the men with her
the hundred and taken a small stind souls, her bout was standing to see,
and so it was to stand any match overwains.

Sergey Ivanovitch was said.

"Yes, tell me, I didn't come to the clearness of you. I'm not to speak to
him?" said Vronsky, as a left and talk off, and he sat to her surerisen,
still at acquaintance in which the doctor smiled at her.

"Why an a men, and I've been bright, it was all the same the sister,
we have been told you so left. And the same there is not, as you see that
I do not think in that might be, and she's any of the marsh?" said Stepan
Arkadyevitch, and at the commind of the princess answered