<a href="https://colab.research.google.com/github/abialbon/pytorch-udacity-scholarship/blob/master/RNN/RNN_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RNN Practice

Things to do

1. Install PyTorch
2. Get the dataset
3. Create model wrapper
4. Create train function
5. Write the function that creates batches
6. Write the function that one_hot_encodes
7. Write a function that can encode/decode

8. Train the model
9. Save the model to google drive
10. Write a function that can predict given a char
11. Write a function that will create text from a starting prime

In [0]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision

In [0]:
#All imports here:
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

# Global variables
device = 'cuda' if torch.cuda.is_available() else 'cpu'


In [63]:
!wget https://raw.githubusercontent.com/abialbon/deep-learning-v2-pytorch/master/recurrent-neural-networks/char-rnn/data/anna.txt

--2018-11-28 23:58:06--  https://raw.githubusercontent.com/abialbon/deep-learning-v2-pytorch/master/recurrent-neural-networks/char-rnn/data/anna.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2025486 (1.9M) [text/plain]
Saving to: ‘anna.txt.1’


2018-11-28 23:58:06 (10.6 MB/s) - ‘anna.txt.1’ saved [2025486/2025486]



In [0]:
with open('anna.txt', 'r') as f:
    data = f.read()
    tokens = set(data)
    n_tokens = len(set(data))

In [0]:
# Model class
class CharRNN(nn.Module):
    def __init__(self, tokens, n_hidden_layers=512, n_layers=2, drop_prob=0.5):
        super().__init__()
        self.tokens = tokens
        self.n_tokens = len(tokens)
        self.char2int = {ch:i for i, ch in enumerate(self.tokens)}
        self.int2char = {i:ch for ch, i in self.char2int.items()}
        self.hidden_layers = n_hidden_layers
        self.n_layers = n_layers
        # Layers
        self.lstm = nn.LSTM(self.n_tokens, n_hidden_layers, n_layers, batch_first=True)
        self.fc = nn.Linear(n_hidden_layers, self.n_tokens)
        self.dropout = nn.Dropout(drop_prob)
        
    
    def forward(self, x, h):
        x, h = self.lstm(x, h)
        x = self.dropout(x)
        x = x.contiguous().view(-1, self.hidden_layers)
        x = self.fc(x)
        return x, h
    
    
    def encode(self, item, dict):
        return dict[item]
    
    
    
    def init_hidden_state(self, batch_size):
        weights = next(self.parameters()).data
        return (weights.new(self.n_layers, batch_size, self.hidden_layers).zero_().to(device),
               weights.new(self.n_layers, batch_size, self.hidden_layers).zero_().to(device))
        

In [0]:
def one_hot_encode(arr, n_labels):
    """
    Generates one hot encoded array with n_labels number of labels
    
    Arguments:
    ----------
    arr: an array of shape x, y
    n_labels: one hot labels to encode
    
    Output:
    -------
    One hot encoded array of size (x, y, n_labels)
    """
    i = np.multiply(*arr.shape)
    one_hot = np.zeros((i, n_labels))
    one_hot[np.arange(i), arr.flatten()] = 1
    one_hot = one_hot.reshape(*arr.shape, n_labels)
    return one_hot

In [0]:
def generate_batches(arr, b, s):
    """
    Generates batches of x, y with shapes b x s with y shifted one to the right
    
    Argumants:
    ----------
    arr: a flat array of size x, y
    b: batch size
    s: sequence length
    
    Outputs:
    generator tuple(x, y) each with a dimension of (b x s)
    """
    n = len(arr) // (b * s)
    arr = arr[:n*b*s]
    arr = arr.reshape(b, -1)
    for step in range(0, n*s, s):
        x = arr[:, step:step+s]
        y = np.zeros_like(x)
        try:
            y[:,:-1], y[:,-1] = x[:, 1:], arr[:, step+s]
        except:
            y[:,:-1], y[:,-1] = x[:, 1:], arr[:, 0]
        yield x, y
    

In [0]:
def train(model, data, epochs=5, batch_size=128, seq_length=100, val_fraction=0.1, lr=0.001):
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    model.to(device)
    model.train()
    
    encoded_data = np.array([model.encode(i, model.char2int) for i in data])
    idx = int(len(data) * (1-val_fraction))
    trainset, valset = encoded_data[:idx], encoded_data[idx:]
   
    
    for e in range(1, epochs+1):
        train_loss = 0
        t_counter = 0
        h = model.init_hidden_state(batch_size)

        for inputs, target in generate_batches(trainset, batch_size, seq_length):
            inputs = one_hot_encode(inputs, model.n_tokens)
            inputs = torch.from_numpy(inputs).type(torch.FloatTensor).to(device)
            target = torch.from_numpy(target).to(device)
            
            h = tuple([each.data for each in h])
            
            output, h = model.forward(inputs, h)
            optimizer.zero_grad()
            loss = criterion(output, target.view(batch_size*seq_length))
            train_loss += loss.item()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 5)
            optimizer.step()
            t_counter += 1
            
        else:
            model.eval()
            v_h = model.init_hidden_state(batch_size)
            v_loss = 0
            v_counter = 0
            with torch.no_grad():
                for v_inputs, v_target in generate_batches(valset, batch_size, seq_length):
                    v_inputs = one_hot_encode(v_inputs, model.n_tokens)
                    v_inputs = torch.from_numpy(v_inputs).type(torch.FloatTensor).to(device)
                    v_target = torch.from_numpy(v_target).to(device)

                    v_h = tuple([each.data for each in h])

                    output, v_h = model.forward(v_inputs, v_h)
                    loss = criterion(output, v_target.view(batch_size * seq_length))
                    v_loss += loss.item()
                    v_counter += 1
                    
                else:
                    model.train()
                    print('Epoch: {} ---- Train loss: {:.3f} ---- Val loss: {:.3f}'.format(e, train_loss/t_counter, v_loss/v_counter))


In [69]:
net = CharRNN(tokens)
net.to(device)

CharRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True)
  (fc): Linear(in_features=512, out_features=83, bias=True)
  (dropout): Dropout(p=0.5)
)

In [77]:
train(net, data, epochs=5)

1786700
Epoch: 1 ---- Train loss: 1.055 ---- Val loss: 1.470
Epoch: 2 ---- Train loss: 1.039 ---- Val loss: 1.482
Epoch: 3 ---- Train loss: 1.026 ---- Val loss: 1.484


KeyboardInterrupt: ignored

In [0]:
def predict_next_char(model, char, h, top_k):
    h = tuple([each.data for each in h])
    encoded = np.array([[model.encode(i, model.char2int) for i in char]])
    one_hot_encoded = one_hot_encode(encoded, model.n_tokens)
    
    x = torch.from_numpy(one_hot_encoded).type(torch.FloatTensor).to(device)
    
    output, h = model.forward(x, h)
    probs = F.softmax(output, dim=1)
    p, top_c = probs.topk(top_k)
    
    p = p.detach().cpu().numpy().squeeze()
    top_c = top_c.detach().cpu().numpy().squeeze()
    if top_k is not 1:
        char = np.random.choice(top_c, p=p/p.sum())
    else:
        char = top_c
    char = model.encode(int(char), model.int2char)
    
    return char, h 

In [0]:
def sample(model, size=200, prime='The', top_k=5):
    chars = [ch for ch in prime]
    h = model.init_hidden_state(1)
    for i in prime:
        char, h = predict_next_char(model, i, h, top_k)
    chars.append(char)
    
    for ii in range(size):
        char, h = predict_next_char(model, chars[-1], h, top_k)
        chars.append(char)
        
    return ''.join(chars)

In [87]:
print(sample(net, 1000, 'Anna', 5))

Anna Arkadyevna, who
was seemed an artly with his heart to her, and she would have tried
improve of her and he could not have business with a slyes is a small
point into the separate attractive. She was not that as the more did
not range her a long wife. She stopped and took up a concealive also
young, but as shuglish acquaintances.

"I wonder as you're a belichtic," said he. "Is, so that there's a measureful the
staying from himself. And she's a light, yes, she does not let,"
she said, "I haven't forgiven, and I've bold the stillness of sufferings
and the souncts of all their position, but if I'rriel and saving
to me, though a bang haddblows."

"Oh, we all say now singing as with a geal of destroum of dead."

He gazed at Varenka.

"With yous went on, I am doing as though I shall come to the lows that's near
him.
... and I don't know! Beed her too," she said,
walking into telling her. Anna was standing before, but therefore,
and a screng about the walozed waiter, which he saw nothing, 