# A simple RNN from scratch

**Outline**
- Creating your customized dataset
- Implement a simple RNN module from scratch
- NLLLoss vs CrossEntropyLoss
- Learning rate scheduler

In [1]:
import time
import torch
from torch import nn
from torch import optim

torch.manual_seed(13)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device: {}'.format(device))

Device: cuda:0


In [2]:
from utils.data_helpers import EvenOddDataset

size = 200
start = 0
trainset = EvenOddDataset(size, start)

In [3]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, x, hidden):
        combined = torch.cat((x, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.logsoftmax(output)
        return output, hidden

    def init_hidden_state(self):
        return torch.zeros(1, self.hidden_size)

    def predict(self, tensor):
        with torch.no_grad():
            hidden = self.init_hidden_state()

            for i in range(tensor.size()[0]):
                output, hidden = self.forward(tensor[i], hidden)
            pred = output.argmax(dim=1).item()
        return pred


In [4]:
epochs = 10000
print_period = epochs // 20

input_size_for_single_step = 1
n_hidden = 200
fc_size = 20
n_classes = 2
learning_rate = 0.001
momentum = 0.9

rnn = RNN(input_size_for_single_step, n_hidden, n_classes)
criterion = nn.NLLLoss()
optimizer = optim.SGD(rnn.parameters(), lr=learning_rate, momentum=momentum)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, epochs // 5, gamma=0.7)

print("Learning rate: {}".format(optimizer.param_groups[0]['lr']))

Learning rate: 0.001


In [5]:
from utils.data_helpers import num_from_tensor
from utils.misc import time_since


def train_step(tens, target, rnn, criterion, optimizer):
    hidden = rnn.init_hidden_state()
    optimizer.zero_grad()

    for i in range(tens.size()[0]):
        output, hidden = rnn(tens[i], hidden)

    loss = criterion(output, target)
    loss.backward()
    optimizer.step()

    return output, loss.item()


start = time.time()
for e in range(1, epochs + 1):
    tens, target = trainset.sample()
    output, loss = train_step(tens, target, rnn, criterion, optimizer)

    if e % print_period == 0:
        guess = output.argmax(dim=1).item()
        correct = '✓' if guess == target else '✗ True class: %s' % target.item()
        print('%d %d%% (%s) %.4f Guess: %s / %s %s' %
              (e, e / epochs * 100, time_since(start), loss, num_from_tensor(tens), guess, correct))

        print("Learning rate: {}\n".format(optimizer.param_groups[0]['lr']))
    lr_scheduler.step()


500 5% (0m 1s) 0.4508 Guess: 64 / 1 ✓
Learning rate: 0.001

1000 10% (0m 1s) 0.2229 Guess: 196 / 1 ✓
Learning rate: 0.001

1500 15% (0m 2s) 0.1458 Guess: 187 / 0 ✓
Learning rate: 0.001

2000 20% (0m 3s) 0.1324 Guess: 199 / 0 ✓
Learning rate: 0.001

2500 25% (0m 4s) 0.1269 Guess: 79 / 0 ✓
Learning rate: 0.0007

3000 30% (0m 5s) 0.0841 Guess: 68 / 1 ✓
Learning rate: 0.0007

3500 35% (0m 5s) 0.0853 Guess: 164 / 1 ✓
Learning rate: 0.0007

4000 40% (0m 6s) 0.0666 Guess: 75 / 0 ✓
Learning rate: 0.0007

4500 45% (0m 6s) 0.0577 Guess: 82 / 1 ✓
Learning rate: 0.00049

5000 50% (0m 7s) 0.0499 Guess: 193 / 0 ✓
Learning rate: 0.00049

5500 55% (0m 8s) 0.0463 Guess: 102 / 1 ✓
Learning rate: 0.00049

6000 60% (0m 8s) 0.0456 Guess: 105 / 0 ✓
Learning rate: 0.00049

6500 65% (0m 9s) 0.0472 Guess: 161 / 0 ✓
Learning rate: 0.000343

7000 70% (0m 10s) 0.0536 Guess: 176 / 1 ✓
Learning rate: 0.000343

7500 75% (0m 10s) 0.0455 Guess: 180 / 1 ✓
Learning rate: 0.000343

8000 80% (0m 11s) 0.0487 Guess: 130 / 1

### Resources
[Adjusting the learning rate](https://pytorch.org/docs/master/optim.html#how-to-adjust-learning-rate)

[Writing custom datasets](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html)

[Nice animated visualizations](https://towardsdatascience.com/animated-rnn-lstm-and-gru-ef124d06cf45)