# Recurrent NN

So far, a network takes input, the generate outputs, only the error is propagated back.
RNN network takes inputs, at the same time, the output from previous state are fed back as another input. It enabled the RNN to retain certain type of **memory**, which make it useful for:
* time series prediction
* language modeling, sentimental analysis
* translation, speech recognition ...


## pytorch API

The interface is friendly:

The hidden_size is the output size

```
cell = nn.RNN(input_size = 4, hidden_size = 2, batch_first = True)
cell = nn.GRU(input_size = 4, hidden_size = 2, batch_first = True)
cell = nn.LSTM(input_size = 4, hidden_size = 2, batch_first = True)
```

Once a cell is defined:

```
out, hidden = cell(inputs, hidden)
```

So the cell output has two parts: the output itself, and hidden output.

## Example: feed letters

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

# One hot encoding for each char in 'hello'
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]

In [21]:
# One cell RNN input_dim (4) -> output_dim (2). sequence: 5
cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)

# (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False
# tensor([[[-1.4432,  1.7785]]])
hidden = Variable(torch.randn(1, 1, 2))


### Option 1: feed one letter at a time, input shape (1, 1, 4)

In [23]:
# Propagate input through RNN
# Input: (batch, seq_len, input_size) when batch_first=True
inputs = Variable(torch.Tensor([h, e, l, l, o]))
for one in inputs:
    one = one.view(1, 1, -1)
    print("input=", one)
    # Input: (batch, seq_len, input_size) when batch_first=True
    out, hidden = cell(one, hidden)
    print("input_size=", one.size(), "outsize=", out.size(), "out_value=", out.data)

input= tensor([[[1., 0., 0., 0.]]])
input_size= torch.Size([1, 1, 4]) outsize= torch.Size([1, 1, 2]) out_value= tensor([[[-0.6660,  0.6075]]])
input= tensor([[[0., 1., 0., 0.]]])
input_size= torch.Size([1, 1, 4]) outsize= torch.Size([1, 1, 2]) out_value= tensor([[[-0.4775,  0.7268]]])
input= tensor([[[0., 0., 1., 0.]]])
input_size= torch.Size([1, 1, 4]) outsize= torch.Size([1, 1, 2]) out_value= tensor([[[-0.4449,  0.1278]]])
input= tensor([[[0., 0., 1., 0.]]])
input_size= torch.Size([1, 1, 4]) outsize= torch.Size([1, 1, 2]) out_value= tensor([[[-0.2878, -0.1194]]])
input= tensor([[[0., 0., 0., 1.]]])
input_size= torch.Size([1, 1, 4]) outsize= torch.Size([1, 1, 2]) out_value= tensor([[[0.0518, 0.4729]]])


### Option 2: feed one "sequence" at a time, input shape (1, 5, 4)

The second number 5 is the length of the letter, "hello".


In [24]:
inputs = Variable(torch.Tensor([h, e, l, l, o]))
# We can do the whole at once
# Propagate input through RNN
# Input: (batch, seq_len, input_size) when batch_first=True
inputs = inputs.view(1, 5, -1)
out, hidden = cell(inputs, hidden)
print("input_size=", one.size(), "outsize=", out.size(), "out_value=\n", out.data)

input_size= torch.Size([1, 1, 4]) outsize= torch.Size([1, 5, 2]) out_value=
 tensor([[[-0.6630,  0.6087],
         [-0.4768,  0.7275],
         [-0.4449,  0.1284],
         [-0.2879, -0.1192],
         [ 0.0517,  0.4729]]])


### Option 3: feed multiple words or "batch" at a time, input shape (3, 5, 4)

If we have three words to feed, the first number is for that.
Of course, we are limited to encode just a few letters here.


In [15]:
# hidden : (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False
hidden = Variable(torch.randn(1, 3, 2))

# One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3
# 3 batches 'hello', 'eolll', 'lleel'
# rank = (3, 5, 4)
inputs = Variable(torch.Tensor([[h, e, l, l, o],
                                [e, o, l, l, l],
                                [l, l, e, e, l]]))

# Propagate input through RNN
# Input: (batch, seq_len, input_size) when batch_first=True
# B x S x I
out, hidden = cell(inputs, hidden)

print("input_size=", one.size(), "outsize=", out.size(), "out_value=\n", out.data)


input_size= torch.Size([1, 1, 4]) outsize= torch.Size([3, 5, 2]) out_value=
 tensor([[[ 0.2906,  0.8383],
         [-0.0809,  0.4115],
         [-0.3715,  0.3934],
         [-0.2620,  0.2893],
         [ 0.5988,  0.2444]],

        [[ 0.4730,  0.1992],
         [ 0.3435,  0.4745],
         [-0.5093,  0.5346],
         [-0.1811,  0.2702],
         [-0.3563,  0.3304]],

        [[-0.5692,  0.5406],
         [-0.1550,  0.2497],
         [ 0.0040,  0.1241],
         [-0.0872,  0.1561],
         [-0.4086,  0.3393]]])


## Train RNN to predict next letter


In [28]:
import torch
import torch.nn as nn
from torch.autograd import Variable

torch.manual_seed(777)  # reproducibility


idx2char = ['h', 'i', 'e', 'l', 'o']

# Teach hihell -> ihello
x_data = [[0, 1, 0, 2, 3, 3]]   # hihell
x_one_hot = [[[1, 0, 0, 0, 0],   # h 0
              [0, 1, 0, 0, 0],   # i 1
              [1, 0, 0, 0, 0],   # h 0
              [0, 0, 1, 0, 0],   # e 2
              [0, 0, 0, 1, 0],   # l 3
              [0, 0, 0, 1, 0]]]  # l 3

y_data = [1, 0, 2, 3, 3, 4]    # ihello

# As we have one batch of samples, we will change them to variables only once
inputs = Variable(torch.Tensor(x_one_hot))
labels = Variable(torch.LongTensor(y_data))

num_classes = 5
input_size = 5  # one-hot size
hidden_size = 5  # output from the LSTM. 5 to directly predict one-hot
batch_size = 1   # one sentence
sequence_length = 6  # |ihello| == 6
num_layers = 1  # one-layer rnn


class RNN(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(RNN, self).__init__()

        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.sequence_length = sequence_length

        self.rnn = nn.RNN(input_size=5, hidden_size=5, batch_first=True)

    def forward(self, x):
        # Initialize hidden and cell states
        # (num_layers * num_directions, batch, hidden_size) for batch_first=True
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))

        # Reshape input
        x.view(x.size(0), self.sequence_length, self.input_size)

        # Propagate input through RNN
        # Input: (batch, seq_len, input_size)
        # h_0: (num_layers * num_directions, batch, hidden_size)

        out, _ = self.rnn(x, h_0)
        return out.view(-1, num_classes)


# Instantiate RNN model
rnn = RNN(num_classes, input_size, hidden_size, num_layers)
print(rnn)

# Set loss and optimizer function
# CrossEntropyLoss = LogSoftmax + NLLLoss
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.1)

# Train the model
for epoch in range(100):
    outputs = rnn(inputs)
    optimizer.zero_grad()
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    _, idx = outputs.max(1)
    idx = idx.data.numpy()
    result_str = [idx2char[c] for c in idx.squeeze()]
    print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.item()))
    print("Predicted string: ", ''.join(result_str))

print("Learning finished!")

RNN(
  (rnn): RNN(5, 5, batch_first=True)
)
epoch: 1, loss: 1.693
Predicted string:  llllll
epoch: 2, loss: 1.523
Predicted string:  llllll
epoch: 3, loss: 1.393
Predicted string:  llllll
epoch: 4, loss: 1.263
Predicted string:  llllll
epoch: 5, loss: 1.146
Predicted string:  llllll
epoch: 6, loss: 1.055
Predicted string:  lhelll
epoch: 7, loss: 1.002
Predicted string:  ihelll
epoch: 8, loss: 0.965
Predicted string:  ihelll
epoch: 9, loss: 0.913
Predicted string:  ihelll
epoch: 10, loss: 0.879
Predicted string:  ihelll
epoch: 11, loss: 0.840
Predicted string:  ihelll
epoch: 12, loss: 0.805
Predicted string:  ihello
epoch: 13, loss: 0.779
Predicted string:  ihello
epoch: 14, loss: 0.758
Predicted string:  ihello
epoch: 15, loss: 0.738
Predicted string:  ihello
epoch: 16, loss: 0.717
Predicted string:  ihello
epoch: 17, loss: 0.694
Predicted string:  ihello
epoch: 18, loss: 0.667
Predicted string:  ihelll
epoch: 19, loss: 0.643
Predicted string:  ihelll
epoch: 20, loss: 0.647
Predicted s