In [7]:
import torch
import torch.nn as nn
import torch.optim as optim

In [87]:
h = [1, 0, 0 ,0] # dim (4)
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]

Single RNN

In [88]:
# output dim (2)
cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)

In [95]:
inputs = torch.tensor([[h]], dtype=torch.float) # rank = (1, 1, 4)

In [96]:
hidden = torch.randn(1, 1, 2)

In [97]:
out, hidden = cell(inputs, hidden)

In [98]:
out

tensor([[[0.0886, 0.5660]]], grad_fn=<TransposeBackward0>)

# Unfolding to `n` sequences

In [103]:
inputs = [[h, e, l, l, o]]
inputs = torch.tensor(inputs, dtype=torch.float)

In [104]:
inputs.size()

torch.Size([1, 5, 4])

In [105]:
hidden = torch.randn(1, 1, 2)

In [102]:
cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)

In [106]:
out, hidden = cell(inputs, hidden)
out

tensor([[[ 0.8379,  0.1169],
         [ 0.5100,  0.3034],
         [ 0.8857,  0.0452],
         [ 0.8154, -0.0911],
         [ 0.8023,  0.1019]]], grad_fn=<TransposeBackward0>)

In [107]:
out.size()

torch.Size([1, 5, 2])

# Batching input

<img src="RNN.png" />

In [112]:
hidden_size = 2
sequence_length = 5
batch_size = 3

In [110]:
inputs = [[h, e, l, l, o], 
          [e, o, l, l, l],
          [l, l, e, e, l]]
inputs = torch.tensor(inputs, dtype=torch.float)

In [111]:
inputs

tensor([[[1., 0., 0., 0.],
         [0., 1., 0., 0.],
         [0., 0., 1., 0.],
         [0., 0., 1., 0.],
         [0., 0., 0., 1.]],

        [[0., 1., 0., 0.],
         [0., 0., 0., 1.],
         [0., 0., 1., 0.],
         [0., 0., 1., 0.],
         [0., 0., 1., 0.]],

        [[0., 0., 1., 0.],
         [0., 0., 1., 0.],
         [0., 1., 0., 0.],
         [0., 1., 0., 0.],
         [0., 0., 1., 0.]]])

In [115]:
inputs.size() # (batch, seq, one_hot_size)

torch.Size([3, 5, 4])

`hidden_size` changed to `3` (batch_size)

In [116]:
hidden = torch.randn(1, 3, 2)

In [117]:
cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)

In [118]:
out, hidden = cell(inputs, hidden)

In [119]:
out

tensor([[[-0.2943, -0.4227],
         [ 0.6367, -0.3800],
         [-0.7054,  0.5231],
         [-0.8284,  0.5637],
         [ 0.0511,  0.5185]],

        [[ 0.8957, -0.3290],
         [ 0.2561,  0.4048],
         [-0.8674,  0.4274],
         [-0.7936,  0.6040],
         [-0.8390,  0.5623]],

        [[-0.8232,  0.5418],
         [-0.8239,  0.5779],
         [ 0.1936, -0.4992],
         [ 0.5999, -0.4525],
         [-0.6753,  0.5427]]], grad_fn=<TransposeBackward0>)

# Teach RNN `hihello` to `ihello`
input_dim = 5<br>
output_dim = 5

<img src='./55.png'>

Obviously loss function have to be `cross entropy`<br>
Because it is multi classes

<img src='./rnn_loss.png'>

In [122]:
idx2char = ['h', 'i', 'e', 'l', 'o']

In [123]:
x_data = [0, 1, 0, 2, 3, 3] # hihell
one_hot_dict = {
    'h': [1, 0, 0, 0, 0],
    'i': [0, 1, 0, 0, 0],
    'e': [0, 0, 1, 0, 0],
    'l': [0, 0, 0, 1, 0],
    'o': [0, 0, 0, 0, 1],
}
one_hot_lookup = [
    [1, 0, 0, 0, 0], # 0 h
    [0, 1, 0, 0, 0], # 1 i
    [0, 0, 1, 0, 0], # 2 e
    [0, 0, 0, 1, 0], # 3 l
    [0, 0, 0, 0, 1], # 4 o
]
y_data = [1, 0, 2, 3, 3, 4] # ihello
x_one_hot = [one_hot_lookup[x] for x in x_data]

# (2) Parameters

In [155]:
num_classes = 5
input_size = 5  # one_hot size
hidden_size = 5 # output from the LSTM. 5 to directly predict one-hot
batch_size = 1  # one sentence
sequence_length = 1 # Let's do one by one
num_layers = 1  # one-layer rnn

In [156]:
inputs = 'hihell'
labels = 'ihello'
ans = [one_hot_dict[i] for i in inputs]
inputs = torch.tensor(ans, dtype=torch.float)
labels = torch.tensor(y_data, dtype=torch.long)

# 1. Model

In [165]:
class Model(nn.Module):
    def __init__(self, 
                 input_size=5, 
                 hidden_size=5, 
                 num_layers=1, 
                 batch_size=1, 
                 sequence_length=1,
                 num_classes=5):
        super().__init__()
        self.rnn = nn.RNN(input_size=input_size, 
                          hidden_size=hidden_size, batch_first=True)
    
    def forward(self, x, hidden):
        # Reshape input in (batch_size, sequence_length, input_size)
        x = x.view(batch_size, sequence_length, input_size)

        out, hidden = self.rnn(x, hidden)
        out = out.view(-1, num_classes)
        return hidden, out
    
    def init_hidden(self):
        return torch.zeros(num_layers, batch_size, hidden_size)

# 2. Criterion & Loss

<img src='loss.png'>

In [206]:
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

# 3. Training (feed one by one)

In [207]:
hidden = model.init_hidden()
loss = 0

In [197]:
import sys
def single_traing(inputs, labels, hidden, loss, criterion, optimizer):
    sys.stdout.write("predicted string: ")
    for input, label in zip(inputs, labels):
        hidden, output = model(input, hidden)
        # import ipdb; ipdb.set_trace()
        val, idx = output.max(1)
        sys.stdout.write(idx2char[idx.data[0]])
        my_label = label.unsqueeze(0)
        loss += criterion(output, my_label)
    print(f"\t epoch: {epoch}, loss: {loss.data}")
    loss.backward()
    optimizer.step()

In [198]:
single_traing(inputs, labels, hidden, loss, criterion, optimizer)

predicted string: ihello	 epoch: 4, loss: 2.681422472000122


# 3.1 Training with epoch

In [208]:
for epoch in range(1, 15 + 1):
    optimizer.zero_grad()
    loss = 0
    hidden = model.init_hidden()
    single_traing(inputs, labels, hidden, loss, criterion, optimizer)

predicted string: hhihhh	 epoch: 1, loss: 9.099803924560547
predicted string: lhelhl	 epoch: 2, loss: 7.887807846069336
predicted string: ihilll	 epoch: 3, loss: 6.988587379455566
predicted string: ihillo	 epoch: 4, loss: 6.244536399841309
predicted string: ihello	 epoch: 5, loss: 5.607056140899658
predicted string: ihello	 epoch: 6, loss: 5.096649169921875
predicted string: ihello	 epoch: 7, loss: 4.712426662445068
predicted string: ihelho	 epoch: 8, loss: 4.428897857666016
predicted string: ihelho	 epoch: 9, loss: 4.216862201690674
predicted string: ihello	 epoch: 10, loss: 4.049167633056641
predicted string: ihello	 epoch: 11, loss: 3.9077630043029785
predicted string: ihello	 epoch: 12, loss: 3.784339189529419
predicted string: ihello	 epoch: 13, loss: 3.6682534217834473
predicted string: ihello	 epoch: 14, loss: 3.54925274848938
predicted string: ihello	 epoch: 15, loss: 3.4397172927856445
