# Improve with embedding
<img src='ex12-3.png'>

In [128]:
import torch
import torch.nn as nn
import torch.optim as optim

torch.manual_seed(99999999)

<torch._C.Generator at 0x7f1d98e47f90>

In [129]:
full_data = 'hihello'
full_data = set([i for i in full_data])
full_data # Try minor change in arrangement

{'e', 'h', 'i', 'l', 'o'}

In [130]:
word_to_ix = {'e': 0, 'h': 1, 'i': 2, 'l': 3, 'o': 4}
embeds = nn.Embedding(5, 5) # 5 chars, 5 dimensions

x_data = 'hihell'

lookup_tensor = torch.tensor([word_to_ix[i] for i in x_data], dtype=torch.long)
inputs = embeds(lookup_tensor)

In [131]:
lookup_tensor

tensor([1, 2, 1, 0, 3, 3])

In [132]:
# Need to do this. Otherwise `AutoGrad` will be `Embedding` not mine
inputs = inputs.clone().detach()
inputs

tensor([[-1.0195,  0.2725,  0.2672, -2.0831, -0.9972],
        [ 0.9105, -0.7068,  1.2200, -0.0353,  0.5045],
        [-1.0195,  0.2725,  0.2672, -2.0831, -0.9972],
        [ 0.2432, -0.0211,  0.7164, -0.5276,  0.0104],
        [-0.9600, -0.7599,  0.5004, -0.4419,  0.2254],
        [-0.9600, -0.7599,  0.5004, -0.4419,  0.2254]])

In [133]:
tmp = [word_to_ix[i] for i in 'ihello']
labels = torch.tensor(tmp, dtype=torch.long)

In [134]:
labels

tensor([2, 1, 0, 3, 3, 4])

# (2) Parameters

In [135]:
num_classes = 5
input_size = 5  # one_hot size
hidden_size = 5 # output from the LSTM. 5 to directly predict one-hot
batch_size = 1  # one sentence
sequence_length = 1 # Let's do one by one
num_layers = 1  # one-layer rnn

# 1. Model

In [136]:
class Model(nn.Module):
    def __init__(self,
                input_size=5,
                hidden_size=5,
                num_layers=1,
                batch_size=1,
                sequence_length=1,
                num_classes=5):
        super().__init__()
        self.rnn = nn.RNN(input_size=input_size,
                         hidden_size=hidden_size,
                         batch_first=True)
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.sequence_length = sequence_length
        self.num_classes = num_classes
        
        # Fully-Connected layer
        self.fc = nn.Linear(num_classes, num_classes)

    def forward(self, x, hidden):
        # Reshape input in (batch_size, sequence_length, input_size)
        x = x.view(self.batch_size, self.sequence_length, self.input_size)

        out, hidden = self.rnn(x, hidden)
        out = self.fc(out) # Add here
        out = out.view(-1, self.num_classes)
        return hidden, out
    
    def init_hidden(self):
        return torch.zeros(self.num_layers, self.batch_size, self.hidden_size)


# 2. Criterion & Loss

In [137]:
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

# 3. Training

In [138]:
model = Model(input_size=5, hidden_size=5, num_layers=1, 
              batch_size=1, sequence_length=6, num_classes=5)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

In [139]:
hidden = model.init_hidden()
loss = 0

In [140]:
idx = 0
for epoch in range(0, 10 + 1):
    hidden.detach_()
    hidden = hidden.detach()
    hidden = hidden.clone().detach().requires_grad_(True) # New syntax from `1.0`
    
    hidden, outputs = model(inputs, hidden)
    optimizer.zero_grad()
    loss = criterion(outputs, labels) # It wraps for-loop in here
    loss.backward()
    optimizer.step()
    _, idx = outputs.max(1)
    idx = idx.data.numpy()
    
    # A bit acrobatic since I lookup `value` to see the `key`
    result_str = [list(word_to_ix.keys())[list(word_to_ix.values()).index(i)] for i in idx]
    print(f"epoch: {epoch}, loss: {loss.data}")
    print(f"Predicted string: {''.join(result_str)}")

epoch: 0, loss: 1.5181742906570435
Predicted string: lhllll
epoch: 1, loss: 1.472831130027771
Predicted string: llllll
epoch: 2, loss: 1.3394774198532104
Predicted string: llllll
epoch: 3, loss: 1.096235752105713
Predicted string: lhelll
epoch: 4, loss: 0.8429825901985168
Predicted string: ihehlo
epoch: 5, loss: 0.6725465655326843
Predicted string: ihehlo
epoch: 6, loss: 0.5581628084182739
Predicted string: ihello
epoch: 7, loss: 0.4689774215221405
Predicted string: ihello
epoch: 8, loss: 0.39387795329093933
Predicted string: ihello
epoch: 9, loss: 0.32651111483573914
Predicted string: ihello
epoch: 10, loss: 0.2684680223464966
Predicted string: ihello
