#### module

In [1]:
import torch 
import torch.nn as nn
import torch.optim as optim
import numpy as np

#### data

In [2]:
string = "hello pytorch. how long can a rnn cell remember? show me your limit!"
chars =  "abcdefghijklmnopqrstuvwxyz ?!.,:;01"

In [3]:
char_list = [char for char in chars]
n_letters = len(char_list)

#### hyper-parameters

In [4]:
hidden_size = 35 
lr = 0.01
epochs = 1000
batch_size = 1
seq_len = 1
num_layers = 3
input_size = n_letters

#### model

In [5]:
def string_to_onehot(string):
    start = np.zeros(shape=n_letters, dtype=int)
    end = np.zeros(shape=n_letters, dtype=int)
    start[-2] = 1
    end[-1] = 1
    
    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape=n_letters, dtype=int)
        zero[idx] = 1
        start = np.vstack([start, zero])
    output = np.vstack([start, end])
    return output

In [6]:
def onehot_to_word(onehot):
    onehot = torch.Tensor.numpy(onehot)
    return char_list[onehot.argmax()]

In [7]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
        
    def forward(self, input, hidden, cell):
        output, (hidden, cell) = self.lstm(input, (hidden, cell))
        return output, hidden, cell
    
    def init_hidden_cell(self):
        hidden = torch.zeros(num_layers, batch_size, hidden_size)
        cell = torch.zeros(num_layers, batch_size, hidden_size)
        return hidden, cell
    
rnn = RNN(input_size, hidden_size, num_layers)

#### loss

In [8]:
loss_func = nn.MSELoss()

#### optimizer

In [9]:
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)

#### train

In [10]:
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

In [11]:
unroll_len = one_hot.size()[0]//seq_len - 1
for i in range(epochs):
    hidden, cell = rnn.init_hidden_cell()
    
    loss = 0
    for j in range(unroll_len):
        input_data = one_hot[j:j+seq_len].view(seq_len, batch_size, input_size)
        label = one_hot[j+1:j+seq_len+1].view(seq_len, batch_size, input_size)
        optimizer.zero_grad()
        output, hidden, cell = rnn(input_data, hidden, cell)
        loss += loss_func(output.view(1, -1), label.view(1, -1))
    
    loss.backward()
    optimizer.step()
    
    if i%10 == 0:
        print(loss)

tensor(2.3862, grad_fn=<AddBackward0>)
tensor(1.8175, grad_fn=<AddBackward0>)
tensor(1.7532, grad_fn=<AddBackward0>)
tensor(1.6269, grad_fn=<AddBackward0>)
tensor(1.4021, grad_fn=<AddBackward0>)
tensor(1.0654, grad_fn=<AddBackward0>)
tensor(0.6803, grad_fn=<AddBackward0>)
tensor(0.3602, grad_fn=<AddBackward0>)
tensor(0.1961, grad_fn=<AddBackward0>)
tensor(0.1030, grad_fn=<AddBackward0>)
tensor(0.0642, grad_fn=<AddBackward0>)
tensor(0.0429, grad_fn=<AddBackward0>)
tensor(0.0317, grad_fn=<AddBackward0>)
tensor(0.0262, grad_fn=<AddBackward0>)
tensor(0.0197, grad_fn=<AddBackward0>)
tensor(0.0158, grad_fn=<AddBackward0>)
tensor(0.0131, grad_fn=<AddBackward0>)
tensor(0.0114, grad_fn=<AddBackward0>)
tensor(0.0103, grad_fn=<AddBackward0>)
tensor(0.0092, grad_fn=<AddBackward0>)
tensor(0.0077, grad_fn=<AddBackward0>)
tensor(0.0064, grad_fn=<AddBackward0>)
tensor(0.0057, grad_fn=<AddBackward0>)
tensor(0.0054, grad_fn=<AddBackward0>)
tensor(0.0049, grad_fn=<AddBackward0>)
tensor(0.0047, grad_fn=<A

#### test

In [12]:
hidden, cell = rnn.init_hidden_cell()

In [13]:
for i in range(unroll_len-1):
    input_data = one_hot[i:i+1].view(1, batch_size, hidden_size)
    label = one_hot[i+1:i+1+1].view(1, batch_size, hidden_size)
    output, hidden, cell = rnn(input_data, hidden, cell)
    print(onehot_to_word(output.data), end="")

hello pytorch. how long can a rnn cell remember? show me your limit!