#### module

In [1]:
import torch 
import torch.nn as nn
import torch.optim as optim
import numpy as np

#### hyper-parameters

In [2]:
n_hidden = 35 
lr = 0.01
epochs = 1000

#### data

In [3]:
string = "hello pytorch. how long can a rnn cell remember? show me your limit!"
chars =  "abcdefghijklmnopqrstuvwxyz ?!.,:;01"

In [4]:
char_list = [char for char in chars]
n_letters = len(char_list)

#### model

In [5]:
def string_to_onehot(string):
    start = np.zeros(shape=n_letters, dtype=int)
    end = np.zeros(shape=n_letters, dtype=int)
    start[-2] = 0
    end[-1] = 0
    
    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape=n_letters, dtype=int)
        zero[idx] = 1
        start = np.vstack([start, zero])
    output = np.vstack([start, end])
    return output

In [6]:
def onehot_to_word(onehot):
    onehot = torch.Tensor.numpy(onehot)
    return char_list[onehot.argmax()]

In [7]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.i2h = nn.Linear(input_size+hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size+hidden_size, output_size)
        self.act_fn = nn.Tanh()
        
    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.act_fn(self.i2h(combined))
        output = self.i2o(combined)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)
    
rnn = RNN(n_letters, n_hidden, n_letters)

#### loss

In [8]:
loss_func = nn.MSELoss()

#### optimizer

In [9]:
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)

#### train

In [10]:
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

In [11]:
for i in range(epochs):
    optimizer.zero_grad()
    hidden = rnn.init_hidden()
    
    total_loss = 0
    for j in range(one_hot.size()[0]-1):
        input_ = one_hot[j:j+1, :]
        target = one_hot[j+1]
        output, hidden = rnn.forward(input_, hidden)
        
        loss = loss_func(output.view(-1), target.view(-1))
        total_loss += loss
    
    total_loss.backward()
    optimizer.step()
    
    if i%10 == 0:
        print(total_loss)

tensor(2.6185, grad_fn=<AddBackward0>)
tensor(1.1347, grad_fn=<AddBackward0>)
tensor(0.6845, grad_fn=<AddBackward0>)
tensor(0.4325, grad_fn=<AddBackward0>)
tensor(0.2789, grad_fn=<AddBackward0>)
tensor(0.2002, grad_fn=<AddBackward0>)
tensor(0.1465, grad_fn=<AddBackward0>)
tensor(0.1122, grad_fn=<AddBackward0>)
tensor(0.0934, grad_fn=<AddBackward0>)
tensor(0.0801, grad_fn=<AddBackward0>)
tensor(0.0786, grad_fn=<AddBackward0>)
tensor(0.0620, grad_fn=<AddBackward0>)
tensor(0.0531, grad_fn=<AddBackward0>)
tensor(0.0597, grad_fn=<AddBackward0>)
tensor(0.0454, grad_fn=<AddBackward0>)
tensor(0.0398, grad_fn=<AddBackward0>)
tensor(0.0354, grad_fn=<AddBackward0>)
tensor(0.0551, grad_fn=<AddBackward0>)
tensor(0.0320, grad_fn=<AddBackward0>)
tensor(0.0291, grad_fn=<AddBackward0>)
tensor(0.0276, grad_fn=<AddBackward0>)
tensor(0.0268, grad_fn=<AddBackward0>)
tensor(0.0238, grad_fn=<AddBackward0>)
tensor(0.0218, grad_fn=<AddBackward0>)
tensor(0.0209, grad_fn=<AddBackward0>)
tensor(0.0212, grad_fn=<A

#### test

In [12]:
start = torch.zeros(1, n_letters)
start[:, -2] = 1

In [13]:
with torch.no_grad():
    hidden = rnn.init_hidden()
    input_ = start
    output_string = ""
    
    for i in range(len(string)):
        output, hidden = rnn.forward(input_, hidden)
        output_string += onehot_to_word(output.data)
        input_ = output

output_string

'helloe y  r ho l t hr hm  wrmelllw? erlnog ilnno  yono ono.o olo.  o'