## 1. Settings

## 1) Import Required Libraries

In [52]:
import torch 
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np

## 2) Data & Preprocessing

In [53]:
len(string)

68

In [54]:
string = "hello pytorch. how long can a rnn cell remember? show me your limit!"
chars = "abcdefghijklmnopqrstuvwxyz ?!.,:;01"
char_list = [i for i in chars]
char_len = len(char_list)

char_len

35

## 3) Hyperparameters

In [55]:
batch_size = 1
seq_len = 1
num_layers = 1
input_size = char_len
hidden_size = 35 
lr = 0.01
num_epochs = 1000

## 4) String to One-hot

In [56]:
# String to onehot vector
# a -> [1 0 0 ... 0 0]

def string_to_onehot(string):
    start = np.zeros(shape=len(char_list) ,dtype=int)
    end = np.zeros(shape=len(char_list) ,dtype=int)
    start[-2] = 1
    end[-1] = 1
    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape=char_len ,dtype=int)
        zero[idx]=1
        start = np.vstack([start,zero])
    output = np.vstack([start,end])
    return output

## 5) Onehot to String

In [57]:
# Onehot vector to word
# [1 0 0 ... 0 0] -> a 

def onehot_to_word(onehot_1):
    onehot = torch.Tensor(onehot_1)
    return char_list[onehot.argmax()]

In [58]:
a = string_to_onehot(string)[0]
onehot_to_word(a)


'0'

In [59]:
a = string_to_onehot(string)[1]
onehot_to_word(a)

'h'

In [60]:
a = string_to_onehot(string)[2]
onehot_to_word(a)

'e'

In [61]:
## RNN with 1 hiddden layer
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
        
    def forward(self, input, hidden, cell): ## LSTM이라서 hidden이랑 cell state
        output, (hidden,cell) = self.lstm(input,(hidden,cell))
        
        return output,hidden,cell
    
    def init_hidden_cell(self):
        hidden = Variable(torch.zeros(num_layers,seq_len*batch_size,hidden_size))
        cell = Variable(torch.zeros(num_layers, seq_len*batch_size,hidden_size))
        
        return hidden, cell
    
rnn = RNN(input_size, hidden_size, num_layers)

## 3. Loss Function & Optimizer

In [62]:
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())
print(one_hot.size())

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr = lr)

torch.Size([70, 35])


## 4. Test code

In [63]:
j = 0
input_data = Variable(one_hot[j:j+seq_len].view(batch_size,seq_len,-1))
print(input_data.size())

torch.Size([1, 1, 35])


In [64]:
hidden,  cell = rnn.init_hidden_cell()
print(hidden.size(), cell.size())

torch.Size([1, 1, 35]) torch.Size([1, 1, 35])


In [65]:
output, hidden, cell = rnn(input_data, hidden, cell)
print(output.size(), hidden.size(), cell.size())

torch.Size([1, 1, 35]) torch.Size([1, 1, 35]) torch.Size([1, 1, 35])


## 5. Train

In [66]:
len(string) + 2 # start 랑 end 추가

70

In [67]:
unroll_len = one_hot.size()[0] // seq_len -1

In [68]:
unroll_len

69

In [69]:
num_epochs

1000

In [72]:
for epoch in range(num_epochs):
    hidden, cell = rnn.init_hidden_cell()
    loss= 0
    
    for i in range(unroll_len):
        input_data = Variable(one_hot[i:i+seq_len].view(batch_size,seq_len,-1))
        label = Variable(one_hot[i+1:i+seq_len+1].view(batch_size,seq_len,-1))
        
        optimizer.zero_grad()
        
        output, hidden, cell = rnn(input_data, hidden, cell)
        loss += criterion(output.view(1,-1),label.view(1,-1))
        
    loss.backward()
    optimizer.step()
    
    if epoch%10 == 0:
        print(loss)
        

tensor(3.6163, grad_fn=<AddBackward0>)
tensor(1.7552, grad_fn=<AddBackward0>)
tensor(1.3937, grad_fn=<AddBackward0>)
tensor(1.1782, grad_fn=<AddBackward0>)
tensor(1.0341, grad_fn=<AddBackward0>)
tensor(0.9346, grad_fn=<AddBackward0>)
tensor(0.8486, grad_fn=<AddBackward0>)
tensor(0.7642, grad_fn=<AddBackward0>)
tensor(0.6746, grad_fn=<AddBackward0>)
tensor(0.6018, grad_fn=<AddBackward0>)
tensor(0.5405, grad_fn=<AddBackward0>)
tensor(0.5023, grad_fn=<AddBackward0>)
tensor(0.4521, grad_fn=<AddBackward0>)
tensor(0.4192, grad_fn=<AddBackward0>)
tensor(0.3906, grad_fn=<AddBackward0>)
tensor(0.3669, grad_fn=<AddBackward0>)
tensor(0.3455, grad_fn=<AddBackward0>)
tensor(0.3237, grad_fn=<AddBackward0>)
tensor(0.3046, grad_fn=<AddBackward0>)
tensor(0.2856, grad_fn=<AddBackward0>)
tensor(0.2707, grad_fn=<AddBackward0>)
tensor(0.2555, grad_fn=<AddBackward0>)
tensor(0.2434, grad_fn=<AddBackward0>)
tensor(0.2363, grad_fn=<AddBackward0>)
tensor(0.2252, grad_fn=<AddBackward0>)
tensor(0.2164, grad_fn=<A

## 6. Test

In [73]:
hidden, cell = rnn.init_hidden_cell()

for j in range(unroll_len-1):
    input_data = Variable(one_hot[j:j+seq_len].view(batch_size, seq_len,-1))
    label = Variable(one_hot[j+1:j+seq_len+1].view(batch_size,seq_len,-1))
    
    output, hidden, cell = rnn(input_data, hidden,cell)
    print(onehot_to_word(output.data), end = "")

hello pytorch. how long can a rnn cell remember? show me your limit!