## Reference
[최근호님 pytorch-Fastcampus](https://github.com/GunhoChoi/PyTorch-FastCampus/blob/9028266c7297a13365fac2b7f117397e292b2f5c/05_RNN/0_Basic/Simple_Char_RNNcell.ipynb)

## 1. Settings
### 1) Import Required Libraries

In [5]:
import torch 
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np

### 2) Hyperparameters & Data

In [3]:
# preprocessing string data
# alphabet(0-25), others(26~32), start(33), end(34) - > 35 characters

n_hidden = 50
lr = 0.01
epochs = 2000

string = "hello pytorch, how long can a rnn cell remember?" # show us your limit!
chars = "abcdefghijklmnopqrstuvwxyz ?!.,:;01"
char_list = [i for i in chars]
char_len = len(char_list)  # 35
n_letters = len(char_list) # 35

In [4]:
char_list

['a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 ' ',
 '?',
 '!',
 '.',
 ',',
 ':',
 ';',
 '0',
 '1']

In [6]:
char_list.index('b')

1

In [7]:
n_letters

35

### 3) String to One-hot

In [8]:
# string to onehot vector
# a -> [1,0,0,0 ... 0 0]

def string_to_onehot(string):
    start= np.zeros(shape=len(char_list), dtype = int)
    end = np.zeros(shape=len(char_list), dtype = int)
    start[-2] = 1
    end[-1] = 1
    for i in string:
        idx = char_list.index(i)                     # char_list.index(a)이면 a의 위치를 뽑아냄. 즉 0이 나옴
        zero = np.zeros(shape=char_len, dtype = int)
        zero[idx]=1
        start = np.vstack([start,zero])
    output = np.vstack([start,end])
    return output

In [9]:
string_to_onehot('a')

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

### 4) One-hot to Character

In [46]:
# Onehot vector to word
# [1 0 0 0 0 ... 0] -> a

def onehot_to_word(onehot_1):
    onehot = torch.Tensor.numpy(onehot_1)
    return char_list[onehot.argmax()]

## 2. RNN class

In [10]:
# RNN woth 1 hidden layer
class RNN(nn.Module):
    def __init__(self,  input_size, hidden_size,output_size):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        self.i2o = nn.Linear(hidden_size, output_size)
        self.act_fn = nn.Tanh()
        
    def forward(self, input, hidden):
        hidden = self.act_fn(self.i2h(input) + self.h2h(hidden))
        output = self.i2o(hidden)
        return output, hidden
    
    def init_hidden(self):
        return Variable(torch.zeros(1, self.hidden_size))
    
rnn = RNN(n_letters, n_hidden, n_letters)
    

## 3. Loss function  & Optimizer

In [11]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr = lr)

## 4. Train

In [12]:
string

'hello pytorch, how long can a rnn cell remember?'

In [13]:
string_to_onehot(string)

array([[0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1]])

In [14]:
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

In [15]:
one_hot

tensor([[0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]])

In [20]:
len(string) +2

50

In [17]:
one_hot.size() ## string길이에다가 start 랑 end 넣어줘서 sentence_length 50

torch.Size([50, 35])

In [32]:
one_hot[0]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.])

In [36]:
hidden = rnn.init_hidden()
for j in range(1):
    input = Variable(one_hot[j:j+1,:])
    output, hidden = rnn.forward(input, hidden)
    labels = Variable(one_hot[j+1])
    print(labels.view(-1))
    

tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


In [38]:
hidden = rnn.init_hidden()
for j in range(1):
    input = Variable(one_hot[j:j+1,:])
    output, hidden = rnn.forward(input, hidden)
    labels = Variable(one_hot[j+1])
    print(output.view(-1))

tensor([ 0.0328, -0.2014,  0.0032, -0.0185,  0.0866,  0.0301, -0.0227, -0.0729,
        -0.1680, -0.1159,  0.0769,  0.1172,  0.1044,  0.1052,  0.0147,  0.0057,
        -0.0349, -0.1002,  0.0653,  0.0959, -0.0064, -0.1935, -0.0439,  0.0275,
         0.1473, -0.0518, -0.0269,  0.1545,  0.0190, -0.0853, -0.0068,  0.1318,
         0.0402,  0.1295,  0.0308], grad_fn=<ViewBackward>)


In [42]:
for epoch in range(epochs):
    rnn.zero_grad() ## gradient 초기화
    total_loss = 0 
    hidden = rnn.init_hidden() ## hidden 레이어 초기화
    ## rnn은 전체 구간을 한번 돌면서 LOSS의 총합을 한꺼번에 UPDATE함 그래서 시간이 훨씬 오래걸림
    
    for j in range(one_hot.size()[0]-1):
        input = Variable(one_hot[j:j+1,:])
        output, hidden = rnn.forward(input, hidden)
        target = Variable(one_hot[j+1])
        loss = criterion(output.view(-1), target.view(-1))
        total_loss += loss
#        input = output 이거 test단에만 필요한거 아닌가?
    
    total_loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(total_loss)

tensor(9.5325e-05, grad_fn=<AddBackward0>)
tensor(0.0006, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(9.6161e-05, grad_fn=<AddBackward0>)
tensor(5.7402e-05, grad_fn=<AddBackward0>)
tensor(1.1110e-05, grad_fn=<AddBackward0>)
tensor(9.4862e-06, grad_fn=<AddBackward0>)
tensor(2.1074e-05, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(7.6741e-05, grad_fn=<AddBackward0>)
tensor(2.0166e-05, grad_fn=<AddBackward0>)
tensor(6.8083e-06, grad_fn=<AddBackward0>)
tensor(4.5172e-06, grad_fn=<

## Test

In [44]:
hidden = rnn.init_hidden()
input = Variable(one_hot[0:1,:]) ## start token주면 어디까지 잘 생성이 될까

In [47]:
for i in range(len(string)):
    output, hidden = rnn.forward(input, hidden)
    print(onehot_to_word(output.data), end="")
    input = output

hllo pytorch,,hhw long  an aarrn ccell emmember?