<a href="https://colab.research.google.com/github/chaiminwoo0223/Deep-Learning/blob/main/13%20-%20LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
string = "hello pytorch. how long can a rnn cell remember? show me your limit!"
chars = "abcdefghijklmnopqrstuvwxyz ?!.,:;01"
char_list = [i for i in chars]
char_len = len(char_list)

In [3]:
print(char_list)
print(char_len)

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ', '?', '!', '.', ',', ':', ';', '0', '1']
35


In [4]:
# string "abc"
# start = [0 0 0 ... 1 0]
# a =     [1 0 0 ... 0 0]
# b =     [0 1 0 ... 0 0]
# c =     [0 0 1 ... 0 0]
# end =   [0 0 0 ... 0 1]

In [5]:
def string_to_onehot(string):
    start = np.zeros(shape=char_len, dtype=int)
    end = np.zeros(shape=char_len, dtype=int)
    start[-2] = 1
    end[-1] = 1
    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape=char_len, dtype=int)
        zero[idx] = 1
        start = np.vstack([start,zero])
    output = np.vstack([start,end])
    return output

def onehot_to_word(onehot_1):
    onehot = torch.Tensor.numpy(onehot_1)
    return char_list[onehot.argmax()]

In [6]:
# 문자열을 단어 하나씩 잘라서 사용(batch_size = 1)
batch_size = 1
seq_len = 1
num_layers = 3
input_size = char_len
hidden_size = 35
lr = 0.01
num_epochs = 1000

one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())
print(one_hot.size())

torch.Size([70, 35])


In [7]:
# RNN
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers) # LSTM(Long Short-Term Memory)

    def forward(self, input_, hidden, cell):
        output, (hidden, cell) = self.lstm(input_, (hidden, cell))
        return output, hidden, cell
    
    def init_hidden_cell(self):
        hidden = torch.zeros(num_layers, batch_size, hidden_size)
        cell = torch.zeros(num_layers, batch_size, hidden_size)
        return hidden, cell

rnn = RNN(input_size, hidden_size, num_layers)

In [8]:
# 손실함수와 최적화
loss_func = nn.MSELoss()
optimizer = optim.Adam(rnn.parameters(), lr=lr)

In [9]:
print(one_hot.size())
print(one_hot[0].size())
print(one_hot[0:1].size())

torch.Size([70, 35])
torch.Size([35])
torch.Size([1, 35])


In [10]:
j = 0
input_data = one_hot[j:j+seq_len].view(seq_len, batch_size, input_size)
print(input_data.size())
hidden, cell = rnn.init_hidden_cell()
print(hidden.size(), cell.size())
output, hidden, cell = rnn(input_data, hidden, cell)
print(output.size(), hidden.size(), cell.size())

torch.Size([1, 1, 35])
torch.Size([3, 1, 35]) torch.Size([3, 1, 35])
torch.Size([1, 1, 35]) torch.Size([3, 1, 35]) torch.Size([3, 1, 35])


In [11]:
# Train
unroll_len = one_hot.size()[0]//seq_len - 1

for i in range(num_epochs):
    hidden, cell = rnn.init_hidden_cell()
    loss = 0
    for j in range(unroll_len):
        input_data = one_hot[j:j+seq_len].view(seq_len, batch_size, input_size)
        label = one_hot[j+1:j+seq_len+1].view(seq_len, batch_size, input_size)
        optimizer.zero_grad()
        output, hidden, cell = rnn(input_data, hidden, cell)
        loss += loss_func(output.view(1,-1), label.view(1,-1))
    loss.backward()
    optimizer.step()

    if i%10 == 0:
        print(loss)

tensor(2.5905, grad_fn=<AddBackward0>)
tensor(1.8229, grad_fn=<AddBackward0>)
tensor(1.7662, grad_fn=<AddBackward0>)
tensor(1.6201, grad_fn=<AddBackward0>)
tensor(1.4666, grad_fn=<AddBackward0>)
tensor(1.1939, grad_fn=<AddBackward0>)
tensor(0.7996, grad_fn=<AddBackward0>)
tensor(0.5321, grad_fn=<AddBackward0>)
tensor(0.3031, grad_fn=<AddBackward0>)
tensor(0.1972, grad_fn=<AddBackward0>)
tensor(0.1460, grad_fn=<AddBackward0>)
tensor(0.1219, grad_fn=<AddBackward0>)
tensor(0.1076, grad_fn=<AddBackward0>)
tensor(0.1008, grad_fn=<AddBackward0>)
tensor(0.0908, grad_fn=<AddBackward0>)
tensor(0.0820, grad_fn=<AddBackward0>)
tensor(0.0772, grad_fn=<AddBackward0>)
tensor(0.0741, grad_fn=<AddBackward0>)
tensor(0.0717, grad_fn=<AddBackward0>)
tensor(0.0697, grad_fn=<AddBackward0>)
tensor(0.0680, grad_fn=<AddBackward0>)
tensor(0.0665, grad_fn=<AddBackward0>)
tensor(0.0655, grad_fn=<AddBackward0>)
tensor(0.0648, grad_fn=<AddBackward0>)
tensor(0.0641, grad_fn=<AddBackward0>)
tensor(0.0634, grad_fn=<A

In [12]:
# Test
hidden, cell = rnn.init_hidden_cell()

for j in range(unroll_len-1):
    input_data = one_hot[j:j+1].view(1, batch_size, hidden_size)
    output, hidden, cell = rnn(input_data, hidden, cell)
    print(onehot_to_word(output.data), end="")

hello pytorch. how gong can a rnn cell remember? show me your iimit!