In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

## RNN with a simple example

In [3]:
# 관련 hyper parameter를 정의 
# hidden state의 dimension, size -> HIDDEN_DIM
HIDDEN_DIM = 35
LEARNING_RATE = 0.01
EPOCHS = 100

In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
# input으로 활용할 string 
string = "hello pytorch and data analytics."

In [7]:
# input에 대해서 one-hot encoding을 진행하기 위해 관련된 alphabet, 공백, .을 정의할 필요가 있음
# char_list -> string에 필요한 character를 넣어주면 됨 
# 01-> character 나 input에 대해서 start / end 를 구분해주기 위해서 넣어줌  
chars = "abcdefghijklmnopqrstuvwxyz .01"
char_list = [i for i in chars]
n_letters = len(char_list)
n_letters

30

In [9]:
# string -> one-hot encodding 
def string_to_onehot(string):
    start = np.zeros(shape = n_letters, dtype = int)
    end = np.zeros(shape = n_letters, dtype = int)

    start[-2] = 1 # 0에 해당하는 위치에 1 -> 첫번째 vector는 start를 알려줌 
    end[-1] = 1   # 1에 해당하는 위치에 1 -> 마지막 vector는 end를 알려줌  

    # 각각의 character가 위치하는 부분에 0대신에 1을 채워주는 one-hot encodding  
    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape = n_letters, dtype = int)
        zero[idx] = 1
        start = np.vstack([start, zero]) # vstack -> 두 개의 배열을 위 아래로 합쳐줌  
    output = np.vstack([start, end])
    return output

In [10]:
string_to_onehot("data")

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 1]])

In [12]:
# onehot -> string 하는 deccoding 
def onehot_to_string(onehot):
    onehot_value = torch.Tensor.numpy(onehot)
    return char_list[onehot_value.argmax()]

In [15]:
# RNN class 정의 
# torch에 있는 neural network module을 상속받아서 사용 
class RNN(nn.Module):
    # 속성 정의, 생성자 
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.input2hidden = nn.Linear(input_size, hidden_size)   # input에 해당하는 것을 받아서 hidden을 output 
        self.hidden2hidden = nn.Linear(hidden_size, hidden_size) # 이전의 hidden을 받아서 현재 hidden을 output 
        self.hidden2output = nn.Linear(hidden_size, output_size) # 최종적으로 완성된 hideen을 받아서 output
        self.act_fn = nn.Tanh() # actication layer, hyperbolic tan

    def forward(self, input, hidden):
        hidden = self.act_fn(self.input2hidden(input) + self.hidden2hidden(hidden)) # 현재 input + 지금까지의 hidden = 현재 hidden
        output = self.hidden2output(hidden)
        return output, hidden

    # hidden vector에 대해서 초기화 function 
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

In [31]:
# input -> embedding 된 size 30, output 동일
rnn = RNN(n_letters, HIDDEN_DIM, n_letters)#.to(device)

In [32]:
# loss_fun, optimizer 정의 
loss_func = nn.MSELoss().to(device)
optimizer_rnn = torch.optim.Adam(rnn.parameters(), lr = LEARNING_RATE)

In [33]:
rnn.parameters

<bound method Module.parameters of RNN(
  (input2hidden): Linear(in_features=30, out_features=35, bias=True)
  (hidden2hidden): Linear(in_features=35, out_features=35, bias=True)
  (hidden2output): Linear(in_features=35, out_features=30, bias=True)
  (act_fn): Tanh()
)>

In [36]:
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())
# one-hot 형태의 input을 가지고 training 시작 

for i in range(EPOCHS):
    # optimizer, hidden 초기화 
    optimizer_rnn.zero_grad()
    hidden = rnn.init_hidden()
    total_loss = 0

    # input은 특정 character -> 그 다음 target character  
    # input, target에 해당하는 one-hot vector 모두 포함 
    # forward를 통해서 나오는 hidden과 output
    # forward를 통해 나온 output과 실제 값 target과의 차이를 통해 loss 계산 
    for j in range(one_hot.size()[0]-1):
        input_ = one_hot[j:j+1, :]#.to(device)
        target = one_hot[j+1]#.to(device)
        output, hidden = rnn.forward(input_, hidden)
        loss = loss_func(output.view(-1), target.view(-1))
        total_loss += loss

    total_loss.backward() 
    optimizer_rnn.step()

    if i % 50 == 0:
        print(total_loss)

tensor(0.0123, grad_fn=<AddBackward0>)
tensor(0.0035, grad_fn=<AddBackward0>)


In [38]:
# 학습된 모델이 text를 잘 generation 하는가? 
# 단순히 start token을 넣어주면 그 자리에 있는 있는 string이 나와야 함 
start_tkn = torch.zeros(1, n_letters)
start_tkn[:, -2] = 1

with torch.no_grad():
    hidden = rnn.init_hidden()
    input_ = start_tkn#.to(device)
    output_string = ""

    for i in range(len(string)):
        output, hidden = rnn.forward(input_, hidden)
        output_string += onehot_to_string(output.data)
        input_ = output

print(output_string)

hello pytorch and data an tn  n t


## RNN and LSTM

In [57]:

!wget https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt -P ./ --no-check-certificate

--2021-11-29 08:26:50--  https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
  Unable to locally verify the issuer's authority.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: './input.txt'

     0K .......... .......... .......... .......... ..........  4%  822K 1s
    50K .......... .......... .......... .......... ..........  9% 2.91M 1s
   100K .......... .......... .......... .......... .......... 13% 6.95M 1s
   150K .......... .......... .......... .......... .......... 18% 9.66M 0s
   200K .......... .......... .......... .......... .......... 22% 8.18M 0s
   250K .......... .......... .......... .......... .......... 27% 7.81M 0s
   300K .......... .......... .......... ......

In [42]:
!pip install unidecode



In [43]:
import re
import unidecode
import random
import string
import time, math

import torch
import torch.nn as nn
import torch.nn.functional as F

In [46]:
# hyper parameter 
EPOCHS = 1000
HIDDEN_DIM = 100
BATCH_SIZE = 1
CHUNK_LEN = 250 # 전체 data가 100만 character 정도여서 전체를 쓰지 않고 일부를 random으로 추출하여 학습하기 위함 
NUM_LAYERS = 1  # 강의자료에서는 1개의 layer만을 고려, 2개의 layer라면 rnn 위에 rnn 쌓을 수 있음 
EMBEDDING = 70  # word2vec 예제를 보면 one-hot encoding된 input vector를 4차원으로 바꿈, embedding 될 vector를 몇 차원으로 할 것인가 
LEARNING_RATE = 0.004

In [47]:
# character를 직접 정의하지 않고 string module의 printable을 활용하여 들어갈 수 있는 character를 미리 정의 
characters = string.printable
n_characters = len(characters)
characters

'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'

In [58]:
text_file = unidecode.unidecode(open('./input.txt').read())
len_text_file = len(text_file)
len_text_file

1115394

In [60]:
# 100만개의 data를 전부 사용할 수 없기 때문에 chunk를 활용한다 
def random_chunk():
    start_index = random.randint(0, len_text_file - CHUNK_LEN)
    end_index = start_index + CHUNK_LEN + 1
    return text_file[start_index : end_index]

print(random_chunk())

ever king so grieved for subjects' woe?
Much is your sorrow; mine ten times so much.

Son:
I'll bear thee hence, where I may weep my fill.

Father:
These arms of mine shall be thy winding-sheet;
My heart, sweet boy, shall be thy sepulchre,
For from my


In [61]:
# character를 tensor로 바꿔주는 def  
# 원래는 one-hot encodding 하였으나 
# tensor로 바꾼 후 embedding이 되는 형태로 진행 
def character_to_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for char in range(len(string)):
        tensor[char] = characters.index(string[char])
    return tensor

print(character_to_tensor('ABCde'))

tensor([36, 37, 38, 13, 14])


In [62]:
def random_training_set():
    chunk = random_chunk()
    input = character_to_tensor(chunk[:-1])
    target = character_to_tensor(chunk[1:])
    return input, target

In [63]:
# input - target이 이런 형태로 정의됨 
# input은 chuck에서 어떤 지점이 있을텐데 이 지점의 -1까지 해당하는 부분이 input이 되고 
# target은 1보다 큰 부분이 target이 된다
# 23에서 96까지 input, 23 다음인 16부터 96 다음인 42까지가 target 
random_training_set()

(tensor([23, 16, 94, 28, 21, 14, 14, 25, 73, 94, 29, 17, 10, 29, 94, 18, 28, 94,
         23, 24, 29, 94, 32, 17, 10, 29, 94, 18, 29, 94, 18, 28, 62, 96, 55, 17,
         18, 28, 94, 21, 24, 31, 14, 94, 15, 14, 14, 21, 94, 44, 73, 94, 29, 17,
         10, 29, 94, 15, 14, 14, 21, 94, 23, 24, 94, 21, 24, 31, 14, 94, 18, 23,
         94, 29, 17, 18, 28, 75, 96, 39, 24, 28, 29, 94, 29, 17, 24, 30, 94, 23,
         24, 29, 94, 21, 10, 30, 16, 17, 82, 96, 96, 37, 40, 49, 57, 50, 47, 44,
         50, 77, 96, 49, 24, 73, 94, 12, 24, 35, 73, 94, 44, 94, 27, 10, 29, 17,
         14, 27, 94, 32, 14, 14, 25, 75, 96, 96, 53, 50, 48, 40, 50, 77, 96, 42,
         24, 24, 13, 94, 17, 14, 10, 27, 29, 73, 94, 10, 29, 94, 32, 17, 10, 29,
         82, 96, 96, 37, 40, 49, 57, 50, 47, 44, 50, 77, 96, 36, 29, 94, 29, 17,
         34, 94, 16, 24, 24, 13, 94, 17, 14, 10, 27, 29, 68, 28, 94, 24, 25, 25,
         27, 14, 28, 28, 18, 24, 23, 75, 96, 96, 53, 50, 48, 40, 50, 77, 96, 58,
         17, 34, 73, 94, 28,

### Make RNN model

In [65]:
# encoding과 decoding이 포함된 RNN 모델 
class EN_RNN_DE(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers):
        super(EN_RNN_DE, self).__init__()

        # embedding size와 later 수 추가 
        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        # embedding 하는 부분 
        # encoder layer -> Embedding 함수를 그대로 활용 
        # RNN layer -> input, hidden, layer수를 parameter로 받음 
        # decoder layer -> linear로 묶어서 hidden을 넣고 마지막으로 output을 얻어내는 형태 
        self.encoder = nn.Embedding(self.input_size, self.embedding_size)
        self.rnn = nn.RNN(self.embedding_size, self.hidden_size, self.num_layers)
        self.decoder = nn.Linear(self.hidden_size, self.output_size)

        # encdoer layer에 먼저 input 넣어줌 
        # rnn layer을 통해서 hidden state, output 얻게 됨 
        # decoder lyaer에서 최종저긴 결과물을 얻게 됨, decoder가 된 output, 해당 layer에는 output vector가 들어감 
        # 최종적으로 decoding 된 output 과 hidden return 
    def forward(self, input, hidden):
        en_output = self.encoder(input.view(1, -1))
        output, hidden = self.rnn(en_output, hidden)
        de_output = self.decoder(output.view(1, -1))
        return de_output, hidden

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        return hidden

In [68]:
model = EN_RNN_DE(n_characters, EMBEDDING, HIDDEN_DIM, n_characters, NUM_LAYERS)#.to(device)

In [70]:
# A라는 character를 넣었을 때 어떻게 흘러가는가 
# input size 
# hidden size ~ 마지막은 output size 
inp = character_to_tensor("A")
print(inp.size())
hidden = model.init_hidden()
print(hidden.size())
out,hidden = model(inp,hidden)
print(hidden.size())
print(out.size())

torch.Size([1])
torch.Size([1, 1, 100])
torch.Size([1, 1, 100])
torch.Size([1, 100])


In [71]:
model.parameters

<bound method Module.parameters of EN_RNN_DE(
  (encoder): Embedding(100, 70)
  (rnn): RNN(70, 100)
  (decoder): Linear(in_features=100, out_features=100, bias=True)
)>

In [72]:
optimizer_model = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)
loss_func = nn.CrossEntropyLoss()

In [74]:
# training도 앞선 예제와 비슷하게 흘러감 
# chuck를 통해서 구성한 input과 target vector를 정의하고 
# hidden에 대한 것을 초기화, loss에 대해 정의 
for i in range(EPOCHS):
    input, target = random_training_set()
    input = input#.to(device)
    target = target#.to(device)
    hidden = model.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer_model.zero_grad()

    for j in range(CHUNK_LEN-1):
        x = input[j] # input에 해당하는 tensor(vector)를 가져오고 
        y_ = target[j].unsqueeze(0).type(torch.LongTensor) # target에 해당하는 tensor를 가져오고 
        y, hidden = model(x, hidden) # model을 돌려서 예측값을 가져온 후 
        loss += loss_func(y, y_)     # 실제갑소가 예측값 사이의 loss를 구한 후 

    loss.backward() # backward process를 진행 
    optimizer_model.step()

    if i % 100 == 0:
        print(loss/CHUNK_LEN)

tensor([4.6142], grad_fn=<DivBackward0>)
tensor([2.2460], grad_fn=<DivBackward0>)
tensor([2.1213], grad_fn=<DivBackward0>)
tensor([2.1741], grad_fn=<DivBackward0>)
tensor([2.1136], grad_fn=<DivBackward0>)
tensor([2.1410], grad_fn=<DivBackward0>)
tensor([1.9719], grad_fn=<DivBackward0>)
tensor([1.9750], grad_fn=<DivBackward0>)
tensor([2.0312], grad_fn=<DivBackward0>)
tensor([2.1560], grad_fn=<DivBackward0>)


In [75]:
# b라는 character를 넣었을 때 어떤 text가 generation되는가? 
start_string = "b"

input = character_to_tensor(start_string)
hidden = model.init_hidden()

print(start_string, end="")

for i in range(300):
    output, hidden = model(input, hidden)

    output_dist = output.data.view(-1).div(0.8).exp()
    top_i = torch.multinomial(output_dist, 1)[0]
    predicted_char = characters[top_i]

    print(predicted_char, end="")

    input = character_to_tensor(predicted_char)

but ther thour same a pure,--
How lame unfiar to nibrent kour dead jome of the liers:
Wild for frien the lie.

LAUTIV:
Hould of morit growle my sweety
Betsint:
Will--

AUdIERY:
As the vile he kive sir sto my his chandie, felver my brome me hond.

LADY URgh:
And gistant as it shich wlich sere we massu

### Make LSTM model

In [87]:
# 중간 layer에서 RNN을 쓸지 LSTM을 쓸지에 대한 차이만 존재함 
# forward def 에서 차이
class EN_LSTM_DE(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers):
        super(EN_LSTM_DE, self).__init__()

        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.encoder = nn.Embedding(self.input_size, self.embedding_size)
        self.lstm = nn.LSTM(self.embedding_size, self.hidden_size, self.num_layers) # lstm function 활용, 들어가는 변수는 차이 없음 
        self.decoder = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, cell):
        en_output = self.encoder(input.view(1, -1))                   # input이 encoder를 통과한 결과 
        output, (hidden, cell) = self.lstm(en_output, (hidden, cell)) # embedding이 된 input결과와 hidden과 cell이 동시에 () 들어감 
        de_output = self.decoder(output.view(1, -1))                  # decoder된 output 
        return de_output, hidden, cell                                # 최종적으로 decoder된 output과 hidden, cell을 return 

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        cell = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        return hidden, cell

In [88]:
model_LSTM = EN_LSTM_DE(n_characters, EMBEDDING, HIDDEN_DIM, n_characters, NUM_LAYERS)#.to(device)

In [89]:
model_LSTM.parameters

<bound method Module.parameters of EN_LSTM_DE(
  (encoder): Embedding(100, 70)
  (lstm): LSTM(70, 100)
  (decoder): Linear(in_features=100, out_features=100, bias=True)
)>

In [None]:
input = character_to_tensor("A")
print(input)

hidden, cell = model_LSTM.init_hidden()
print(hidden.size())

output, hidden, cell = model_LSTM(input, hidden, cell)
print(output.size())

In [90]:
optimizer_lstm = torch.optim.Adam(model_LSTM.parameters(), lr = LEARNING_RATE)
loss_func = nn.CrossEntropyLoss()

In [92]:
for i in range(EPOCHS):
    input, target = random_training_set()
    input = input#.to(device)
    target = target#.to(device)
    hidden, cell = model_LSTM.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer_lstm.zero_grad()

    for j in range(CHUNK_LEN-1):
        x = input[j]
        y_ = target[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden, cell = model_LSTM(x, hidden, cell) # lstm이기 때문에 hidden과 cell이 들어감 
        loss += loss_func(y, y_)

    loss.backward()
    optimizer_lstm.step()

    if i % 100 == 0:
        print(loss/CHUNK_LEN)

tensor([4.5891], grad_fn=<DivBackward0>)
tensor([2.3515], grad_fn=<DivBackward0>)
tensor([2.1397], grad_fn=<DivBackward0>)
tensor([2.1236], grad_fn=<DivBackward0>)
tensor([2.0520], grad_fn=<DivBackward0>)
tensor([2.0361], grad_fn=<DivBackward0>)
tensor([1.8929], grad_fn=<DivBackward0>)
tensor([2.0001], grad_fn=<DivBackward0>)
tensor([1.9445], grad_fn=<DivBackward0>)
tensor([2.0089], grad_fn=<DivBackward0>)


In [93]:
# learning rate, epoch을 조절하고 중간 중간 drop out을 함으로써 좀 더 좋은 결과를 얻을 수 있음 
start_string = "b"

input = character_to_tensor(start_string)
hidden, cell = model_LSTM.init_hidden()

print(start_string, end="")

for i in range(300):
    output, hidden, cell = model_LSTM(input, hidden, cell)

    output_dist = output.data.view(-1).div(0.8).exp()
    top_i = torch.multinomial(output_dist, 1)[0]
    predicted_char = characters[top_i]

    print(predicted_char, end="")

    input = character_to_tensor(predicted_char)

bit entor for ladies.

KING RDWIUS:
I seast word and grothers;
What shall mosst to; rath to be eart of come, my foursies, Conould are learn to ble wome sirsfed feer of Pepon
And for se, a been at learing to his rone the praist to to levaied land fors too to to souston tous of as thou seak have to un 