## 1. Import Pakages

In [None]:
import torch
import torch.nn as nn
import torch.utils.data as data
import unidecode
import string
import random
import re
import time, math

## 2. 하이퍼파라미터 세팅

In [None]:
num_step = 10000
print_every = 100
plot_every = 10
chunk_len = 200
embedding_size = 150
hidden_size = 100
batch_size =1
num_layers = 1
lr = 0.002

## 3. 데이터 전처리

In [None]:
all_characters = string.printable
n_characters = len(all_characters)
print(all_characters)
print('num_chars = ', n_characters)

In [None]:
file = unidecode.unidecode(open('../data/shakespeare.txt').read())
file_len = len(file)

print('file_len =', file_len)

In [None]:
def random_chunk():
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

print(random_chunk())
print('')
print(len(random_chunk()))

In [None]:
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return tensor

print(char_tensor('hello'))
print(char_tensor(random_chunk()))

## 4. 데이터 로더

In [None]:
def random_training_set():    
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

inp, target = random_training_set()
print(inp)
print(target)

## 5. 네트워크 설계

In [None]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers=1):
        super(SimpleRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.embedding_size = embedding_size
        
        self.encoder = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.RNN(embedding_size,hidden_size, num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
    
    def forward(self, input, hidden):
        out = self.encoder(input.view(1,-1))
        out, hidden = self.rnn(out, hidden)
        out = self.decoder(out.view(batch_size,-1))
        
        return out, hidden

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, batch_size, hidden_size)
        return hidden
    

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = SimpleRNN(n_characters, embedding_size, hidden_size, n_characters, num_layers=2).to(device)

In [None]:
print('RNN 데이터 feeding 예시')
inp = char_tensor("A")
print(inp)
hidden = model.init_hidden()
print(hidden.size())

out, hidden = model(inp, hidden)
print(out)

## 6. loss function, optimizer 정의

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

## 7. test 함수 정의

In [None]:
def test():
    start_str = "b"
    inp = char_tensor(start_str)
    hidden = model.init_hidden()
    x = inp

    print(start_str, end="")
    for i in range(200):
        output, hidden = model(x, hidden)
        output_dist = output.data.view(-1).div(0.8).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        predicted_char = all_characters[top_i]

        print(predicted_char, end="")

        x = char_tensor(predicted_char)

## 8. Training

In [None]:
for i in range(num_step):
    total = char_tensor(random_chunk())
    inp = total[:-1]
    label = total[1:]
    hidden = model.init_hidden()
    loss = 0
    optimizer.zero_grad()
    for j in range(chunk_len-1):
        x  = inp[j]
        y_ = label[j]
        y_ = torch.LongTensor([y_.item()]).to(device)
        y, hidden = model(x,hidden)
        loss += criterion(y, y_)

    loss.backward()
    optimizer.step()
    
    if i % 100 == 0:
        print("loss:", loss.item()/chunk_len, "\n")
        test()
        print("\n")

## 9. LSTM

In [None]:
class SimpleLSTM(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers=1):
        super(SimpleLSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.embedding_size = embedding_size
        
        self.encoder = nn.Embedding(input_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size, hidden_size, num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
    
    def forward(self, input, hidden, cell):
        out = self.encoder(input.view(batch_size, -1))
        out,(hidden,cell) = self.lstm(out,(hidden, cell))
        out = self.decoder(out.view(batch_size, -1))
        
        return out, hidden, cell

    def init_hidden(self):
          
        hidden = torch.zeros(num_layers, batch_size, hidden_size)
        cell = torch.zeros(num_layers, batch_size, hidden_size)
        
        return hidden, cell
    
model = SimpleLSTM(n_characters, embedding_size, hidden_size, n_characters, num_layers)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
def test():
    start_str = "b"
    inp = char_tensor(start_str)
    hidden,cell = model.init_hidden()
    x = inp

    print(start_str,end="")
    for i in range(200):
        output,hidden,cell = model(x,hidden,cell)

        output_dist = output.data.view(-1).div(0.8).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        predicted_char = all_characters[top_i]

        print(predicted_char,end="")

        x = char_tensor(predicted_char)

In [None]:
for i in range(num_step):
    total = char_tensor(random_chunk())
    inp = total[:-1]
    label = total[1:]
    hidden, cell = model.init_hidden()
    loss = 0
    optimizer.zero_grad()
    for j in range(chunk_len-1):
        x  = inp[j]
        y_ = label[j]
        y_ = torch.LongTensor([y_.item()]).to(device)
        y, hidden, cell = model(x, hidden, cell)
        loss += criterion(y, y_)

    loss.backward()
    optimizer.step()
    
    if i % 100 == 0:
        print("loss:", loss.item()/chunk_len, "\n")
        test()
        print("\n")