## LSTM과 GRU로 셰익스피어의 글을 학습시켜보자
- Batch_size를 넣어줌

## 1. Settings

### 1) Import required libraries

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:
import unidecode
import string
import random
import re
import time
import math

### 2) Hyperparameter

In [3]:
num_epochs = 10000
point_every = 100
plot_every = 10
chunk_len = 200
embedding_size = 150
hidden_size = 100
batch_size = 64
num_layers = 1
lr = 0.002

## 2. Data

### 1) Prepare characters

In [4]:
all_characters = string.printable
n_characters = len(all_characters)
print(all_characters)
print('num_chars = ', n_characters)

0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

num_chars =  100


### 2) Get text data

In [5]:
file = unidecode.unidecode(open('../data/shakespeare.txt').read())
file_len = len(file)
print('file_len =', file_len)

file_len = 1115394


## 3. Functions for text preprocessing

### 1) Random Chunk

In [6]:
def random_chunk():
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

print(random_chunk())

hand
Did will what he did spend and spent not that
Which his triumphant father's hand had won;
His hands were guilty of no kindred blood,
But bloody with the enemies of his kin.
O Richard! York is too 


### 2) Chararcter to tensor

In [7]:
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return Variable(tensor)#.cuda()

print(char_tensor('ABCdef')) ## string을 index로 바꿔준다. 

tensor([36, 37, 38, 13, 14, 15])


### 3) Chunk into input & label


In [8]:
def random_training_set(chunk_len,batch_size):
    inp = torch.LongTensor(batch_size, chunk_len)
    target = torch.LongTensor(batch_size, chunk_len)
    for bi in range(batch_size):
        chunk = random_chunk()
        inp[bi] = char_tensor(chunk[:-1])
        target[bi] = char_tensor(chunk[1:])
        
    inp = Variable(inp)
    target = Variable(target)
    return inp, target ## input과 target random으로 생성(batch_size = 64, ino)

In [9]:
inp,target = random_training_set(200,64)

In [10]:
inp.type()

'torch.LongTensor'

In [11]:
inp.size()

torch.Size([64, 200])

In [12]:
target.size()

torch.Size([64, 200])

In [13]:
inp[:,0].size()[0]

64

## 3. Model & Optimizer

### 1) Model

In [14]:
class RNN(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.embedding_size = embedding_size
        
        self.encoder = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size,hidden_size,num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
    
    def forward(self, input, hidden,cell):
        batch_size = input.size()[0]
        out = self.encoder(input.view(batch_size,-1))   # batch적용, input : [batch_size(=64)] -> [batch_size(=64),1]
        out = out.transpose(1,0)                        # out : [batch_size(=64), seq_len(=1), embedding_size(=150)] -> [seq_len(=1),batch_size(=64), embedding_size(=150)]
        out,(hidden,cell) = self.rnn(out,(hidden,cell)) # out : [seq_len(=1), batch_size(=64), hidden_size(=100)]
        out = self.decoder(out.view(batch_size,-1))     # out : [batch_size(=64), hidden_size(=100)]
        
        return out,hidden,cell                          # hidden,cell : [seq_len(=1), batch_size(=64), hidden_size(=100)]

    def init_hidden(self,batch_size):
          
        hidden = Variable(torch.zeros(num_layers,batch_size,hidden_size))
        cell = Variable(torch.zeros(num_layers,batch_size,hidden_size))
        
        return hidden,cell
    
model = RNN(n_characters, embedding_size, hidden_size, n_characters, num_layers)

### 2) Loss & Optimizer

In [15]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

### 3) Test Function

In [16]:
def generate(model, prime_str='A', predict_len=200, temperature=0.8, cuda=False):
    hidden,cell = model.init_hidden(1)
    prime_input = Variable(char_tensor(prime_str).unsqueeze(0))
    predicted = prime_str
    inp = prime_input[:,-1]
    
    for p in range(predict_len):
        output, hidden,cell = model(inp, hidden,cell)
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]

        # Add predicted character to string and use as next input
        predicted_char = all_characters[top_i]
        predicted += predicted_char
        inp = Variable(char_tensor(predicted_char).unsqueeze(0))

    return predicted

In [17]:
for i in range(1000):
    hidden,cell = model.init_hidden(batch_size)
    loss = 0
    optimizer.zero_grad()
    inp,target = random_training_set(chunk_len,batch_size)
    for j in range(chunk_len-1):
        x = inp[:,j]
        y_ = target[:,j]
        y,hidden,cell = model(x,hidden,cell)
        loss += criterion(y,y_.view(-1))
        
    loss.backward()
    optimizer.step()
    
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        text = generate(model,prime_str='b')
        print(text)
        print("\n\n")

    


 tensor(4.5642, grad_fn=<DivBackward0>) 

bCPQtMJN9P)3uPq!czLFn*l>hqr{mG)i+'XSwy
_I{-$c
h	CNDAoA^#J$W *;ZuCZQ`20~?|+Bn$.B#:{>%Z_;$phkQ\iUYFUye1RA6dgMtL*. Cmwkh_y,~4$Jv?5WhY|3xw%n0#)0prgM|*<a*hRC6;ckzr;{(F<oA#nr0qo_#	>63`6sut.@(jV~2l 





KeyboardInterrupt: 

In [37]:
for i in range(1):
    hidden,cell = model.init_hidden(batch_size)
    loss = 0
    optimizer.zero_grad()
    inp,target = random_training_set(chunk_len,batch_size)
    for j in range(1):
        x = inp[:,j]
        y_ = target[:,j]
        y,hidden,cell = model(x,hidden,cell)
        loss += criterion(y,y_.view(-1)) # y : [batch, n_class] y_: [batch]
        
    loss.backward()
    optimizer.step()
    
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        text = generate(model,prime_str='b')
        print(text)
        print("\n\n")

    


 tensor(0.0225, grad_fn=<DivBackward0>) 

bo9,!E[@FZMK;
{T.SGt#%993f!	m&$e]E<xd>g1K>~21q//vNN_5$GV;Tz {a=8,3@PA4c	5(L-W6y#,0Ms
=tms4j@+D4 a9G(:t'Tx:/`Y)i0C6aKw;mD;a"$FVZ^w#B`%0/tokQFtS;%AhTmIx$4$QVIjo/0N$y*z
>3SS{ zh=\qA+31YJ0+4I	xcU?8=





In [51]:
y.size()

torch.Size([64, 100])

In [42]:
for j in range(1):
    print(j)

0


In [43]:
loss

tensor(4.5086, grad_fn=<AddBackward0>)

In [44]:
y_.size()

torch.Size([64])

In [49]:
inp[:,198]

tensor([94, 24, 14, 48, 28, 24, 28, 10, 21, 94, 10, 74, 29, 73, 94, 94, 94, 27,
        10, 14, 21, 21, 36, 44, 94, 18, 28, 49, 10, 21, 73, 28, 10, 15, 28, 25,
        23, 28, 24, 34, 82, 55, 29, 51, 50, 73, 31, 13, 17, 13, 24, 50, 32, 24,
        43, 30, 96, 41, 21, 21, 48, 23, 94, 23])

In [47]:
x

tensor([94, 39, 14, 29, 94, 22, 18, 24, 27, 53, 14, 10, 36, 24, 18, 34, 13, 13,
        77, 94, 17, 30, 23, 21, 23, 16, 21, 20, 44, 16, 10, 50, 94, 29, 94, 94,
        18, 27, 29, 94, 94, 32, 25, 29, 32, 94, 27, 94, 25, 21, 27, 58, 96, 21,
        96, 25, 94, 77, 30, 17, 73, 94, 21, 55])