이번 코드를 이용해서 우리가 얻어야 하는 것들의 목표

* RNN 의 구조에 대한 다시 한번의 이해
* input, output data 들의 shape 에 대한 이해
* RNN 안에 어떠한 parameter 들이 존재하는지에 대한 이해
* self-supervised learning 에 대한 intuition 에 대해서 이해



In [1]:
import torch
from torch import nn

import numpy as np

In [2]:
text = ['hey how are you','good i am fine','have a nice day', 'cailab prevails', ]

# dictionary 형성을 위해서 
chars = set(''.join(text))
int2char = dict(enumerate(chars))
char2int = { char: ind for ind,char in int2char.items()}

In [3]:
print(char2int)

{'i': 0, ' ': 1, 'g': 2, 'w': 3, 'l': 4, 'y': 5, 'u': 6, 'p': 7, 'c': 8, 'v': 9, 'b': 10, 's': 11, 'o': 12, 'h': 13, 'r': 14, 'f': 15, 'e': 16, 'm': 17, 'd': 18, 'a': 19, 'n': 20}


In [4]:
for i in text:
    print(len(i))

15
14
15
15


In [5]:
maxlen = len(max(text))

In [6]:
# padding 을 maxlen 으로 만들어 넣는다... padding 은 ' '

for i in range(len(text)):
    while len(text[i]) < maxlen:
        text[i] += ' '


In [7]:
text

['hey how are you', 'good i am fine ', 'have a nice day', 'cailab prevails']

In [8]:
# input / target 을 sequence 를 generate

input_seq = []
target_seq = []

for i in range(len(text)):
    input_seq.append(text[i][:-1])
    target_seq.append(text[i][1:])
    print("Input Sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))

    

Input Sequence: hey how are yo
Target Sequence: ey how are you
Input Sequence: good i am fine
Target Sequence: ood i am fine 
Input Sequence: have a nice da
Target Sequence: ave a nice day
Input Sequence: cailab prevail
Target Sequence: ailab prevails


In [9]:
for i in range(len(text)):
    input_seq[i] = [ char2int[c] for c in input_seq[i]]
    target_seq[i] = [ char2int[c] for c in target_seq[i]]

In [10]:
# data 들을 one-hot representation 으로 바꿈 

dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

def one_hot_encode( sequence, dict_size, seq_len, batch_size):
    features = np.zeros((batch_size, seq_len, dict_size), dtype= np.float32)
    for i in range(batch_size):
        for u in range(seq_len):
            features[i,u, sequence[i][u] ] = 1
    return features

In [11]:
input_seq = one_hot_encode( input_seq, dict_size, seq_len, batch_size)
print(input_seq.shape)


(4, 14, 21)


In [12]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [13]:
input_seq.shape

torch.Size([4, 14, 21])

In [14]:
target_seq.shape

torch.Size([4, 14])

In [15]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [16]:
device

'cuda'

In [17]:
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_size)

        
    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size)
        out, hidden = self.rnn(x, hidden)
        
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)

        return out, hidden
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        return hidden
        

In [18]:
model = Model( input_size=dict_size, output_size=dict_size, hidden_dim = 12, n_layers = 1).to(device)


In [19]:
n_epochs = 1000
lr = 0.01

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [20]:

# Training 

input_seq = input_seq.to(device)
for epoch in range(1, n_epochs+1):
    optimizer.zero_grad()
    output, hidden = model(input_seq)
    output = output.to(device)
    target_seq = target_seq.to(device)
    target_seq = target_seq.view(-1).long()
    loss = criterion(output, target_seq)
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(epoch,"/",n_epochs)
        print(loss.item())
    

10 / 1000
2.6153151988983154
20 / 1000
2.3262863159179688
30 / 1000
1.9209972620010376
40 / 1000
1.5043874979019165
50 / 1000
1.126924753189087
60 / 1000
0.8069230914115906
70 / 1000
0.5786339044570923
80 / 1000
0.4218820035457611
90 / 1000
0.31221243739128113
100 / 1000
0.23614656925201416
110 / 1000
0.18467701971530914
120 / 1000
0.14975956082344055
130 / 1000
0.12546086311340332
140 / 1000
0.10801015049219131
150 / 1000
0.0950629711151123
160 / 1000
0.08515185117721558
170 / 1000
0.07735756784677505
180 / 1000
0.0766797661781311
190 / 1000
0.07191787660121918
200 / 1000
0.06582465022802353
210 / 1000
0.06024587154388428
220 / 1000
0.05674416199326515
230 / 1000
0.053714819252491
240 / 1000
0.051319483667612076
250 / 1000
0.04923576861619949
260 / 1000
0.04744499921798706
270 / 1000
0.04586787149310112
280 / 1000
0.04446592181921005
290 / 1000
0.043212298303842545
300 / 1000
0.042084310203790665
310 / 1000
0.04106471315026283
320 / 1000
0.040139347314834595
330 / 1000
0.0392963998019

In [65]:
def predict(model, character):
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character = character.to(device)

    out, hidden = model(character)
    prob = nn.functional.softmax(out[-1], dim=0).data
    char_ind = torch.max(prob, dim=0)[1].item()
    return int2char[char_ind], hidden

In [68]:
def sample(model, out_len, start='hey'):
    model.eval()
    start = start.lower()
    chars = [ch for ch in start]
    size = out_len - len(chars)
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)
        
    return ''.join(chars)

In [69]:

sample(model, 15, 'hey')

'hey how are you'

In [70]:
print(model)

Model(
  (rnn): RNN(21, 12, batch_first=True)
  (fc): Linear(in_features=12, out_features=21, bias=True)
)


In [71]:

for i in model.parameters():
    print(i.shape)

torch.Size([12, 21])
torch.Size([12, 12])
torch.Size([12])
torch.Size([12])
torch.Size([21, 12])
torch.Size([21])
