이번 코드를 이용해서 우리가 얻어야 하는 것들의 목표

* RNN 의 구조에 대한 다시 한번의 이해
* input, output data 들의 shape 에 대한 이해
* RNN 안에 어떠한 parameter 들이 존재하는지에 대한 이해
* self-supervised learning 에 대한 intuition 에 대해서 이해


In [None]:
import torch
from torch import nn

import numpy as np

In [None]:
text = ['hey how are you','good i am fine','have a nice day', 'cailab prevails', ]

# dictionary 형성을 위해서 
chars = set(''.join(text))
int2char = dict(enumerate(chars))
char2int = { char: ind for ind,char in int2char.items()}

In [3]:
print(char2int)

{'u': 0, 'b': 1, 'y': 2, 'm': 3, ' ': 4, 'l': 5, 'd': 6, 'o': 7, 'g': 8, 'n': 9, 'i': 10, 'h': 11, 'p': 12, 'r': 13, 'e': 14, 'v': 15, 's': 16, 'a': 17, 'f': 18, 'c': 19, 'w': 20}


In [4]:
for i in text:
    print(len(i))

15
14
15
15


In [5]:
maxlen = len(max(text))

In [6]:
# padding 을 maxlen 으로 만들어 넣는다... padding 은 ' '

for i in range(len(text)):
    while len(text[i]) < maxlen:
        text[i] += ' '


In [7]:
text

['hey how are you', 'good i am fine ', 'have a nice day', 'cailab prevails']

In [8]:
# input / target 을 sequence 를 generate

input_seq = []
target_seq = []

for i in range(len(text)):
    input_seq.append(text[i][:-1])
    target_seq.append(text[i][1:])
    print("Input Sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))

    

Input Sequence: hey how are yo
Target Sequence: ey how are you
Input Sequence: good i am fine
Target Sequence: ood i am fine 
Input Sequence: have a nice da
Target Sequence: ave a nice day
Input Sequence: cailab prevail
Target Sequence: ailab prevails


In [9]:
for i in range(len(text)):
    input_seq[i] = [ char2int[c] for c in input_seq[i]]
    target_seq[i] = [ char2int[c] for c in target_seq[i]]

In [10]:
# data 들을 one-hot representation 으로 바꿈 

dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

def one_hot_encode( sequence, dict_size, seq_len, batch_size):
    features = np.zeros((batch_size, seq_len, dict_size), dtype= np.float32)
    for i in range(batch_size):
        for u in range(seq_len):
            features[i,u, sequence[i][u] ] = 1
    return features

In [11]:
input_seq = one_hot_encode( input_seq, dict_size, seq_len, batch_size)
print(input_seq.shape)


(4, 14, 21)


In [12]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [13]:
input_seq.shape

torch.Size([4, 14, 21])

In [14]:
target_seq.shape

torch.Size([4, 14])

In [15]:
device = "cuda" if torch.cuda.is_available() else "cpu"

  return torch._C._cuda_getDeviceCount() > 0


In [16]:
device

'cpu'

In [17]:
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_size)

        
    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size)
        out, hidden = self.rnn(x, hidden)
        print("inputshape", x.shape)
        print("outshape",out.shape)
        print("hiddenshape",hidden.shape)
        
        out = out.contiguous().view(-1, self.hidden_dim)
        print("outshape2",out.shape)
        out = self.fc(out)
        print("OUTSHAPE",out.shape)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        return hidden
        

In [18]:
model = Model( input_size=dict_size, output_size=dict_size, hidden_dim = 12, n_layers = 1).to(device)


In [19]:
n_epochs = 100
lr = 0.01

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [24]:

# Training 

input_seq = input_seq.to(device)
for epoch in range(1, n_epochs+1):
    optimizer.zero_grad()
    output, hidden = model(input_seq)
    output = output.to(device)
    target_seq = target_seq.to(device)
    target_seq = target_seq.view(-1).long()
    print("training:  ",output.shape, target_seq.shape)
    loss = criterion(output, target_seq)
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(epoch,"/",n_epochs)
        print(loss.item())
    

inputshape torch.Size([4, 14, 21])
outshape torch.Size([4, 14, 12])
hiddenshape torch.Size([1, 4, 12])
outshape2 torch.Size([56, 12])
OUTSHAPE torch.Size([56, 21])
training:   torch.Size([56, 21]) torch.Size([56])
inputshape torch.Size([4, 14, 21])
outshape torch.Size([4, 14, 12])
hiddenshape torch.Size([1, 4, 12])
outshape2 torch.Size([56, 12])
OUTSHAPE torch.Size([56, 21])
training:   torch.Size([56, 21]) torch.Size([56])
inputshape torch.Size([4, 14, 21])
outshape torch.Size([4, 14, 12])
hiddenshape torch.Size([1, 4, 12])
outshape2 torch.Size([56, 12])
OUTSHAPE torch.Size([56, 21])
training:   torch.Size([56, 21]) torch.Size([56])
inputshape torch.Size([4, 14, 21])
outshape torch.Size([4, 14, 12])
hiddenshape torch.Size([1, 4, 12])
outshape2 torch.Size([56, 12])
OUTSHAPE torch.Size([56, 21])
training:   torch.Size([56, 21]) torch.Size([56])
inputshape torch.Size([4, 14, 21])
outshape torch.Size([4, 14, 12])
hiddenshape torch.Size([1, 4, 12])
outshape2 torch.Size([56, 12])
OUTSHAPE t

outshape2 torch.Size([56, 12])
OUTSHAPE torch.Size([56, 21])
training:   torch.Size([56, 21]) torch.Size([56])
inputshape torch.Size([4, 14, 21])
outshape torch.Size([4, 14, 12])
hiddenshape torch.Size([1, 4, 12])
outshape2 torch.Size([56, 12])
OUTSHAPE torch.Size([56, 21])
training:   torch.Size([56, 21]) torch.Size([56])
inputshape torch.Size([4, 14, 21])
outshape torch.Size([4, 14, 12])
hiddenshape torch.Size([1, 4, 12])
outshape2 torch.Size([56, 12])
OUTSHAPE torch.Size([56, 21])
training:   torch.Size([56, 21]) torch.Size([56])
inputshape torch.Size([4, 14, 21])
outshape torch.Size([4, 14, 12])
hiddenshape torch.Size([1, 4, 12])
outshape2 torch.Size([56, 12])
OUTSHAPE torch.Size([56, 21])
training:   torch.Size([56, 21]) torch.Size([56])
inputshape torch.Size([4, 14, 21])
outshape torch.Size([4, 14, 12])
hiddenshape torch.Size([1, 4, 12])
outshape2 torch.Size([56, 12])
OUTSHAPE torch.Size([56, 21])
training:   torch.Size([56, 21]) torch.Size([56])
inputshape torch.Size([4, 14, 21]

In [21]:
def predict(model, character):
    character = np.array([ [char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character = character.to(device)
    
    out, hidden = model(character)
    prob = nn.functional.softmax(out[-1], dim=0).data
    char_ind = torch.max(prob, dim=0)[1].item()
    return int2char[char_ind], hidden

In [22]:
def sample(model, out_len, start='hey'):
    model.eval()
    start = start.lower()
    chars = [ch for ch in start]
    size = out_len - len(chars)
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)
        
    return ''.join(chars)

In [23]:

sample(model, 15, 'hey')

inputshape torch.Size([1, 3, 21])
outshape torch.Size([1, 3, 12])
hiddenshape torch.Size([1, 1, 12])
outshape2 torch.Size([3, 12])
OUTSHAPE torch.Size([3, 21])
inputshape torch.Size([1, 4, 21])
outshape torch.Size([1, 4, 12])
hiddenshape torch.Size([1, 1, 12])
outshape2 torch.Size([4, 12])
OUTSHAPE torch.Size([4, 21])
inputshape torch.Size([1, 5, 21])
outshape torch.Size([1, 5, 12])
hiddenshape torch.Size([1, 1, 12])
outshape2 torch.Size([5, 12])
OUTSHAPE torch.Size([5, 21])
inputshape torch.Size([1, 6, 21])
outshape torch.Size([1, 6, 12])
hiddenshape torch.Size([1, 1, 12])
outshape2 torch.Size([6, 12])
OUTSHAPE torch.Size([6, 21])
inputshape torch.Size([1, 7, 21])
outshape torch.Size([1, 7, 12])
hiddenshape torch.Size([1, 1, 12])
outshape2 torch.Size([7, 12])
OUTSHAPE torch.Size([7, 21])
inputshape torch.Size([1, 8, 21])
outshape torch.Size([1, 8, 12])
hiddenshape torch.Size([1, 1, 12])
outshape2 torch.Size([8, 12])
OUTSHAPE torch.Size([8, 21])
inputshape torch.Size([1, 9, 21])
outsha

'hey how are you'