In [1]:
import sys
import numpy as np
import torch

print("Python version:", sys.version)
print('torch', torch.__version__)
print('numpy', np.__version__)

# from torch.autograd import Variable (torch.Tensor로 통합됨)

Python version: 3.9.7 (default, Sep 16 2021, 16:59:28) [MSC v.1916 64 bit (AMD64)]
torch 2.2.0+cpu
numpy 1.19.5


# RNN Basic

In [32]:
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]

In [33]:
cell = torch.nn.RNN(input_size=4, hidden_size=2, batch_first=True)

hidden = torch.randn(1,1,2)


#하나만 넣어보는 버전
inputs = torch.Tensor([h,e,l,l,o])

for one in inputs:
    one = one.view(1,1,-1)
    out, hidden = cell(one, hidden)
    print('one input size', one.size(), 'out size', out.size())
    

inputs = inputs.view(1,5,-1)
out, hidden = cell(inputs, hidden)
print('sequence input size', inputs.size(), 'out size', out.size())

one input size torch.Size([1, 1, 4]) out size torch.Size([1, 1, 2])
one input size torch.Size([1, 1, 4]) out size torch.Size([1, 1, 2])
one input size torch.Size([1, 1, 4]) out size torch.Size([1, 1, 2])
one input size torch.Size([1, 1, 4]) out size torch.Size([1, 1, 2])
one input size torch.Size([1, 1, 4]) out size torch.Size([1, 1, 2])
sequence input size torch.Size([1, 5, 4]) out size torch.Size([1, 5, 2])


In [34]:
hidden = torch.randn(1, 3, 2)

inputs = torch.Tensor([[h, e, l, l, o],
                        [e, o, l, l, l],
                        [l, l, e, e, l]
                        ])

out, hidden = cell(inputs, hidden)
print('batch input size', inputs.size(), 'out size', out.size())

batch input size torch.Size([3, 5, 4]) out size torch.Size([3, 5, 2])


In [35]:
cell = torch.nn.RNN(input_size=4, hidden_size=2)

inputs = inputs.transpose(dim0=0, dim1=1)

out, hidden = cell(inputs, hidden)
print('batch input size', inputs.size(), 'out size', out.size())

batch input size torch.Size([5, 3, 4]) out size torch.Size([5, 3, 2])


# RNN next step

In [36]:
torch.manual_seed(777)  # reproducibility

#            0    1    2    3    4
idx2char = ['h', 'i', 'e', 'l', 'o']

# Teach hihell -> ihello
x_data = [0, 1, 0, 2, 3, 3]   # hihell

one_hot_lookup = [[1, 0, 0, 0, 0],  # 0
                  [0, 1, 0, 0, 0],  # 1
                  [0, 0, 1, 0, 0],  # 2
                  [0, 0, 0, 1, 0],  # 3
                  [0, 0, 0, 0, 1]]  # 4

y_data = [1, 0, 2, 3, 3, 4]    # ihello
x_one_hot = [one_hot_lookup[x] for x in x_data]

In [37]:
# torch.Tensor : float32 data type /  torch.LongTensor : int64 type (정수 레이블 or 색인때 사용)

inputs = torch.Tensor(x_one_hot)
labels = torch.LongTensor(y_data)

num_classes = 5
input_size = 5
hidden_size = 5
batch_size = 1
sequence_length = 1 # One by one
num_layers = 1 # one-layer rnn

In [38]:
class Model(torch.nn.Module):

    def __init__(self):
        super(Model, self).__init__()
        self.rnn = torch.nn.RNN(input_size=input_size,
                                hidden_size=hidden_size,
                                batch_first=True)
        
    def forward(self, hidden, x):
        x = x.view(batch_size, sequence_length, input_size) # Reshape input (batch first)
        
        out, hidden = self.rnn(x, hidden) # hidden: (num_layers * num_directions, batch, hidden_size)
        return hidden, out.view(-1, num_classes)
    
    def init_hidden(self): 
        return torch.zeros(num_layers, batch_size, hidden_size) 
    '''
    num_layers x batch_size x hidden_size인 모든 요소가 0인 텐서를 생성
    순환 신경망의 초기 hidden state 나타내는데 사용
    일반적으로 모든 값을 0으로 초기화 후, 시작하는 것이 일반적인 초기화 방법.
    모델의 학습이 시작되면서 역전파에 의해 이 값이 조정될 것.
    '''
    

In [39]:
model = Model()
print(model)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

Model(
  (rnn): RNN(5, 5, batch_first=True)
)


In [40]:
for epoch in range(20):
    optimizer.zero_grad()
    loss = 0
    hidden = model.init_hidden()
    
    sys.stdout.write("predicted string: ")
    for input, label in zip(inputs, labels):
        hidden, output = model(hidden, input) #pytorch에서는 torch.nn.Module로 객체 호출시, forward 메서드 자동 호출
        val, idx = output.max(1)
        sys.stdout.write(idx2char[idx.data[0]])
        loss += criterion(output, torch.LongTensor([label]))
    
    print(", epoch: %d, loss: %1.3f" % (epoch+1, loss))
    
    loss.backward()
    optimizer.step()
    
print('learning finished')

predicted string: llllll, epoch: 1, loss: 10.155
predicted string: llllll, epoch: 2, loss: 9.137
predicted string: llllll, epoch: 3, loss: 8.355
predicted string: llllll, epoch: 4, loss: 7.577
predicted string: llllll, epoch: 5, loss: 6.876
predicted string: lhelll, epoch: 6, loss: 6.327
predicted string: ihelll, epoch: 7, loss: 6.014
predicted string: ihelll, epoch: 8, loss: 5.787
predicted string: ihelll, epoch: 9, loss: 5.477
predicted string: ihelll, epoch: 10, loss: 5.274
predicted string: ihelll, epoch: 11, loss: 5.041
predicted string: ihello, epoch: 12, loss: 4.827
predicted string: ihello, epoch: 13, loss: 4.676
predicted string: ihello, epoch: 14, loss: 4.550
predicted string: ihello, epoch: 15, loss: 4.430
predicted string: ihello, epoch: 16, loss: 4.305
predicted string: ihello, epoch: 17, loss: 4.164
predicted string: ihelll, epoch: 18, loss: 4.003
predicted string: ihelll, epoch: 19, loss: 3.860
predicted string: ihelll, epoch: 20, loss: 3.879
learning finished


# sequential

In [61]:
#이전보다 x데이터에 [] 하나 더 추가
x_data = [[0, 1, 0, 2, 3, 3]]   # hihell
x_one_hot = [[[1, 0, 0, 0, 0],   # h 0
              [0, 1, 0, 0, 0],   # i 1
              [1, 0, 0, 0, 0],   # h 0
              [0, 0, 1, 0, 0],   # e 2
              [0, 0, 0, 1, 0],   # l 3
              [0, 0, 0, 1, 0]]]  # l 3

y_data = [1, 0, 2, 3, 3, 4]    # ihello

inputs = torch.Tensor(x_one_hot)
labels = torch.LongTensor(y_data)

sequence_length = 6  # |ihello| == 6

In [62]:
class RNN(torch.nn.Module):
    
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.sequence_length = sequence_length
        
        self.rnn = torch.nn.RNN(input_size = 5,
                               hidden_size = 5,
                               batch_first = True)
        
    def forward(self, x):
        h_0 = torch.zeros(
            self.num_layers, x.size(0), self.hidden_size)
        
        x.view(x.size(0), self.sequence_length, self.input_size) # Reshape input (batch, sequence, input)
        
        out, _ = self.rnn(x, h_0)
        return out.view(-1, num_classes)
    
rnn = RNN(num_classes, input_size, hidden_size, num_layers)
print(rnn)

RNN(
  (rnn): RNN(5, 5, batch_first=True)
)


In [71]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.1)

for epoch in range(20):
    outputs = rnn(inputs)
    optimizer.zero_grad()
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    _, idx = outputs.max(1)
    idx = idx.data.numpy()
    result_str = [idx2char[c] for c in idx.squeeze()] 
    # .squeeze( : 텐서 크기 줄이기 -> 차원크기 1인 차원 제거
    # ex) (1, 2, 3)과 같은 크기의 텐서를 (2, 3)으로 바꿔줍니다.
    
    print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.data))
    print("Predicted string: ", ''.join(result_str))

print("Learning finished!")

epoch: 1, loss: 0.565
Predicted string:  ihello
epoch: 2, loss: 0.535
Predicted string:  ihello
epoch: 3, loss: 0.530
Predicted string:  ihello
epoch: 4, loss: 0.525
Predicted string:  ihello
epoch: 5, loss: 0.526
Predicted string:  ihello
epoch: 6, loss: 0.524
Predicted string:  ihello
epoch: 7, loss: 0.518
Predicted string:  ihello
epoch: 8, loss: 0.515
Predicted string:  ihello
epoch: 9, loss: 0.512
Predicted string:  ihello
epoch: 10, loss: 0.510
Predicted string:  ihello
epoch: 11, loss: 0.507
Predicted string:  ihello
epoch: 12, loss: 0.504
Predicted string:  ihello
epoch: 13, loss: 0.501
Predicted string:  ihello
epoch: 14, loss: 0.497
Predicted string:  ihello
epoch: 15, loss: 0.497
Predicted string:  ihello
epoch: 16, loss: 0.494
Predicted string:  ihello
epoch: 17, loss: 0.493
Predicted string:  ihello
epoch: 18, loss: 0.494
Predicted string:  ihello
epoch: 19, loss: 0.492
Predicted string:  ihello
epoch: 20, loss: 0.489
Predicted string:  ihello
Learning finished!


# emsemble

In [4]:
idx2char = ['h', 'i', 'e', 'l', 'o']

# Teach hihell -> ihello
x_data = [[0, 1, 0, 2, 3, 3]]   # hihell
y_data = [1, 0, 2, 3, 3, 4]    # ihello

# As we have one batch of samples, we will change them to variables only once
inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)

num_classes = 5
input_size = 5
embedding_size = 10  # embedding size
hidden_size = 5  # output from the LSTM. 5 to directly predict one-hot
batch_size = 1   # one sentence
sequence_length = 6  # |ihello| == 6
num_layers = 1  # one-layer rnn

In [5]:
class em_Model(torch.nn.Module):
    
    def __init__(self, num_layers, hidden_size):
        super(em_Model, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embedding = torch.nn.Embedding(input_size, embedding_size)
        self.rnn = torch.nn.RNN(input_size = embedding_size, hidden_size = 5, batch_first = True)
        self.fc = torch.nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        
        emb = self.embedding(x)
        emb = emb.view(batch_size, sequence_length, -1)
        
        out, _ = self.rnn(emb, h_0)
        return self.fc(out.view(-1, num_classes))

model = em_Model(num_layers, hidden_size)
print(model)

em_Model(
  (embedding): Embedding(5, 10)
  (rnn): RNN(10, 5, batch_first=True)
  (fc): Linear(in_features=5, out_features=5, bias=True)
)


In [6]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

for epoch in range(20):
    outputs = model(inputs)
    optimizer.zero_grad()
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    _, idx = outputs.max(1)
    idx = idx.data.numpy()
    result_str = [idx2char[c] for c in idx.squeeze()]
    print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.item()))
    print("Predicted string: ", ''.join(result_str))

print("Learning finished!")

epoch: 1, loss: 1.793
Predicted string:  eieeei
epoch: 2, loss: 1.410
Predicted string:  eielll
epoch: 3, loss: 1.237
Predicted string:  lhelll
epoch: 4, loss: 1.072
Predicted string:  hhhlll
epoch: 5, loss: 0.885
Predicted string:  hhhlll
epoch: 6, loss: 0.755
Predicted string:  hhhllo
epoch: 7, loss: 0.651
Predicted string:  ihello
epoch: 8, loss: 0.549
Predicted string:  ihello
epoch: 9, loss: 0.460
Predicted string:  ihello
epoch: 10, loss: 0.393
Predicted string:  ihello
epoch: 11, loss: 0.341
Predicted string:  ihello
epoch: 12, loss: 0.289
Predicted string:  ihello
epoch: 13, loss: 0.239
Predicted string:  ihello
epoch: 14, loss: 0.197
Predicted string:  ihello
epoch: 15, loss: 0.165
Predicted string:  ihello
epoch: 16, loss: 0.138
Predicted string:  ihello
epoch: 17, loss: 0.116
Predicted string:  ihello
epoch: 18, loss: 0.097
Predicted string:  ihello
epoch: 19, loss: 0.081
Predicted string:  ihello
epoch: 20, loss: 0.069
Predicted string:  ihello
Learning finished!
