# RNN, LSTM, GRU 실습

In [11]:
import torch
import numpy as np
import gc

device = 'mps'

# RNN 구현

In [100]:
class SimpleRNN(torch.nn.Module):
    def __init__(self, n_inputs, n_hidden, n_outputs): # n_inputs: 입력 크기, n_hidden: hidden vector 크기, n_outputs: 결과 크기
        super().__init__()
        self.D = n_inputs # 입력 데이터 차원의 크기
        self.M = n_hidden # hidden vector 차원의 크기
        self.K = n_outputs # 결과 차원의 크기

        self.rnn = torch.nn.RNN(input_size=self.D, # 입력 데이터 크기 설정
                                hidden_size=self.M, # hidden vector 크기 설정
                                nonlinearity='tanh', # activation function
                                batch_first=True) # 배치 차원이 가장 앞에 오도록 설정
        self.fc = torch.nn.Linear(self.M, self.K) # 결과를 얻기 위한 FC layer

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.M).to(device) # 초기 hidden state vector를 0으로 설정

        out, h_0 = self.rnn(x, h0) # RNN 수행 -> out: 모든 time step에서의 결과를 가지고 있다. h_0: 마지막 time step에서의 결과만 가지고 있다.
        out = self.fc(out[:, -1, :]) # 마지막 time step의 결과만 사용하여 결과 도출 -> out = self.fc(h_0.squeeze(0))로 가능
        
        return out

In [101]:
model = SimpleRNN(n_inputs=2, n_hidden=10, n_outputs=2).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

train_x = torch.from_numpy(np.array([[[1, 2], [3, 4], [5, 6]]], dtype=np.float32)).to(device)
train_y = torch.tensor([1]).to(device)

for epoch in range(300):
    optimizer.zero_grad()
    outputs = model(train_x)
    loss = criterion(outputs, train_y)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 30 == 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, 300, loss.item()))

Epoch [30/300], Loss: 0.1511
Epoch [60/300], Loss: 0.0781
Epoch [90/300], Loss: 0.0490
Epoch [120/300], Loss: 0.0335
Epoch [150/300], Loss: 0.0251
Epoch [180/300], Loss: 0.0201
Epoch [210/300], Loss: 0.0167
Epoch [240/300], Loss: 0.0142
Epoch [270/300], Loss: 0.0123
Epoch [300/300], Loss: 0.0107


In [42]:
# n_gidden 값을 증가 -> loss가 줄어드는 것을 확인할 수 있다.

model = SimpleRNN(n_inputs=2, n_hidden=20, n_outputs=2).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

train_x = torch.from_numpy(np.array([[[1, 2], [3, 4], [5, 6]]], dtype=np.float32)).to(device)
train_y = torch.tensor([1]).to(device)

for epoch in range(300):
    optimizer.zero_grad()
    outputs = model(train_x)
    loss = criterion(outputs, train_y)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 30 == 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, 300, loss.item()))

Epoch [30/300], Loss: 0.3873
Epoch [60/300], Loss: 0.1163
Epoch [90/300], Loss: 0.0513
Epoch [120/300], Loss: 0.0298
Epoch [150/300], Loss: 0.0215
Epoch [180/300], Loss: 0.0169
Epoch [210/300], Loss: 0.0139
Epoch [240/300], Loss: 0.0116
Epoch [270/300], Loss: 0.0099
Epoch [300/300], Loss: 0.0086


# LSTM 실습

In [102]:
class LSTM(torch.nn.Module):
    def __init__(self, n_inputs, n_hidden, n_outputs):
        super().__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs

        self.lstm = torch.nn.LSTM(input_size=self.D, # 입력 데이터 차원 크기
                                  hidden_size=self.M, # hidden vector 차원 크기
                                  batch_first=True)
        self.fc = torch.nn.Linear(self.M, self.K)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.M).to(device) # 초기 hidden state vector
        c0 = torch.zeros(1, x.size(0), self.M).to(device) # 초기 cell state vector

        out, (h_0, c_0) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])

        return out

In [104]:
del model
gc.collect()

model = LSTM(n_inputs=2, n_hidden=10, n_outputs=2).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

for epoch in range(300):
    model.zero_grad()
    outputs = model(train_x)
    loss = criterion(outputs, train_y)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 30 == 0: # 30 에포크마다 손실을 출력
        print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, 300, loss.item()))

Epoch [30/300], Loss: 0.3284
Epoch [60/300], Loss: 0.1779
Epoch [90/300], Loss: 0.0895
Epoch [120/300], Loss: 0.0499
Epoch [150/300], Loss: 0.0316
Epoch [180/300], Loss: 0.0219
Epoch [210/300], Loss: 0.0162
Epoch [240/300], Loss: 0.0126
Epoch [270/300], Loss: 0.0102
Epoch [300/300], Loss: 0.0085


In [91]:
class LSTM(torch.nn.Module):
    def __init__(self, n_inputs, n_hidden, n_outputs):
        super().__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs

        self.lstm = torch.nn.LSTM(input_size=self.D, # 입력 데이터 차원 크기
                                  hidden_size=self.M, # hidden vector 차원 크기
                                  batch_first=True)
        self.fc = torch.nn.Linear(self.M, self.K)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.M).to(device) # 초기 hidden state vector
        c0 = torch.zeros(1, x.size(0), self.M).to(device) # 초기 cell state vector

        out, (h_0, c_0) = self.lstm(x, (h0, c0))
        print(out[:, -1, :].shape)
        print(h_0.squeeze(0).shape)

        out = self.fc(h_0.squeeze(0))

        return out

In [92]:
del model
gc.collect()

model = LSTM(n_inputs=2, n_hidden=10, n_outputs=2).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

for epoch in range(300):
    optimizer.zero_grad()
    outputs = model(train_x)
    print(outputs)
    loss = criterion(outputs, train_y)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 30 == 0: # 30 에포크마다 손실을 출력
        print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, 300, loss.item()))

torch.Size([1, 10])
torch.Size([1, 10])
tensor([[-0.0360,  0.2652]], device='mps:0', grad_fn=<LinearBackward0>)


RuntimeError: Expected a proper Tensor but got None (or an undefined Tensor in C++) for argument #0 'grad_y'

# GRU 구현

In [95]:
class GRU(torch.nn.Module):
    def __init__(self, n_inputs, n_hidden, n_outputs):
        super().__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs

        self.gru = torch.nn.GRU(input_size=self.D,
                                hidden_size=self.M,
                                batch_first=True)
        self.fc = torch.nn.Linear(self.M, self.K)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.M).to(device)

        out, h_0 = self.gru(x, h0)
        out = self.fc(out[:, -1, :])

        return out

In [96]:
del model
gc.collect()

# GRU 학습
model = GRU(n_inputs=2, n_hidden=20, n_outputs=2).to(device) # GRU 모델 인스턴스 생성
criterion = torch.nn.CrossEntropyLoss() # 손실 함수로 CrossEntropyLoss를 사용
optimizer = torch.optim.Adam(model.parameters()) # 최적화 알고리즘으로 Adam을 사용

for epoch in range(300):
    model.zero_grad()
    outputs = model(train_x)
    loss = criterion(outputs, train_y)  # A dummy target example
    loss.backward()
    optimizer.step()

    if (epoch+1) % 30 == 0:
        print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, 300, loss.item()))

Epoch [30/300], Loss: 0.2048
Epoch [60/300], Loss: 0.0773
Epoch [90/300], Loss: 0.0344
Epoch [120/300], Loss: 0.0195
Epoch [150/300], Loss: 0.0130
Epoch [180/300], Loss: 0.0096
Epoch [210/300], Loss: 0.0075
Epoch [240/300], Loss: 0.0061
Epoch [270/300], Loss: 0.0051
Epoch [300/300], Loss: 0.0043


In [97]:
class GRU(torch.nn.Module):
    def __init__(self, n_inputs, n_hidden, n_outputs):
        super().__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs

        self.gru = torch.nn.GRU(input_size=self.D,
                                hidden_size=self.M,
                                batch_first=True)
        self.fc = torch.nn.Linear(self.M, self.K)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.M).to(device)

        out, h_0 = self.gru(x, h0)
        out = self.fc(h_0.squeeze(0))

        return out

In [98]:
del model
gc.collect()

# GRU 학습
model = GRU(n_inputs=2, n_hidden=20, n_outputs=2).to(device) # GRU 모델 인스턴스 생성
criterion = torch.nn.CrossEntropyLoss() # 손실 함수로 CrossEntropyLoss를 사용
optimizer = torch.optim.Adam(model.parameters()) # 최적화 알고리즘으로 Adam을 사용

for epoch in range(300):
    model.zero_grad()
    outputs = model(train_x)
    loss = criterion(outputs, train_y)  # A dummy target example
    loss.backward()
    optimizer.step()

    if (epoch+1) % 30 == 0:
        print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, 300, loss.item()))

Epoch [30/300], Loss: 0.1881
Epoch [60/300], Loss: 0.0653
Epoch [90/300], Loss: 0.0274
Epoch [120/300], Loss: 0.0150
Epoch [150/300], Loss: 0.0099
Epoch [180/300], Loss: 0.0072
Epoch [210/300], Loss: 0.0056
Epoch [240/300], Loss: 0.0045
Epoch [270/300], Loss: 0.0037
Epoch [300/300], Loss: 0.0032
