---
# Import
---



In [1]:
import torch # 전체 라이브러리 
import torch.nn as nn #각 레이어 / 손실함수 등
import torch.optim as optim # 옵티마이저
import torch.nn.functional as F # 활성화 함수 / nn과 비슷
from torch.utils.data import DataLoader # 데이터세트 관리용 
import torchvision.datasets as datasets # Mnist, cifar10과 같은 데이터 불러오기
import torchvision.transforms as transforms # 비전 변환
from tqdm import tqdm

---
# set device
---

In [2]:
# GPU를 사용, 안되면 CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

---
# Hyperparameters
----

In [3]:
input_size = 28 # 입력 노두 수
seqence_length = 28
num_layers = 2 # 레이어 개수
hidden_size = 256 # hidden_state의 크기
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

---
# Create RNN
---

In [12]:
class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super(RNN, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first = True)
    # self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first = True)
  
    self.fc = nn.Linear(hidden_size * seqence_length, num_classes)
  
  def forward(self, x):
    # 정의된 모양과 함께 스칼라값 0으로 채워진 텐서를 반환
    # 초기 hidden state 설정
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
   
    # Forward Prop
    out, _ = self.rnn(x, h0) # out: RNN의 마지막 레이어로부터 나온 output feature 를 반환 h0: hidden state를 반환
    out = out.reshape(out.shape[0], -1) # 매 노드마다 output 값이 나오는 many to many 
    out = self.fc(out)
    return out

- LSTM

In [22]:
class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super(RNN, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)
    self.fc = nn.Linear(hidden_size * seqence_length, num_classes)
  
  def forward(self, x):
    # 정의된 모양과 함께 스칼라값 0으로 채워진 텐서를 반환
    # 초기 hidden state 설정
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
    
    # LSTM은 cell state가 필요
    c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
    
    # Forward Prop
    out, _ = self.lstm(x, (h0, c0)) # out: RNN의 마지막 레이어로부터 나온 output feature 를 반환 h0: hidden state를 반환
    out = self.fc(out[:,-1,:])
    return out

---
# Load Data
---

In [6]:
train_dataset = datasets.MNIST(root = '/content/drive/MyDrive/Pytorch/data', train=True,
                               transform = transforms.ToTensor(), # 텐서형 변환 (데이터 로드 시 numpy형으로 로드
                               download = True)

# dataloader를 통해 데이터를 어떤식으로 불러올지 
# 여기서는 한번에 batch_size 크기만큼, shuffle을 해서 불러오겠다.
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True) 

In [7]:
test_dataset = datasets.MNIST(root = '/content/drive/MyDrive/Pytorch/data', train=False,
                               transform = transforms.ToTensor(), # 텐서형 변환 (데이터 로드 시 numpy형으로 로드
                               download = True)

test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = True) 

---
# Initialize network
---

In [13]:
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)  

---
# Loss and optimizer
----

In [14]:
criterion = nn.CrossEntropyLoss() # 손실함수
optimizer = optim.Adam(model.parameters(), lr = learning_rate) # 옵티마이저

---
# Train Network
----

In [15]:
for epoch in tqdm(range(num_epochs)):
  for batch_idx, (data, targets) in enumerate(train_loader):
    #MNIST로 테스트 하기 때문에 차원이 1인 부분을 제거해야함
    data = data.to(device = device).squeeze(1) 
    targets = targets.to(device = device) # GPU or CPU에 할당

    # forword
    scores = model(data)
    loss = criterion(scores, targets)

    # backword
    optimizer.zero_grad() # 한번에 모든 weight들의 계산된 gradient를 초기화
    loss.backward()

    # gradient descent or adam step
    optimizer.step() # 계산된 gradient가 weight에 update 

100%|██████████| 2/2 [01:50<00:00, 55.39s/it]


---
# check accuracy on training & test to see how good our model
---

In [20]:
def check_accuracy(loader, model):
  if loader.dataset.train:
    print('Checking accuracy on training data')
  else:
    print('Checking accracy on test data')
  
  num_correct = 0
  num_samples = 0
  
  # 학습 되지 않도록 모델에 알림
  model.eval() 
  with torch.no_grad():
    for x , y in loader:
      x = x.to(device = device).squeeze(1)
      y = y.to(device = device)

      score = model(x)
      _, predictions = score.max(1) # 최대 점수가 1이 되도록 예측
      num_correct += (predictions ==  y).sum()
      num_samples += predictions.size(0)

    print(f'got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
  
  model.train()

- RNN

In [21]:
check_accuracy(train_loader ,model)
check_accuracy(test_loader ,model)

Checking accuracy on training data
got 57977 / 60000 with accuracy 96.63
Checking accracy on test data
got 9644 / 10000 with accuracy 96.44


-  LSTM

In [24]:
check_accuracy(train_loader ,model)
check_accuracy(test_loader ,model)

Checking accuracy on training data
got 58587 / 60000 with accuracy 97.65
Checking accracy on test data
got 9730 / 10000 with accuracy 97.30
