https://blog.naver.com/dbwjd516/222917450093 참조


rnn 과 lstm은 원래 문장에서 사용하는 것.

이미지에 적용하기 위해 이미지의 픽셀값을 가로로 1줄씩 구분, 위에서부터 순차적으로 RNN_LSTM에 입력. 따라서 이미지의 가로길이=(seq_length)=28

흑백 사진이므로 rgb값이 없음(1차원). 따라서 input_dim = 28*1 = 28

#import module

In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import tensorflow as tf
import torchvision
import torchvision.transforms as transforms

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#define model -> RNN

In [3]:
class custom_rnn(nn.Module):
  def __init__(self,input_dim=28*1,hidden_dim=72,num_layers=1,num_classes=10):
    super(custom_rnn,self).__init__()
    self.input_dim = input_dim
    self.hidden_dim = hidden_dim
    self.num_layers = num_layers
    self.num_classes = num_classes
    
    self.rnn = nn.RNN(self.input_dim,self.hidden_dim,self.num_layers,batch_first = True).to(device) #batch, seq_len, hidden_size 순서
    self.output_layer = nn.Linear(self.hidden_dim,self.num_classes).to(device)
    
  def forward(self,x):
    
    hidden_state = torch.zeros(
        self.num_layers,x.size(0),self.hidden_dim
    ).to(device)
    
    rnn_out,hidden_state = self.rnn(x,hidden_state)
    
    out = self.output_layer(rnn_out[:,-1:]).view([-1,self.num_classes])
    #rnn_out[:,-1:] 은 rnn_out의 batch,hidden_size 만을 가져옴(seq_len 가장 끝부분에서)
    
    return out

#define model -> LSTM

In [2]:
class custom_lstm(nn.Module):
  def __init__(self,input_dim=28,hidden_dim=72,num_layers=1,num_classes=10):
    super(custom_lstm,self).__init__()
    self.input_dim = input_dim
    self.hidden_dim = hidden_dim
    self.num_layers = num_layers
    self.num_classes = num_classes
    
    self.lstm = nn.LSTM(self.input_dim,self.hidden_dim,self.num_layers,batch_first = True).to(device) #batch, seq_len, hidden_size 순서
    self.output_layer = nn.Linear(self.hidden_dim,self.num_classes).to(device)
    
  def forward(self,x):
    
    lstm_out,hidden_state = self.lstm(x)
    
    out = self.output_layer(lstm_out[:,-1:]).view([-1,self.num_classes])
    
    return out

#load data

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

In [4]:
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=16,
                                         shuffle=False, num_workers=2)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



#define eval_model

In [5]:
def eval_model(model,criterion,optimizer):
  num_epochs=5
  for epoch in range(num_epochs):
      # Train the model for one epoch
      model.train()
      train_loss = 0.0
      train_correct = 0
      train_total = 0
      for inputs, labels in trainloader:
          inputs = inputs.view(-1,28,28) # -> for RNN, -1,seq_len, input_dim  | 이번에는 흑백이므로 num_layer=1, 이미지 세로 크기=28
          inputs, labels = inputs.to(device), labels.to(device)
          optimizer.zero_grad()
          outputs = model(inputs)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          # Update the training loss and accuracy
          train_loss += loss.item() * inputs.size(0)
          _, predicted = torch.max(outputs.data, 1)
          train_correct += (predicted == labels).sum().item()
          train_total += labels.size(0)

      # Compute the validation accuracy
      model.eval()
      val_correct = 0
      val_total = 0
      with torch.no_grad():
          for inputs, labels in testloader:
              inputs = inputs.view(-1,28,28) # -> for RNN, -1,height,height*num_layer  | 이번에는 흑백이므로 num_layer=1, 이미지 세로 크기=28
              inputs, labels = inputs.to(device), labels.to(device)
              outputs = model(inputs)
              _, predicted = torch.max(outputs.data, 1)
              val_correct += (predicted == labels).sum().item()
              val_total += labels.size(0)
      val_acc = val_correct / val_total

      # Print the training loss and validation accuracy for this epoch
      train_loss = train_loss / len(trainloader.dataset)
      train_acc = train_correct / train_total
      print(f'Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.4f} Train Acc: {train_acc:.4f} Val Acc: {val_acc:.4f}')

#train and see result -> RNN

In [9]:
import time
model = custom_rnn().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

start = time.time()
eval_model(model,criterion,optimizer)
fin = time.time()
print(f"total time: {fin-start:.2f} second")

Epoch 1/5: Train Loss: 1.4179 Train Acc: 0.4970 Val Acc: 0.7661
Epoch 2/5: Train Loss: 0.5673 Train Acc: 0.8226 Val Acc: 0.8835
Epoch 3/5: Train Loss: 0.3712 Train Acc: 0.8938 Val Acc: 0.9130
Epoch 4/5: Train Loss: 0.2957 Train Acc: 0.9166 Val Acc: 0.9271
Epoch 5/5: Train Loss: 0.2508 Train Acc: 0.9305 Val Acc: 0.9328
total time: 146.91 second


In [6]:
model = custom_rnn().to(device)
num_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {num_params}")

Number of parameters: 8074


#train and see result -> LSTM

In [6]:
import time
model = custom_lstm().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

start = time.time()
eval_model(model,criterion,optimizer)
fin = time.time()
print(f"total time: {fin-start:.2f} second")

Epoch 1/5: Train Loss: 1.7391 Train Acc: 0.4046 Val Acc: 0.7298
Epoch 2/5: Train Loss: 0.4450 Train Acc: 0.8650 Val Acc: 0.9125
Epoch 3/5: Train Loss: 0.2207 Train Acc: 0.9361 Val Acc: 0.9501
Epoch 4/5: Train Loss: 0.1626 Train Acc: 0.9521 Val Acc: 0.9673
Epoch 5/5: Train Loss: 0.1290 Train Acc: 0.9617 Val Acc: 0.9716
total time: 152.20 second


In [28]:
model = custom_lstm().to(device)
num_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {num_params}")

Number of parameters: 30106
