In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.transforms as transforms
from torchvision.datasets import MNIST, CIFAR10, CIFAR100
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt

In [2]:
path = './datasets/'

transform = transforms.Compose([transforms.ToTensor()])

train_data = MNIST(root=path, train=True, transform=transform, download=True)
test_data = MNIST(root=path, train=False, transform=transform, download=True)

batch_size = 100

train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False, num_workers=4)

print(train_data)
print(test_data)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 79557644.57it/s]


Extracting ./datasets/MNIST/raw/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 87273554.63it/s]


Extracting ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 27526483.70it/s]


Extracting ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 14710833.03it/s]

Extracting ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw






Dataset MNIST
    Number of datapoints: 60000
    Root location: ./datasets/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )
Dataset MNIST
    Number of datapoints: 10000
    Root location: ./datasets/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
           )




In [3]:
_, seq_len, input_size = train_data[0][0].shape # (1,28,28)
output_size = len(train_data.classes)

만약 데이터의 길이가 다른 경우 (MNIST는 데이터의 길이가 정해져 있음)
-> rnn_padded_sepuence : 길이를 자동으로 변환해주는 함수

In [4]:
hidden_size = input_size*2
num_layers = 4
batch_first = True
bidirectional = True

model_name = 'RNN'

In [18]:
class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, batch_first=True, bidirectional=False):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.batch_first = batch_first
        self.bidirectional = bidirectional

        if self.bidirectional:
            self.direction = 2
        else:
            self.direction = 1

        self.seq = nn.RNN(input_size=self.input_size,
                          hidden_size=self.hidden_size,
                          num_layers=self.num_layers,
                          batch_first=self.batch_first,
                          bidirectional=self.bidirectional)
        self.fc = nn.Linear(self.hidden_size*self.direction, output_size)

    def forward(self, x):
        x = x.reshape(-1, seq_len, self.input_size) # cell에서 있는 변환을 사용하지 않아도 된다.
        hidden_state = torch.zeros(self.direction*self.num_layers,batch_size, self.hidden_size).to(device) # inital hidden을 세팅해준다. h_0
        out, hidden = self.seq(x, hidden_state.detach().to(device)) # detach : nn는 그래프 구조로 이뤄져 있다. 이는 방향이 정해져 있음을 의미한다. detach는 역전파할 때 오류를 막는다.
        # out, hidden : 최상단 층, 각각의 층에서의 last hidden
        out = out[:,-1,:].squeeze() # 가장 우측 상단에 있는 hidden 데이터를 뽑아온다.
        # squeeze는 필요없는 1 차원을 지운다.
        out = self.fc(out)
        return out

In [19]:
if model_name == 'RNN':
    classifier = RNNClassifier
elif model_name == 'LSTM':
    classifier = LSTMClassifier
else:
    classifier = GRUClassifier

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [20]:
model = classifier(input_size, hidden_size, num_layers, batch_first, bidirectional).to(device)
loss = nn.CrossEntropyLoss(reduction='sum')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)

In [21]:
num_epoch = 10
train_loss_lst, test_loss_lst = list(), list()

for i in range(num_epoch):
    # training
    model.train()

    total_loss = 0
    cnt = 0

    for batch_idx, (x,y) in enumerate(train_loader):

        x,y = x.to(device), y.to(device)
        y_est = model.forward(x)
        cost = loss(y_est, y)

        total_loss += cost.item()

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        pred = torch.argmax(y_est, dim=1)
        cnt += (pred == y).sum().item()

    acc = cnt / len(train_data)
    ave_loss = total_loss / len(train_data)

    train_loss_lst.append(ave_loss)

    if i % 1 == 0:
        print(f"\nEpoch {i} Train : {ave_loss:.3f} / {acc:.3f}")

    #testing
    model.eval()

    total_loss = 0
    cnt = 0

    with torch.no_grad():
        for batch, (x,y) in enumerate(test_loader):

            x, y = x.to(device), y.to(device)

            y_est = model.forward(x)
            pred = torch.argmax(y_est, dim=1)

            total_loss += cost.item()

        acc = cnt / len(test_data)
        ave_loss = total_loss / len(test_data)

        test_loss_lst.append(ave_loss)

        if i % 1 == 0:
            print(f"Epoch {i} Test : {ave_loss:.3f} / {acc:.3f}")

print()
num_parameter = 0
for parameter in model.parameters():
    print(parameter.shape)
    num_parameter += np.prod(parameter.size())
print(num_parameter)




Epoch 0 Train : 0.761 / 0.741
Epoch 0 Test : 0.426 / 0.000

Epoch 1 Train : 0.276 / 0.914
Epoch 1 Test : 0.096 / 0.000

Epoch 2 Train : 0.202 / 0.938
Epoch 2 Test : 0.246 / 0.000

Epoch 3 Train : 0.164 / 0.951
Epoch 3 Test : 0.166 / 0.000

Epoch 4 Train : 0.145 / 0.957
Epoch 4 Test : 0.066 / 0.000

Epoch 5 Train : 0.128 / 0.961
Epoch 5 Test : 0.104 / 0.000

Epoch 6 Train : 0.109 / 0.968
Epoch 6 Test : 0.088 / 0.000

Epoch 7 Train : 0.109 / 0.967
Epoch 7 Test : 0.076 / 0.000

Epoch 8 Train : 0.101 / 0.970
Epoch 8 Test : 0.059 / 0.000

Epoch 9 Train : 0.095 / 0.972
Epoch 9 Test : 0.139 / 0.000

torch.Size([56, 28])
torch.Size([56, 56])
torch.Size([56])
torch.Size([56])
torch.Size([56, 28])
torch.Size([56, 56])
torch.Size([56])
torch.Size([56])
torch.Size([56, 112])
torch.Size([56, 56])
torch.Size([56])
torch.Size([56])
torch.Size([56, 112])
torch.Size([56, 56])
torch.Size([56])
torch.Size([56])
torch.Size([56, 112])
torch.Size([56, 56])
torch.Size([56])
torch.Size([56])
torch.Size([56, 