### ✨Pytorch 와 RNN, LSTM
> torch.nn.RNN<br>
torch.nn.LSTM

- input, sequence, 하이퍼 파라미터 설정

In [1]:
sequence_length = 28    # MNIST row 를 일종의 순서로 다룸
feature_size = 28       # 입력 차원
hidden_size = 128
num_layers = 4
dropout_p = 0.2
output_size = 10        # 0 ~ 9 숫자 분류
minibatch_size = 128

### ✨RNN / LSTM 모델 구현

In [3]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import numpy as np
from copy import deepcopy

In [4]:
sequence_length = 28    # MNIST row 를 일종의 순서로 다룸
feature_size = 28       # 입력 차원
hidden_size = 128
num_layers = 4
dropout_p = 0.2
output_size = 10        # 0 ~ 9 숫자 분류
minibatch_size = 128

In [5]:
class Net(nn.Module):
    def __init__(self, feature_size, hidden_size, num_layers, dropout_p, output_size, model_type):
        super().__init__()
        if model_type == 'rnn':
            self.sequenceclassifier=nn.RNN(
                input_size=feature_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                batch_first=True,
                dropout = dropout_p,
                bidirectional=True
            )
        elif model_type == 'lstm':
            self.sequenceclassifier=nn.LSTM(
                input_size=feature_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                batch_first=True,
                dropout=dropout_p,
                bidirectional=True
            )
        self.layers=nn.Sequential(
            nn.LeakyReLU(0.1),
            nn.BatchNorm1d(hidden_size*2),
            # self.rnn()의 출력값은 (batch_size, sequence_length, bidirections*hidden_size)
            # bidirectional이 True 이므로, bidirections는 2 => 2*hidden_size
            nn.Linear(hidden_size*2, output_size),
            nn.LogSoftmax(dim=-1)
        )
    
    def forward(self, x):
        out, _ = self.sequenceclassifier(x)
        out = out[:, -1]    # (128, 28, 28) -> (128, 28, 2*128) -> (128, 2*128)
        y= self.layers(out)
        return y

### ✨참고 코드 : shape과 slicing 이해

In [6]:
import torch
import torch.nn as nn

data1 = torch.full((minibatch_size, sequence_length, 2*hidden_size), 1)
data2 = data1[:, -1]
print(data1.shape, data2.shape)
data3 = torch.full((minibatch_size, 1, sequence_length, feature_size), 1)
data4 = data3.reshape(-1, sequence_length, feature_size)
print(data3.shape, data4.shape)

torch.Size([128, 28, 256]) torch.Size([128, 256])
torch.Size([128, 1, 28, 28]) torch.Size([128, 28, 28])


In [7]:
model = Net(feature_size, hidden_size, num_layers, dropout_p, output_size, 'rnn')
model

Net(
  (sequenceclassifier): RNN(28, 128, num_layers=4, batch_first=True, dropout=0.2, bidirectional=True)
  (layers): Sequential(
    (0): LeakyReLU(negative_slope=0.1)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Linear(in_features=256, out_features=10, bias=True)
    (3): LogSoftmax(dim=-1)
  )
)

### ✨MNIST with LSTM

In [9]:
train_rawdata = datasets.MNIST(root='dataset',
                               train=True,
                               download=True,
                               transform=transforms.ToTensor())
test_dataset = datasets.MNIST(root='dataset',
                              train=False,
                              download=True,
                              transform=transforms.ToTensor())
print('number of training data : ', len(train_rawdata))
print('number of test data : ', len(test_dataset))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 7976450.97it/s] 


Extracting dataset\MNIST\raw\train-images-idx3-ubyte.gz to dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 1681389.32it/s]


Extracting dataset\MNIST\raw\train-labels-idx1-ubyte.gz to dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1596681.41it/s]


Extracting dataset\MNIST\raw\t10k-images-idx3-ubyte.gz to dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 7453258.52it/s]

Extracting dataset\MNIST\raw\t10k-labels-idx1-ubyte.gz to dataset\MNIST\raw

number of training data :  60000
number of test data :  10000





In [10]:
VALIDATION_RATE = 0.2
train_indices, val_indices, _, _ = train_test_split(
    range(len(train_rawdata)),      # X index 번호
    train_rawdata.targets,          # y
    stratify=train_rawdata.targets, # 균등분포
    test_size=VALIDATION_RATE       # test dataset 비율
)

In [11]:
train_dataset = Subset(train_rawdata, train_indices)
validation_dataset = Subset(train_rawdata, val_indices)

In [12]:
print(len(train_dataset), len(validation_dataset), len(test_dataset))

48000 12000 10000


In [13]:
minibatch_size = 128

train_batches = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
val_batches = DataLoader(validation_dataset, batch_size=minibatch_size, shuffle=True)
test_batches = DataLoader(test_dataset, batch_size=minibatch_size, shuffle=True)

- input, output, loss, optimizer 설정

In [14]:
loss_func = nn.NLLLoss()    # log softmax
optimizer = torch.optim.Adam(model.parameters())

- Training & Validation

In [15]:
def train_model(model, early_stop, n_epochs, progress_interval):

    train_losses, valid_losses, lowest_lost = list(), list(), np.inf

    for epoch in range(n_epochs):
        model.train()
        for x_minibatch, y_minibatch in train_batches:
            x_minibatch = x_minibatch.reshape(-1, sequence_length, feature_size)
            y_minibatch_pred = model(x_minibatch)
            loss = loss_func(y_minibatch_pred, y_minibatch)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        # validate the model
        model.eval()
        with torch.no_grad():
            for x_minibatch, y_minibatch in val_batches:
                x_minibatch = x_minibatch.reshape(-1, sequence_length, feature_size)
                y_minibatch_pred = model(x_minibatch)
                loss = loss_func(y_minibatch_pred, y_minibatch)
                valid_losses.append(loss.item())

            if valid_losses[-1] < lowest_lost:
                lowest_loss = valid_losses[-1]
                lowest_epoch = epoch
                best_model = deepcopy(model.state_dict())
            else:
                if (early_stop > 0) and (lowest_epoch+early_stop < epoch):
                    print("Early Stopped", epoch, "epochs")
                    model.load_state_dict(best_model)
                    break
            
            if (epoch % progress_interval) == 0:
                print(train_losses[-1], valid_losses[-1], lowest_loss, lowest_epoch, epoch)
        
        model.load_state_dict(best_model)
        return model, lowest_loss, train_losses, valid_losses

In [16]:
nb_epochs = 100
progress_interval = 3
early_stop = 30

model, lowest_loss, train_losses, valid_losses = train_model(model, early_stop, nb_epochs, progress_interval)

0.26211389899253845 0.2798013985157013 0.2798013985157013 0 0
