<a href="https://colab.research.google.com/github/cswcjt/Deep_Learning/blob/main/RNN_Practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms

In [2]:
inputs = torch.Tensor([1,2,3,4,5,6,7,8,9,10,11,12])

In [3]:
input_size = 1
seq_length = 3
hidden_size = 2
num_layers = 2
batch_size = 4

In [4]:
rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
# batch_first: [seq, batch, feature] -> [batch, seq, feature]
# input: input, hidden_0 -> 2개의 input을 받음
# output: out, hidden -> 2개의 출력

In [5]:
# input 상세 설명
# input -> shape = [seq_length, batch_size, input_size]
# hidden_0: network 초기의 hidden state -> shape = [num layers*num direction, batch_size, input_size]
# num direction -> forward prob만 있으면  num direction = 1, backward prob도 있으면 num direction = 2

In [6]:
# output 상세 설명
# out: 마지막 RNN layer로부터 "매" time step 마다의 output(ex. hello -> e, 1, 1, o)를 리턴
# h_n: 모든 RNN layer로부터 "마지막" time step의 hidden value(state) -> shape = [num layers*num directions, batch size, hidden size]

In [7]:
# batch_first 사용한 input의 결과 예시
print(inputs.shape)
inputs = inputs.view(batch_size, seq_length, input_size)
print(inputs.shape)

torch.Size([12])
torch.Size([4, 3, 1])


In [8]:
# output
out, hidden = rnn(inputs)
print('out:', out.shape) # [batch size, seq length, num directions*hidden size]
print('hidden:', hidden.shape) # [num layers*num directions, batch size, hidden size]

out: torch.Size([4, 3, 2])
hidden: torch.Size([2, 4, 2])


In [9]:
### bidirectinal
bi_rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)

In [10]:
inputs.shape

torch.Size([4, 3, 1])

In [11]:
out, hidden = bi_rnn(inputs)

In [12]:
print(out.shape)
print(hidden.shape)

torch.Size([4, 3, 4])
torch.Size([4, 4, 2])


In [13]:
out.shape

torch.Size([4, 3, 4])

In [14]:
out = out.view(batch_size, seq_length, 2, hidden_size) # 2: 방향이 앞, 뒤로 나눠짐
out.shape

torch.Size([4, 3, 2, 2])

In [15]:
out_direc1 = out[:,:,0,:]
out_direc2 = out[:,:,1,:]

print("out_direc1:", out_direc1.shape)
print("out_direc2:", out_direc2.shape)

out_direc1: torch.Size([4, 3, 2])
out_direc2: torch.Size([4, 3, 2])


In [16]:
hidden = hidden.view(num_layers, 2, batch_size, hidden_size)
hidden.shape

torch.Size([2, 2, 4, 2])

In [17]:
hidden_direc1 = hidden[:,0,:,:]
hidden_direc2 = hidden[:,1,:,:]

print("hidden_direc1:", hidden_direc1.shape)
print("hidden_direc2:", hidden_direc2.shape)

hidden_direc1: torch.Size([2, 4, 2])
hidden_direc2: torch.Size([2, 4, 2])


### RNN application

In [18]:
from torch.utils.data import DataLoader

#### data load

In [19]:
train_data = datasets.MNIST(
    root = './data',
    train = True,
    download = True, 
    transform = transforms.ToTensor()
)

test_data = datasets.MNIST(
    root = './data',
    train = False,
    download = True, 
    transform = transforms.ToTensor()
)

In [36]:
test_data.shape

AttributeError: ignored

In [20]:
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data, batch_size=128, shuffle=False)

#### define model

In [21]:
class RNN(nn.Module):
    def __init__(self, num_classes):
        super(RNN, self).__init__()
        self.input_size = 28
        self.hidden_size = 128
        self.num_layers = 2
        self.RNN = nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, num_classes)

    def forward(self, x):
        out, hidden = self.RNN(x) # [out: mini-batch, seq_length, hidden_size]
        out = self.fc(out[:, -1, :]) # 모든 mini_batch에 대해서, 마지막 seq_length 사용, 모든 hidden_size를 사용
        return out


In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RNN(num_classes=10).to(device)

#### loss function

In [23]:
CEloss = nn.CrossEntropyLoss()
adam_optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

### run the model

In [24]:
total_epochs=3
sequence_length=28
input_size=28

In [35]:
for epoch in range(total_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # images를 input으로 넣기 위해 reshape 진행 --> 기준:batch_size, sequence_length, input_size
        images = images.reshape(images.shape[0], sequence_length, input_size).to(device)
        #print(images.shape)
        labels = labels.to(device)
        print(labels.shape)

        outputs = model(images)
        loss = CEloss(outputs, labels)
        print(outputs.shape)
        
        # 최적화
        adam_optimizer.zero_grad()
        loss.backward()
        adam_optimizer.step()
    print("Epoch [{}/{}], Loss {}".format(epoch+1, total_epochs, loss.item()))

torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])
torch.Size([128])
torch.Size([128, 10])


In [34]:
with torch.no_grad():
    correct = 0
    total = 0

    for images, labels in test_loader:
        images = images.reshape(images.shape[0], sequence_length, input_size).to(device)
        labels = labels.to(device)
        print(labels.shape)

        output = model(images)

        _, predicted = torch.max(outputs.data, 1)
        print(outputs.data.shape)
        print(predicted.shape)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print("test accuracy: {}%".format(100*correct/total))

torch.Size([128])
torch.Size([96, 10])
torch.Size([96])


RuntimeError: ignored

In [27]:
class Bi_RNN(nn.Module):
    def __init__(self, num_classes):
        super(Bi_RNN, self).__init__()
        self.input_size = 28
        self.hidden_size = 128
        self.num_layers = 2
        self.RNN = nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True, bidirectional=True)
        
        # bidirectional인 경우 -> hidden_size 2배 해준다.
        self.fc = nn.Linear(self.hidden_size*2, num_classes)

    def forward(self, x):
        out, _ = self.RNN(x) # [out: mini-batch, seq_length, hidden_size]
        out = self.fc(out[:, -1, :]) # 모든 mini_batch에 대해서, 마지막 seq_length 사용, 모든 hidden_size를 사용
        return out


In [28]:
model = Bi_RNN(num_classes=10).to(device)

In [29]:
CEloss = nn.CrossEntropyLoss()
adam_optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [30]:
total_epochs = 3
sequence_length = 28
input_size = 28