In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dataset
from torch.autograd import Variable
from torch.nn import Parameter
from torch import Tensor
import torch.nn.functional as F
from torch.utils.data import DataLoader
import math

In [None]:
device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


### 그냥 실행해 주세요

In [None]:
cuda=True if torch.cuda.is_available() else False
Tensor=torch.cuda.FloatTensor if cuda else torch.FloatTensor

torch.manual_seed(125)
if torch.cuda.is_available():
  torch.cuda.manual_seed_all(125)

### 데이터셋을 평균 0.5, 표준편차 1.0을 기준으로 정규화하고 텐서로 변경해 주세요!(transforms 이용 )

In [None]:
mnist_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (1.0,))
])

### MNIST 데이터셋 다운로드 해주세요!

In [None]:
from torchvision.datasets import MNIST
dataload_root='your_path'
train_dataset=MNIST(dataload_root, transform= mnist_transform, train=True , download=True)
valid_dataset=MNIST(dataload_root, transform= mnist_transform, train=False , download=True)
test_dataset=MNIST(dataload_root, transform=  mnist_transform, train=False , download=True)

### batch_size=64로 하고 train_loader, valid_loader, test_loader를 만들어 주세요!

In [None]:
batch_size=64
train_loader=DataLoader(dataset=train_dataset,
                       batch_size=batch_size,
                       shuffle=True)
valid_loader=DataLoader(dataset=valid_dataset,
                       batch_size=batch_size,
                       shuffle=False)
test_loader=DataLoader(dataset=test_dataset,
                       batch_size=batch_size,
                       shuffle=False)

###  GRU 셀 네트워크를 만들어 주세요!

이때 GRU 에서는 3개 (망각, 입력, 활성화함수) 게이트가 사용된다는 점을 기억해 주세요!


In [None]:
class GRUCell(nn.Module):
  def __init__(self, input_size , hidden_size, bias =True):
    super(GRUCell,self).__init__()
    self.input_size=input_size
    self.hidden_size=hidden_size
    self.bias=bias
    self.x2h=nn.Linear(input_size,3 * hidden_size,bias=bias)
    self.h2h=nn.Linear(hidden_size,3 * hidden_size,bias=bias)
    self.reset_parameters()


  # 파라미터를 초기화(균등 분포를 이용해 초기화)
  def reset_parameters(self):
    std=1.0/math.sqrt(self.hidden_size)
    for w in self.parameters():
      w.data.uniform_(-std,std)

  def forward(self,x,hidden):
    x=x.view(-1,x.size(1))

    gate_x=self.x2h(x)
    gate_h=self.h2h(hidden)
    gate_x=gate_x.squeeze()
    gate_h=gate_h.squeeze()

    # 3개의 게이트로 쪼갬 (chunk 사용)
    i_r, i_i, i_n = gate_x.chunk(3, 1)
    h_r, h_i, h_n = gate_h.chunk(3, 1)

    resetgate=F.sigmoid(i_r+h_r)
    inputgate=F.sigmoid(i_i+h_i)
    newgate = F.tanh(i_n + (resetgate * h_n))
    hy = newgate + inputgate*(hidden-newgate)
    return hy


### 전반적인 네트워크 구성하기

In [None]:
class GRUModel(nn.Module):
  def __init__(self,input_dim,hidden_dim,layer_dim,output_dim,bias=True):
    super(GRUModel,self).__init__()
    self.hidden_dim=hidden_dim
    self.layer_dim=layer_dim
    self.gru_cell=GRUCell(input_dim,hidden_dim,layer_dim)
    self.fc=nn.Linear(hidden_dim,output_dim)

  def forward(self,x):
    if torch.cuda.is_available():
      h0=Variable(torch.zeros(self.layer_dim,x.size(0),self.hidden_dim).cuda())
    else:
      h0=Variable(torch.zeros(self.layer_dim,x.size(0),self.hidden_dim))

    outs=[]
    hn=h0[0,:,:]

    for seq in range(x.size(1)):
      hn=self.gru_cell(x[:,seq,:],hn)
      outs.append(hn)
    # 최종 출력 반환
    out = outs[-1].squeeze()
    out=self.fc(out)
    return out


### 옵티마이저와 손실 함수 설정(자유롭게 설정!)

In [None]:
input_dim=28
hidden_dim=128
layer_dim=1
output_dim=10

model=GRUModel(input_dim , hidden_dim , layer_dim , output_dim)
if torch.cuda.is_available():
  model.cuda()
criterion=nn.CrossEntropyLoss()
learning_rate=5e-5
optimizer=torch.optim.AdamW(model.parameters(),lr=learning_rate)

### 5 에포크로 모델 학습과 성능을 검증해 주세요 ( # 부분에 코드 입력)

In [None]:
num_epochs = 5
seq_dim=28
loss_list=[]
iter=0
for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    if torch.cuda.is_available():
      images=Variable(images.view(-1,seq_dim,input_dim).cuda())
      labels=Variable(labels.cuda())
    else:
      images=Variable(images.view(-1,seq_dim,input_dim))
      labels=Variable(labels)

    optimizer.zero_grad()
    outputs=model(images)
    loss=criterion(outputs,labels)

    if torch.cuda.is_available():
      loss.cuda()
    #역전파
      loss.backward()
    #가중치 업데이트
      optimizer.step()
    loss_list.append(loss.item())
    iter += 1
    if iter % 500 == 0:
      correct=0
      total=0
      for images, labels in valid_loader:
        if torch.cuda.is_available():
          images=Variable(images.view(-1,seq_dim,input_dim).cuda())
        else:
          images=Variable(images.view(-1,seq_dim,input_dim))
        outputs=model(images)
        _, predicted=torch.max(outputs.data,1)
        total += labels.size(0)

        if torch.cuda.is_available():
          correct += (predicted.cpu()==labels.cpu()).sum()
        else:
          correct+=(predicted == labels).sum()
      accuracy= correct/total * 100 #
      print('Iteration : {}. Loss: {}. Accuracy:{}'.format(iter,loss.item(),accuracy))

Iteration : 500. Loss: 1.060360074043274. Accuracy:72.93000030517578
Iteration : 1000. Loss: 0.6739810705184937. Accuracy:76.08000183105469
Iteration : 1500. Loss: 0.6330926418304443. Accuracy:78.9699935913086
Iteration : 2000. Loss: 0.6121330857276917. Accuracy:80.65999603271484
Iteration : 2500. Loss: 0.5029085874557495. Accuracy:82.56999969482422
Iteration : 3000. Loss: 0.49117323756217957. Accuracy:83.81000518798828
Iteration : 3500. Loss: 0.5620765089988708. Accuracy:84.73999786376953
Iteration : 4000. Loss: 0.4079395830631256. Accuracy:85.56999969482422
Iteration : 4500. Loss: 0.4665379524230957. Accuracy:85.43000030517578


### 테스트셋을 이용해 모델을 예측해 주세요 ( # 부분에 코드입력)

In [None]:
def evaluate(model, val_iter):
  corrects, total, total_loss=0,0,0
  #평가상태로 전환
  model.eval()
  for images, labels in val_iter:
    if torch.cuda.is_available():
      images=Variable(images.view(-1,seq_dim,input_dim).cuda())
      labels=Variable(labels.cuda())
    logit=model(images).to(device)
    loss=F.cross_entropy(logit,labels,reduction='sum')
    _, predicted = torch.max(logit.data,1)
    total += labels.size(0)
    total_loss += loss.item()
    corrects += (predicted == labels).sum()

  avg_loss=total_loss / len(val_iter.dataset)
  avg_accuracy = 100 * corrects / total
  return avg_loss , avg_accuracy

### Loss 값과 Accuracy 값을 출력해 주세요!

In [None]:
test_loss, test_acc = evaluate(model,test_loader)
print("Test Loss: %5.2f | Test Accuracy: %5.2f" % (test_loss,test_acc))

Test Loss:  0.44 | Test Accuracy: 86.04
