![nn](img/pytorch_02.png)

## 실습 목표

- Neural network 모델을 만들고 학습시킬 수 있다.
- 모델을 튜닝하여 원하는 성능을 얻을 수 있다.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms

In [2]:
# gpu number 지정
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'  # 1차 과정 gpu number

![nn](img/pytorch_08.png)

In [3]:
# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='datasets/', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='datasets/', train=False, transform=transforms.ToTensor())

# Data loader
# mini batch size
train_loader = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=128, shuffle=False)

![nn](img/pytorch_07.png)

In [4]:
# 모델 class 선언
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()  # sigmoid activation function
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.sigmoid(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        out = self.fc3(out)
        return out

In [5]:
# 모델 instance 생성
model = NeuralNet(784, 20, 10)  # init(784, 20, 10)
# input dim: 784  / hidden dim: 20  / output dim: 10

In [6]:
model

NeuralNet(
  (fc1): Linear(in_features=784, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=10, bias=True)
  (sigmoid): Sigmoid()
)

In [7]:
# gpu 사용
model = model.to('cuda')

In [8]:
# 잘 학습이 되었는지 판단 기준
loss_fn = nn.CrossEntropyLoss()

In [9]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.05) 
# torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9)
# torch.optim.Adam(model.parameters(), lr=0.05)

![nn](img/pytorch_09.gif)

In [10]:
# Train the model
total_step = len(train_loader)

for epoch in range(10):
    for i, (images, labels) in enumerate(train_loader):  # mini batch for loop
        # gpu
        images = images.reshape(-1, 28*28).to('cuda')
        labels = labels.to('cuda')
        
        # Forward
        outputs = model(images)  # forwardI(images)
        loss = loss_fn(outputs, labels)  # 예측 값, 실제 값
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()  # 자동 미분값 계산
        optimizer.step()  # requires_grad=True parameter 업데이트
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, 10, i+1, total_step, loss.item()))

Epoch [1/10], Step [100/469], Loss: 2.3050
Epoch [1/10], Step [200/469], Loss: 2.2790
Epoch [1/10], Step [300/469], Loss: 2.2890
Epoch [1/10], Step [400/469], Loss: 2.2899
Epoch [2/10], Step [100/469], Loss: 2.2896
Epoch [2/10], Step [200/469], Loss: 2.2720
Epoch [2/10], Step [300/469], Loss: 2.2720
Epoch [2/10], Step [400/469], Loss: 2.2481
Epoch [3/10], Step [100/469], Loss: 2.2224
Epoch [3/10], Step [200/469], Loss: 2.2200
Epoch [3/10], Step [300/469], Loss: 2.1417
Epoch [3/10], Step [400/469], Loss: 2.0921
Epoch [4/10], Step [100/469], Loss: 1.9951
Epoch [4/10], Step [200/469], Loss: 1.9040
Epoch [4/10], Step [300/469], Loss: 1.9307
Epoch [4/10], Step [400/469], Loss: 1.7986
Epoch [5/10], Step [100/469], Loss: 1.6014
Epoch [5/10], Step [200/469], Loss: 1.5180
Epoch [5/10], Step [300/469], Loss: 1.4620
Epoch [5/10], Step [400/469], Loss: 1.3454
Epoch [6/10], Step [100/469], Loss: 1.2676
Epoch [6/10], Step [200/469], Loss: 1.3053
Epoch [6/10], Step [300/469], Loss: 1.0883
Epoch [6/10

In [11]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to('cuda')
        labels = labels.to('cuda')
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)  # classificatoin model -> top 1 label이 예측 값
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 84.7 %


### 퀴즈

#### 아래 코드를 변형하여, Fully connected neural network의 MNIST classification  test 성능을  95% 이상으로 올려보세요.  정답은 물론 하나가 아니며, 코드의 변형이 많을수도 있고 적을수도 있습니다.

In [12]:
# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='datasets/', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='datasets/', train=False, transform=transforms.ToTensor())

# Data loader
train_loader = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=128, shuffle=False)

In [13]:
# 모델 class 선언
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.sigmoid(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        out = self.fc3(out)
        return out

In [14]:
# 모델 instance 생성
model = NeuralNet(784, 20, 10)
# gpu 사용
model = model.to('cuda')

In [15]:
loss_fn = nn.CrossEntropyLoss()

In [16]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.05) 
# torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9)
# torch.optim.Adam(model.parameters(), lr=0.05)

In [18]:
# Train the model
total_step = len(train_loader)
for epoch in range(10):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28).to('cuda')
        labels = labels.to('cuda')
        
        # Forward pass
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, i+1, total_step, loss.item()))

RuntimeError: Given groups=1, weight of size [20, 784, 3, 3], expected input[128, 1, 28, 28] to have 784 channels, but got 1 channels instead

In [None]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to('cuda')
        labels = labels.to('cuda')
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

### 생각해보기

#### 이전 퀴즈에서 사용한 방법 외에 어떤 시도를 더 해볼 수 있을까요?