## Classification


In [1]:
pip install torchvision

Note: you may need to restart the kernel to use updated packages.


### Define Dataset

In [2]:
from torchvision.datasets import MNIST
import torchvision.transforms as transforms

train_data = MNIST('./data/train', train=True, download=True, transform=transforms.ToTensor())
test_data = MNIST('./data/test', train=False, download=True, transform=transforms.ToTensor())

### Define DataLoader

In [3]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_data, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=16, shuffle=False)

### Define model

In [4]:
from torch import nn

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_layer = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_layer(x)
        return logits

model = Model()

### Define loss(criterion) & optimizer

In [5]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.001)
loss_function = nn.CrossEntropyLoss()

### Training & Testing

In [6]:
model.train()
for x, y in train_dataloader:
    prediction = model(x)
    loss = loss_function(prediction, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [7]:
model.eval()
for x, y in test_dataloader:
    prediction = nn.Softmax(dim=1)(model(x))
    y_pred = prediction.argmax(1)
    print(f"Predicted class: {y_pred}, True label: {y}")

Predicted class: tensor([7, 2, 1, 0, 4, 1, 4, 9, 6, 9, 0, 6, 9, 0, 1, 5]), True label: tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5])
Predicted class: tensor([9, 7, 3, 4, 9, 6, 7, 5, 4, 0, 7, 4, 0, 1, 3, 1]), True label: tensor([9, 7, 3, 4, 9, 6, 6, 5, 4, 0, 7, 4, 0, 1, 3, 1])
Predicted class: tensor([3, 6, 7, 2, 7, 1, 3, 1, 1, 7, 4, 2, 3, 5, 1, 2]), True label: tensor([3, 4, 7, 2, 7, 1, 2, 1, 1, 7, 4, 2, 3, 5, 1, 2])
Predicted class: tensor([4, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 9, 3]), True label: tensor([4, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 9, 3])
Predicted class: tensor([7, 4, 6, 4, 3, 0, 7, 0, 2, 9, 1, 7, 3, 2, 9, 7]), True label: tensor([7, 4, 6, 4, 3, 0, 7, 0, 2, 9, 1, 7, 3, 2, 9, 7])
Predicted class: tensor([7, 6, 2, 7, 8, 4, 7, 3, 6, 1, 3, 6, 9, 3, 1, 4]), True label: tensor([7, 6, 2, 7, 8, 4, 7, 3, 6, 1, 3, 6, 9, 3, 1, 4])
Predicted class: tensor([1, 7, 6, 9, 6, 0, 5, 4, 9, 9, 2, 1, 9, 4, 8, 7]), True label: tensor([1, 7, 6, 9, 6, 0, 5, 4, 9, 9, 2, 1, 9, 4,

Predicted class: tensor([1, 5, 9, 2, 3, 5, 8, 8, 0, 4, 3, 3, 6, 6, 0, 1]), True label: tensor([1, 5, 9, 7, 3, 5, 8, 8, 0, 5, 3, 3, 6, 6, 0, 1])
Predicted class: tensor([6, 0, 3, 5, 4, 4, 1, 2, 9, 1, 4, 6, 9, 9, 3, 9]), True label: tensor([6, 0, 3, 5, 4, 4, 1, 2, 9, 1, 4, 6, 9, 9, 3, 9])
Predicted class: tensor([8, 4, 4, 3, 1, 3, 1, 3, 8, 7, 9, 4, 8, 8, 2, 9]), True label: tensor([8, 4, 4, 3, 1, 3, 1, 8, 8, 7, 9, 4, 8, 8, 7, 9])
Predicted class: tensor([9, 1, 4, 5, 6, 0, 5, 2, 2, 6, 1, 5, 5, 2, 4, 9]), True label: tensor([7, 1, 4, 5, 6, 0, 5, 2, 2, 2, 1, 5, 5, 2, 4, 9])
Predicted class: tensor([6, 2, 7, 7, 2, 2, 1, 1, 2, 8, 3, 7, 2, 4, 1, 7]), True label: tensor([6, 2, 7, 7, 2, 2, 1, 1, 2, 8, 3, 7, 2, 4, 1, 7])
Predicted class: tensor([1, 7, 6, 7, 8, 2, 7, 3, 1, 7, 5, 8, 2, 6, 2, 2]), True label: tensor([1, 7, 6, 7, 8, 2, 7, 3, 1, 7, 5, 8, 2, 6, 2, 2])
Predicted class: tensor([5, 6, 5, 0, 9, 2, 4, 3, 7, 9, 7, 6, 6, 8, 0, 4]), True label: tensor([5, 6, 5, 0, 9, 2, 4, 3, 3, 9, 7, 6, 6, 8,

Predicted class: tensor([1, 7, 7, 8, 6, 5, 7, 3, 8, 9, 5, 3, 7, 9, 1, 7]), True label: tensor([1, 7, 7, 8, 6, 5, 9, 3, 8, 9, 5, 3, 7, 9, 1, 7])
Predicted class: tensor([0, 0, 3, 7, 2, 5, 2, 1, 8, 6, 2, 9, 3, 7, 5, 2]), True label: tensor([0, 0, 3, 7, 2, 5, 8, 1, 8, 6, 2, 9, 5, 7, 5, 7])
Predicted class: tensor([8, 6, 8, 5, 1, 4, 8, 4, 5, 8, 3, 0, 6, 2, 7, 3]), True label: tensor([8, 6, 2, 5, 1, 4, 8, 4, 5, 8, 3, 0, 6, 2, 7, 3])
Predicted class: tensor([3, 2, 1, 0, 7, 3, 4, 6, 3, 9, 3, 2, 8, 9, 0, 3]), True label: tensor([3, 2, 1, 0, 7, 3, 4, 0, 3, 9, 3, 2, 8, 9, 0, 3])
Predicted class: tensor([8, 0, 7, 6, 5, 4, 7, 3, 9, 0, 8, 6, 2, 5, 1, 1]), True label: tensor([8, 0, 7, 6, 5, 4, 7, 3, 9, 0, 8, 6, 2, 5, 6, 1])
Predicted class: tensor([0, 0, 4, 4, 0, 1, 2, 3, 2, 7, 7, 8, 5, 2, 5, 7]), True label: tensor([0, 0, 4, 4, 0, 1, 2, 3, 2, 7, 7, 8, 5, 2, 5, 7])
Predicted class: tensor([6, 9, 1, 4, 1, 6, 4, 2, 4, 3, 5, 4, 3, 9, 5, 0]), True label: tensor([6, 9, 1, 4, 1, 6, 4, 2, 4, 3, 5, 4, 3, 9,

Predicted class: tensor([9, 9, 5, 9, 8, 6, 1, 0, 3, 3, 5, 2, 1, 6, 3, 0]), True label: tensor([9, 9, 5, 9, 8, 5, 1, 0, 3, 3, 5, 2, 1, 6, 5, 0])
Predicted class: tensor([2, 8, 2, 5, 6, 2, 3, 0, 2, 2, 6, 4, 3, 5, 5, 1]), True label: tensor([2, 8, 1, 5, 6, 2, 3, 0, 2, 2, 6, 4, 3, 5, 5, 1])
Predicted class: tensor([7, 2, 1, 6, 9, 1, 3, 9, 5, 5, 1, 6, 2, 2, 8, 6]), True label: tensor([7, 2, 1, 6, 9, 1, 9, 9, 5, 5, 1, 6, 2, 2, 8, 6])
Predicted class: tensor([7, 1, 4, 6, 0, 6, 0, 3, 3, 2, 8, 3, 6, 8, 9, 8]), True label: tensor([7, 1, 4, 6, 0, 4, 0, 3, 3, 2, 2, 3, 6, 8, 9, 8])
Predicted class: tensor([5, 3, 8, 5, 4, 5, 2, 0, 5, 6, 3, 2, 8, 3, 9, 9]), True label: tensor([5, 3, 8, 5, 4, 5, 2, 0, 5, 6, 3, 2, 8, 3, 9, 9])
Predicted class: tensor([3, 7, 9, 4, 6, 7, 1, 3, 7, 3, 6, 6, 0, 9, 0, 1]), True label: tensor([5, 7, 9, 4, 6, 7, 1, 3, 7, 3, 6, 6, 0, 9, 0, 1])
Predicted class: tensor([9, 9, 2, 8, 8, 0, 1, 6, 9, 7, 5, 3, 4, 7, 6, 9]), True label: tensor([9, 9, 2, 8, 8, 0, 1, 6, 9, 7, 5, 3, 4, 7,

Predicted class: tensor([2, 2, 6, 9, 9, 6, 5, 5, 3, 3, 8, 1, 6, 5, 6, 8]), True label: tensor([2, 2, 6, 9, 9, 6, 5, 5, 3, 3, 8, 1, 6, 5, 6, 8])
Predicted class: tensor([1, 9, 7, 6, 8, 3, 7, 4, 7, 0, 9, 0, 0, 3, 7, 9]), True label: tensor([1, 9, 7, 6, 8, 3, 7, 4, 7, 0, 9, 0, 0, 3, 7, 9])
Predicted class: tensor([3, 0, 2, 0, 1, 0, 1, 0, 4, 0, 1, 0, 4, 7, 9, 6]), True label: tensor([3, 0, 2, 0, 1, 0, 1, 0, 4, 0, 1, 0, 4, 7, 9, 6])
Predicted class: tensor([2, 6, 2, 2, 9, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), True label: tensor([2, 6, 2, 2, 9, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Predicted class: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5]), True label: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5])
Predicted class: tensor([6, 7, 8, 9, 8, 0, 5, 6, 6, 0, 8, 0, 2, 3, 7, 9]), True label: tensor([6, 7, 8, 9, 8, 0, 5, 6, 6, 0, 8, 0, 2, 3, 7, 9])
Predicted class: tensor([4, 7, 1, 9, 1, 7, 1, 4, 0, 0, 4, 1, 7, 5, 7, 1]), True label: tensor([4, 7, 1, 9, 1, 7, 1, 4, 0, 0, 4, 1, 7, 5,

Predicted class: tensor([2, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 4, 4]), True label: tensor([2, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4])
Predicted class: tensor([5, 1, 0, 4, 5, 6, 6, 3, 4, 4, 2, 9, 1, 0, 6, 4]), True label: tensor([5, 1, 0, 4, 5, 6, 6, 3, 4, 4, 2, 8, 1, 0, 6, 4])
Predicted class: tensor([8, 7, 2, 3, 3, 9, 2, 0, 9, 3, 3, 7, 1, 5, 6, 3]), True label: tensor([9, 7, 2, 3, 3, 9, 2, 0, 9, 3, 3, 9, 1, 5, 2, 3])
Predicted class: tensor([4, 1, 8, 4, 0, 2, 4, 0, 2, 4, 7, 8, 0, 7, 5, 6]), True label: tensor([7, 7, 8, 4, 0, 2, 4, 0, 2, 4, 7, 8, 0, 7, 0, 6])
Predicted class: tensor([9, 3, 2, 8, 6, 7, 5, 7, 5, 1, 0, 7, 1, 6, 7, 2]), True label: tensor([9, 3, 2, 8, 6, 0, 5, 7, 5, 1, 0, 8, 1, 6, 7, 2])
Predicted class: tensor([9, 7, 9, 5, 8, 6, 2, 6, 2, 8, 1, 7, 5, 7, 1, 1]), True label: tensor([9, 7, 9, 5, 8, 6, 2, 6, 2, 8, 1, 7, 5, 0, 1, 1])
Predicted class: tensor([3, 4, 4, 4, 1, 8, 6, 8, 9, 0, 1, 2, 3, 4, 5, 6]), True label: tensor([3, 8, 4, 9, 1, 8, 6, 8, 9, 0, 1, 2, 3, 4,

Predicted class: tensor([5, 5, 0, 1, 8, 9, 8, 2, 6, 0, 4, 5, 0, 2, 1, 8]), True label: tensor([5, 5, 0, 1, 1, 9, 8, 2, 6, 0, 4, 5, 0, 3, 1, 8])
Predicted class: tensor([6, 7, 5, 9, 9, 3, 0, 3, 1, 4, 4, 0, 4, 9, 0, 1]), True label: tensor([6, 7, 5, 9, 9, 3, 0, 3, 1, 4, 4, 0, 4, 9, 0, 1])
Predicted class: tensor([2, 3, 5, 6, 7, 8, 0, 2, 2, 3, 5, 6, 7, 8, 9, 0]), True label: tensor([2, 3, 5, 6, 7, 8, 0, 1, 2, 3, 5, 6, 7, 8, 9, 0])
Predicted class: tensor([2, 2, 3, 6, 6, 7, 8, 9, 9, 7, 0, 9, 0, 1, 5, 8]), True label: tensor([1, 2, 3, 5, 6, 7, 8, 9, 9, 7, 0, 9, 0, 1, 5, 8])
Predicted class: tensor([8, 0, 9, 3, 2, 7, 8, 4, 6, 1, 0, 4, 9, 4, 4, 0]), True label: tensor([8, 0, 9, 3, 2, 7, 8, 4, 6, 1, 0, 4, 9, 4, 2, 0])
Predicted class: tensor([4, 0, 1, 6, 9, 3, 2, 9, 1, 6, 0, 8, 1, 8, 7, 7]), True label: tensor([5, 0, 1, 6, 9, 3, 2, 9, 1, 6, 0, 1, 1, 8, 7, 7])
Predicted class: tensor([6, 3, 6, 0, 7, 2, 4, 1, 7, 0, 6, 7, 2, 2, 5, 8]), True label: tensor([6, 3, 6, 0, 7, 2, 4, 1, 7, 0, 6, 7, 1, 2,

Predicted class: tensor([8, 9, 1, 6, 8, 9, 9, 0, 1, 2, 4, 4, 3, 7, 4, 4]), True label: tensor([8, 9, 1, 6, 8, 9, 9, 0, 1, 2, 4, 4, 3, 7, 4, 4])
Predicted class: tensor([4, 0, 3, 8, 7, 5, 8, 2, 1, 7, 5, 3, 8, 5, 2, 5]), True label: tensor([4, 0, 3, 8, 7, 5, 8, 2, 1, 7, 5, 3, 8, 5, 2, 5])
Predicted class: tensor([1, 1, 6, 2, 1, 3, 8, 6, 4, 2, 6, 2, 5, 5, 0, 2]), True label: tensor([1, 1, 6, 2, 1, 3, 8, 6, 4, 2, 6, 2, 5, 5, 0, 2])
Predicted class: tensor([8, 0, 6, 4, 1, 7, 9, 1, 9, 2, 6, 7, 6, 6, 8, 7]), True label: tensor([8, 0, 6, 8, 1, 7, 9, 1, 9, 2, 6, 7, 6, 6, 8, 7])
Predicted class: tensor([4, 9, 2, 1, 3, 3, 0, 5, 5, 8, 0, 3, 7, 9, 7, 0]), True label: tensor([4, 9, 2, 1, 3, 3, 0, 5, 5, 8, 0, 3, 7, 9, 7, 0])
Predicted class: tensor([2, 7, 9, 1, 7, 8, 0, 3, 5, 3, 6, 0, 1, 2, 3, 4]), True label: tensor([2, 7, 9, 1, 7, 8, 0, 3, 5, 3, 6, 0, 1, 2, 3, 4])
Predicted class: tensor([5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0]), True label: tensor([5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8,

# 실습

## Exercise 1

In [8]:
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_data = CIFAR10("./data", train=True, download=True, transform=transform)
test_data = CIFAR10("./data", train=False, download=True, transform=transform)

print(len(train_data))
print(len(test_data))

Files already downloaded and verified
Files already downloaded and verified
50000
10000


In [9]:
from torch.utils.data import DataLoader

train_dataloader = ### implement here (requirement: use batch size 8 and shuffle the data.)
test_dataloader = ### implement here (requirement: use batch size 8 and shuffle the data.)

SyntaxError: invalid syntax (1484516528.py, line 3)

In [None]:
import matplotlib.pyplot as plt # library for plotting
from torchvision.transforms.functional import to_pil_image # function for changing tensor to pil image

# Visualize first 8 images
plt.figure(figsize=(16,4))

### implement here (hint: for loop)
    img = to_pil_image(train_data[i][0]/2 + 0.5)
    plt.subplot(1,8,i+1)
    plt.imshow(img)
plt.show()

# Print labels for each image
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
print("Labels: ", end='')
for i in range(8): 
    print(classes[train_data[i][1]], end=' | ')

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(8, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )

        self.fc_layer = nn.Linear(16 * 8 * 8, 10)

    def forward(self, x):
        x1 = self.layer1(x)
        x2 = self.layer2(x1)

        x2 = torch.flatten(x2, 1)
        x3 = self.fc_layer(x2)
        out = F.softmax(x3, dim=1)
        return out


net = CNN()

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

다음은 생성한 모델을 cpu를 통해 학습할 것인가 gpu를 사용하여 학습할 것인가 결정하는 코드입니다.  
torch.cuda.is_available() 함수는 현재 환경에서 프로그램이 gpu를 사용할 수 있는가를 true/false로 반환해주게 됩니다.
기본적으로 모델과 데이터는 cpu에서 연산이 되게 됩니다. 따라서, gpu를 사용할 수 있는 환경이라면 torch.device('cuda')를 꼭 사용해주는 것이 좋습니다.  
이때, 모델을 gpu에 올리는 것과 동시에 train/test시 각 배치 단위의 데이터 역시 gpu에 올려야 정상적으로 연산이 진행될 수 있습니다.

In [None]:
if torch.cuda.is_available(): device = torch.device('cuda')
else: device = torch.device('cpu')

net = net.to(device) # network to device (cpu or gpu)

In [None]:
for epoch in range(4):   # 데이터셋을 수차례 반복합니다.

    running_loss = 0.0
    net ### implement here (hint: training is started from now on.)
    for i, data in enumerate(train_dataloader, 0):
        # [inputs, labels]의 목록인 data로부터 입력을 받은 후;
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device) # data to device (cpu or gpu)

        # 변화도(Gradient) 매개변수를 0으로 만들고
        optimizer ### implement here (hint: remove previous gradients from the last iteration)

        # 순전파 + 역전파 + 최적화를 한 후
        outputs = net() ### implement here (hint: put inputs in the network)
        loss = criterion() ### implement here 
        loss ### implement here (hint: backpropagation)
        optimizer ### implement here (hint: parameter update)

        # 통계를 출력합니다.
        running_loss += ### implement here
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}',end=' | ')
            running_loss = 0.0
    
    correct = 0
    total = 0
    net ### implement here (hint: test is started from now on.)
    # 학습 중이 아니므로, 출력에 대한 변화도를 계산할 필요가 없습니다
    with ### implement here
        for data in test_dataloader:
            images, labels = data
            images, labels =  ### implement here (hint: data to device (cpu or gpu))
            
            # 신경망에 이미지를 통과시켜 출력을 계산합니다
            outputs = net(images)
            # 가장 높은 값(energy)를 갖는 분류(class)를 정답으로 선택하겠습니다
            _, predicted = ### implement here (hint: use torch.max)
            total += labels.size(0)
            correct += ### implement here (hint: count the number of correct predictions.)

    print(f'Accuracy on the 10000 test images: {100 * correct // total} %')
    
print('Finished Training')

## Exercise 2

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class VGG(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(, , kernel_size=, padding=), ### implement here (hint: input channels: 3, output channels: 64, kernel size: 3, padding size: 1)
            , ### implement here (use ReLU activation function)
            nn.MaxPool2d(2,2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(, , kernel_size=, padding=), ### implement here (hint: input channels: 64, output channels: 128, kernel size: 3, padding size: 1)
            , ### implement here (use ReLU activation function)
            nn.MaxPool2d(2,2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(, , kernel_size=, padding=), ### implement here (hint: input channels: 128, output channels: 256, kernel size: 3, padding size: 1)
            , ### implement here (use ReLU activation function)
            nn.Conv2d(, , kernel_size=, padding=), ### implement here (hint: input channels: 256, output channels: 256, kernel size: 3, padding size: 1)
            , ### implement here (use ReLU activation function)
            nn.MaxPool2d(2,2)
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(, , kernel_size=, padding=), ### implement here (hint: input channels: 256, output channels: 512, kernel size: 3, padding size: 1)
            , ### implement here (use ReLU activation function)
            nn.Conv2d(, , kernel_size=, padding=), ### implement here (hint: input channels: 512, output channels: 512, kernel size: 3, padding size: 1)
            , ### implement here (use ReLU activation function)
            nn.MaxPool2d(2,2)
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(, , kernel_size=, padding=), ### implement here (hint: input channels: 512, output channels: 512, kernel size: 3, padding size: 1)
            , ### implement here (use ReLU activation function)
            nn.Conv2d(, , kernel_size=, padding=), ### implement here (hint: input channels: 512, output channels: 512, kernel size: 3, padding size: 1)
            , ### implement here (use ReLU activation function)
            nn.MaxPool2d(2,2)
        )
        self.fc_layer = ### implement here (hint: use linear layer with input channels: 512, output channels: 10)

        # Initialize weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                m.bias.data.zero_()


    def forward(self, x):
        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)
        x5 = self.layer5(x4)

        x5 = torch.flatten(x5, 1)
        x6 = self.fc_layer(x5)
        out = F.softmax(x6, dim=) ### implement here (hint: Softmax is applied to the class dimension.)
        return out


net = VGG()

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
if torch.cuda.is_available(): device = torch.device('cuda')
else: device = torch.device('cpu')

net = net.to(device)

In [None]:
for epoch in range(4):   # 데이터셋을 수차례 반복합니다.

    running_loss = 0.0
    net.train()
    for i, data in enumerate(train_dataloader, 0):
        # [inputs, labels]의 목록인 data로부터 입력을 받은 후;
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # 변화도(Gradient) 매개변수를 0으로 만들고
        optimizer ### implement here (hint: remove previous gradients from the last iteration)

        # 순전파 + 역전파 + 최적화를 한 후
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss ### implement here (hint: backpropagation)
        optimizer ### implement here (hint: parameter update)

        # 통계를 출력합니다.
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}',end=' | ')
            running_loss = 0.0
    
    correct = 0
    total = 0
    net.eval()
    # 학습 중이 아니므로, 출력에 대한 변화도를 계산할 필요가 없습니다
    with torch.no_grad():
        for data in test_dataloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            
            # 신경망에 이미지를 통과시켜 출력을 계산합니다
            outputs = net(images)
            # 가장 높은 값(energy)를 갖는 분류(class)를 정답으로 선택하겠습니다
            _, predicted = ### implement here (hint: use torch.max)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy on the 10000 test images: {100 * correct // total} %')
    
print('Finished Training')

## Weight Initialization

Weight Initialization은 모델에 사용되는 여러 learnable parameter를 특정 방식으로 초기화 해주는 것을 의미한다. 처음 layer가 선언되면, 이는 랜덤한 값으로 초기화되게 되는데, 이를 보다 더 근거있는 값으로 초기화해주기 위하여 Xavier initialization, He initialization 등의 다양한 방법론이 존재한다. 이를 해주게 되면 네트워크가 보다 더 안정적이고 빠르게 학습될 수 있다.  

Pytorch torch.nn.init  
https://pytorch.org/docs/stable/nn.init.html

In [None]:
import torch.nn as nn

nn.init.uniform_() # uniform distribution
nn.init.normal_() # normal distribution

nn.init.constant_() # constant value

nn.init.xavier_uniform_() # Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010), using a uniform distribution
nn.init.kaiming_uniform_() # Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification - He, K. et al. (2015), using a uniform distribution.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(8, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )

        self.fc_layer = nn.Linear(16 * 8 * 8, 10)

        # Weight Initialization example
        nn.init.kaiming_uniform_(self.fc_layer.weight.data)

    def forward(self, x):
        x1 = self.layer1(x)
        x2 = self.layer2(x1)

        x2 = torch.flatten(x2, 1)
        x3 = self.fc_layer(x2)
        out = F.softmax(x3, dim=1)
        return out


net = CNN()