In [None]:
import os
from glob import glob

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np

In [None]:
seed =1
batch_size = 64
test_batch_size = 64

no_cuda = False

In [None]:
use_cuda = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda"if use_cuda else "cpu")

In [None]:
torch.manual_seed(seed)#shuffle항상 비슷하게섞으려고

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('dataset',train=True, download=True,
                   transform=transforms.Compose([
                                                 transforms.ToTensor(),
                                                 transforms.Normalize((0.1307,),(0.3081))
                   ])),
                   batch_size=batch_size,
                   shuffle = True
)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('dataset',train=False,transform=transforms.Compose([
                                                                       transforms.ToTensor(),
                                                                       transforms.Normalize((0.1307,),(0.3081))
    ])),
    batch_size = test_batch_size,
    shuffle = True
)

In [None]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()#생성자 실행순서 알맞게 해줌
    self.conv1 = nn.Conv2d(1,20,5,1)#(in,out,filter,stride)
    self.conv2 = nn.Conv2d(20,50,5,1)
    self.fc1 = nn.Linear(4*4*50,500)#왜 4*4*50일까 print해보면 암.pooling해서
    self.fc2 = nn.Linear(500,10)
  def forward(self,x):
    #feature extraction
    x = F.relu(self.conv1(x))
    x = F.max_pool2d(x,2,2)
    x = F.relu(self.conv2(x))
    x = F.max_pool2d(x,2,2)

    #print(x.shape)
    #fully connected
    x = x.view(-1,4*4*50)#flatten
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    return F.log_softmax(x, dim = 1)

In [None]:
#optimization
model = Net().to(device)
optimizer = optim.SGD(model.parameters(),lr=0.001,momentum=0.5)

In [None]:
params = list(model.parameters())
for i in range(8):
  print(params[i].size())

torch.Size([20, 1, 5, 5])
torch.Size([20])
torch.Size([50, 20, 5, 5])
torch.Size([50])
torch.Size([500, 800])
torch.Size([500])
torch.Size([10, 500])
torch.Size([10])


In [None]:
model.train()#train mode

Net(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)

In [None]:
data, target = next(iter(train_loader))

In [None]:
data.shape, target.shape

(torch.Size([64, 1, 28, 28]), torch.Size([64]))

In [None]:
data, target = data.to(device), target.to(device)#어디에 컴파일을 해주겠다는걸 알려주는거임

In [None]:
optimizer.zero_grad()#clear해주고 시작

In [None]:
output = model(data)

In [None]:
#target과 output을 비교해서 loss계산
loss = F.nll_loss(output, target)

In [None]:
loss.backward()#back propagation을 통해 gradients계산

In [None]:
optimizer.step()#update

In [None]:
epochs = 1
log_interval = 100


In [None]:
for epoch in range(1,epochs+1):
  model.train()
  for batch_idx, (data,target) in enumerate(train_loader):
    data, target = data.to(device), target.to(device)#device로 옮겨줌
    optimizer.zero_grad()
    output = model(data)
    loss = F.nll_loss(output, target)
    loss.backward()
    optimizer.step()

    if batch_idx % log_interval == 0:
      print('train epoch: {} [{}/{}({:.0f}%)]|tLoss: {:.6f}'.format(
          epoch,batch_idx*len(data), len(train_loader.dataset),
          100*batch_idx/len(train_loader),loss.item()
      ))



In [None]:
model.eval()#평가 모드 시작

Net(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)

In [None]:
test_loss = 0
correct = 0

with torch.no_grad():#back, grad 계산 사용량 줄인다
  data, target = next(iter(test_loader))
  data, target = data.to(device), target.to(device)#device에 집언허기
  output = model(data)

  test_loss = F.nll_loss(output, target, reduction='sum').item()#sum을 하면 하나의 스칼라로 리턴해서 test_loss에 저장
  
  pred = output.argmax(dim =1,keepdim = True)#차원수 유지
  correct = pred.eq(target.view_as(pred)).sum()#같은지 안같은지 확인하고 더함
  #view_as는 차원수를 맞춰주는 거다.


In [None]:
test_loss

30.495494842529297

In [None]:
correct

tensor(56)

In [None]:
pred

tensor([[9],
        [8],
        [2],
        [8],
        [1],
        [3],
        [1],
        [2],
        [4],
        [9],
        [7],
        [0],
        [6],
        [6],
        [3],
        [7],
        [6],
        [0],
        [5],
        [1],
        [2],
        [8],
        [0],
        [4],
        [1],
        [6],
        [9],
        [2],
        [9],
        [9],
        [0],
        [4],
        [6],
        [1],
        [6],
        [9],
        [5],
        [7],
        [2],
        [2],
        [8],
        [6],
        [7],
        [4],
        [9],
        [6],
        [1],
        [2],
        [0],
        [1],
        [8],
        [8],
        [1],
        [7],
        [9],
        [2],
        [5],
        [3],
        [8],
        [2],
        [7],
        [8],
        [0],
        [7]])

In [None]:
pred.shape

torch.Size([64, 1])

In [None]:
target.dtype, target.shape

(torch.int64, torch.Size([64]))

In [None]:
target.view_as(pred).shape

torch.Size([64, 1])

In [None]:
test_loss /= len(test_loader.dataset)

In [None]:
test_loss

0.0030495494842529298

In [None]:
model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
  for data, target in test_loader:
    data, target = data.to(device), target.to(device)
    output = model(data)
    test_loss += F.nll_loss(output, target, reduction='sum').item()
    pred = output.argmax(dim = 1,keepdim =True)
    correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Average Loss : {:.4f}, Accuracy : {}/{} ({:.0f}%)\n'.format(
    test_loss,correct,len(test_loader.dataset),100*correct/len(test_loader.dataset)
))


Test set: Average Loss : 0.4849, Accuracy : 8650/10000 (86%)

