### 일련의 과정을 따라서 만든다.
1. 딥러닝 생성
    - nn.Module을 상속했을 때 foward를 정의하면 backward를 자동적으로 만들어준다.
2. 데이터 전처리
3. DataLoader에 전처리한 데이터를 넣어준다.
    - 데이터를 접근할 수 있는 권한을 가지면서 sampling한 minibatch를 제공해준다.
    - iter() 형태로 만들어준다.
4. for문 <- training(epoch 수만큼)
    1. fowarding
    2. loss: costfunc, objecfunc
    3. backpropagation(backward)
    4. update
5. evaluation


In [0]:
import torch
import torch.nn as nn # neural network (nn)
import torch.nn.functional as F # activation convolution function 뉴럴 네트워크를 위한 함수들이 정의 되어있는 모듈이다.
import torchvision # 이미지 관련 처리, Pretrained Model 관련된 모듈이다.
import torchvision.datasets as vision_dsets # 이미 주어진 데이터셋 사용
import torchvision.transforms as T # 이미지 처리(Vision) 관련된 transformation이 정의 되어있는 모듈(normalization)
import torch.optim as optim # optimizer들이 정의되어있는 모듈
from torch.utils import data

## MNIST Feed-forward Neural Network

### Data Loader 불러오기

In [0]:
def MNIST_DATA(root='./', train=True, transfomrs=None, download=True, batch_size=32, num_worker=1):
    print('[+] Get the MNIST DATA')

    mnist_train = vision_dsets.MNIST(root=root, # 데이터 저장 위치
                                     train=True, # train 데이터인지 아닌지
                                     transform=T.ToTensor(), # 데이터 전처리 여기서는 pytorch가 사용할 수 있는 형태인 Tensor형태로 변환해주는 작업
                                     download=True) # 데이터를 다운로드 할지 여부
    mnist_test = vision_dsets.MNIST(root=root,
                                   train=False, # test 데이터를 가져온다.
                                   transform=T.ToTensor(),
                                   download=True)
    
    """
    DataLoader는 데이터와 batch size 정보를 바탕으로 매 iteration 마다 주어진 데이터를 원하는 batch size 만큼 반환해주는 iterator다.
    batch size는 2의 배수로 저장하는 것이 좋다.
    무조건 크다고 좋은것이 아니다. gpu memory size 고려해야 한다.
    """
    trainLoader = data.DataLoader(dataset=mnist_train, # 어떤 데이터를 제공할건지
                                  batch_size=batch_size, # 배치 사이즈
                                  shuffle=True, # Train의 경우 shuffling을 해준다. True로하면 매 epoch마다 셔플링해서 순서를 학습하는 것을 방지
                                  num_workers=1) # 데이터를 로드하는데 worker를 얼마나 추가하겠는가?
    testLoader = data.DataLoader(dataset=mnist_test,
                                 batch_size=batch_size,
                                 shuffle=False, # test는 shuffling이 필요하지 않다.
                                 num_workers=1)
    
    print('[+] Finished loading data & Preprocessing')
    return mnist_train, mnist_test, trainLoader, testLoader

In [3]:
trainDataset, testDataset, trainLoader, testLoader = MNIST_DATA(batch_size=32)

  0%|          | 0/9912422 [00:00<?, ?it/s]

[+] Get the MNIST DATA
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:00, 21422860.39it/s]                            


Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw


32768it [00:00, 315144.56it/s]                           
0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 5298643.90it/s]                           
8192it [00:00, 130966.09it/s]


Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw
Processing...
Done!
[+] Finished loading data & Preprocessing


### Train Function

In [0]:
def train_network(net, optimizer, trainloader, epochs=5):
    for epoch in range(epochs): # epochs만큼 반복

        running_loss = 0.0 # running loss를 저장할 변수
        for i, data in enumerate(trainloader):
            inputs, labels = data # DataLoader iterator의 반환값은 (input_data, label)형태이다. unpacking 해준다
            inputs = inputs.cuda() # gou에 데이터를 올린다.
            labels = labels.cuda()

            # 현재 backprop을 계산하기 위해서 기존의 저장된 activation buffer를 비운다.
            optimizer.zero_grad()
            # 위의 작업을 안해주면 메모리가 터지므로 기존의 gradient를 날려줘야한다.

            outputs = net(inputs) # network로부터 inputs에 대한 outputs를 얻는다. 
            # loss function에 주어진 target과 output의 score를 계산하여 반환한다.
            loss = criterion(outputs, labels)
            # loss를 이용해 backpropagation을 진행한다.
            loss.backward()
            # backprop을 바탕으로 optimizer가 gradient descenting을 수행한다. -> weight 조정한다.
            optimizer.step()

            running_loss += loss.item()
            if i % 500 == 499: # print every 2000 mini-batch
                print('[%d, %5d] loss: %.3f' %
                      (epoch+1, i+1, running_loss/500))
                running_loss=0.0
    
    print('Finished Trainig')



In [0]:
def test(model, testloader):
    model.eval() # batchnorm, dropout을 사용하는데 test이므로 eval 모드로 사용하겠다고 선언
    test_loss = 0
    correct = 0
    for data, target in testloader:
        # gpu로 올려준다.
        data, target = data.cuda(), target.cuda()
        outputs = model(data)
        pred = outputs.max(1, keepdim=True)[1] # get the index of max
        # max인 이유 -> 가장 높은 score 가진 것으로 예측한것임
        correct += pred.eq(target.view_as(pred)).sum().item() # 정답 데이터의 갯수를 반환

    test_loss /= len(testloader.dataset)
    print('\n Test set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(testloader.dataset), 
        100. * correct / len(testloader.dataset)
    ))

## Neural Network + Activation Function 

### 간단한 Neural Network를 만들어보자.(1)
특징: 2개의 Layer를 가지는 Neural Network  
Layer1 = input: 28*28, output: 30 + Activation Function = Sigmoid  
Layer2 = input: 30, output:10 -> 모델의 클래스 개수(0~9)가 output 수  
Cross Entropy Loss, SGD optimizer

In [0]:
class MNIST_Net(nn.Module):
    def __init__(self):
        super(MNIST_Net, self).__init__()

        self.fc0 = nn.Linear(28*28, 30)
        self.fc1 = nn.Linear(30,10)

    def forward(self, x):
        x = x.view(-1, 28*28) # batchsize, inputsize 로 reshape
        x = F.sigmoid(self.fc0(x)) # activation function 수행
        x = self.fc1(x)
        return x


In [0]:
mnist_net = MNIST_Net().cuda() # 정의한 모델을 인스턴스화 하고 gpu에 올린다
criterion = nn.CrossEntropyLoss() # loss function 정의 CrossEntropyLoss 사용 -> softmax있으므로 logit 값을 줘야한다.
optimizer = optim.SGD(mnist_net.parameters(), lr=0.001) 

In [8]:
train_network(mnist_net, optimizer, trainLoader)



[1,   500] loss: 2.353
[1,  1000] loss: 2.310
[1,  1500] loss: 2.286
[2,   500] loss: 2.259
[2,  1000] loss: 2.247
[2,  1500] loss: 2.234
[3,   500] loss: 2.212
[3,  1000] loss: 2.198
[3,  1500] loss: 2.185
[4,   500] loss: 2.158
[4,  1000] loss: 2.143
[4,  1500] loss: 2.125
[5,   500] loss: 2.093
[5,  1000] loss: 2.073
[5,  1500] loss: 2.053
Finished Trainig


In [9]:
test(mnist_net, testLoader)




 Test set: Accuracy: 6394/10000 (64%)



### 간단한 Neural Network를 만들어보자.(2)
특징: 2개의 Layer를 가지는 Neural Network  
Layer1 = input: 28*28, output: 30 + Activation Function = tanh   
Layer2 = input: 30, output:10 -> 모델의 클래스 개수(0~9)가 output 수  
Cross Entropy Loss, SGD optimizer

In [0]:

class MNIST_Net(nn.Module):
    def __init__(self):
        super(MNIST_Net, self).__init__() # nn.Module 생성자 호출 Q) 왜 필요할까요?
        # an affine operation: y = Wx + b
        self.fc0 = nn.Linear(28*28,30)
        self.fc1 = nn.Linear(30, 10)

    def forward(self, x):
        x = x.view(-1,28*28) # x.view함수는 주어진 인자의 크기로 해당 데이터의 크기를 반환합니다. 즉, (Batch_size,28,28) --> (Batch_size,28*28)로 변환합니다.
        x = F.tanh(self.fc0(x)) # 28*28 -> 30 -> Activation function 을 수행합니다.
        x = self.fc1(x)  # 30 -> 10 으로 10개의 Class에 대한 logit 값을 호출합니다. 
        return x

In [0]:
mnist_net = MNIST_Net().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(mnist_net.parameters(), lr = 0.001)

In [12]:
train_network(mnist_net, optimizer, trainLoader)



[1,   500] loss: 2.221
[1,  1000] loss: 2.059
[1,  1500] loss: 1.907
[2,   500] loss: 1.652
[2,  1000] loss: 1.529
[2,  1500] loss: 1.407
[3,   500] loss: 1.241
[3,  1000] loss: 1.152
[3,  1500] loss: 1.078
[4,   500] loss: 0.979
[4,  1000] loss: 0.936
[4,  1500] loss: 0.887
[5,   500] loss: 0.827
[5,  1000] loss: 0.790
[5,  1500] loss: 0.755
Finished Trainig


In [13]:
test(mnist_net, testLoader)




 Test set: Accuracy: 8485/10000 (85%)



### 간단한 Neural Network 를 만들어 봅시다. (3)
특징 : 2개의 Layer를 가지는 Neural Network 
<구성>  
Layer 1 - input:28*28 , output : 30 + Activation Fucntion - Relu

Layer 2 - input: 30 output:10

Cross Entropy Loss  + SGD optimizer 

In [0]:
class MNIST_Net(nn.Module):
    def __init__(self):
        super(MNIST_Net, self).__init__()

        self.fc0 = nn.Linear(28*28, 30)
        self.fc1 = nn.Linear(30, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc0(x))
        x = self.fc1(x)
        return x

In [0]:
mnist_net = MNIST_Net().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(mnist_net.parameters(), lr = 0.001)

In [16]:
train_network(mnist_net, optimizer, trainLoader)

[1,   500] loss: 2.282
[1,  1000] loss: 2.209
[1,  1500] loss: 2.118
[2,   500] loss: 1.913
[2,  1000] loss: 1.768
[2,  1500] loss: 1.612
[3,   500] loss: 1.347
[3,  1000] loss: 1.209
[3,  1500] loss: 1.096
[4,   500] loss: 0.939
[4,  1000] loss: 0.889
[4,  1500] loss: 0.825
[5,   500] loss: 0.746
[5,  1000] loss: 0.712
[5,  1500] loss: 0.680
Finished Trainig


In [17]:
test(mnist_net, testLoader)


 Test set: Accuracy: 8573/10000 (86%)



성능 차이가 나는 이유
1. sigmoid(58%))
    - not zero centered -> zigzag(느림)
    - saturated -> 제대로 학습이 안됨
2. tanh(84%)
    - zero centered
    - saturated -> 제대로 학습이 안됨
3. relu(86%)
    - 계산 비용이 작다
    - 양수 부분에서 not saturated -> good

### 간단한 Neural Network 를 만들어 봅시다. (4) 
특징 : 3개의 Layer를 가지는 Neural Network 
<구성>  
Layer 1 - input:28*28 , output : 40 + Activation Fucntion - sigmoid 

Layer 2 - input: 40 output: 30

Layer 3 - input: 30 output : 10

Cross Entropy Loss  + SGD optimizer 

In [0]:
class MNIST_Net(nn.Module):
    def __init__(self):
        super(MNIST_Net, self).__init__()

        self.fc0 = nn.Linear(28*28, 40)
        self.fc1 = nn.Linear(40, 30)
        self.fc2 = nn.Linear(30,10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.sigmoid(self.fc0(x))
        x = F.sigmoid(self.fc1(x))
        x = self.fc2(x)
        return x

In [0]:
mnist_net = MNIST_Net().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(mnist_net.parameters(), lr=0.001)

In [20]:
train_network(mnist_net, optimizer, trainLoader)



[1,   500] loss: 2.346
[1,  1000] loss: 2.322
[1,  1500] loss: 2.310
[2,   500] loss: 2.301
[2,  1000] loss: 2.300
[2,  1500] loss: 2.299
[3,   500] loss: 2.299
[3,  1000] loss: 2.299
[3,  1500] loss: 2.298
[4,   500] loss: 2.298
[4,  1000] loss: 2.297
[4,  1500] loss: 2.297
[5,   500] loss: 2.296
[5,  1000] loss: 2.296
[5,  1500] loss: 2.297
Finished Trainig


In [21]:
test(mnist_net, testLoader)




 Test set: Accuracy: 1135/10000 (11%)



### Q) 왜 학습이 잘 안될까?
 - 시그모이드는 레이어가 쌓이면 학습이 잘 안된다.
 - staturated 되서 gradient = 0 이 된다.

### 간단한 Neural Network 를 만들어 봅시다. (5) 
특징 : 3개의 Layer를 가지는 Neural Network 
<구성>  
Layer 1 - input:28*28 , output : 40 + Activation Fucntion - Relu 

Layer 2 - input: 40 output: 30

Layer 3 - input: 30 output : 10

Cross Entropy Loss  + SGD optimizer 

In [0]:
class MNIST_Net(nn.Module):
    def __init__(self):
        super(MNIST_Net, self).__init__()
        self.fc0 = nn.Linear(28*28, 40)
        self.fc1 = nn.Linear(40,30)
        self.fc2 = nn.Linear(30,10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc0(x))
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [0]:
mnist_net = MNIST_Net().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(mnist_net.parameters(), lr = 0.001)

In [24]:
train_network(mnist_net, optimizer, trainLoader)

[1,   500] loss: 2.298
[1,  1000] loss: 2.272
[1,  1500] loss: 2.245
[2,   500] loss: 2.184
[2,  1000] loss: 2.138
[2,  1500] loss: 2.086
[3,   500] loss: 1.956
[3,  1000] loss: 1.868
[3,  1500] loss: 1.757
[4,   500] loss: 1.546
[4,  1000] loss: 1.409
[4,  1500] loss: 1.300
[5,   500] loss: 1.118
[5,  1000] loss: 1.042
[5,  1500] loss: 0.978
Finished Trainig


In [25]:
test(mnist_net, testLoader)


 Test set: Accuracy: 7498/10000 (75%)



(4) 와의 차이: relu는 음수이면 0 양수이면 x인 함수 -> gradient saturate가 없다.  
(3) 과의 차이: SGD가 local minima & saddle point에서 문제가 발생한다. -> optimizer를 바꿔준다. 

### 간단한 Neural Network 를 만들어 봅시다. (6) 
특징 : 3개의 Layer를 가지는 Neural Network 
<구성>  
Layer 1 - input:28*28 , output : 40 + Activation Fucntion - Relu 

Layer 2 - input: 40 output: 30

Layer 3 - input: 30 output : 10

Cross Entropy Loss  + **Adam** optimizer 

In [0]:
 class MNIST_Net(nn.Module):
    def __init__(self):
        super(MNIST_Net, self).__init__()
        # an affine operation: y = Wx + b
        self.fc0 = nn.Linear(28*28,40) #Layer 1 
        self.fc1 = nn.Linear(40, 30) # Layer 2
        self.fc2 = nn.Linear(30, 10) # Layer 3

    def forward(self, x):
       
        x = x.view(-1,28*28)
        x = F.relu(self.fc0(x)) # Layer 1
        x = F.relu(self.fc1(x)) # Layer 2
        x = self.fc2(x) # Layer 3 
        return x

In [0]:
mnist_net = MNIST_Net().cuda()
criterion = nn.CrossEntropyLoss()
optimzier = optim.Adam(mnist_net.parameters(), lr=0.001)
# Adam = momentum + Ada

In [28]:
train_network(mnist_net, optimzier, trainLoader)

[1,   500] loss: 0.665
[1,  1000] loss: 0.316
[1,  1500] loss: 0.276
[2,   500] loss: 0.213
[2,  1000] loss: 0.195
[2,  1500] loss: 0.178
[3,   500] loss: 0.150
[3,  1000] loss: 0.148
[3,  1500] loss: 0.137
[4,   500] loss: 0.116
[4,  1000] loss: 0.126
[4,  1500] loss: 0.121
[5,   500] loss: 0.106
[5,  1000] loss: 0.098
[5,  1500] loss: 0.106
Finished Trainig


In [29]:
test(mnist_net, testLoader)


 Test set: Accuracy: 9640/10000 (96%)



### 간단한 Neural Network 를 만들어 봅시다. (7) Layer 를 줄여볼까요? 
특징 : 2개의 Layer를 가지는 Neural Network 
<구성>  
Layer 1 - input:28*28 , output : 30 + Activation Fucntion - Relu 

Layer 2 - input: 30 output : 10

Cross Entropy Loss  + **Adam** optimizer 

In [0]:
 class MNIST_Net(nn.Module):
    def __init__(self):
        super(MNIST_Net, self).__init__()
        # an affine operation: y = Wx + b
        self.fc0 = nn.Linear(28*28,30) #Layer 1 
        self.fc1 = nn.Linear(30, 10) # Layer 3

    def forward(self, x):
        x = x.view(-1,28*28)
        x = F.relu(self.fc0(x)) # Layer 1
        x = self.fc1(x) # Layer 3 
        return x

In [0]:
mnist_net = MNIST_Net().cuda()
criterion = nn.CrossEntropyLoss()
optimzier = optim.Adam(mnist_net.parameters(), lr=0.001)

In [44]:
train_network(mnist_net, optimzier, trainLoader)

[1,   500] loss: 0.680
[1,  1000] loss: 0.332
[1,  1500] loss: 0.298
[2,   500] loss: 0.246
[2,  1000] loss: 0.224
[2,  1500] loss: 0.207
[3,   500] loss: 0.180
[3,  1000] loss: 0.169
[3,  1500] loss: 0.175
[4,   500] loss: 0.148
[4,  1000] loss: 0.153
[4,  1500] loss: 0.150
[5,   500] loss: 0.136
[5,  1000] loss: 0.127
[5,  1500] loss: 0.125
Finished Trainig


In [45]:
test(mnist_net, testLoader)


 Test set: Accuracy: 9582/10000 (96%)



### 간단한 Neural Network 를 만들어 봅시다. (7) Batch Norm 을 줘 볼까요?
특징 : 2개의 Layer를 가지는 Neural Network 
<구성>  
Layer 1 - input:28*28 , output : 30 + Activation Fucntion - Relu  + Batch Norm

Layer 2 - input: 30 output : 10

Cross Entropy Loss  + **Adam** optimizer 

In [0]:
class MNIST_Net(nn.Module):
    def __init__(self):
        super(MNIST_Net, self).__init__()

        self.fc0 = nn.Linear(28*28, 30)
        self.bn0 = nn.BatchNorm1d(30)
        self.fc1 = nn.Linear(30, 10)


    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.bn0(self.fc0(x)))
        x = self.fc1(x)
        return x

In [0]:
mnist_net = MNIST_Net().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mnist_net.parameters(), lr=0.001)

In [54]:
train_network(mnist_net, optimizer, trainLoader)

[1,   500] loss: 0.720
[1,  1000] loss: 0.334
[1,  1500] loss: 0.285
[2,   500] loss: 0.225
[2,  1000] loss: 0.224
[2,  1500] loss: 0.208
[3,   500] loss: 0.181
[3,  1000] loss: 0.182
[3,  1500] loss: 0.182
[4,   500] loss: 0.156
[4,  1000] loss: 0.156
[4,  1500] loss: 0.164
[5,   500] loss: 0.139
[5,  1000] loss: 0.138
[5,  1500] loss: 0.145
Finished Trainig


In [55]:
test(mnist_net, testLoader)


 Test set: Accuracy: 9641/10000 (96%)



### 간단한 Neural Network 를 만들어 봅시다. (8) 더 깊은 레이어에 Batch Norm 을 줘 볼까요?
특징 : 2개의 Layer를 가지는 Neural Network

<구성>  

Layer 1 - input:28*28 , output : 40 + Activation Fucntion - Relu + BatchNorm

Layer 2 - input: 40 output: 30 + Activation Fucntion - Relu  + BatchNorm

Layer 3 - input: 30 output : 10

Cross Entropy Loss  + **Adam** optimizer 

In [0]:
class MNIST_Net(nn.Module):
    def __init__(self):
        super(MNIST_Net, self).__init__()
        self.fc0 = nn.Linear(28*28, 40)
        self.bn0 = nn.BatchNorm1d(40)
        self.fc1 = nn.Linear(40, 30)
        self.bn1 = nn.BatchNorm1d(30)
        self.fc2 = nn.Linear(30, 10)


    def forward(self, x):
        x = x.view(-1,28*28)
        x = F.relu(self.bn0(self.fc0(x)))
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.fc2(x)
        return x

In [0]:
mnist_net = MNIST_Net().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mnist_net.parameters(), lr=0.001)

In [58]:
train_network(mnist_net, optimizer, trainLoader)

[1,   500] loss: 0.676
[1,  1000] loss: 0.269
[1,  1500] loss: 0.227
[2,   500] loss: 0.173
[2,  1000] loss: 0.162
[2,  1500] loss: 0.140
[3,   500] loss: 0.121
[3,  1000] loss: 0.122
[3,  1500] loss: 0.126
[4,   500] loss: 0.109
[4,  1000] loss: 0.107
[4,  1500] loss: 0.112
[5,   500] loss: 0.093
[5,  1000] loss: 0.093
[5,  1500] loss: 0.104
Finished Trainig


In [59]:
test(mnist_net, testLoader)


 Test set: Accuracy: 9744/10000 (97%)



깊은 Neural Network에서 BatchNorm을 사용하면 정확도가 증가한다.

## Practical Guide Pytorch nn.Sequential 



```
x = F.relu(self.bn0(self.fc0(x)))
x = F.relu(self.bn1(self.fc1(x)))
```
너무 복잡하지 않나요?  그냥 x = self.fc(x) 쉽게 해버리면 안 될까요?

Solution : nn.Sequential + 자매품 nn.ModuList


In [0]:
class MNIST_Net(nn.Module):
    def __init__(self):
        super(MNIST_Net, self).__init__()

        layer_list = []
        layer_list.append(nn.Linear(28*28, 40)) # Layer1
        layer_list.append(nn.BatchNorm1d(40)) # BatchNorm1(after FC, Conv / before nonlinearity)
        layer_list.append(nn.ReLU())
        layer_list.append(nn.Linear(40,30)) # Layer2
        layer_list.append(nn.BatchNorm1d(30)) # BatchNorm2
        layer_list.append(nn.ReLU())
        layer_list.append(nn.Linear(30,10)) # Layer3
        self.net = nn.Sequential(*layer_list)

    
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.net(x)
        return x

In [0]:
mnist_net = MNIST_Net().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mnist_net.parameters(), lr=0.001)

In [67]:
train_network(mnist_net, optimizer, trainLoader)

[1,   500] loss: 0.670
[1,  1000] loss: 0.274
[1,  1500] loss: 0.211
[2,   500] loss: 0.157
[2,  1000] loss: 0.159
[2,  1500] loss: 0.161
[3,   500] loss: 0.134
[3,  1000] loss: 0.131
[3,  1500] loss: 0.129
[4,   500] loss: 0.108
[4,  1000] loss: 0.115
[4,  1500] loss: 0.113
[5,   500] loss: 0.095
[5,  1000] loss: 0.103
[5,  1500] loss: 0.105
Finished Trainig


In [68]:
test(mnist_net, testLoader)


 Test set: Accuracy: 9709/10000 (97%)



#### 연습해 봅시다 ! 

특징 : 2개의 Layer를 가지는 Neural Network <구성>

Layer 1 - input:28*28 , output : 30 + Activation Fucntion - Relu + Batch Norm

Layer 2 - input: 30 output : 10

Cross Entropy Loss + Adam optimizer

In [0]:
class MNIST_Net(nn.Module):
    def __init__(self):
        super(MNIST_Net, self).__init__()

        layer_list = []
        layer_list.append(nn.Linear(28*28, 30))
        layer_list.append(nn.BatchNorm1d(30))
        layer_list.append(nn.ReLU())
        layer_list.append(nn.Linear(30,10))
        self.net = nn.Sequential(*layer_list)


    def forward(self, x):
        x = x.view(-1, 28*28)
        return self.net(x)

In [0]:
mnist_net = MNIST_Net().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mnist_net.parameters(), lr=0.001)

In [72]:
train_network(mnist_net, optimizer, trainLoader)

[1,   500] loss: 0.708
[1,  1000] loss: 0.347
[1,  1500] loss: 0.290
[2,   500] loss: 0.224
[2,  1000] loss: 0.221
[2,  1500] loss: 0.205
[3,   500] loss: 0.176
[3,  1000] loss: 0.168
[3,  1500] loss: 0.167
[4,   500] loss: 0.143
[4,  1000] loss: 0.147
[4,  1500] loss: 0.161
[5,   500] loss: 0.131
[5,  1000] loss: 0.133
[5,  1500] loss: 0.136
Finished Trainig


In [73]:
test(mnist_net, testLoader)


 Test set: Accuracy: 9645/10000 (96%)

