# NN 심화 (cifar10)

## Reading Dataset

In [1]:
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
'''
Torchvision : Python imaging format image를 불러옴
transforms.Compose : 전처리 pipeline
transforms.ToTensor : 이미지(range [0 , 255])를 float텐서(shape(CxHxW) with range[0.0, 1.0]) 형태로 뱐환
PIL image (H x W x C) -> Tensor (C x H x W)
transforms.Normalize : input = (input - 0.5 ) / 0.5
'''

# CIFAR10 data를 transform 시켜서 불러옴
trainset = torchvision.datasets.CIFAR10(root='./data',
                                        train=True,
                                        download=True,
                                        transform=transform)


testset = torchvision.datasets.CIFAR10(root='./data',
                                       train=False,
                                       download=True,
                                       transform=transform)



Files already downloaded and verified
Files already downloaded and verified


### 설정

batch_size : 32

epoch : 10

In [2]:
#config
src = {'input_size':3*32*32,
       'hidden_size1':50,
       'hidden_size2':25,
       'output_size':10,
       'init_weight_range':0.5,
       'num_epochs':10,
       'batch_size':32,
       'learning_rate':1e-3}

In [3]:
from torch.utils.data import DataLoader

'''
torch.utils.data.DataLoader : data를 batch size로 묶음
'''

trainloader = DataLoader(trainset, 
                         batch_size=src['batch_size'],
                         shuffle=True, 
                         drop_last=False)

testloader = DataLoader(testset, 
                        batch_size=src['batch_size'],
                        shuffle=False,
                        drop_last=False)
classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

In [4]:
trainiter = iter(trainloader)
images, labels = trainiter.next()

print(len(trainloader))
print(images.shape)
print(labels)

1563
torch.Size([32, 3, 32, 32])
tensor([8, 8, 1, 6, 8, 0, 6, 3, 6, 1, 1, 3, 5, 7, 4, 8, 4, 2, 3, 1, 2, 1, 8, 7,
        9, 8, 3, 9, 6, 7, 1, 7])


## Modeling

### 기본 모델 

linear - relu - linear - relu - linear - softmax

In [5]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self, src):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(src['input_size'], src['hidden_size1'])
        self.fc2 = nn.Linear(src['hidden_size1'], src['hidden_size2'])
        self.fc3 = nn.Linear(src['hidden_size2'], src['output_size'])
                             
        
        self.init_range = src['init_weight_range']
    
    #가중치 초기화
    def init_weight(self): 
        self.fc1.weight.data.uniform_(-self.init_range, self.init_range)
        self.fc2.weight.data.uniform_(-self.init_range, self.init_range)
        self.fc3.weight.data.uniform_(-self.init_range, self.init_range)
        
    def forward(self, img):
        x = img.view(img.shape[0], -1)
        #--------------------
        x = self.fc1(x)
        x = F.relu(x) #relu
        
        x = self.fc2(x)
        x = F.relu(x) #relu
        
        y = self.fc3(x)
        #y = F.softmax(x, dim=0) #softmax
        #--------------------
        
        
        return y

### dropout 모델 

linear - relu - dropout - linear - relu - dropout - linear - softmax

In [15]:
import torch.nn as nn
import torch.nn.functional as F

class Net_dropout(nn.Module):
    def __init__(self, src):
        super(Net_dropout, self).__init__()
        self.fc1 = nn.Linear(src['input_size'], src['hidden_size1'])
        self.fc2 = nn.Linear(src['hidden_size1'], src['hidden_size2'])
        self.fc3 = nn.Linear(src['hidden_size2'], src['output_size'])
        
        self.init_range = src['init_weight_range']
    
    #가중치 초기화
    def init_weight(self): 
        self.fc1.weight.data.uniform_(-self.init_range, self.init_range)
        self.fc2.weight.data.uniform_(-self.init_range, self.init_range)
        self.fc3.weight.data.uniform_(-self.init_range, self.init_range)
        
    def forward(self, img):
        x = img.view(img.shape[0], -1)
        #--------------------
        x = self.fc1(x)
        x = F.relu(x) #relu
        x = F.dropout(x)
        
        x = self.fc2(x)
        x = F.relu(x) #relu
        x = F.dropout(x)
        
        y = self.fc3(x)
        #y = F.softmax(x, dim=0) #softmax
        #--------------------
        
        
        return y

### batch normalization 모델 

linear - relu - batchNorm - linear - relu - batchNorm - linear - softmax

In [16]:
import torch.nn as nn
import torch.nn.functional as F

class Net_batchnorm(nn.Module):
    def __init__(self, src):
        super(Net_batchnorm, self).__init__()
        self.fc1 = nn.Linear(src['input_size'], src['hidden_size1'])
        self.bn1 = nn.BatchNorm1d(src['hidden_size1'])
        self.fc2 = nn.Linear(src['hidden_size1'], src['hidden_size2'])
        self.bn2 = nn.BatchNorm1d(src['hidden_size2'])
        self.fc3 = nn.Linear(src['hidden_size2'], src['output_size'])
        
        self.init_range = src['init_weight_range']
    
    #가중치 초기화
    def init_weight(self): 
        self.fc1.weight.data.uniform_(-self.init_range, self.init_range)
        self.fc2.weight.data.uniform_(-self.init_range, self.init_range)
        self.fc3.weight.data.uniform_(-self.init_range, self.init_range)
        
    def forward(self, img):
        x = img.view(img.shape[0], -1)
        #--------------------
        x = self.fc1(x)
        x = F.relu(x) #relu
        x = self.bn1(x)
        
        x = self.fc2(x)
        x = F.relu(x) #relu
        x = self.bn2(x)
        
        y = self.fc3(x)
        #y = F.softmax(x, dim=0) #softmax
        #--------------------
        
        
        return y

## Training and Evaluating

#### 기본 모델 monentum 적용

In [25]:
model = Net(src)
y = model(images)

In [26]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), 
                      src['learning_rate'])


In [27]:
from tqdm import tqdm
import time
time_start = time.time()

model.init_weight()

for epoch in tqdm(range(src['num_epochs'])):
    current_loss = 0.0
#     model.train(True)
    
    for i, data in enumerate(trainloader):
        # get the inputs
        inputs, labels = data
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        
        optimizer.step()
        
        # print statistics
        step = i + 1
        current_loss += loss.item()
        
        if step % 100 == 0 and step != 0:     # print every 100 mini-batches
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %
                  (epoch + 1, src['num_epochs'], step, len(trainloader)//100 * 100, current_loss / 100))
            current_loss = 0.0 #100번동안 있었던 loss의 누적값 평균, 그 후 reset

traintime = time.time() - time_start

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Epoch [1/10], Step [100/1500], Loss: 10.4984
Epoch [1/10], Step [200/1500], Loss: 7.5930
Epoch [1/10], Step [300/1500], Loss: 6.2303
Epoch [1/10], Step [400/1500], Loss: 5.2828
Epoch [1/10], Step [500/1500], Loss: 4.7198
Epoch [1/10], Step [600/1500], Loss: 4.2201
Epoch [1/10], Step [700/1500], Loss: 3.7854
Epoch [1/10], Step [800/1500], Loss: 3.5847
Epoch [1/10], Step [900/1500], Loss: 3.2589
Epoch [1/10], Step [1000/1500], Loss: 3.1466
Epoch [1/10], Step [1100/1500], Loss: 3.0973
Epoch [1/10], Step [1200/1500], Loss: 2.8837
Epoch [1/10], Step [1300/1500], Loss: 2.8317
Epoch [1/10], Step [1400/1500], Loss: 2.7001
Epoch [1/10], Step [1500/1500], Loss: 2.6719


 10%|████████▎                                                                          | 1/10 [00:20<03:03, 20.35s/it]

Epoch [2/10], Step [100/1500], Loss: 2.5885
Epoch [2/10], Step [200/1500], Loss: 2.5065
Epoch [2/10], Step [300/1500], Loss: 2.4446
Epoch [2/10], Step [400/1500], Loss: 2.4405
Epoch [2/10], Step [500/1500], Loss: 2.4457
Epoch [2/10], Step [600/1500], Loss: 2.4105
Epoch [2/10], Step [700/1500], Loss: 2.4144
Epoch [2/10], Step [800/1500], Loss: 2.3657
Epoch [2/10], Step [900/1500], Loss: 2.3259
Epoch [2/10], Step [1000/1500], Loss: 2.3381
Epoch [2/10], Step [1100/1500], Loss: 2.3307
Epoch [2/10], Step [1200/1500], Loss: 2.3232
Epoch [2/10], Step [1300/1500], Loss: 2.2708
Epoch [2/10], Step [1400/1500], Loss: 2.2654
Epoch [2/10], Step [1500/1500], Loss: 2.2798


 20%|████████████████▌                                                                  | 2/10 [00:40<02:42, 20.29s/it]

Epoch [3/10], Step [100/1500], Loss: 2.2583
Epoch [3/10], Step [200/1500], Loss: 2.2142
Epoch [3/10], Step [300/1500], Loss: 2.2349
Epoch [3/10], Step [400/1500], Loss: 2.2443
Epoch [3/10], Step [500/1500], Loss: 2.2221
Epoch [3/10], Step [600/1500], Loss: 2.2217
Epoch [3/10], Step [700/1500], Loss: 2.2181
Epoch [3/10], Step [800/1500], Loss: 2.2134
Epoch [3/10], Step [900/1500], Loss: 2.1979
Epoch [3/10], Step [1000/1500], Loss: 2.2102
Epoch [3/10], Step [1100/1500], Loss: 2.1920
Epoch [3/10], Step [1200/1500], Loss: 2.1698
Epoch [3/10], Step [1300/1500], Loss: 2.1948
Epoch [3/10], Step [1400/1500], Loss: 2.1708
Epoch [3/10], Step [1500/1500], Loss: 2.1801


 30%|████████████████████████▉                                                          | 3/10 [01:00<02:21, 20.26s/it]

Epoch [4/10], Step [100/1500], Loss: 2.1613
Epoch [4/10], Step [200/1500], Loss: 2.1613
Epoch [4/10], Step [300/1500], Loss: 2.1609
Epoch [4/10], Step [400/1500], Loss: 2.1468
Epoch [4/10], Step [500/1500], Loss: 2.1671
Epoch [4/10], Step [600/1500], Loss: 2.1372
Epoch [4/10], Step [700/1500], Loss: 2.1490
Epoch [4/10], Step [800/1500], Loss: 2.1586
Epoch [4/10], Step [900/1500], Loss: 2.1377
Epoch [4/10], Step [1000/1500], Loss: 2.1426
Epoch [4/10], Step [1100/1500], Loss: 2.1163
Epoch [4/10], Step [1200/1500], Loss: 2.1302
Epoch [4/10], Step [1300/1500], Loss: 2.1537
Epoch [4/10], Step [1400/1500], Loss: 2.1541
Epoch [4/10], Step [1500/1500], Loss: 2.1409


 40%|█████████████████████████████████▏                                                 | 4/10 [01:23<02:05, 20.90s/it]

Epoch [5/10], Step [100/1500], Loss: 2.1335
Epoch [5/10], Step [200/1500], Loss: 2.1053
Epoch [5/10], Step [300/1500], Loss: 2.1032
Epoch [5/10], Step [400/1500], Loss: 2.1120
Epoch [5/10], Step [500/1500], Loss: 2.1209
Epoch [5/10], Step [600/1500], Loss: 2.0970
Epoch [5/10], Step [700/1500], Loss: 2.1124
Epoch [5/10], Step [800/1500], Loss: 2.0773
Epoch [5/10], Step [900/1500], Loss: 2.1039
Epoch [5/10], Step [1000/1500], Loss: 2.1002
Epoch [5/10], Step [1100/1500], Loss: 2.0895
Epoch [5/10], Step [1200/1500], Loss: 2.1190
Epoch [5/10], Step [1300/1500], Loss: 2.1041
Epoch [5/10], Step [1400/1500], Loss: 2.1080
Epoch [5/10], Step [1500/1500], Loss: 2.0918


 50%|█████████████████████████████████████████▌                                         | 5/10 [01:43<01:44, 20.81s/it]

Epoch [6/10], Step [100/1500], Loss: 2.0760
Epoch [6/10], Step [200/1500], Loss: 2.1019
Epoch [6/10], Step [300/1500], Loss: 2.0922
Epoch [6/10], Step [400/1500], Loss: 2.0624
Epoch [6/10], Step [500/1500], Loss: 2.0678
Epoch [6/10], Step [600/1500], Loss: 2.0700
Epoch [6/10], Step [700/1500], Loss: 2.0763
Epoch [6/10], Step [800/1500], Loss: 2.0947
Epoch [6/10], Step [900/1500], Loss: 2.0758
Epoch [6/10], Step [1000/1500], Loss: 2.0749
Epoch [6/10], Step [1100/1500], Loss: 2.0815
Epoch [6/10], Step [1200/1500], Loss: 2.0707
Epoch [6/10], Step [1300/1500], Loss: 2.0523
Epoch [6/10], Step [1400/1500], Loss: 2.0505
Epoch [6/10], Step [1500/1500], Loss: 2.0727


 60%|█████████████████████████████████████████████████▊                                 | 6/10 [02:03<01:22, 20.59s/it]

Epoch [7/10], Step [100/1500], Loss: 2.0692
Epoch [7/10], Step [200/1500], Loss: 2.0805
Epoch [7/10], Step [300/1500], Loss: 2.0632
Epoch [7/10], Step [400/1500], Loss: 2.0412
Epoch [7/10], Step [500/1500], Loss: 2.0413
Epoch [7/10], Step [600/1500], Loss: 2.0686
Epoch [7/10], Step [700/1500], Loss: 2.0411
Epoch [7/10], Step [800/1500], Loss: 2.0417
Epoch [7/10], Step [900/1500], Loss: 2.0626
Epoch [7/10], Step [1000/1500], Loss: 2.0924
Epoch [7/10], Step [1100/1500], Loss: 2.0605
Epoch [7/10], Step [1200/1500], Loss: 2.0224
Epoch [7/10], Step [1300/1500], Loss: 2.0545
Epoch [7/10], Step [1400/1500], Loss: 2.0144
Epoch [7/10], Step [1500/1500], Loss: 2.0391


 70%|██████████████████████████████████████████████████████████                         | 7/10 [02:23<01:01, 20.47s/it]

Epoch [8/10], Step [100/1500], Loss: 2.0324
Epoch [8/10], Step [200/1500], Loss: 2.0147
Epoch [8/10], Step [300/1500], Loss: 2.0545
Epoch [8/10], Step [400/1500], Loss: 2.0374
Epoch [8/10], Step [500/1500], Loss: 2.0430
Epoch [8/10], Step [600/1500], Loss: 2.0323
Epoch [8/10], Step [700/1500], Loss: 2.0467
Epoch [8/10], Step [800/1500], Loss: 2.0456
Epoch [8/10], Step [900/1500], Loss: 2.0303
Epoch [8/10], Step [1000/1500], Loss: 2.0302
Epoch [8/10], Step [1100/1500], Loss: 2.0252
Epoch [8/10], Step [1200/1500], Loss: 2.0463
Epoch [8/10], Step [1300/1500], Loss: 2.0209
Epoch [8/10], Step [1400/1500], Loss: 2.0352
Epoch [8/10], Step [1500/1500], Loss: 2.0311


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [02:44<00:41, 20.52s/it]

Epoch [9/10], Step [100/1500], Loss: 2.0762
Epoch [9/10], Step [200/1500], Loss: 2.0236
Epoch [9/10], Step [300/1500], Loss: 2.0169
Epoch [9/10], Step [400/1500], Loss: 2.0154
Epoch [9/10], Step [500/1500], Loss: 2.0327
Epoch [9/10], Step [600/1500], Loss: 2.0326
Epoch [9/10], Step [700/1500], Loss: 2.0247
Epoch [9/10], Step [800/1500], Loss: 2.0138
Epoch [9/10], Step [900/1500], Loss: 1.9843
Epoch [9/10], Step [1000/1500], Loss: 2.0436
Epoch [9/10], Step [1100/1500], Loss: 2.0105
Epoch [9/10], Step [1200/1500], Loss: 1.9946
Epoch [9/10], Step [1300/1500], Loss: 2.0269
Epoch [9/10], Step [1400/1500], Loss: 2.0091
Epoch [9/10], Step [1500/1500], Loss: 1.9892


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [03:04<00:20, 20.45s/it]

Epoch [10/10], Step [100/1500], Loss: 1.9908
Epoch [10/10], Step [200/1500], Loss: 2.0072
Epoch [10/10], Step [300/1500], Loss: 2.0216
Epoch [10/10], Step [400/1500], Loss: 2.0254
Epoch [10/10], Step [500/1500], Loss: 1.9955
Epoch [10/10], Step [600/1500], Loss: 2.0185
Epoch [10/10], Step [700/1500], Loss: 2.0126
Epoch [10/10], Step [800/1500], Loss: 1.9880
Epoch [10/10], Step [900/1500], Loss: 2.0088
Epoch [10/10], Step [1000/1500], Loss: 2.0130
Epoch [10/10], Step [1100/1500], Loss: 2.0232
Epoch [10/10], Step [1200/1500], Loss: 1.9869
Epoch [10/10], Step [1300/1500], Loss: 1.9879
Epoch [10/10], Step [1400/1500], Loss: 2.0088
Epoch [10/10], Step [1500/1500], Loss: 1.9926


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [03:25<00:00, 20.52s/it]


In [28]:
import torch
# Test the Model
correct = 0
total = 0
for i, data in enumerate(testloader):
    inputs, labels = data
#     images = images.view(-1, 28*28)
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.shape[0]
    correct += (predicted == labels).sum()

print('Accuracy of the network on the 2500 test images: %d %%' % (100 * correct / total))
print( 'Time elapsed: {} seconds'.format(traintime))

Accuracy of the network on the 2500 test images: 26 %
Time elapsed: 205.56801676750183 seconds


#### 기본 모델 Adam 적용

In [10]:
model = Net(src)
y = model(images)

In [11]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), 
                      src['learning_rate'])

In [12]:
from tqdm import tqdm
import time
time_start = time.time()

model.init_weight()

for epoch in tqdm(range(src['num_epochs'])):
    current_loss = 0.0
#     model.train(True)
    
    for i, data in enumerate(trainloader):
        # get the inputs
        inputs, labels = data
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print statistics
        step = i + 1
        current_loss += loss.item()
        
        if step % 100 == 0 and step != 0:     # print every 100 mini-batches
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %
                  (epoch + 1, src['num_epochs'], step, len(trainloader)//100 * 100, current_loss / 100))
            current_loss = 0.0 #100번동안 있었던 loss의 누적값 평균, 그 후 reset
            
traintime = time.time() - time_start

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Epoch [1/10], Step [100/1500], Loss: 6.4845
Epoch [1/10], Step [200/1500], Loss: 3.0964
Epoch [1/10], Step [300/1500], Loss: 2.5269
Epoch [1/10], Step [400/1500], Loss: 2.2942
Epoch [1/10], Step [500/1500], Loss: 2.2676
Epoch [1/10], Step [600/1500], Loss: 2.1942
Epoch [1/10], Step [700/1500], Loss: 2.1171
Epoch [1/10], Step [800/1500], Loss: 2.1078
Epoch [1/10], Step [900/1500], Loss: 2.0683
Epoch [1/10], Step [1000/1500], Loss: 2.0579
Epoch [1/10], Step [1100/1500], Loss: 2.0098
Epoch [1/10], Step [1200/1500], Loss: 2.0435
Epoch [1/10], Step [1300/1500], Loss: 2.0180
Epoch [1/10], Step [1400/1500], Loss: 1.9875
Epoch [1/10], Step [1500/1500], Loss: 1.9414


 10%|████████▎                                                                          | 1/10 [00:22<03:23, 22.63s/it]

Epoch [2/10], Step [100/1500], Loss: 1.9272
Epoch [2/10], Step [200/1500], Loss: 1.8924
Epoch [2/10], Step [300/1500], Loss: 1.9015
Epoch [2/10], Step [400/1500], Loss: 1.9004
Epoch [2/10], Step [500/1500], Loss: 1.8835
Epoch [2/10], Step [600/1500], Loss: 1.9017
Epoch [2/10], Step [700/1500], Loss: 1.8668
Epoch [2/10], Step [800/1500], Loss: 1.8895
Epoch [2/10], Step [900/1500], Loss: 1.8461
Epoch [2/10], Step [1000/1500], Loss: 1.8490
Epoch [2/10], Step [1100/1500], Loss: 1.8294
Epoch [2/10], Step [1200/1500], Loss: 1.8723
Epoch [2/10], Step [1300/1500], Loss: 1.8527
Epoch [2/10], Step [1400/1500], Loss: 1.8275
Epoch [2/10], Step [1500/1500], Loss: 1.8148


 20%|████████████████▌                                                                  | 2/10 [00:45<03:01, 22.68s/it]

Epoch [3/10], Step [100/1500], Loss: 1.7913
Epoch [3/10], Step [200/1500], Loss: 1.7540
Epoch [3/10], Step [300/1500], Loss: 1.7611
Epoch [3/10], Step [400/1500], Loss: 1.7656
Epoch [3/10], Step [500/1500], Loss: 1.7528
Epoch [3/10], Step [600/1500], Loss: 1.7723
Epoch [3/10], Step [700/1500], Loss: 1.7685
Epoch [3/10], Step [800/1500], Loss: 1.7331
Epoch [3/10], Step [900/1500], Loss: 1.7412
Epoch [3/10], Step [1000/1500], Loss: 1.7134
Epoch [3/10], Step [1100/1500], Loss: 1.7339
Epoch [3/10], Step [1200/1500], Loss: 1.7265
Epoch [3/10], Step [1300/1500], Loss: 1.7056
Epoch [3/10], Step [1400/1500], Loss: 1.7406
Epoch [3/10], Step [1500/1500], Loss: 1.7231


 30%|████████████████████████▉                                                          | 3/10 [01:07<02:37, 22.52s/it]

Epoch [4/10], Step [100/1500], Loss: 1.6781
Epoch [4/10], Step [200/1500], Loss: 1.6618
Epoch [4/10], Step [300/1500], Loss: 1.6460
Epoch [4/10], Step [400/1500], Loss: 1.6734
Epoch [4/10], Step [500/1500], Loss: 1.6615
Epoch [4/10], Step [600/1500], Loss: 1.6784
Epoch [4/10], Step [700/1500], Loss: 1.6782
Epoch [4/10], Step [800/1500], Loss: 1.6474
Epoch [4/10], Step [900/1500], Loss: 1.6291
Epoch [4/10], Step [1000/1500], Loss: 1.6618
Epoch [4/10], Step [1100/1500], Loss: 1.6522
Epoch [4/10], Step [1200/1500], Loss: 1.6191
Epoch [4/10], Step [1300/1500], Loss: 1.6438
Epoch [4/10], Step [1400/1500], Loss: 1.6516
Epoch [4/10], Step [1500/1500], Loss: 1.6493


 40%|█████████████████████████████████▏                                                 | 4/10 [01:29<02:14, 22.37s/it]

Epoch [5/10], Step [100/1500], Loss: 1.5883
Epoch [5/10], Step [200/1500], Loss: 1.6017
Epoch [5/10], Step [300/1500], Loss: 1.5577
Epoch [5/10], Step [400/1500], Loss: 1.6246
Epoch [5/10], Step [500/1500], Loss: 1.5870
Epoch [5/10], Step [600/1500], Loss: 1.6131
Epoch [5/10], Step [700/1500], Loss: 1.5786
Epoch [5/10], Step [800/1500], Loss: 1.6048
Epoch [5/10], Step [900/1500], Loss: 1.5764
Epoch [5/10], Step [1000/1500], Loss: 1.5740
Epoch [5/10], Step [1100/1500], Loss: 1.5523
Epoch [5/10], Step [1200/1500], Loss: 1.5535
Epoch [5/10], Step [1300/1500], Loss: 1.5917
Epoch [5/10], Step [1400/1500], Loss: 1.5738
Epoch [5/10], Step [1500/1500], Loss: 1.5551


 50%|█████████████████████████████████████████▌                                         | 5/10 [01:56<01:58, 23.72s/it]

Epoch [6/10], Step [100/1500], Loss: 1.5376
Epoch [6/10], Step [200/1500], Loss: 1.5235
Epoch [6/10], Step [300/1500], Loss: 1.5116
Epoch [6/10], Step [400/1500], Loss: 1.5178
Epoch [6/10], Step [500/1500], Loss: 1.5130
Epoch [6/10], Step [600/1500], Loss: 1.5272
Epoch [6/10], Step [700/1500], Loss: 1.5397
Epoch [6/10], Step [800/1500], Loss: 1.4981
Epoch [6/10], Step [900/1500], Loss: 1.5456
Epoch [6/10], Step [1000/1500], Loss: 1.5543
Epoch [6/10], Step [1100/1500], Loss: 1.4888
Epoch [6/10], Step [1200/1500], Loss: 1.5101
Epoch [6/10], Step [1300/1500], Loss: 1.5380
Epoch [6/10], Step [1400/1500], Loss: 1.5382
Epoch [6/10], Step [1500/1500], Loss: 1.5207


 60%|█████████████████████████████████████████████████▊                                 | 6/10 [02:18<01:33, 23.28s/it]

Epoch [7/10], Step [100/1500], Loss: 1.4790
Epoch [7/10], Step [200/1500], Loss: 1.4658
Epoch [7/10], Step [300/1500], Loss: 1.4874
Epoch [7/10], Step [400/1500], Loss: 1.4741
Epoch [7/10], Step [500/1500], Loss: 1.4870
Epoch [7/10], Step [600/1500], Loss: 1.4619
Epoch [7/10], Step [700/1500], Loss: 1.4845
Epoch [7/10], Step [800/1500], Loss: 1.4770
Epoch [7/10], Step [900/1500], Loss: 1.4625
Epoch [7/10], Step [1000/1500], Loss: 1.4653
Epoch [7/10], Step [1100/1500], Loss: 1.4766
Epoch [7/10], Step [1200/1500], Loss: 1.4512
Epoch [7/10], Step [1300/1500], Loss: 1.4752
Epoch [7/10], Step [1400/1500], Loss: 1.4698
Epoch [7/10], Step [1500/1500], Loss: 1.5441


 70%|██████████████████████████████████████████████████████████                         | 7/10 [02:43<01:11, 23.67s/it]

Epoch [8/10], Step [100/1500], Loss: 1.4324
Epoch [8/10], Step [200/1500], Loss: 1.4313
Epoch [8/10], Step [300/1500], Loss: 1.4515
Epoch [8/10], Step [400/1500], Loss: 1.3988
Epoch [8/10], Step [500/1500], Loss: 1.4546
Epoch [8/10], Step [600/1500], Loss: 1.4252
Epoch [8/10], Step [700/1500], Loss: 1.4516
Epoch [8/10], Step [800/1500], Loss: 1.4475
Epoch [8/10], Step [900/1500], Loss: 1.4416
Epoch [8/10], Step [1000/1500], Loss: 1.4219
Epoch [8/10], Step [1100/1500], Loss: 1.4559
Epoch [8/10], Step [1200/1500], Loss: 1.4339
Epoch [8/10], Step [1300/1500], Loss: 1.4536
Epoch [8/10], Step [1400/1500], Loss: 1.4495
Epoch [8/10], Step [1500/1500], Loss: 1.4645


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [03:07<00:47, 23.81s/it]

Epoch [9/10], Step [100/1500], Loss: 1.4124
Epoch [9/10], Step [200/1500], Loss: 1.4135
Epoch [9/10], Step [300/1500], Loss: 1.3940
Epoch [9/10], Step [400/1500], Loss: 1.3841
Epoch [9/10], Step [500/1500], Loss: 1.4199
Epoch [9/10], Step [600/1500], Loss: 1.4076
Epoch [9/10], Step [700/1500], Loss: 1.4140
Epoch [9/10], Step [800/1500], Loss: 1.4185
Epoch [9/10], Step [900/1500], Loss: 1.3975
Epoch [9/10], Step [1000/1500], Loss: 1.4518
Epoch [9/10], Step [1100/1500], Loss: 1.4107
Epoch [9/10], Step [1200/1500], Loss: 1.4086
Epoch [9/10], Step [1300/1500], Loss: 1.4156
Epoch [9/10], Step [1400/1500], Loss: 1.4246
Epoch [9/10], Step [1500/1500], Loss: 1.4181


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [03:33<00:24, 24.38s/it]

Epoch [10/10], Step [100/1500], Loss: 1.3611
Epoch [10/10], Step [200/1500], Loss: 1.3670
Epoch [10/10], Step [300/1500], Loss: 1.3867
Epoch [10/10], Step [400/1500], Loss: 1.3961
Epoch [10/10], Step [500/1500], Loss: 1.3838
Epoch [10/10], Step [600/1500], Loss: 1.4258
Epoch [10/10], Step [700/1500], Loss: 1.3830
Epoch [10/10], Step [800/1500], Loss: 1.3726
Epoch [10/10], Step [900/1500], Loss: 1.3862
Epoch [10/10], Step [1000/1500], Loss: 1.3470
Epoch [10/10], Step [1100/1500], Loss: 1.3578
Epoch [10/10], Step [1200/1500], Loss: 1.3670
Epoch [10/10], Step [1300/1500], Loss: 1.3627
Epoch [10/10], Step [1400/1500], Loss: 1.4179
Epoch [10/10], Step [1500/1500], Loss: 1.3857


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [03:55<00:00, 23.81s/it]


In [13]:
import torch
# Test the Model
correct = 0
total = 0
for i, data in enumerate(testloader):
    inputs, labels = data
#     images = images.view(-1, 28*28)
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.shape[0]
    correct += (predicted == labels).sum()

print('Accuracy of the network on the 2500 test images: %d %%' % (100 * correct / total))
print( 'Time elapsed: {} seconds'.format(traintime))

Accuracy of the network on the 2500 test images: 46 %
Time elapsed: 235.60971903800964 seconds


#### dropout 모델  Adam 적용

In [17]:
model = Net_dropout(src)
y = model(images)

In [18]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), 
                      src['learning_rate'])

In [19]:
from tqdm import tqdm
import time
time_start = time.time()

model.init_weight()

for epoch in tqdm(range(src['num_epochs'])):
    current_loss = 0.0
#     model.train(True)
    
    for i, data in enumerate(trainloader):
        # get the inputs
        inputs, labels = data
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print statistics
        step = i + 1
        current_loss += loss.item()
        
        if step % 100 == 0 and step != 0:     # print every 100 mini-batches
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %
                  (epoch + 1, src['num_epochs'], step, len(trainloader)//100 * 100, current_loss / 100))
            current_loss = 0.0 #100번동안 있었던 loss의 누적값 평균, 그 후 reset
            
traintime = time.time() - time_start

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Epoch [1/10], Step [100/1500], Loss: 18.4021
Epoch [1/10], Step [200/1500], Loss: 8.9140
Epoch [1/10], Step [300/1500], Loss: 5.9350
Epoch [1/10], Step [400/1500], Loss: 4.4418
Epoch [1/10], Step [500/1500], Loss: 3.5873
Epoch [1/10], Step [600/1500], Loss: 3.2491
Epoch [1/10], Step [700/1500], Loss: 2.8618
Epoch [1/10], Step [800/1500], Loss: 2.7565
Epoch [1/10], Step [900/1500], Loss: 2.7134
Epoch [1/10], Step [1000/1500], Loss: 2.5732
Epoch [1/10], Step [1100/1500], Loss: 2.5002
Epoch [1/10], Step [1200/1500], Loss: 2.4614
Epoch [1/10], Step [1300/1500], Loss: 2.4137
Epoch [1/10], Step [1400/1500], Loss: 2.4171
Epoch [1/10], Step [1500/1500], Loss: 2.3814


 10%|████████▎                                                                          | 1/10 [00:25<03:46, 25.13s/it]

Epoch [2/10], Step [100/1500], Loss: 2.3742
Epoch [2/10], Step [200/1500], Loss: 2.3670
Epoch [2/10], Step [300/1500], Loss: 2.3759
Epoch [2/10], Step [400/1500], Loss: 2.3521
Epoch [2/10], Step [500/1500], Loss: 2.3420
Epoch [2/10], Step [600/1500], Loss: 2.3255
Epoch [2/10], Step [700/1500], Loss: 2.3302
Epoch [2/10], Step [800/1500], Loss: 2.2994
Epoch [2/10], Step [900/1500], Loss: 2.3168
Epoch [2/10], Step [1000/1500], Loss: 2.3150
Epoch [2/10], Step [1100/1500], Loss: 2.3000
Epoch [2/10], Step [1200/1500], Loss: 2.2994
Epoch [2/10], Step [1300/1500], Loss: 2.2994
Epoch [2/10], Step [1400/1500], Loss: 2.2878
Epoch [2/10], Step [1500/1500], Loss: 2.2622


 20%|████████████████▌                                                                  | 2/10 [00:49<03:19, 24.90s/it]

Epoch [3/10], Step [100/1500], Loss: 2.2570
Epoch [3/10], Step [200/1500], Loss: 2.2597
Epoch [3/10], Step [300/1500], Loss: 2.2621
Epoch [3/10], Step [400/1500], Loss: 2.2363
Epoch [3/10], Step [500/1500], Loss: 2.2472
Epoch [3/10], Step [600/1500], Loss: 2.2378
Epoch [3/10], Step [700/1500], Loss: 2.2268
Epoch [3/10], Step [800/1500], Loss: 2.2201
Epoch [3/10], Step [900/1500], Loss: 2.2059
Epoch [3/10], Step [1000/1500], Loss: 2.1968
Epoch [3/10], Step [1100/1500], Loss: 2.1925
Epoch [3/10], Step [1200/1500], Loss: 2.2036
Epoch [3/10], Step [1300/1500], Loss: 2.1701
Epoch [3/10], Step [1400/1500], Loss: 2.1945
Epoch [3/10], Step [1500/1500], Loss: 2.1673


 30%|████████████████████████▉                                                          | 3/10 [01:12<02:50, 24.40s/it]

Epoch [4/10], Step [100/1500], Loss: 2.1691
Epoch [4/10], Step [200/1500], Loss: 2.1948
Epoch [4/10], Step [300/1500], Loss: 2.1712
Epoch [4/10], Step [400/1500], Loss: 2.1738
Epoch [4/10], Step [500/1500], Loss: 2.1628
Epoch [4/10], Step [600/1500], Loss: 2.1611
Epoch [4/10], Step [700/1500], Loss: 2.1618
Epoch [4/10], Step [800/1500], Loss: 2.1462
Epoch [4/10], Step [900/1500], Loss: 2.1658
Epoch [4/10], Step [1000/1500], Loss: 2.1379
Epoch [4/10], Step [1100/1500], Loss: 2.1487
Epoch [4/10], Step [1200/1500], Loss: 2.1342
Epoch [4/10], Step [1300/1500], Loss: 2.1236
Epoch [4/10], Step [1400/1500], Loss: 2.1211
Epoch [4/10], Step [1500/1500], Loss: 2.1176


 40%|█████████████████████████████████▏                                                 | 4/10 [01:35<02:22, 23.79s/it]

Epoch [5/10], Step [100/1500], Loss: 2.1224
Epoch [5/10], Step [200/1500], Loss: 2.1016
Epoch [5/10], Step [300/1500], Loss: 2.1143
Epoch [5/10], Step [400/1500], Loss: 2.1018
Epoch [5/10], Step [500/1500], Loss: 2.0946
Epoch [5/10], Step [600/1500], Loss: 2.0914
Epoch [5/10], Step [700/1500], Loss: 2.1043
Epoch [5/10], Step [800/1500], Loss: 2.1198
Epoch [5/10], Step [900/1500], Loss: 2.0870
Epoch [5/10], Step [1000/1500], Loss: 2.0636
Epoch [5/10], Step [1100/1500], Loss: 2.0940
Epoch [5/10], Step [1200/1500], Loss: 2.0910
Epoch [5/10], Step [1300/1500], Loss: 2.0757
Epoch [5/10], Step [1400/1500], Loss: 2.0804
Epoch [5/10], Step [1500/1500], Loss: 2.0646


 50%|█████████████████████████████████████████▌                                         | 5/10 [01:57<01:56, 23.32s/it]

Epoch [6/10], Step [100/1500], Loss: 2.0555
Epoch [6/10], Step [200/1500], Loss: 2.0705
Epoch [6/10], Step [300/1500], Loss: 2.0478
Epoch [6/10], Step [400/1500], Loss: 2.0426
Epoch [6/10], Step [500/1500], Loss: 2.0636
Epoch [6/10], Step [600/1500], Loss: 2.0723
Epoch [6/10], Step [700/1500], Loss: 2.0728
Epoch [6/10], Step [800/1500], Loss: 2.0519
Epoch [6/10], Step [900/1500], Loss: 2.0607
Epoch [6/10], Step [1000/1500], Loss: 2.0180
Epoch [6/10], Step [1100/1500], Loss: 2.0298
Epoch [6/10], Step [1200/1500], Loss: 2.0183
Epoch [6/10], Step [1300/1500], Loss: 2.0201
Epoch [6/10], Step [1400/1500], Loss: 2.0321
Epoch [6/10], Step [1500/1500], Loss: 2.0528


 60%|█████████████████████████████████████████████████▊                                 | 6/10 [02:19<01:32, 23.04s/it]

Epoch [7/10], Step [100/1500], Loss: 2.0333
Epoch [7/10], Step [200/1500], Loss: 2.0353
Epoch [7/10], Step [300/1500], Loss: 2.0172
Epoch [7/10], Step [400/1500], Loss: 2.0389
Epoch [7/10], Step [500/1500], Loss: 2.0030
Epoch [7/10], Step [600/1500], Loss: 2.0299
Epoch [7/10], Step [700/1500], Loss: 2.0150
Epoch [7/10], Step [800/1500], Loss: 2.0082
Epoch [7/10], Step [900/1500], Loss: 1.9918
Epoch [7/10], Step [1000/1500], Loss: 1.9973
Epoch [7/10], Step [1100/1500], Loss: 2.0083
Epoch [7/10], Step [1200/1500], Loss: 2.0090
Epoch [7/10], Step [1300/1500], Loss: 2.0230
Epoch [7/10], Step [1400/1500], Loss: 2.0132
Epoch [7/10], Step [1500/1500], Loss: 2.0003


 70%|██████████████████████████████████████████████████████████                         | 7/10 [02:41<01:08, 22.74s/it]

Epoch [8/10], Step [100/1500], Loss: 2.0061
Epoch [8/10], Step [200/1500], Loss: 2.0323
Epoch [8/10], Step [300/1500], Loss: 1.9615
Epoch [8/10], Step [400/1500], Loss: 2.0161
Epoch [8/10], Step [500/1500], Loss: 1.9754
Epoch [8/10], Step [600/1500], Loss: 2.0110
Epoch [8/10], Step [700/1500], Loss: 2.0056
Epoch [8/10], Step [800/1500], Loss: 2.0076
Epoch [8/10], Step [900/1500], Loss: 2.0079
Epoch [8/10], Step [1000/1500], Loss: 2.0086
Epoch [8/10], Step [1100/1500], Loss: 2.0087
Epoch [8/10], Step [1200/1500], Loss: 1.9950
Epoch [8/10], Step [1300/1500], Loss: 1.9931
Epoch [8/10], Step [1400/1500], Loss: 1.9994
Epoch [8/10], Step [1500/1500], Loss: 1.9634


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [03:03<00:45, 22.59s/it]

Epoch [9/10], Step [100/1500], Loss: 1.9840
Epoch [9/10], Step [200/1500], Loss: 1.9539
Epoch [9/10], Step [300/1500], Loss: 1.9688
Epoch [9/10], Step [400/1500], Loss: 1.9931
Epoch [9/10], Step [500/1500], Loss: 1.9726
Epoch [9/10], Step [600/1500], Loss: 1.9753
Epoch [9/10], Step [700/1500], Loss: 1.9728
Epoch [9/10], Step [800/1500], Loss: 1.9620
Epoch [9/10], Step [900/1500], Loss: 1.9503
Epoch [9/10], Step [1000/1500], Loss: 1.9596
Epoch [9/10], Step [1100/1500], Loss: 1.9780
Epoch [9/10], Step [1200/1500], Loss: 1.9609
Epoch [9/10], Step [1300/1500], Loss: 1.9328
Epoch [9/10], Step [1400/1500], Loss: 1.9497
Epoch [9/10], Step [1500/1500], Loss: 1.9676


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [03:27<00:22, 22.87s/it]

Epoch [10/10], Step [100/1500], Loss: 1.9665
Epoch [10/10], Step [200/1500], Loss: 1.9539
Epoch [10/10], Step [300/1500], Loss: 1.9521
Epoch [10/10], Step [400/1500], Loss: 1.9550
Epoch [10/10], Step [500/1500], Loss: 1.9622
Epoch [10/10], Step [600/1500], Loss: 1.9531
Epoch [10/10], Step [700/1500], Loss: 1.9271
Epoch [10/10], Step [800/1500], Loss: 1.9623
Epoch [10/10], Step [900/1500], Loss: 1.9316
Epoch [10/10], Step [1000/1500], Loss: 1.9486
Epoch [10/10], Step [1100/1500], Loss: 1.9598
Epoch [10/10], Step [1200/1500], Loss: 1.9351
Epoch [10/10], Step [1300/1500], Loss: 1.9409
Epoch [10/10], Step [1400/1500], Loss: 1.9382
Epoch [10/10], Step [1500/1500], Loss: 1.9687


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [03:50<00:00, 22.95s/it]


In [20]:
import torch
# Test the Model
correct = 0
total = 0
for i, data in enumerate(testloader):
    inputs, labels = data
#     images = images.view(-1, 28*28)
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.shape[0]
    correct += (predicted == labels).sum()

print('Accuracy of the network on the 2500 test images: %d %%' % (100 * correct / total))
print( 'Time elapsed: {} seconds'.format(traintime))

Accuracy of the network on the 2500 test images: 28 %
Time elapsed: 230.63831567764282 seconds


#### batchNormalization 모델  Adam 적용

In [21]:
model = Net_batchnorm(src)
y = model(images)

In [22]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), 
                      src['learning_rate'])

In [23]:
from tqdm import tqdm
import time
time_start = time.time()

model.init_weight()

for epoch in tqdm(range(src['num_epochs'])):
    current_loss = 0.0
#     model.train(True)
    
    for i, data in enumerate(trainloader):
        # get the inputs
        inputs, labels = data
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print statistics
        step = i + 1
        current_loss += loss.item()
        
        if step % 100 == 0 and step != 0:     # print every 100 mini-batches
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %
                  (epoch + 1, src['num_epochs'], step, len(trainloader)//100 * 100, current_loss / 100))
            current_loss = 0.0 #100동안 있었던 loss의 누적값 평균, 그 후 reset
            
traintime = time.time() - time_start

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Epoch [1/10], Step [100/1500], Loss: 2.2496
Epoch [1/10], Step [200/1500], Loss: 2.0503
Epoch [1/10], Step [300/1500], Loss: 1.9570
Epoch [1/10], Step [400/1500], Loss: 1.9105
Epoch [1/10], Step [500/1500], Loss: 1.8835
Epoch [1/10], Step [600/1500], Loss: 1.7882
Epoch [1/10], Step [700/1500], Loss: 1.7968
Epoch [1/10], Step [800/1500], Loss: 1.7536
Epoch [1/10], Step [900/1500], Loss: 1.7658
Epoch [1/10], Step [1000/1500], Loss: 1.7425
Epoch [1/10], Step [1100/1500], Loss: 1.7118
Epoch [1/10], Step [1200/1500], Loss: 1.7244
Epoch [1/10], Step [1300/1500], Loss: 1.6889
Epoch [1/10], Step [1400/1500], Loss: 1.6791
Epoch [1/10], Step [1500/1500], Loss: 1.7029


 10%|████████▎                                                                          | 1/10 [00:31<04:46, 31.88s/it]

Epoch [2/10], Step [100/1500], Loss: 1.6239
Epoch [2/10], Step [200/1500], Loss: 1.6515
Epoch [2/10], Step [300/1500], Loss: 1.6475
Epoch [2/10], Step [400/1500], Loss: 1.6082
Epoch [2/10], Step [500/1500], Loss: 1.6377
Epoch [2/10], Step [600/1500], Loss: 1.6131
Epoch [2/10], Step [700/1500], Loss: 1.6003
Epoch [2/10], Step [800/1500], Loss: 1.6164
Epoch [2/10], Step [900/1500], Loss: 1.6134
Epoch [2/10], Step [1000/1500], Loss: 1.5918
Epoch [2/10], Step [1100/1500], Loss: 1.5721
Epoch [2/10], Step [1200/1500], Loss: 1.5935
Epoch [2/10], Step [1300/1500], Loss: 1.5993
Epoch [2/10], Step [1400/1500], Loss: 1.5747
Epoch [2/10], Step [1500/1500], Loss: 1.5354


 20%|████████████████▌                                                                  | 2/10 [00:59<04:05, 30.69s/it]

Epoch [3/10], Step [100/1500], Loss: 1.5488
Epoch [3/10], Step [200/1500], Loss: 1.5214
Epoch [3/10], Step [300/1500], Loss: 1.5384
Epoch [3/10], Step [400/1500], Loss: 1.5159
Epoch [3/10], Step [500/1500], Loss: 1.5528
Epoch [3/10], Step [600/1500], Loss: 1.5481
Epoch [3/10], Step [700/1500], Loss: 1.5597
Epoch [3/10], Step [800/1500], Loss: 1.5139
Epoch [3/10], Step [900/1500], Loss: 1.5637
Epoch [3/10], Step [1000/1500], Loss: 1.5458
Epoch [3/10], Step [1100/1500], Loss: 1.5326
Epoch [3/10], Step [1200/1500], Loss: 1.5427
Epoch [3/10], Step [1300/1500], Loss: 1.5113
Epoch [3/10], Step [1400/1500], Loss: 1.5328
Epoch [3/10], Step [1500/1500], Loss: 1.4963


 30%|████████████████████████▉                                                          | 3/10 [01:23<03:20, 28.64s/it]

Epoch [4/10], Step [100/1500], Loss: 1.4834
Epoch [4/10], Step [200/1500], Loss: 1.4825
Epoch [4/10], Step [300/1500], Loss: 1.4775
Epoch [4/10], Step [400/1500], Loss: 1.4980
Epoch [4/10], Step [500/1500], Loss: 1.4763
Epoch [4/10], Step [600/1500], Loss: 1.5077
Epoch [4/10], Step [700/1500], Loss: 1.5075
Epoch [4/10], Step [800/1500], Loss: 1.5036
Epoch [4/10], Step [900/1500], Loss: 1.4988
Epoch [4/10], Step [1000/1500], Loss: 1.4839
Epoch [4/10], Step [1100/1500], Loss: 1.4658
Epoch [4/10], Step [1200/1500], Loss: 1.4943
Epoch [4/10], Step [1300/1500], Loss: 1.4768
Epoch [4/10], Step [1400/1500], Loss: 1.4789
Epoch [4/10], Step [1500/1500], Loss: 1.4618


 40%|█████████████████████████████████▏                                                 | 4/10 [01:49<02:46, 27.79s/it]

Epoch [5/10], Step [100/1500], Loss: 1.4365
Epoch [5/10], Step [200/1500], Loss: 1.4280
Epoch [5/10], Step [300/1500], Loss: 1.4459
Epoch [5/10], Step [400/1500], Loss: 1.4538
Epoch [5/10], Step [500/1500], Loss: 1.4284
Epoch [5/10], Step [600/1500], Loss: 1.4252
Epoch [5/10], Step [700/1500], Loss: 1.4324
Epoch [5/10], Step [800/1500], Loss: 1.4346
Epoch [5/10], Step [900/1500], Loss: 1.4595
Epoch [5/10], Step [1000/1500], Loss: 1.4569
Epoch [5/10], Step [1100/1500], Loss: 1.4525
Epoch [5/10], Step [1200/1500], Loss: 1.4394
Epoch [5/10], Step [1300/1500], Loss: 1.4180
Epoch [5/10], Step [1400/1500], Loss: 1.4459
Epoch [5/10], Step [1500/1500], Loss: 1.4533


 50%|█████████████████████████████████████████▌                                         | 5/10 [02:14<02:15, 27.10s/it]

Epoch [6/10], Step [100/1500], Loss: 1.4339
Epoch [6/10], Step [200/1500], Loss: 1.4237
Epoch [6/10], Step [300/1500], Loss: 1.4331
Epoch [6/10], Step [400/1500], Loss: 1.3928
Epoch [6/10], Step [500/1500], Loss: 1.3936
Epoch [6/10], Step [600/1500], Loss: 1.4018
Epoch [6/10], Step [700/1500], Loss: 1.3809
Epoch [6/10], Step [800/1500], Loss: 1.3692
Epoch [6/10], Step [900/1500], Loss: 1.4430
Epoch [6/10], Step [1000/1500], Loss: 1.4558
Epoch [6/10], Step [1100/1500], Loss: 1.4233
Epoch [6/10], Step [1200/1500], Loss: 1.4311
Epoch [6/10], Step [1300/1500], Loss: 1.3925
Epoch [6/10], Step [1400/1500], Loss: 1.4472
Epoch [6/10], Step [1500/1500], Loss: 1.4331


 60%|█████████████████████████████████████████████████▊                                 | 6/10 [02:39<01:44, 26.19s/it]

Epoch [7/10], Step [100/1500], Loss: 1.3769
Epoch [7/10], Step [200/1500], Loss: 1.3911
Epoch [7/10], Step [300/1500], Loss: 1.3981
Epoch [7/10], Step [400/1500], Loss: 1.3930
Epoch [7/10], Step [500/1500], Loss: 1.4157
Epoch [7/10], Step [600/1500], Loss: 1.3996
Epoch [7/10], Step [700/1500], Loss: 1.3669
Epoch [7/10], Step [800/1500], Loss: 1.4014
Epoch [7/10], Step [900/1500], Loss: 1.3958
Epoch [7/10], Step [1000/1500], Loss: 1.3640
Epoch [7/10], Step [1100/1500], Loss: 1.4074
Epoch [7/10], Step [1200/1500], Loss: 1.4242
Epoch [7/10], Step [1300/1500], Loss: 1.3776
Epoch [7/10], Step [1400/1500], Loss: 1.3872
Epoch [7/10], Step [1500/1500], Loss: 1.3849


 70%|██████████████████████████████████████████████████████████                         | 7/10 [03:03<01:17, 25.70s/it]

Epoch [8/10], Step [100/1500], Loss: 1.3767
Epoch [8/10], Step [200/1500], Loss: 1.3727
Epoch [8/10], Step [300/1500], Loss: 1.3681
Epoch [8/10], Step [400/1500], Loss: 1.3230
Epoch [8/10], Step [500/1500], Loss: 1.3726
Epoch [8/10], Step [600/1500], Loss: 1.3734
Epoch [8/10], Step [700/1500], Loss: 1.3568
Epoch [8/10], Step [800/1500], Loss: 1.3669
Epoch [8/10], Step [900/1500], Loss: 1.3966
Epoch [8/10], Step [1000/1500], Loss: 1.3712
Epoch [8/10], Step [1100/1500], Loss: 1.3594
Epoch [8/10], Step [1200/1500], Loss: 1.3890
Epoch [8/10], Step [1300/1500], Loss: 1.3904
Epoch [8/10], Step [1400/1500], Loss: 1.3368
Epoch [8/10], Step [1500/1500], Loss: 1.3659


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [03:30<00:51, 25.96s/it]

Epoch [9/10], Step [100/1500], Loss: 1.3468
Epoch [9/10], Step [200/1500], Loss: 1.3310
Epoch [9/10], Step [300/1500], Loss: 1.3175
Epoch [9/10], Step [400/1500], Loss: 1.3459
Epoch [9/10], Step [500/1500], Loss: 1.3525
Epoch [9/10], Step [600/1500], Loss: 1.3491
Epoch [9/10], Step [700/1500], Loss: 1.3470
Epoch [9/10], Step [800/1500], Loss: 1.3364
Epoch [9/10], Step [900/1500], Loss: 1.3448
Epoch [9/10], Step [1000/1500], Loss: 1.3625
Epoch [9/10], Step [1100/1500], Loss: 1.3670
Epoch [9/10], Step [1200/1500], Loss: 1.3594
Epoch [9/10], Step [1300/1500], Loss: 1.3682
Epoch [9/10], Step [1400/1500], Loss: 1.3815
Epoch [9/10], Step [1500/1500], Loss: 1.3461


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [03:54<00:25, 25.49s/it]

Epoch [10/10], Step [100/1500], Loss: 1.3381
Epoch [10/10], Step [200/1500], Loss: 1.3037
Epoch [10/10], Step [300/1500], Loss: 1.3564
Epoch [10/10], Step [400/1500], Loss: 1.3557
Epoch [10/10], Step [500/1500], Loss: 1.3224
Epoch [10/10], Step [600/1500], Loss: 1.3286
Epoch [10/10], Step [700/1500], Loss: 1.3540
Epoch [10/10], Step [800/1500], Loss: 1.3116
Epoch [10/10], Step [900/1500], Loss: 1.3565
Epoch [10/10], Step [1000/1500], Loss: 1.3257
Epoch [10/10], Step [1100/1500], Loss: 1.3482
Epoch [10/10], Step [1200/1500], Loss: 1.3456
Epoch [10/10], Step [1300/1500], Loss: 1.3313
Epoch [10/10], Step [1400/1500], Loss: 1.3229
Epoch [10/10], Step [1500/1500], Loss: 1.3354


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [04:18<00:00, 24.94s/it]


In [24]:
import torch
# Test the Model
correct = 0
total = 0
for i, data in enumerate(testloader):
    inputs, labels = data
#     images = images.view(-1, 28*28)
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.shape[0]
    correct += (predicted == labels).sum()

print('Accuracy of the network on the 2500 test images: %d %%' % (100 * correct / total))
print( 'Time elapsed: {} seconds'.format(traintime))

Accuracy of the network on the 2500 test images: 48 %
Time elapsed: 258.19803500175476 seconds


## 정리

##### batch size : 32 , epoch size : 10 

#### 기본 모델
Epoch [10/10], Step [1500/1500], Loss: 1.9926

Accuracy of the network on the 2500 test images: 26 %

Time elapsed: 205.56801676750183 seconds

#### 기본 모델 + adam
Epoch [10/10], Step [1500/1500], Loss: 1.3857

Accuracy of the network on the 2500 test images: 46 %

Time elapsed: 235.60971903800964 seconds

#### dropout 모델 + adam
Epoch [10/10], Step [1500/1500], Loss: 1.9687

Accuracy of the network on the 2500 test images: 28 %

Time elapsed: 230.63831567764282 seconds

#### batch normalization 모델 + adam
Epoch [10/10], Step [1500/1500], Loss: 1.3354

Accuracy of the network on the 2500 test images: 48 %

Time elapsed: 258.19803500175476 seconds

basic + adam 모델의 경우  최적화를 하지 않은 basic 모델에 비해 accuracy가 약 20% 올랐고 loss도 많이 줄었다.

학습에 걸린 시간은 15%정도 증가하였다.  최적화 과정에서 시간이 소요된 것으로 보인다.

-

dropout + adam 모델의 경우 basic + adam 모델과 비교해 보았을 때 시간은 거의 차이가 나지 않은 반면 loss 과 accuracy가 매우 나빠졌다.

이것은 노드를 확률적으로 끄는 dropout의 특성상 특정 w에 의존하지 않아 overfitting을 낮추는 장점이 있지만 상대적으로 w의 학습이 느려져 더 많은 epoch가 필요한 것으로 보인다.

-

batch normalization + adam 모델의 경우 basic + adam 모델과 비교해 보았을 때 loss가 좀더 줄었고 accuracy가 2% 더 높아져 가장 높은 accuracy를 보였다. 그에 비해 학습시간은 20초 가량 더 걸렸다. batchNorm을 통해 epoch 대비 학습속도가 상승한 것으로 보인다.