## 과제 요약
- Case 9의 마지막 에포크에서 val accuracy가 0.6018을 기록하여, 과제 수행에 성공하였다. Val Accuracy를 높였던 결정적인 요소로는 
> - 4 Layer로 이루어진 Convolutional Block을 4개까지 직렬로 쌓는 것(그 이상의 Block은 유의미한 영향이 없었다)
> - Adam Optimizer가 아닌 Momentum 계열 Optimizer를 사용하는 것(Adaptive Gradient 계열을 사용하면 학습이 이루어지지 않는 버그가 있었다)
> - Convolution Filter의 수가 64 -> 128 -> 256 -> 512 순으로 늘어나 풍부한 수의 feature를 확보하는 것(filter 수가 6 -> 16 -> 32 -> 64 에 불과하면 정확도 43%를 벗어나지 못했다)<br><br>
> 를 들 수 있다.

In [4]:
import pandas as pd
from tqdm import tqdm

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms


# 코드 다시 돌리기 위한 seed 고정
import random
import numpy as np
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [5]:
device

device(type='cuda')

In [6]:
train_transform = transforms.Compose([    
    transforms.AutoAugment(transforms.autoaugment.AutoAugmentPolicy.CIFAR10),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5)),
])        

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
])    

train = torchvision.datasets.CIFAR100(root="./", train=True, download=True, transform=train_transform)
test = torchvision.datasets.CIFAR100(root="./", train=False, download=True, transform=test_transform)

train_loader = torch.utils.data.DataLoader(train, batch_size=256,
                                           shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test, batch_size=256,
                                          shuffle=False, num_workers=2)
criterion = nn.CrossEntropyLoss()

Files already downloaded and verified
Files already downloaded and verified


In [None]:
class MyModel(nn.Module):
    #Cifar100 Shape: [Batch, Channel(3), 32, 32]
    def __init__(self, in_channels, num_classes):
        super(MyModel, self).__init__()
        """Base Model"""
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 100)
        
        

    def forward(self, x):
        """Base Model"""
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = MyModel(3, 100).to(device)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

model


MyModel(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=100, bias=True)
)

In [None]:
for epoch in range(10):
    model.train()
    running_loss = 0.0
    best_acc = 0.0
    best_model_wts = model.state_dict()
    print(f"train epoch: {epoch+1}----------------")
    for img, label in tqdm(train_loader):
        img = img.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        output = model(img)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    correct, all_data = 0,0
    print("\ntrain_loss : ", running_loss / len(train_loader))
    model.eval()
    for img, label in test_loader:
        with torch.no_grad():
            img = img.to(device)
            label = label.to(device)
            output = model(img)

            correct += torch.sum(torch.argmax(output, dim=1) == label).item()
            all_data += len(label)
    print("val_acc : ", correct / all_data)
    if correct / all_data > best_acc:
      best_acc = correct / all_data
      best_model_wts = model.state_dict()

train epoch: 1----------------


100%|██████████| 196/196 [00:14<00:00, 13.52it/s]



train_loss :  4.527656900639436
val_acc :  0.0707
train epoch: 2----------------


100%|██████████| 196/196 [00:14<00:00, 13.65it/s]



train_loss :  4.233913866841063
val_acc :  0.1173
train epoch: 3----------------


100%|██████████| 196/196 [00:14<00:00, 13.48it/s]



train_loss :  4.021952615708721
val_acc :  0.1501
train epoch: 4----------------


100%|██████████| 196/196 [00:14<00:00, 13.71it/s]



train_loss :  3.9045823398901494
val_acc :  0.1499
train epoch: 5----------------


100%|██████████| 196/196 [00:14<00:00, 13.42it/s]



train_loss :  3.83749518954024
val_acc :  0.1402
train epoch: 6----------------


100%|██████████| 196/196 [00:14<00:00, 13.21it/s]



train_loss :  3.7911516355008494
val_acc :  0.1651
train epoch: 7----------------


100%|██████████| 196/196 [00:14<00:00, 13.16it/s]



train_loss :  3.76611359387028
val_acc :  0.1851
train epoch: 8----------------


100%|██████████| 196/196 [00:14<00:00, 13.26it/s]



train_loss :  3.747718228369343
val_acc :  0.181
train epoch: 9----------------


100%|██████████| 196/196 [00:14<00:00, 13.50it/s]



train_loss :  3.714530322016502
val_acc :  0.1924
train epoch: 10----------------


 25%|██▌       | 49/196 [00:05<00:17,  8.21it/s]


KeyboardInterrupt: 

# Case1. 
- Conv 계층을 5계층으로 하여 깊이를 더한다.
- Channel 수는 3 -> 64 -> 64 -> 32 -> 16 -> 16 로 하여 더 많은 feature map을 뽑아내겠다.
- Kernel의 크기는 처음 3계층 3 x 3, 마지막 2계층 2 x 2로 하여 국소적 맥락에만 집중하도록 하겠다.
- image size는 32 -> 16 -> 8 -> 4 -> 2
- FC Node는 36 -> 180 -> 140 -> 100
- Optimizer Adam, learning rate 0.01

In [None]:
class Case1Model(nn.Module):
    #Cifar100 Shape: [Batch, Channel(3), 32, 32]
    def __init__(self, in_channels, num_classes):
        super(Case1Model, self).__init__()
        """Base Model"""
        self.conv1 = nn.Conv2d(3, 64, 3)
        self.pool1 = nn.MaxPool2d(2, 2, 1)
        
        self.conv2 = nn.Conv2d(64, 64, 3)
        self.pool2 = nn.MaxPool2d(2, 2, 1)

        self.conv3 = nn.Conv2d(64, 32, 3)
        self.pool3 = nn.MaxPool2d(2, 2, 1)

        self.conv4 = nn.Conv2d(32, 16, 2)
        self.pool4 = nn.MaxPool2d(2, 2, 1)

        self.conv5 = nn.Conv2d(16, 16, 2)
        self.pool5 = nn.MaxPool2d(2, 1, 1)
        
        self.fc1 = nn.Linear(16 * 2 * 2, 180)
        self.fc2 = nn.Linear(180, 140)
        self.fc3 = nn.Linear(140, 100)
        
        

    def forward(self, x):
        """Base Model"""
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        x = self.pool4(F.relu(self.conv4(x)))
        x = self.pool5(F.relu(self.conv5(x)))

        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model1 = Case1Model(3, 100).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01)

model1


In [None]:
for epoch in range(10):
    model1.train()
    running_loss = 0.0
    best_acc = 0.0
    best_model_wts = model1.state_dict()
    print(f"train epoch: {epoch+1}----------------")
    for img, label in tqdm(train_loader):
        img = img.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        output = model1(img)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    correct, all_data = 0,0
    print("\ntrain_loss : ", running_loss / len(train_loader))
    model1.eval()
    for img, label in test_loader:
        with torch.no_grad():
            img = img.to(device)
            label = label.to(device)
            output = model1(img)

            correct += torch.sum(torch.argmax(output, dim=1) == label).item()
            all_data += len(label)
    print("val_acc : ", correct / all_data)
    if correct / all_data > best_acc:
      best_acc = correct / all_data
      best_model_wts = model1.state_dict()

# Case 2
- 앞선 Case 1에선 Gradient Vanishing이 일어난 것으로 보인다.
- 이에 BatchNormalization으로 이를 해소하고, Learning Rate도 0.1로 다시 높이겠다.

In [None]:
class Case2Model(nn.Module):
    #Cifar100 Shape: [Batch, Channel(3), 32, 32]
    def __init__(self, in_channels, num_classes):
        super(Case2Model, self).__init__()
        """Base Model"""
        self.conv1 = nn.Conv2d(3, 64, 3)
        self.pool1 = nn.MaxPool2d(2, 2, 1)
        self.batchNorm1 = nn.BatchNorm2d(64)
        
        self.conv2 = nn.Conv2d(64, 64, 3)
        self.pool2 = nn.MaxPool2d(2, 2, 1)
        self.batchNorm2 = nn.BatchNorm2d(64)


        self.conv3 = nn.Conv2d(64, 32, 3)
        self.pool3 = nn.MaxPool2d(2, 2, 1)
        self.batchNorm3 = nn.BatchNorm2d(32)

        self.conv4 = nn.Conv2d(32, 16, 2)
        self.pool4 = nn.MaxPool2d(2, 2, 1)
        self.batchNorm4 = nn.BatchNorm2d(16)


        self.conv5 = nn.Conv2d(16, 16, 2)
        self.pool5 = nn.MaxPool2d(2, 1, 1)

        
        self.fc1 = nn.Linear(16 * 2 * 2, 180)
        self.fc2 = nn.Linear(180, 140)
        self.fc3 = nn.Linear(140, 100)
        
        

    def forward(self, x):
        """Base Model"""
        x1 = self.batchNorm1(self.pool1(F.relu(self.conv1(x))))
        x2 = self.batchNorm2(self.pool2(F.relu(self.conv2(x1))))
        x3 = self.batchNorm3(self.pool3(F.relu(self.conv3(x2))))
        x4 = self.batchNorm4(self.pool4(F.relu(self.conv4(x3))))
        x5 = self.pool5(F.relu(self.conv5(x4)))

        x = torch.flatten(x5, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model2 = Case2Model(3, 100).to(device)
optimizer = optim.Adam(model2.parameters(), lr=0.1)

model2


Case2Model(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  (batchNorm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  (batchNorm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  (batchNorm3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(32, 16, kernel_size=(2, 2), stride=(1, 1))
  (pool4): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  (batchNorm4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d

In [None]:
for epoch in range(10):
    model2.train()
    running_loss = 0.0
    best_acc = 0.0
    best_model_wts = model2.state_dict()
    print(f"train epoch: {epoch+1}----------------")
    for img, label in tqdm(train_loader):
        img = img.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        output = model2(img)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    correct, all_data = 0,0
    print("\ntrain_loss : ", running_loss / len(train_loader))
    model2.eval()
    for img, label in test_loader:
        with torch.no_grad():
            img = img.to(device)
            label = label.to(device)
            output = model2(img)

            correct += torch.sum(torch.argmax(output, dim=1) == label).item()
            all_data += len(label)
    print("val_acc : ", correct / all_data)
    if correct / all_data > best_acc:
      best_acc = correct / all_data
      best_model_wts = model2.state_dict()

train epoch: 1----------------


100%|██████████| 196/196 [00:36<00:00,  5.37it/s]



train_loss :  4.499614194947846
val_acc :  0.0273
train epoch: 2----------------


100%|██████████| 196/196 [00:38<00:00,  5.15it/s]



train_loss :  4.066124045118993
val_acc :  0.0573
train epoch: 3----------------


 37%|███▋      | 73/196 [00:16<00:27,  4.50it/s]


KeyboardInterrupt: 

# Case 3
- Optimizer를 원래의 SGD로 바꾸어보겠다.

In [None]:
class Case3Model(nn.Module):
    #Cifar100 Shape: [Batch, Channel(3), 32, 32]
    def __init__(self, in_channels, num_classes):
        super(Case2Model, self).__init__()
        """Base Model"""
        self.conv1 = nn.Conv2d(3, 64, 3)
        self.pool1 = nn.MaxPool2d(2, 2, 1)
        self.batchNorm1 = nn.BatchNorm2d(64)
        
        self.conv2 = nn.Conv2d(64, 64, 3)
        self.pool2 = nn.MaxPool2d(2, 2, 1)
        self.batchNorm2 = nn.BatchNorm2d(64)


        self.conv3 = nn.Conv2d(64, 32, 3)
        self.pool3 = nn.MaxPool2d(2, 2, 1)
        self.batchNorm3 = nn.BatchNorm2d(32)

        self.conv4 = nn.Conv2d(32, 16, 2)
        self.pool4 = nn.MaxPool2d(2, 2, 1)
        self.batchNorm4 = nn.BatchNorm2d(16)


        self.conv5 = nn.Conv2d(16, 16, 2)
        self.pool5 = nn.MaxPool2d(2, 1, 1)

        
        self.fc1 = nn.Linear(16 * 2 * 2, 180)
        self.fc2 = nn.Linear(180, 140)
        self.fc3 = nn.Linear(140, 100)
        
        

    def forward(self, x):
        """Base Model"""
        x1 = self.batchNorm1(self.pool1(F.relu(self.conv1(x))))
        x2 = self.batchNorm2(self.pool2(F.relu(self.conv2(x1))))
        x3 = self.batchNorm3(self.pool3(F.relu(self.conv3(x2))))
        x4 = self.batchNorm4(self.pool4(F.relu(self.conv4(x3))))
        x5 = self.pool5(F.relu(self.conv5(x4)))

        x = torch.flatten(x5, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model3 = Case3Model(3, 100).to(device)
optimizer = optim.SGD(model3.parameters(), lr=0.1)

model3


MyModel(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=100, bias=True)
)

In [None]:
for epoch in range(10):
    model3.train()
    running_loss = 0.0
    best_acc = 0.0
    best_model_wts = model3.state_dict()
    print(f"train epoch: {epoch+1}----------------")
    for img, label in tqdm(train_loader):
        img = img.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        output = model3(img)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    correct, all_data = 0,0
    print("\ntrain_loss : ", running_loss / len(train_loader))
    model3.eval()
    for img, label in test_loader:
        with torch.no_grad():
            img = img.to(device)
            label = label.to(device)
            output = model3(img)

            correct += torch.sum(torch.argmax(output, dim=1) == label).item()
            all_data += len(label)
    print("val_acc : ", correct / all_data)
    if correct / all_data > best_acc:
      best_acc = correct / all_data
      best_model_wts = model3.state_dict()

train epoch: 1----------------


  0%|          | 0/196 [00:03<?, ?it/s]


RuntimeError: running_mean should contain 256 elements not 64

## Case 4
- Residual Connection을 추가한다.
- Optimizer를 Nesterov Momentum으로 변경한다.

In [None]:
class Case4Model(nn.Module):
    #Cifar100 Shape: [Batch, Channel(3), 32, 32]
    def __init__(self, in_channels, num_classes):
        super(Case4Model, self).__init__()
        """Base Model"""
        self.conv1 = nn.Conv2d(3, 6, 3, 1, 1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.batchNorm1 = nn.BatchNorm2d(6)
        #6 * (16 * 16)

        self.conv2 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm2 = nn.BatchNorm2d(6)
        self.conv3 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm3 = nn.BatchNorm2d(6)
        #6 * (16 * 16)
        
        self.conv4 = nn.Conv2d(6, 16, 3, 1, 1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.batchNorm4 = nn.BatchNorm2d(16)
        #16 * (8 * 8)

        self.conv5 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm5 = nn.BatchNorm2d(16)
        self.conv6 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm6 = nn.BatchNorm2d(16)
        #16 * (8 * 8)
        
        self.fc1 = nn.Linear(1024, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 100)
        
        

    def forward(self, x):
        """Base Model"""
        x1 = self.batchNorm1(self.pool1(F.relu(self.conv1(x))))
        
        x2 = self.batchNorm2(F.relu(self.conv2(x1)))
        x3 = self.batchNorm3(F.relu(F.relu(self.conv3(x2)) + x1))

        x4 = self.batchNorm4(self.pool2(F.relu(self.conv4(x3))))

        x5 = self.batchNorm5(F.relu(self.conv5(x4)))
        x6 = self.batchNorm6(F.relu(F.relu(self.conv6(x5)) + x4))

        x = torch.flatten(x6, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model4 = Case4Model(3, 100).to(device)
optimizer = optim.SGD(model4.parameters(), momentum=0.9, nesterov=True, lr=0.1)

model4


Case4Model(
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (batchNorm1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchNorm2): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchNorm3): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (batchNorm4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchNorm5): BatchNorm2d(16, eps=1e-05, momentum=0.1, 

In [None]:
for epoch in range(10):
    model4.train()
    running_loss = 0.0
    best_acc = 0.0
    best_model_wts = model4.state_dict()
    print(f"train epoch: {epoch+1}----------------")
    for img, label in tqdm(train_loader):
        img = img.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        output = model4(img)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    correct, all_data = 0,0
    print("\ntrain_loss : ", running_loss / len(train_loader))
    model4.eval()
    for img, label in test_loader:
        with torch.no_grad():
            img = img.to(device)
            label = label.to(device)
            output = model4(img)

            correct += torch.sum(torch.argmax(output, dim=1) == label).item()
            all_data += len(label)
    print("val_acc : ", correct / all_data)
    if correct / all_data > best_acc:
      best_acc = correct / all_data
      best_model_wts = model4.state_dict()

train epoch: 1----------------


100%|██████████| 196/196 [00:19<00:00,  9.81it/s]



train_loss :  3.5816236235657515
val_acc :  0.2176
train epoch: 2----------------


100%|██████████| 196/196 [00:20<00:00,  9.78it/s]



train_loss :  2.7541073645864214
val_acc :  0.2833
train epoch: 3----------------


100%|██████████| 196/196 [00:19<00:00,  9.81it/s]



train_loss :  2.3736668691343192
val_acc :  0.3376
train epoch: 4----------------


100%|██████████| 196/196 [00:19<00:00,  9.97it/s]



train_loss :  2.116758614170308
val_acc :  0.3468
train epoch: 5----------------


100%|██████████| 196/196 [00:20<00:00,  9.80it/s]



train_loss :  1.9030459693499975
val_acc :  0.3412
train epoch: 6----------------


100%|██████████| 196/196 [00:19<00:00,  9.88it/s]



train_loss :  1.7153010654206178
val_acc :  0.3498
train epoch: 7----------------


100%|██████████| 196/196 [00:19<00:00,  9.95it/s]



train_loss :  1.5490733731766135
val_acc :  0.3458
train epoch: 8----------------


100%|██████████| 196/196 [00:19<00:00,  9.92it/s]



train_loss :  1.393361126281777
val_acc :  0.3473
train epoch: 9----------------


100%|██████████| 196/196 [00:19<00:00,  9.83it/s]



train_loss :  1.2683219334908895
val_acc :  0.3244
train epoch: 10----------------


  0%|          | 0/196 [00:01<?, ?it/s]


KeyboardInterrupt: 

## Case 6
- Layer를 더 deep하게 쌓겠다.

In [None]:
class Case5Model(nn.Module):
    #Cifar100 Shape: [Batch, Channel(3), 32, 32]
    def __init__(self, in_channels, num_classes):
        super(Case5Model, self).__init__()
        """Base Model"""
        self.conv1_0 = nn.Conv2d(3, 6, 3, 1, 1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.batchNorm1_0 = nn.BatchNorm2d(6)
        #6 * (16 * 16)

        self.conv1_1 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_1 = nn.BatchNorm2d(6)
        self.conv1_2 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_2 = nn.BatchNorm2d(6)
        self.conv1_3 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_3 = nn.BatchNorm2d(6)
        self.conv1_4 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_4 = nn.BatchNorm2d(6)
        #6 * (16 * 16)
        
        self.conv2_0 = nn.Conv2d(6, 16, 3, 1, 1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.batchNorm2_0 = nn.BatchNorm2d(16)
        #16 * (8 * 8)

        self.conv2_1 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_1 = nn.BatchNorm2d(16)
        self.conv2_2 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_2 = nn.BatchNorm2d(16)
        self.conv2_3 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_3 = nn.BatchNorm2d(16)
        self.conv2_4 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_4 = nn.BatchNorm2d(16)
        #16 * (8 * 8)
        
        self.conv3_0 = nn.Conv2d(16, 32, 3, 1, 1)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.batchNorm3_0 = nn.BatchNorm2d(32)
        #32 * (4 * 4)

        self.conv3_1 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_1 = nn.BatchNorm2d(32)
        self.conv3_2 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_2 = nn.BatchNorm2d(32)
        self.conv3_3 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_3 = nn.BatchNorm2d(32)
        self.conv3_4 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_4 = nn.BatchNorm2d(32)
        #32 * (4 * 4)

        # self.conv4_0 = nn.Conv2d(32, 64, 3, 1, 1)
        # self.pool4 = nn.MaxPool2d(2, 2)
        # self.batchNorm4_0 = nn.BatchNorm2d(64)
        # #64 * (2 * 2)

        # self.conv4_1 = nn.Conv2d(64, 64, 3, 1, 1)
        # self.batchNorm4_1 = nn.BatchNorm2d(64)
        # self.conv4_2 = nn.Conv2d(64, 64, 3, 1, 1)
        # self.batchNorm4_2 = nn.BatchNorm2d(64)
        # self.conv4_3 = nn.Conv2d(64, 64, 3, 1, 1)
        # self.batchNorm4_3 = nn.BatchNorm2d(64)
        # self.conv4_4 = nn.Conv2d(64, 64, 3, 1, 1)
        # self.batchNorm4_4 = nn.BatchNorm2d(64)

        self.fc1 = nn.Linear(512, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 100)
        
        

    def forward(self, x):
        """Base Model"""
        x1_0 = self.batchNorm1_0(self.pool1(F.relu(self.conv1_0(x))))
        x1_1 = self.batchNorm1_1(F.relu(self.conv1_1(x1_0)))
        x1_2 = self.batchNorm1_2(F.relu(self.conv1_2(x1_1)) + x1_0)
        x1_3 = self.batchNorm1_3(F.relu(self.conv1_3(x1_2)) + x1_0 + x1_1)
        x1_4 = self.batchNorm1_4(F.relu(self.conv1_4(x1_3)) + x1_0 + x1_1 + x1_2)

        x2_0 = self.batchNorm2_0(self.pool2(F.relu(self.conv2_0(x1_4))))
        x2_1 = self.batchNorm2_1(F.relu(self.conv2_1(x2_0)))
        x2_2 = self.batchNorm2_2(F.relu(self.conv2_2(x2_1)) + x2_0)
        x2_3 = self.batchNorm2_3(F.relu(self.conv2_3(x2_2)) + x2_0 + x2_1)
        x2_4 = self.batchNorm2_4(F.relu(self.conv2_4(x2_3)) + x2_0 + x2_1 + x2_2)


        x3_0 = self.batchNorm3_0(self.pool3(F.relu(self.conv3_0(x2_4))))
        x3_1 = self.batchNorm3_1(F.relu(self.conv3_1(x3_0)))
        x3_2 = self.batchNorm3_2(F.relu(self.conv3_2(x3_1)) + x3_0)
        x3_3 = self.batchNorm3_3(F.relu(self.conv3_3(x3_2)) + x3_0 + x3_1)
        x3_4 = self.batchNorm3_4(F.relu(self.conv3_4(x3_3)) + x3_0 + x3_1 + x3_2)

        x = torch.flatten(x3_4, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model5 = Case5Model(3, 100).to(device)
optimizer = optim.Adam(model5.parameters(), momentum=0.9, nesterov=True, lr=0.1)

model5


Case5Model(
  (conv1_0): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (batchNorm1_0): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv1_1): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchNorm1_1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv1_2): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchNorm1_2): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv1_3): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchNorm1_3): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv1_4): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchNorm1_4): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2_0): Conv2d(6, 16, kerne

In [None]:
for epoch in range(50):
    model5.train()
    running_loss = 0.0
    best_acc = 0.0
    best_model_wts = model5.state_dict()
    print(f"train epoch: {epoch+1}----------------")
    for img, label in tqdm(train_loader):
        img = img.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        output = model5(img)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    correct, all_data = 0,0
    print("\ntrain_loss : ", running_loss / len(train_loader))
    model5.eval()
    for img, label in test_loader:
        with torch.no_grad():
            img = img.to(device)
            label = label.to(device)
            output = model5(img)

            correct += torch.sum(torch.argmax(output, dim=1) == label).item()
            all_data += len(label)
    print("val_acc : ", correct / all_data)
    if correct / all_data > best_acc:
      best_acc = correct / all_data
      best_model_wts = model5.state_dict()

train epoch: 1----------------


100%|██████████| 196/196 [00:34<00:00,  5.74it/s]



train_loss :  3.592560725552695
val_acc :  0.2312
train epoch: 2----------------


100%|██████████| 196/196 [00:42<00:00,  4.66it/s]



train_loss :  2.759140508515494
val_acc :  0.3129
train epoch: 3----------------


100%|██████████| 196/196 [00:42<00:00,  4.62it/s]



train_loss :  2.383287867721246
val_acc :  0.3253
train epoch: 4----------------


100%|██████████| 196/196 [00:43<00:00,  4.52it/s]



train_loss :  2.136520119954129
val_acc :  0.3699
train epoch: 5----------------


100%|██████████| 196/196 [00:43<00:00,  4.47it/s]



train_loss :  1.9362358377904307
val_acc :  0.384
train epoch: 6----------------


100%|██████████| 196/196 [00:42<00:00,  4.65it/s]



train_loss :  1.765207861151014
val_acc :  0.3785
train epoch: 7----------------


100%|██████████| 196/196 [00:42<00:00,  4.62it/s]



train_loss :  1.6179643115218805
val_acc :  0.3889
train epoch: 8----------------


100%|██████████| 196/196 [00:42<00:00,  4.57it/s]



train_loss :  1.491591241894936
val_acc :  0.3942
train epoch: 9----------------


100%|██████████| 196/196 [00:42<00:00,  4.66it/s]



train_loss :  1.370022299946571
val_acc :  0.3848
train epoch: 10----------------


100%|██████████| 196/196 [00:43<00:00,  4.55it/s]



train_loss :  1.259012725584361
val_acc :  0.3879
train epoch: 11----------------


100%|██████████| 196/196 [00:42<00:00,  4.57it/s]



train_loss :  1.1516789432082857
val_acc :  0.3819
train epoch: 12----------------


100%|██████████| 196/196 [00:43<00:00,  4.50it/s]



train_loss :  1.0802965167225624
val_acc :  0.3754
train epoch: 13----------------


100%|██████████| 196/196 [00:42<00:00,  4.58it/s]



train_loss :  1.0095387980037807
val_acc :  0.3756
train epoch: 14----------------


100%|██████████| 196/196 [00:42<00:00,  4.57it/s]



train_loss :  0.9324508902369714
val_acc :  0.3811
train epoch: 15----------------


100%|██████████| 196/196 [00:43<00:00,  4.54it/s]



train_loss :  0.8691854738459295
val_acc :  0.3735
train epoch: 16----------------


100%|██████████| 196/196 [00:43<00:00,  4.54it/s]



train_loss :  0.8357868602081221
val_acc :  0.358
train epoch: 17----------------


100%|██████████| 196/196 [00:42<00:00,  4.63it/s]



train_loss :  0.7819894166017065
val_acc :  0.367
train epoch: 18----------------


100%|██████████| 196/196 [00:43<00:00,  4.55it/s]



train_loss :  0.7257013089802801
val_acc :  0.3705
train epoch: 19----------------


100%|██████████| 196/196 [00:42<00:00,  4.62it/s]



train_loss :  0.7070928875889096
val_acc :  0.3654
train epoch: 20----------------


100%|██████████| 196/196 [00:42<00:00,  4.56it/s]



train_loss :  0.6760953521850158
val_acc :  0.3589
train epoch: 21----------------


100%|██████████| 196/196 [00:42<00:00,  4.62it/s]



train_loss :  0.6544220050682827
val_acc :  0.3566
train epoch: 22----------------


100%|██████████| 196/196 [00:42<00:00,  4.62it/s]



train_loss :  0.6276933642066255
val_acc :  0.3631
train epoch: 23----------------


100%|██████████| 196/196 [00:43<00:00,  4.55it/s]



train_loss :  0.6179578876008793
val_acc :  0.3501
train epoch: 24----------------


100%|██████████| 196/196 [00:43<00:00,  4.54it/s]



train_loss :  0.5943349186255007
val_acc :  0.3556
train epoch: 25----------------


100%|██████████| 196/196 [00:42<00:00,  4.59it/s]



train_loss :  0.5715004283250594
val_acc :  0.3567
train epoch: 26----------------


100%|██████████| 196/196 [00:46<00:00,  4.22it/s]



train_loss :  0.5711106054332792
val_acc :  0.3479
train epoch: 27----------------


100%|██████████| 196/196 [00:47<00:00,  4.16it/s]



train_loss :  0.5499459740762808
val_acc :  0.3458
train epoch: 28----------------


100%|██████████| 196/196 [01:29<00:00,  2.19it/s]



train_loss :  0.5449669713876686
val_acc :  0.3566
train epoch: 29----------------


100%|██████████| 196/196 [01:30<00:00,  2.18it/s]



train_loss :  0.5189416296019846
val_acc :  0.3454
train epoch: 30----------------


  0%|          | 0/196 [00:00<?, ?it/s]

## Case 7
- Linear Layer를 더 Deep하게 쌓아보겠다.
- DropOut을 적용한다.

In [None]:
class Case7Model(nn.Module):
    #Cifar100 Shape: [Batch, Channel(3), 32, 32]
    def __init__(self, in_channels, num_classes):
        super(Case7Model, self).__init__()
        """Base Model"""
        self.conv1_0 = nn.Conv2d(3, 6, 3, 1, 1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.batchNorm1_0 = nn.BatchNorm2d(6)
        #6 * (16 * 16)

        self.conv1_1 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_1 = nn.BatchNorm2d(6)
        self.conv1_2 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_2 = nn.BatchNorm2d(6)
        self.conv1_3 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_3 = nn.BatchNorm2d(6)
        self.conv1_4 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_4 = nn.BatchNorm2d(6)
        #6 * (16 * 16)
        
        self.conv2_0 = nn.Conv2d(6, 16, 3, 1, 1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.batchNorm2_0 = nn.BatchNorm2d(16)
        #16 * (8 * 8)

        self.conv2_1 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_1 = nn.BatchNorm2d(16)
        self.conv2_2 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_2 = nn.BatchNorm2d(16)
        self.conv2_3 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_3 = nn.BatchNorm2d(16)
        self.conv2_4 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_4 = nn.BatchNorm2d(16)
        #16 * (8 * 8)
        
        self.conv3_0 = nn.Conv2d(16, 32, 3, 1, 1)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.batchNorm3_0 = nn.BatchNorm2d(32)
        #32 * (4 * 4)

        self.conv3_1 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_1 = nn.BatchNorm2d(32)
        self.conv3_2 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_2 = nn.BatchNorm2d(32)
        self.conv3_3 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_3 = nn.BatchNorm2d(32)
        self.conv3_4 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_4 = nn.BatchNorm2d(32)
        #32 * (4 * 4)

        self.conv4_0 = nn.Conv2d(32, 128, 3, 1, 1)
        self.pool4 = nn.MaxPool2d(2, 2)
        self.batchNorm4_0 = nn.BatchNorm2d(128)
        #128 * (2 * 2)

        self.conv4_1 = nn.Conv2d(128, 128, 3, 1, 1)
        self.batchNorm4_1 = nn.BatchNorm2d(128)
        self.conv4_2 = nn.Conv2d(128, 128, 3, 1, 1)
        self.batchNorm4_2 = nn.BatchNorm2d(128)
        self.conv4_3 = nn.Conv2d(128, 128, 3, 1, 1)
        self.batchNorm4_3 = nn.BatchNorm2d(128)
        self.conv4_4 = nn.Conv2d(128, 128, 3, 1, 1)
        self.batchNorm4_4 = nn.BatchNorm2d(128)
        #128 * (2 * 2)

        self.fc1 = nn.Linear(512, 256)
        self.batchNormL_1 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 256)
        self.batchNormL_2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.batchNormL_3 = nn.BatchNorm1d(128)
        self.fc4 = nn.Linear(128, 128)
        self.batchNormL_4 = nn.BatchNorm1d(128)
        self.fc5 = nn.Linear(128, 100)
        
        self.dropout = nn.Dropout(0.2)        

    def forward(self, x):
        """Base Model"""
        x1_0 = self.batchNorm1_0(self.pool1(F.relu(self.conv1_0(x))))
        x1_1 = self.batchNorm1_1(F.relu(self.conv1_1(x1_0)))
        x1_2 = self.batchNorm1_2(F.relu(self.conv1_2(x1_1)) + x1_0)
        x1_3 = self.batchNorm1_3(F.relu(self.conv1_3(x1_2)) + x1_0 + x1_1)
        x1_4 = self.batchNorm1_4(F.relu(self.conv1_4(x1_3)) + x1_0 + x1_1 + x1_2)

        x2_0 = self.batchNorm2_0(self.pool2(F.relu(self.conv2_0(x1_4))))
        x2_1 = self.batchNorm2_1(F.relu(self.conv2_1(x2_0)))
        x2_2 = self.batchNorm2_2(F.relu(self.conv2_2(x2_1)) + x2_0)
        x2_3 = self.batchNorm2_3(F.relu(self.conv2_3(x2_2)) + x2_0 + x2_1)
        x2_4 = self.batchNorm2_4(F.relu(self.conv2_4(x2_3)) + x2_0 + x2_1 + x2_2)

        x3_0 = self.batchNorm3_0(self.pool3(F.relu(self.conv3_0(x2_4))))
        x3_1 = self.batchNorm3_1(F.relu(self.conv3_1(x3_0)))
        x3_2 = self.batchNorm3_2(F.relu(self.conv3_2(x3_1)) + x3_0)
        x3_3 = self.batchNorm3_3(F.relu(self.conv3_3(x3_2)) + x3_0 + x3_1)
        x3_4 = self.batchNorm3_4(F.relu(self.conv3_4(x3_3)) + x3_0 + x3_1 + x3_2)

        x4_0 = self.batchNorm4_0(self.pool4(F.relu(self.conv4_0(x3_4))))
        x4_1 = self.batchNorm4_1(F.relu(self.conv4_1(x4_0)))
        x4_2 = self.batchNorm4_2(F.relu(self.conv4_2(x4_1)) + x4_0)
        x4_3 = self.batchNorm4_3(F.relu(self.conv4_3(x4_2)) + x4_0 + x4_1)
        x4_4 = self.batchNorm4_4(F.relu(self.conv4_4(x4_3)) + x4_0 + x4_1 + x4_2)

        x = torch.flatten(x4_4, 1) # flatten all dimensions except batch
        x = self.dropout(x)
        x = self.batchNormL_1(F.relu(self.fc1(x)))
        x = self.dropout(x)
        x = self.batchNormL_2(F.relu(self.fc2(x)))
        x = self.dropout(x)
        x = self.batchNormL_3(F.relu(self.fc3(x)))
        x = self.dropout(x)
        x = self.batchNormL_4(F.relu(self.fc4(x)))
        x = self.dropout(x)
        x = self.fc5(x)
        return x

model7 = Case7Model(3, 100).to(device)
optimizer = optim.SGD(model7.parameters(), momentum=0.9, nesterov=True, lr=0.05)


In [None]:
for epoch in range(50):
    model7.train()
    running_loss = 0.0
    best_acc = 0.0
    best_model_wts = model7.state_dict()
    print(f"train epoch: {epoch+1}----------------")
    for img, label in tqdm(train_loader):
        img = img.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        output = model7(img)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    correct, all_data = 0,0
    print("\ntrain_loss : ", running_loss / len(train_loader))
    model7.eval()
    for img, label in test_loader:
        with torch.no_grad():
            img = img.to(device)
            label = label.to(device)
            output = model7(img)

            correct += torch.sum(torch.argmax(output, dim=1) == label).item()
            all_data += len(label)
    print("val_acc : ", correct / all_data)
    if correct / all_data > best_acc:
      best_acc = correct / all_data
      best_model_wts = model7.state_dict()

train epoch: 1----------------


100%|██████████| 196/196 [00:37<00:00,  5.21it/s]


train_loss :  4.369052942918271





val_acc :  0.0863
train epoch: 2----------------


100%|██████████| 196/196 [00:33<00:00,  5.82it/s]


train_loss :  4.034760343785188





val_acc :  0.1474
train epoch: 3----------------


100%|██████████| 196/196 [00:36<00:00,  5.43it/s]


train_loss :  3.812810238526792





val_acc :  0.1927
train epoch: 4----------------


100%|██████████| 196/196 [00:32<00:00,  6.10it/s]


train_loss :  3.641325555285629





val_acc :  0.2124
train epoch: 5----------------


100%|██████████| 196/196 [00:37<00:00,  5.23it/s]


train_loss :  3.504164600858883





val_acc :  0.2543
train epoch: 6----------------


100%|██████████| 196/196 [00:33<00:00,  5.84it/s]


train_loss :  3.38158426965986





val_acc :  0.2616
train epoch: 7----------------


100%|██████████| 196/196 [00:31<00:00,  6.16it/s]


train_loss :  3.2728307660745113





val_acc :  0.2873
train epoch: 8----------------


100%|██████████| 196/196 [00:33<00:00,  5.89it/s]


train_loss :  3.1866535325439607





val_acc :  0.3075
train epoch: 9----------------


100%|██████████| 196/196 [00:32<00:00,  5.97it/s]


train_loss :  3.1088798167754192





val_acc :  0.3202
train epoch: 10----------------


100%|██████████| 196/196 [00:33<00:00,  5.90it/s]


train_loss :  3.0445684668969135





val_acc :  0.3398
train epoch: 11----------------


100%|██████████| 196/196 [00:34<00:00,  5.76it/s]


train_loss :  2.9840864016085256





val_acc :  0.3554
train epoch: 12----------------


100%|██████████| 196/196 [00:32<00:00,  6.12it/s]


train_loss :  2.9337891753838985





val_acc :  0.3486
train epoch: 13----------------


100%|██████████| 196/196 [00:33<00:00,  5.90it/s]


train_loss :  2.8874661010138842





val_acc :  0.3497
train epoch: 14----------------


100%|██████████| 196/196 [00:32<00:00,  5.97it/s]


train_loss :  2.8363275807731005





val_acc :  0.3545
train epoch: 15----------------


100%|██████████| 196/196 [00:33<00:00,  5.90it/s]


train_loss :  2.7977877818808263





val_acc :  0.3595
train epoch: 16----------------


100%|██████████| 196/196 [00:33<00:00,  5.83it/s]


train_loss :  2.7617962068440964





val_acc :  0.3799
train epoch: 17----------------


100%|██████████| 196/196 [00:31<00:00,  6.24it/s]


train_loss :  2.717761229495613





val_acc :  0.3709
train epoch: 18----------------


100%|██████████| 196/196 [00:33<00:00,  5.89it/s]


train_loss :  2.6854435083817463





val_acc :  0.3925
train epoch: 19----------------


100%|██████████| 196/196 [00:32<00:00,  6.11it/s]


train_loss :  2.664245616416542





val_acc :  0.3962
train epoch: 20----------------


100%|██████████| 196/196 [00:32<00:00,  5.96it/s]


train_loss :  2.6256759604629205





val_acc :  0.4009
train epoch: 21----------------


100%|██████████| 196/196 [00:34<00:00,  5.75it/s]


train_loss :  2.5973594565780793





val_acc :  0.394
train epoch: 22----------------


100%|██████████| 196/196 [00:31<00:00,  6.27it/s]


train_loss :  2.5681124670164928





val_acc :  0.3935
train epoch: 23----------------


100%|██████████| 196/196 [00:32<00:00,  5.99it/s]


train_loss :  2.5451538939865266





val_acc :  0.4027
train epoch: 24----------------


100%|██████████| 196/196 [00:32<00:00,  6.12it/s]


train_loss :  2.518684503983478





val_acc :  0.4045
train epoch: 25----------------


100%|██████████| 196/196 [00:32<00:00,  6.04it/s]


train_loss :  2.491119079443873





val_acc :  0.4109
train epoch: 26----------------


100%|██████████| 196/196 [00:34<00:00,  5.70it/s]


train_loss :  2.4623010036896686





val_acc :  0.4181
train epoch: 27----------------


100%|██████████| 196/196 [00:30<00:00,  6.34it/s]


train_loss :  2.4436387626492246





val_acc :  0.4158
train epoch: 28----------------


100%|██████████| 196/196 [00:32<00:00,  6.04it/s]


train_loss :  2.4127305429808947





val_acc :  0.4223
train epoch: 29----------------


100%|██████████| 196/196 [00:30<00:00,  6.37it/s]


train_loss :  2.404890990987116





val_acc :  0.4248
train epoch: 30----------------


100%|██████████| 196/196 [00:32<00:00,  6.05it/s]


train_loss :  2.360597147017109





val_acc :  0.4242
train epoch: 31----------------


100%|██████████| 196/196 [00:33<00:00,  5.84it/s]


train_loss :  2.3292069155342725





val_acc :  0.4229
train epoch: 32----------------


100%|██████████| 196/196 [00:30<00:00,  6.36it/s]


train_loss :  2.313345493710771





val_acc :  0.4237
train epoch: 33----------------


100%|██████████| 196/196 [00:32<00:00,  5.95it/s]


train_loss :  2.295609267390504





val_acc :  0.4258
train epoch: 34----------------


100%|██████████| 196/196 [00:30<00:00,  6.34it/s]


train_loss :  2.2735633497335472





val_acc :  0.429
train epoch: 35----------------


100%|██████████| 196/196 [00:32<00:00,  6.04it/s]


train_loss :  2.252590447664261





val_acc :  0.4318
train epoch: 36----------------


100%|██████████| 196/196 [00:33<00:00,  5.85it/s]


train_loss :  2.245071849652699





val_acc :  0.4246
train epoch: 37----------------


100%|██████████| 196/196 [00:30<00:00,  6.44it/s]


train_loss :  2.212303559390866





val_acc :  0.4283
train epoch: 38----------------


100%|██████████| 196/196 [00:33<00:00,  5.81it/s]


train_loss :  2.1978605541647696





val_acc :  0.4308
train epoch: 39----------------


100%|██████████| 196/196 [00:30<00:00,  6.41it/s]


train_loss :  2.1870881975913536





val_acc :  0.4294
train epoch: 40----------------


100%|██████████| 196/196 [00:32<00:00,  6.00it/s]


train_loss :  2.1592212136910884





val_acc :  0.4306
train epoch: 41----------------


100%|██████████| 196/196 [00:32<00:00,  6.10it/s]


train_loss :  2.150691258055823





val_acc :  0.4321
train epoch: 42----------------


100%|██████████| 196/196 [00:31<00:00,  6.21it/s]


train_loss :  2.1219109302880814





val_acc :  0.4341
train epoch: 43----------------


100%|██████████| 196/196 [00:32<00:00,  6.11it/s]


train_loss :  2.1072700206114323





val_acc :  0.4397
train epoch: 44----------------


100%|██████████| 196/196 [00:32<00:00,  6.11it/s]


train_loss :  2.102964518021564





val_acc :  0.4318
train epoch: 45----------------


100%|██████████| 196/196 [00:32<00:00,  6.09it/s]


train_loss :  2.071079203060695





val_acc :  0.4365
train epoch: 46----------------


100%|██████████| 196/196 [00:32<00:00,  6.01it/s]


train_loss :  2.052137487396902





val_acc :  0.4369
train epoch: 47----------------


100%|██████████| 196/196 [00:30<00:00,  6.44it/s]


train_loss :  2.0417079408558045





val_acc :  0.4391
train epoch: 48----------------


100%|██████████| 196/196 [00:33<00:00,  5.87it/s]


train_loss :  2.023634799280945





val_acc :  0.4403
train epoch: 49----------------


100%|██████████| 196/196 [00:30<00:00,  6.38it/s]


train_loss :  2.013939844710486





val_acc :  0.4358
train epoch: 50----------------


100%|██████████| 196/196 [00:33<00:00,  5.91it/s]


train_loss :  1.9825612093721117





val_acc :  0.442


## Case 8
- BottleNeck 구조를 만들겠다.

In [None]:
class Case8Model(nn.Module):
    #Cifar100 Shape: [Batch, Channel(3), 32, 32]
    def __init__(self, in_channels, num_classes):
        super(Case8Model, self).__init__()
        """Base Model"""
        self.conv1_0 = nn.Conv2d(3, 6, 3, 1, 1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.batchNorm1_0 = nn.BatchNorm2d(6)
        #6 * (16 * 16)

        self.conv1_1 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_1 = nn.BatchNorm2d(6)
        self.conv1_2 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_2 = nn.BatchNorm2d(6)
        self.conv1_3 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_3 = nn.BatchNorm2d(6)
        self.conv1_4 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_4 = nn.BatchNorm2d(6)
        #6 * (16 * 16)
        
        self.conv1_5 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_5 = nn.BatchNorm2d(6)
        self.conv1_6 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_6 = nn.BatchNorm2d(6)
        self.conv1_7 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_7 = nn.BatchNorm2d(6)
        self.conv1_8 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_8 = nn.BatchNorm2d(6)
        #6 * (16 * 16)

        self.conv1_9 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_9 = nn.BatchNorm2d(6)
        self.conv1_10 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_10 = nn.BatchNorm2d(6)
        self.conv1_11 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_11 = nn.BatchNorm2d(6)
        self.conv1_12 = nn.Conv2d(6, 6, 3, 1, 1)
        self.batchNorm1_12 = nn.BatchNorm2d(6)
        #6 * (16 * 16)

        self.conv2_0 = nn.Conv2d(6, 16, 3, 1, 1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.batchNorm2_0 = nn.BatchNorm2d(16)
        #16 * (8 * 8)

        self.conv2_1 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_1 = nn.BatchNorm2d(16)
        self.conv2_2 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_2 = nn.BatchNorm2d(16)
        self.conv2_3 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_3 = nn.BatchNorm2d(16)
        self.conv2_4 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_4 = nn.BatchNorm2d(16)
        #16 * (8 * 8)

        self.conv2_5 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_5 = nn.BatchNorm2d(16)
        self.conv2_6 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_6 = nn.BatchNorm2d(16)
        self.conv2_7 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_7 = nn.BatchNorm2d(16)
        self.conv2_8 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_8 = nn.BatchNorm2d(16)
        #16 * (8 * 8)

        self.conv2_9 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_9 = nn.BatchNorm2d(16)
        self.conv2_10 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_10 = nn.BatchNorm2d(16)
        self.conv2_11 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_11 = nn.BatchNorm2d(16)
        self.conv2_12 = nn.Conv2d(16, 16, 3, 1, 1)
        self.batchNorm2_12 = nn.BatchNorm2d(16)
        #16 * (8 * 8)


        self.conv3_0 = nn.Conv2d(16, 32, 3, 1, 1)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.batchNorm3_0 = nn.BatchNorm2d(32)
        #32 * (4 * 4)

        self.conv3_1 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_1 = nn.BatchNorm2d(32)
        self.conv3_2 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_2 = nn.BatchNorm2d(32)
        self.conv3_3 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_3 = nn.BatchNorm2d(32)
        self.conv3_4 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_4 = nn.BatchNorm2d(32)
        #32 * (4 * 4)

        self.conv3_5 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_5 = nn.BatchNorm2d(32)
        self.conv3_6 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_6 = nn.BatchNorm2d(32)
        self.conv3_7 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_7 = nn.BatchNorm2d(32)
        self.conv3_8 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_8 = nn.BatchNorm2d(32)
        #32 * (4 * 4)

        self.conv3_9 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_9 = nn.BatchNorm2d(32)
        self.conv3_10 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_10 = nn.BatchNorm2d(32)
        self.conv3_11 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_11 = nn.BatchNorm2d(32)
        self.conv3_12 = nn.Conv2d(32, 32, 3, 1, 1)
        self.batchNorm3_12 = nn.BatchNorm2d(32)
        #32 * (4 * 4)

        self.fc1 = nn.Linear(512, 256)
        self.batchNormL_1 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 256)
        self.batchNormL_2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.batchNormL_3 = nn.BatchNorm1d(128)
        self.fc4 = nn.Linear(128, 128)
        self.batchNormL_4 = nn.BatchNorm1d(128)
        self.fc5 = nn.Linear(128, 100)
        
        self.dropout = nn.Dropout(0.2)        

    def forward(self, x):
        """Base Model"""
        x1_0 = self.batchNorm1_0(self.pool1(F.relu(self.conv1_0(x))))
        
        x1_1 = self.batchNorm1_1(F.relu(self.conv1_1(x1_0)))
        x1_2 = self.batchNorm1_2(F.relu(self.conv1_2(x1_1)) + x1_0)
        x1_3 = self.batchNorm1_3(F.relu(self.conv1_3(x1_2)) + x1_0 + x1_1)
        x1_4 = self.batchNorm1_4(F.relu(self.conv1_4(x1_3)) + x1_0 + x1_1 + x1_2)

        x1_5 = self.batchNorm1_1(F.relu(self.conv1_5(x1_0)))
        x1_6 = self.batchNorm1_2(F.relu(self.conv1_6(x1_5)) + x1_0)
        x1_7 = self.batchNorm1_3(F.relu(self.conv1_7(x1_6)) + x1_0 + x1_5)
        x1_8 = self.batchNorm1_4(F.relu(self.conv1_8(x1_7)) + x1_0 + x1_5 + x1_6)

        x1_9 = self.batchNorm1_1(F.relu(self.conv1_9(x1_0)))
        x1_10 = self.batchNorm1_2(F.relu(self.conv1_10(x1_9)) + x1_0)
        x1_11 = self.batchNorm1_3(F.relu(self.conv1_11(x1_10)) + x1_0 + x1_9)
        x1_12 = self.batchNorm1_4(F.relu(self.conv1_12(x1_11)) + x1_0 + x1_9 + x1_10)

        x1 = x1_4 + x1_8 + x1_12
        x2_0 = self.batchNorm2_0(self.pool2(F.relu(self.conv2_0(x1))))

        x2_1 = self.batchNorm2_1(F.relu(self.conv2_1(x2_0)))
        x2_2 = self.batchNorm2_2(F.relu(self.conv2_2(x2_1)) + x2_0)
        x2_3 = self.batchNorm2_3(F.relu(self.conv2_3(x2_2)) + x2_0 + x2_1)
        x2_4 = self.batchNorm2_4(F.relu(self.conv2_4(x2_3)) + x2_0 + x2_1 + x2_2)

        x2_5 = self.batchNorm2_5(F.relu(self.conv2_1(x2_0)))
        x2_6 = self.batchNorm2_6(F.relu(self.conv2_2(x2_5)) + x2_0)
        x2_7 = self.batchNorm2_7(F.relu(self.conv2_3(x2_6)) + x2_0 + x2_5)
        x2_8 = self.batchNorm2_8(F.relu(self.conv2_4(x2_7)) + x2_0 + x2_5 + x2_6)

        x2_9 = self.batchNorm2_9(F.relu(self.conv2_1(x2_0)))
        x2_10 = self.batchNorm2_10(F.relu(self.conv2_2(x2_9)) + x2_0)
        x2_11 = self.batchNorm2_11(F.relu(self.conv2_3(x2_10)) + x2_0 + x2_9)
        x2_12 = self.batchNorm2_12(F.relu(self.conv2_4(x2_11)) + x2_0 + x2_9 + x2_10)

        x2 = x2_4 + x2_8 + x2_12
        x3_0 = self.batchNorm3_0(self.pool3(F.relu(self.conv3_0(x2))))

        x3_1 = self.batchNorm3_1(F.relu(self.conv3_1(x3_0)))
        x3_2 = self.batchNorm3_2(F.relu(self.conv3_2(x3_1)) + x3_0)
        x3_3 = self.batchNorm3_3(F.relu(self.conv3_3(x3_2)) + x3_0 + x3_1)
        x3_4 = self.batchNorm3_4(F.relu(self.conv3_4(x3_3)) + x3_0 + x3_1 + x3_2)

        x3_5 = self.batchNorm3_5(F.relu(self.conv3_1(x3_0)))
        x3_6 = self.batchNorm3_6(F.relu(self.conv3_2(x3_5)) + x3_0)
        x3_7 = self.batchNorm3_7(F.relu(self.conv3_3(x3_6)) + x3_0 + x3_5)
        x3_8 = self.batchNorm3_8(F.relu(self.conv3_4(x3_7)) + x3_0 + x3_5 + x3_6)

        x3_9 = self.batchNorm3_9(F.relu(self.conv3_1(x3_0)))
        x3_10 = self.batchNorm3_10(F.relu(self.conv3_2(x3_9)) + x3_0)
        x3_11 = self.batchNorm3_11(F.relu(self.conv3_3(x3_10)) + x3_0 + x3_9)
        x3_12 = self.batchNorm3_12(F.relu(self.conv3_4(x3_11)) + x3_0 + x3_9 + x3_10)
        
        x4 = x3_4 + x3_8 + x3_12

        x = torch.flatten(x4, 1) # flatten all dimensions except batch
        x = self.dropout(x)
        x = self.batchNormL_1(F.relu(self.fc1(x)))
        x = self.dropout(x)
        x = self.batchNormL_2(F.relu(self.fc2(x)))
        x = self.dropout(x)
        x = self.batchNormL_3(F.relu(self.fc3(x)))
        x = self.dropout(x)
        x = self.batchNormL_4(F.relu(self.fc4(x)))
        x = self.dropout(x)
        x = self.fc5(x)
        return x

model8 = Case8Model(3, 100).to(device)
optimizer = optim.SGD(model8.parameters(), momentum=0.9, nesterov=True, lr=0.1)


In [None]:
for epoch in range(100):
    model8.train()
    running_loss = 0.0
    best_acc = 0.0
    best_model_wts = model8.state_dict()
    print(f"train epoch: {epoch+1}----------------")
    for img, label in tqdm(train_loader):
        img = img.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        output = model8(img)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    correct, all_data = 0,0
    print("\ntrain_loss : ", running_loss / len(train_loader))
    model8.eval()
    for img, label in test_loader:
        with torch.no_grad():
            img = img.to(device)
            label = label.to(device)
            output = model8(img)

            correct += torch.sum(torch.argmax(output, dim=1) == label).item()
            all_data += len(label)
    print("val_acc : ", correct / all_data)
    if correct / all_data > best_acc:
      best_acc = correct / all_data
      best_model_wts = model8.state_dict()

train epoch: 1----------------


100%|██████████| 196/196 [01:48<00:00,  1.81it/s]



train_loss :  3.940081620702938
val_acc :  0.1493
train epoch: 2----------------


100%|██████████| 196/196 [01:36<00:00,  2.02it/s]



train_loss :  3.4054188253928204
val_acc :  0.2157
train epoch: 3----------------


100%|██████████| 196/196 [01:37<00:00,  2.02it/s]



train_loss :  3.132105849227127
val_acc :  0.2603
train epoch: 4----------------


100%|██████████| 196/196 [01:35<00:00,  2.05it/s]



train_loss :  2.932823749221101
val_acc :  0.3011
train epoch: 5----------------


100%|██████████| 196/196 [01:37<00:00,  2.00it/s]



train_loss :  2.7736418222894472
val_acc :  0.3208
train epoch: 6----------------


100%|██████████| 196/196 [01:37<00:00,  2.02it/s]



train_loss :  2.644396791652757
val_acc :  0.3406
train epoch: 7----------------


100%|██████████| 196/196 [01:35<00:00,  2.06it/s]



train_loss :  2.5455608039486166
val_acc :  0.351
train epoch: 8----------------


100%|██████████| 196/196 [01:35<00:00,  2.06it/s]



train_loss :  2.456267210902
val_acc :  0.3544
train epoch: 9----------------


100%|██████████| 196/196 [01:35<00:00,  2.05it/s]



train_loss :  2.384217971441697
val_acc :  0.3709
train epoch: 10----------------


100%|██████████| 196/196 [01:36<00:00,  2.04it/s]



train_loss :  2.328522525271591
val_acc :  0.3834
train epoch: 11----------------


100%|██████████| 196/196 [01:48<00:00,  1.80it/s]



train_loss :  2.2693341076374054
val_acc :  0.3874
train epoch: 12----------------


100%|██████████| 196/196 [03:42<00:00,  1.13s/it]



train_loss :  2.22080831016813
val_acc :  0.3939
train epoch: 13----------------


100%|██████████| 196/196 [03:45<00:00,  1.15s/it]



train_loss :  2.177044711550888
val_acc :  0.3975
train epoch: 14----------------


100%|██████████| 196/196 [17:56<00:00,  5.49s/it]   



train_loss :  2.1406194038537083
val_acc :  0.4012
train epoch: 15----------------


100%|██████████| 196/196 [01:28<00:00,  2.22it/s]



train_loss :  2.1007614585818075
val_acc :  0.4145
train epoch: 16----------------


100%|██████████| 196/196 [01:22<00:00,  2.38it/s]



train_loss :  2.063572115435892
val_acc :  0.4074
train epoch: 17----------------


100%|██████████| 196/196 [01:28<00:00,  2.22it/s]



train_loss :  2.0305285405139535
val_acc :  0.4045
train epoch: 18----------------


100%|██████████| 196/196 [01:22<00:00,  2.37it/s]



train_loss :  1.999475038781458
val_acc :  0.4046
train epoch: 19----------------


100%|██████████| 196/196 [01:20<00:00,  2.45it/s]



train_loss :  1.9626066459684957
val_acc :  0.4176
train epoch: 20----------------


100%|██████████| 196/196 [01:21<00:00,  2.41it/s]



train_loss :  1.941057058621426
val_acc :  0.4161
train epoch: 21----------------


100%|██████████| 196/196 [01:19<00:00,  2.46it/s]



train_loss :  1.9152171994958604
val_acc :  0.4158
train epoch: 22----------------


100%|██████████| 196/196 [01:18<00:00,  2.48it/s]



train_loss :  1.889234109800689
val_acc :  0.4207
train epoch: 23----------------


100%|██████████| 196/196 [01:22<00:00,  2.36it/s]



train_loss :  1.873738379502783
val_acc :  0.413
train epoch: 24----------------


100%|██████████| 196/196 [01:23<00:00,  2.34it/s]



train_loss :  1.8458614264215742
val_acc :  0.4249
train epoch: 25----------------


100%|██████████| 196/196 [01:21<00:00,  2.40it/s]



train_loss :  1.8219819111483437
val_acc :  0.4211
train epoch: 26----------------


100%|██████████| 196/196 [01:21<00:00,  2.41it/s]



train_loss :  1.8016627449162153
val_acc :  0.4254
train epoch: 27----------------


100%|██████████| 196/196 [01:21<00:00,  2.40it/s]



train_loss :  1.7806600806664448
val_acc :  0.4321
train epoch: 28----------------


100%|██████████| 196/196 [01:22<00:00,  2.37it/s]



train_loss :  1.766119976432956
val_acc :  0.4251
train epoch: 29----------------


100%|██████████| 196/196 [01:22<00:00,  2.38it/s]



train_loss :  1.7461169276918684
val_acc :  0.4318
train epoch: 30----------------


100%|██████████| 196/196 [01:25<00:00,  2.30it/s]



train_loss :  1.7264555181775774
val_acc :  0.4243
train epoch: 31----------------


100%|██████████| 196/196 [01:22<00:00,  2.39it/s]



train_loss :  1.713503703170893
val_acc :  0.4261
train epoch: 32----------------


100%|██████████| 196/196 [01:22<00:00,  2.37it/s]



train_loss :  1.6842712966763242
val_acc :  0.4245
train epoch: 33----------------


100%|██████████| 196/196 [01:24<00:00,  2.31it/s]



train_loss :  1.6755872977023223
val_acc :  0.4279
train epoch: 34----------------


100%|██████████| 196/196 [01:55<00:00,  1.70it/s]



train_loss :  1.6642998894866632
val_acc :  0.4272
train epoch: 35----------------


100%|██████████| 196/196 [01:28<00:00,  2.22it/s]



train_loss :  1.6506329330862786
val_acc :  0.4329
train epoch: 36----------------


100%|██████████| 196/196 [01:28<00:00,  2.22it/s]



train_loss :  1.6324273834423142
val_acc :  0.4371
train epoch: 37----------------


100%|██████████| 196/196 [01:28<00:00,  2.21it/s]



train_loss :  1.6179396260757835
val_acc :  0.4352
train epoch: 38----------------


100%|██████████| 196/196 [01:26<00:00,  2.26it/s]



train_loss :  1.6113295001643044
val_acc :  0.44
train epoch: 39----------------


100%|██████████| 196/196 [01:24<00:00,  2.32it/s]



train_loss :  1.5909426765782493
val_acc :  0.4332
train epoch: 40----------------


100%|██████████| 196/196 [01:24<00:00,  2.31it/s]



train_loss :  1.5869165330517048
val_acc :  0.4309
train epoch: 41----------------


100%|██████████| 196/196 [01:24<00:00,  2.33it/s]



train_loss :  1.5825025202060232
val_acc :  0.4379
train epoch: 42----------------


100%|██████████| 196/196 [01:24<00:00,  2.31it/s]



train_loss :  1.562180974653789
val_acc :  0.4335
train epoch: 43----------------


100%|██████████| 196/196 [01:24<00:00,  2.32it/s]



train_loss :  1.5446532958624315
val_acc :  0.4389
train epoch: 44----------------


100%|██████████| 196/196 [01:25<00:00,  2.30it/s]



train_loss :  1.541101563949974
val_acc :  0.433
train epoch: 45----------------


100%|██████████| 196/196 [01:25<00:00,  2.29it/s]



train_loss :  1.533865446947059
val_acc :  0.4359
train epoch: 46----------------


100%|██████████| 196/196 [01:25<00:00,  2.29it/s]



train_loss :  1.5244523755141668
val_acc :  0.4314
train epoch: 47----------------


100%|██████████| 196/196 [01:25<00:00,  2.29it/s]



train_loss :  1.5140115880236333
val_acc :  0.4364
train epoch: 48----------------


100%|██████████| 196/196 [01:25<00:00,  2.29it/s]



train_loss :  1.4969278513168802
val_acc :  0.4344
train epoch: 49----------------


100%|██████████| 196/196 [01:25<00:00,  2.29it/s]



train_loss :  1.4873568829225035
val_acc :  0.4345
train epoch: 50----------------


100%|██████████| 196/196 [01:25<00:00,  2.28it/s]



train_loss :  1.4789557694172373
val_acc :  0.4276
train epoch: 51----------------


100%|██████████| 196/196 [01:25<00:00,  2.29it/s]



train_loss :  1.4691907465457916
val_acc :  0.4291
train epoch: 52----------------


100%|██████████| 196/196 [01:25<00:00,  2.28it/s]



train_loss :  1.4563981051347694
val_acc :  0.434
train epoch: 53----------------


100%|██████████| 196/196 [01:25<00:00,  2.28it/s]



train_loss :  1.4633850011290337
val_acc :  0.4322
train epoch: 54----------------


100%|██████████| 196/196 [01:26<00:00,  2.28it/s]



train_loss :  1.444185814687184
val_acc :  0.4289
train epoch: 55----------------


100%|██████████| 196/196 [01:26<00:00,  2.27it/s]



train_loss :  1.4416473732919108
val_acc :  0.4276
train epoch: 56----------------


100%|██████████| 196/196 [01:26<00:00,  2.26it/s]



train_loss :  1.4351510393376252
val_acc :  0.4335
train epoch: 57----------------


100%|██████████| 196/196 [01:27<00:00,  2.25it/s]



train_loss :  1.4198989174803909
val_acc :  0.4387
train epoch: 58----------------


100%|██████████| 196/196 [01:27<00:00,  2.25it/s]



train_loss :  1.4141125563456087
val_acc :  0.4342
train epoch: 59----------------


100%|██████████| 196/196 [01:27<00:00,  2.23it/s]



train_loss :  1.4019212187552939
val_acc :  0.4312
train epoch: 60----------------


100%|██████████| 196/196 [01:27<00:00,  2.24it/s]



train_loss :  1.4149662116352393
val_acc :  0.4347
train epoch: 61----------------


100%|██████████| 196/196 [01:28<00:00,  2.22it/s]



train_loss :  1.3933264096172489
val_acc :  0.4304
train epoch: 62----------------


100%|██████████| 196/196 [01:21<00:00,  2.40it/s]



train_loss :  1.3921307848424327
val_acc :  0.4353
train epoch: 63----------------


100%|██████████| 196/196 [01:19<00:00,  2.47it/s]



train_loss :  1.3793216226052265
val_acc :  0.4316
train epoch: 64----------------


100%|██████████| 196/196 [01:19<00:00,  2.48it/s]



train_loss :  1.38120009704512
val_acc :  0.4301
train epoch: 65----------------


100%|██████████| 196/196 [01:19<00:00,  2.46it/s]



train_loss :  1.3680388678093345
val_acc :  0.4349
train epoch: 66----------------


100%|██████████| 196/196 [01:19<00:00,  2.48it/s]



train_loss :  1.3597342420597465
val_acc :  0.4377
train epoch: 67----------------


100%|██████████| 196/196 [01:19<00:00,  2.48it/s]



train_loss :  1.3515896213297942
val_acc :  0.4292
train epoch: 68----------------


100%|██████████| 196/196 [01:18<00:00,  2.50it/s]



train_loss :  1.3456984843526567
val_acc :  0.4362
train epoch: 69----------------


  8%|▊         | 15/196 [00:09<01:55,  1.56it/s]


KeyboardInterrupt: 

In [7]:
class Case9Model(nn.Module):
    #Cifar100 Shape: [Batch, Channel(3), 32, 32]
    def __init__(self, in_channels, num_classes):
        super(Case9Model, self).__init__()
        """Base Model"""
        self.conv1_0 = nn.Conv2d(3, 64, 3, 1, 1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.batchNorm1_0 = nn.BatchNorm2d(64)
        #64 * (16 * 16)

        self.conv1_1 = nn.Conv2d(64, 64, 3, 1, 1)
        self.batchNorm1_1 = nn.BatchNorm2d(64)
        self.conv1_2 = nn.Conv2d(64, 64, 3, 1, 1)
        self.batchNorm1_2 = nn.BatchNorm2d(64)
        self.conv1_3 = nn.Conv2d(64, 64, 3, 1, 1)
        self.batchNorm1_3 = nn.BatchNorm2d(64)
        self.conv1_4 = nn.Conv2d(64, 64, 3, 1, 1)
        self.batchNorm1_4 = nn.BatchNorm2d(64)
        #64 * (16 * 16)
        
        self.conv2_0 = nn.Conv2d(64, 128, 3, 1, 1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.batchNorm2_0 = nn.BatchNorm2d(128)
        #128 * (8 * 8)

        self.conv2_1 = nn.Conv2d(128, 128, 3, 1, 1)
        self.batchNorm2_1 = nn.BatchNorm2d(128)
        self.conv2_2 = nn.Conv2d(128, 128, 3, 1, 1)
        self.batchNorm2_2 = nn.BatchNorm2d(128)
        self.conv2_3 = nn.Conv2d(128, 128, 3, 1, 1)
        self.batchNorm2_3 = nn.BatchNorm2d(128)
        self.conv2_4 = nn.Conv2d(128, 128, 3, 1, 1)
        self.batchNorm2_4 = nn.BatchNorm2d(128)
        #128 * (8 * 8)
        
        self.conv3_0 = nn.Conv2d(128, 256, 3, 1, 1)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.batchNorm3_0 = nn.BatchNorm2d(256)
        #256 * (4 * 4)

        self.conv3_1 = nn.Conv2d(256, 256, 3, 1, 1)
        self.batchNorm3_1 = nn.BatchNorm2d(256)
        self.conv3_2 = nn.Conv2d(256, 256, 3, 1, 1)
        self.batchNorm3_2 = nn.BatchNorm2d(256)
        self.conv3_3 = nn.Conv2d(256, 256, 3, 1, 1)
        self.batchNorm3_3 = nn.BatchNorm2d(256)
        self.conv3_4 = nn.Conv2d(256, 256, 3, 1, 1)
        self.batchNorm3_4 = nn.BatchNorm2d(256)
        #256 * (4 * 4)

        self.conv4_0 = nn.Conv2d(256, 512, 3, 1, 1)
        self.pool4 = nn.MaxPool2d(2, 2)
        self.batchNorm4_0 = nn.BatchNorm2d(512)
        #512 * (2 * 2)

        self.conv4_1 = nn.Conv2d(512, 512, 3, 1, 1)
        self.batchNorm4_1 = nn.BatchNorm2d(512)
        self.conv4_2 = nn.Conv2d(512, 512, 3, 1, 1)
        self.batchNorm4_2 = nn.BatchNorm2d(512)
        self.conv4_3 = nn.Conv2d(512, 512, 3, 1, 1)
        self.batchNorm4_3 = nn.BatchNorm2d(512)
        self.conv4_4 = nn.Conv2d(512, 512, 3, 1, 1)
        self.batchNorm4_4 = nn.BatchNorm2d(512)
        #512 * (2 * 2)

        self.conv5_0 = nn.Conv2d(512, 512, 2)
        self.batchNorm5_0 = nn.BatchNorm2d(512)
        #512 * (1 * 1)

        
        self.fc1 = nn.Linear(512, 128)
        self.batchNormL_1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 100)
        
        self.dropout = nn.Dropout(0.2)        

    def forward(self, x):
        """Base Model"""
        x1_0 = self.batchNorm1_0(self.pool1(F.relu(self.conv1_0(x))))
        x1_1 = self.batchNorm1_1(F.relu(self.conv1_1(x1_0)))
        x1_2 = self.batchNorm1_2(F.relu(self.conv1_2(x1_1)) + x1_0)
        x1_3 = self.batchNorm1_3(F.relu(self.conv1_3(x1_2)) + x1_0 + x1_1)
        x1_4 = self.batchNorm1_4(F.relu(self.conv1_4(x1_3)) + x1_0 + x1_1 + x1_2)

        x2_0 = self.batchNorm2_0(self.pool2(F.relu(self.conv2_0(x1_4))))
        x2_1 = self.batchNorm2_1(F.relu(self.conv2_1(x2_0)))
        x2_2 = self.batchNorm2_2(F.relu(self.conv2_2(x2_1)) + x2_0)
        x2_3 = self.batchNorm2_3(F.relu(self.conv2_3(x2_2)) + x2_0 + x2_1)
        x2_4 = self.batchNorm2_4(F.relu(self.conv2_4(x2_3)) + x2_0 + x2_1 + x2_2)

        x3_0 = self.batchNorm3_0(self.pool3(F.relu(self.conv3_0(x2_4))))
        x3_1 = self.batchNorm3_1(F.relu(self.conv3_1(x3_0)))
        x3_2 = self.batchNorm3_2(F.relu(self.conv3_2(x3_1)) + x3_0)
        x3_3 = self.batchNorm3_3(F.relu(self.conv3_3(x3_2)) + x3_0 + x3_1)
        x3_4 = self.batchNorm3_4(F.relu(self.conv3_4(x3_3)) + x3_0 + x3_1 + x3_2)

        x4_0 = self.batchNorm4_0(self.pool4(F.relu(self.conv4_0(x3_4))))

        x4_1 = self.batchNorm4_1(F.relu(self.conv4_1(x4_0)))
        x4_2 = self.batchNorm4_2(F.relu(self.conv4_2(x4_1)) + x4_0)
        x4_3 = self.batchNorm4_3(F.relu(self.conv4_3(x4_2)) + x4_0 + x4_1)
        x4_4 = self.batchNorm4_4(F.relu(self.conv4_4(x4_3)) + x4_0 + x4_1 + x4_2)

        x5 = self.batchNorm5_0(F.relu(self.conv5_0(x4_4)))

        x = torch.flatten(x5, 1) # flatten all dimensions except batch
        x = self.dropout(x)
        x = self.batchNormL_1(F.relu(self.fc1(x)))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

model9 = Case9Model(3, 100).to(device)
optimizer = optim.SGD(model9.parameters(), momentum=0.9, nesterov=True, lr=0.05)


In [8]:
for epoch in range(100):
    model9.train()
    running_loss = 0.0
    best_acc = 0.0
    best_model_wts = model9.state_dict()
    print(f"train epoch: {epoch+1}----------------")
    for img, label in tqdm(train_loader):
        img = img.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        output = model9(img)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    correct, all_data = 0,0
    print("\ntrain_loss : ", running_loss / len(train_loader))
    model9.eval()
    for img, label in test_loader:
        with torch.no_grad():
            img = img.to(device)
            label = label.to(device)
            output = model9(img)

            correct += torch.sum(torch.argmax(output, dim=1) == label).item()
            all_data += len(label)
    print("val_acc : ", correct / all_data)
    if correct / all_data > best_acc:
      best_acc = correct / all_data
      best_model_wts = model9.state_dict()

train epoch: 1----------------


100%|██████████| 196/196 [00:44<00:00,  4.39it/s]


train_loss :  3.987913574491228





val_acc :  0.1985
train epoch: 2----------------


100%|██████████| 196/196 [00:39<00:00,  4.93it/s]


train_loss :  3.230841867777766





val_acc :  0.3237
train epoch: 3----------------


100%|██████████| 196/196 [00:40<00:00,  4.86it/s]


train_loss :  2.8209230607869675





val_acc :  0.3926
train epoch: 4----------------


100%|██████████| 196/196 [00:38<00:00,  5.07it/s]


train_loss :  2.5141638797156665





val_acc :  0.4093
train epoch: 5----------------


100%|██████████| 196/196 [00:39<00:00,  4.93it/s]


train_loss :  2.297916546159861





val_acc :  0.4671
train epoch: 6----------------


100%|██████████| 196/196 [00:40<00:00,  4.82it/s]


train_loss :  2.099439494463862





val_acc :  0.5087
train epoch: 7----------------


100%|██████████| 196/196 [00:40<00:00,  4.80it/s]


train_loss :  1.9262938244002206





val_acc :  0.5126
train epoch: 8----------------


100%|██████████| 196/196 [00:39<00:00,  4.93it/s]


train_loss :  1.761614373752049





val_acc :  0.532
train epoch: 9----------------


100%|██████████| 196/196 [00:40<00:00,  4.81it/s]



train_loss :  1.625392028263637
val_acc :  0.5152
train epoch: 10----------------


100%|██████████| 196/196 [00:39<00:00,  5.01it/s]


train_loss :  1.4928053903336427





val_acc :  0.5138
train epoch: 11----------------


100%|██████████| 196/196 [00:40<00:00,  4.78it/s]


train_loss :  1.3701790522555917





val_acc :  0.5398
train epoch: 12----------------


100%|██████████| 196/196 [00:40<00:00,  4.83it/s]


train_loss :  1.2900031202909898





val_acc :  0.5469
train epoch: 13----------------


100%|██████████| 196/196 [00:41<00:00,  4.77it/s]


train_loss :  1.1870646723070923





val_acc :  0.5449
train epoch: 14----------------


100%|██████████| 196/196 [00:39<00:00,  5.00it/s]


train_loss :  1.1098429393403384





val_acc :  0.5574
train epoch: 15----------------


100%|██████████| 196/196 [00:40<00:00,  4.84it/s]


train_loss :  1.0277261989457267





val_acc :  0.5554
train epoch: 16----------------


100%|██████████| 196/196 [00:40<00:00,  4.83it/s]


train_loss :  0.9693375430545028





val_acc :  0.5589
train epoch: 17----------------


100%|██████████| 196/196 [00:41<00:00,  4.77it/s]


train_loss :  0.9074452650182101





val_acc :  0.5506
train epoch: 18----------------


100%|██████████| 196/196 [00:38<00:00,  5.09it/s]


train_loss :  0.8683177795337171





val_acc :  0.5563
train epoch: 19----------------


100%|██████████| 196/196 [00:41<00:00,  4.78it/s]


train_loss :  0.8221468146966429





val_acc :  0.5561
train epoch: 20----------------


100%|██████████| 196/196 [00:40<00:00,  4.83it/s]


train_loss :  0.7890330647327461





val_acc :  0.5676
train epoch: 21----------------


100%|██████████| 196/196 [00:41<00:00,  4.73it/s]


train_loss :  0.7464512586593628





val_acc :  0.5606
train epoch: 22----------------


100%|██████████| 196/196 [00:38<00:00,  5.05it/s]


train_loss :  0.740682060925328





val_acc :  0.573
train epoch: 23----------------


100%|██████████| 196/196 [00:40<00:00,  4.88it/s]


train_loss :  0.6962373049891725





val_acc :  0.5664
train epoch: 24----------------


100%|██████████| 196/196 [00:39<00:00,  4.90it/s]


train_loss :  0.6886387716750709





val_acc :  0.5711
train epoch: 25----------------


100%|██████████| 196/196 [00:41<00:00,  4.74it/s]


train_loss :  0.6767684338348252





val_acc :  0.5688
train epoch: 26----------------


100%|██████████| 196/196 [00:38<00:00,  5.11it/s]


train_loss :  0.6291222116168664





val_acc :  0.5653
train epoch: 27----------------


100%|██████████| 196/196 [00:40<00:00,  4.85it/s]


train_loss :  0.6322945284904266





val_acc :  0.5686
train epoch: 28----------------


100%|██████████| 196/196 [00:40<00:00,  4.86it/s]


train_loss :  0.6266168354421245





val_acc :  0.5714
train epoch: 29----------------


100%|██████████| 196/196 [00:40<00:00,  4.81it/s]


train_loss :  0.6038743009676739





val_acc :  0.5804
train epoch: 30----------------


100%|██████████| 196/196 [00:38<00:00,  5.09it/s]


train_loss :  0.5857509736503873





val_acc :  0.567
train epoch: 31----------------


100%|██████████| 196/196 [00:40<00:00,  4.85it/s]


train_loss :  0.5787528078166806





val_acc :  0.5713
train epoch: 32----------------


100%|██████████| 196/196 [00:39<00:00,  4.92it/s]


train_loss :  0.5745295014004318





val_acc :  0.5795
train epoch: 33----------------


100%|██████████| 196/196 [00:40<00:00,  4.83it/s]


train_loss :  0.5498557551478853





val_acc :  0.5776
train epoch: 34----------------


100%|██████████| 196/196 [00:38<00:00,  5.11it/s]


train_loss :  0.5424003278722569





val_acc :  0.5872
train epoch: 35----------------


100%|██████████| 196/196 [00:40<00:00,  4.85it/s]


train_loss :  0.534918520219472





val_acc :  0.5785
train epoch: 36----------------


100%|██████████| 196/196 [00:39<00:00,  4.96it/s]


train_loss :  0.5306706384432559





val_acc :  0.5801
train epoch: 37----------------


100%|██████████| 196/196 [00:40<00:00,  4.86it/s]


train_loss :  0.5114952508284121





val_acc :  0.5748
train epoch: 38----------------


100%|██████████| 196/196 [00:38<00:00,  5.08it/s]


train_loss :  0.5143398905591089





val_acc :  0.5786
train epoch: 39----------------


100%|██████████| 196/196 [00:40<00:00,  4.90it/s]


train_loss :  0.503893069010608





val_acc :  0.5782
train epoch: 40----------------


100%|██████████| 196/196 [00:38<00:00,  5.06it/s]


train_loss :  0.49453759603962605





val_acc :  0.579
train epoch: 41----------------


100%|██████████| 196/196 [00:40<00:00,  4.79it/s]


train_loss :  0.4898926308568643





val_acc :  0.5833
train epoch: 42----------------


100%|██████████| 196/196 [00:39<00:00,  4.99it/s]


train_loss :  0.49230872261889125





val_acc :  0.5874
train epoch: 43----------------


100%|██████████| 196/196 [00:40<00:00,  4.87it/s]


train_loss :  0.4826795326507821





val_acc :  0.5861
train epoch: 44----------------


100%|██████████| 196/196 [00:38<00:00,  5.10it/s]


train_loss :  0.46699108867620936





val_acc :  0.5887
train epoch: 45----------------


100%|██████████| 196/196 [00:41<00:00,  4.75it/s]


train_loss :  0.459192764318111





val_acc :  0.5829
train epoch: 46----------------


100%|██████████| 196/196 [00:39<00:00,  4.96it/s]


train_loss :  0.4640481952501803





val_acc :  0.5866
train epoch: 47----------------


100%|██████████| 196/196 [00:40<00:00,  4.90it/s]


train_loss :  0.4627661854028702





val_acc :  0.5918
train epoch: 48----------------


100%|██████████| 196/196 [00:38<00:00,  5.07it/s]


train_loss :  0.4489207527771288





val_acc :  0.5865
train epoch: 49----------------


100%|██████████| 196/196 [00:42<00:00,  4.63it/s]


train_loss :  0.4489096094455038





val_acc :  0.5872
train epoch: 50----------------


100%|██████████| 196/196 [00:38<00:00,  5.11it/s]


train_loss :  0.4384962619871509





val_acc :  0.589
train epoch: 51----------------


100%|██████████| 196/196 [00:39<00:00,  5.01it/s]


train_loss :  0.43853762821883574





val_acc :  0.5878
train epoch: 52----------------


100%|██████████| 196/196 [00:39<00:00,  4.97it/s]


train_loss :  0.4274101705879581





val_acc :  0.5857
train epoch: 53----------------


100%|██████████| 196/196 [00:38<00:00,  5.06it/s]


train_loss :  0.43296075475459195





val_acc :  0.5962
train epoch: 54----------------


100%|██████████| 196/196 [00:40<00:00,  4.83it/s]


train_loss :  0.42241636283543643





val_acc :  0.5909
train epoch: 55----------------


100%|██████████| 196/196 [00:39<00:00,  4.93it/s]


train_loss :  0.41498606317505543





val_acc :  0.5904
train epoch: 56----------------


100%|██████████| 196/196 [00:40<00:00,  4.84it/s]


train_loss :  0.4296679273247719





val_acc :  0.5893
train epoch: 57----------------


100%|██████████| 196/196 [00:38<00:00,  5.11it/s]


train_loss :  0.40910457150668517





val_acc :  0.595
train epoch: 58----------------


100%|██████████| 196/196 [00:40<00:00,  4.86it/s]


train_loss :  0.4131888768502644





val_acc :  0.5902
train epoch: 59----------------


100%|██████████| 196/196 [00:38<00:00,  5.10it/s]


train_loss :  0.4097923815098344





val_acc :  0.5948
train epoch: 60----------------


100%|██████████| 196/196 [00:40<00:00,  4.84it/s]


train_loss :  0.39631661955191166





val_acc :  0.5922
train epoch: 61----------------


100%|██████████| 196/196 [00:40<00:00,  4.86it/s]


train_loss :  0.40226587843225925





val_acc :  0.5933
train epoch: 62----------------


100%|██████████| 196/196 [00:40<00:00,  4.87it/s]


train_loss :  0.3969407530159366





val_acc :  0.5988
train epoch: 63----------------


100%|██████████| 196/196 [00:41<00:00,  4.74it/s]


train_loss :  0.38680984277506264





val_acc :  0.601
train epoch: 64----------------


100%|██████████| 196/196 [00:38<00:00,  5.09it/s]


train_loss :  0.3872103469104183





val_acc :  0.5995
train epoch: 65----------------


100%|██████████| 196/196 [00:40<00:00,  4.89it/s]


train_loss :  0.38889457376635805





val_acc :  0.5951
train epoch: 66----------------


100%|██████████| 196/196 [00:39<00:00,  5.01it/s]


train_loss :  0.3950372738193493





val_acc :  0.5974
train epoch: 67----------------


100%|██████████| 196/196 [00:39<00:00,  5.00it/s]


train_loss :  0.38241770893943555





val_acc :  0.5944
train epoch: 68----------------


100%|██████████| 196/196 [00:38<00:00,  5.15it/s]


train_loss :  0.38461366318622414





val_acc :  0.5946
train epoch: 69----------------


100%|██████████| 196/196 [00:38<00:00,  5.08it/s]


train_loss :  0.3811022437804816





val_acc :  0.5994
train epoch: 70----------------


100%|██████████| 196/196 [00:39<00:00,  4.94it/s]


train_loss :  0.3763592916027624





val_acc :  0.6018
train epoch: 71----------------


 38%|███▊      | 74/196 [00:14<00:23,  5.26it/s]


KeyboardInterrupt: ignored