In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch.autograd import Variable

In [5]:
__all__ = ['ResNet', 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110', 'resnet1202']

def _weights_init(m):
  classname = m.__class__.__name__
  #print(classname)
  if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
      init.kaiming_normal_(m.weight)

class LambdaLayer(nn.Module):
  def __init__(self, lambd):
    super(LambdaLayer, self).__init__()
    self.lambd = lambd

  def forward(self, x):
    return self.lambd(x)

In [6]:
class BasicBlock(nn.Module):
  # block 안에서 plane 수 증가 비율
  expansion = 1

  # in_planes : input, planes: output
  def __init__(self, in_planes, planes, stride=1, option='B'):
    super(BasicBlock, self).__init__()
    
    self.dropout = nn.Dropout(0.4)
    
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes, momentum=0.9)
    
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1,padding=1,bias=False)
    self.bn2 = nn.BatchNorm2d(planes, momentum=0.9)
    
    self.shortcut = nn.Sequential()
    # shortcut connection 시 shape가 맞지 않을 때(conv_n 이 conv_n+1로 넘어갈 때)
    if stride != 1 or in_planes != planes:
        if option == 'A':
          # zero padding
          self.shortcut = LambdaLayer(lambda x:
                                      F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
        elif option == 'B':
          # projection
          self.shortcut = nn.Sequential(
              nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
              nn.BatchNorm2d(self.expansion * planes, momentum=0.9)
              )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    # shortcut connection
    out += self.shortcut(x)
    out = F.relu(out)
    return out


In [7]:
class ResNet(nn.Module):
  def __init__(self, block, num_blocks, num_classes=10):
    super(ResNet, self).__init__()
    
    self.in_planes = 16

    self.dropout = nn.Dropout(0.3)

    self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(16, momentum=0.9)


    # feature map : 32 x 32 x 16
    self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
    # feature map : 16 x 16 x 32
    self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
    # feature map : 8 x 8 x 64
    self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
    # 마지막 클래스 예측 (Linear)
    self.fc_out = nn.Linear(64, num_classes)

    self.apply(_weights_init)

  
  def _make_layer(self, block, planes, num_blocks, stride):
    # 처음 stride만 2, 이후 1
    strides = [stride] + [1]*(num_blocks-1)
    layers = []
    for stride in strides:
        layers.append(block(self.in_planes, planes, stride))
        self.in_planes = planes * block.expansion
    return nn.Sequential(*layers)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.dropout(out)
    out = self.layer2(out)
    out = self.dropout(out)
    out = self.layer3(out)
    out = F.avg_pool2d(out, out.size()[3])
    out = out.view(out.size(0),-1)
    out = self.fc_out(out)
    return out

In [8]:
class BasicBlock(nn.Module):
    # block 안에서 plane 수 증가 비율
    expansion = 1
    momen = 0.8
    # in_planes : input, planes: output
    def __init__(self, in_planes, planes, stride=1, option='B'):
        super(BasicBlock, self).__init__()

        self.dropout = nn.Dropout(0.4)

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1,padding=1,bias=False)
        self.bn2 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.shortcut = nn.Sequential()
        # shortcut connection 시 shape가 맞지 않을 때(conv_n 이 conv_n+1로 넘어갈 때)
        if stride != 1 or in_planes != planes:
            if option == 'A':
              # zero padding
              self.shortcut = LambdaLayer(lambda x:
                                          F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
              # projection
              self.shortcut = nn.Sequential(
                  nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                  nn.BatchNorm2d(self.expansion * planes, momentum=self.momen)
                  )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        # shortcut connection
        out += self.shortcut(x)
        out = F.relu(out)
        return out


In [9]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()

        self.in_planes = 16
        self.momen = 0.8

        self.dropout = nn.Dropout(0.3)

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16, momentum = self.momen)


        # feature map : 32 x 32 x 16
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        # feature map : 16 x 16 x 32
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        # feature map : 8 x 8 x 64
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        # 마지막 클래스 예측 (Linear)
        self.fc_out = nn.Linear(64, num_classes)

        self.apply(_weights_init)

  
    def _make_layer(self, block, planes, num_blocks, stride):
        # 처음 stride만 2, 이후 1
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.dropout(out)
        out = self.layer2(out)
        out = self.dropout(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0),-1)
        out = self.fc_out(out)
        return out

In [10]:
def resnet20():
    return ResNet(BasicBlock, [3, 3, 3])


def resnet32():
    return ResNet(BasicBlock, [5, 5, 5])


def resnet44():
    return ResNet(BasicBlock, [7, 7, 7])


def resnet56():
    return ResNet(BasicBlock, [9, 9, 9])
# def resnet56():
#     return ResNet(Bottleneck, [9, 9, 9])


def resnet110():
    return ResNet(BasicBlock, [18, 18, 18])


def resnet1202():
    return ResNet(BasicBlock, [200, 200, 200])

In [11]:
import torchvision
import torchvision.transforms as transforms
import torch.backends.cudnn as cudnn
import torch.optim as optim
import os
import argparse

In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [13]:
transform_train = transforms.Compose([
                                      transforms.RandomCrop(32, padding=4),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])


transform_test = transforms.Compose([
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

trainset = torchvision.datasets.CIFAR10(
    root='./data',train=True,download=True,transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128,shuffle=True,num_workers=2
)
testset = torchvision.datasets.CIFAR10(
    root='./data',train=False,download=True,transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100,shuffle=False,num_workers=2
)
classes=('plane','car','bird','cat','deer','dog','frog','horse','ship','truck')

Files already downloaded and verified
Files already downloaded and verified


In [14]:
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        if batch_idx % 10 == 0:
            print(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'%(train_loss/(batch_idx+1), 100.*correct/total, correct, total))


In [18]:
def test(epoch):
    global best_accuracy
    net.eval()
    test_loss = 0
    correct = 0
    total = 0

    global validation_running_loss_history
    global validation_running_correct_history
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            ouputs = net(inputs)
            loss = criterion(ouputs, targets)

            test_loss += loss.item()
            _, predicted = ouputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            if batch_idx % 50 == 0:
                print(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'%(test_loss/(batch_idx+1), 100.*correct/total, correct, total))
        acc = 100.*correct/total
    if acc > best_accuracy:
        print('Saving..')
        best_accuracy = acc

In [19]:
from torchsummary import summary
summary(net, input_size=(3,224,224), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 224, 224]             432
       BatchNorm2d-2         [-1, 16, 224, 224]              32
            Conv2d-3         [-1, 16, 224, 224]             432
       BatchNorm2d-4         [-1, 16, 224, 224]              32
            Conv2d-5         [-1, 16, 224, 224]           2,304
       BatchNorm2d-6         [-1, 16, 224, 224]              32
            Conv2d-7         [-1, 16, 224, 224]           2,304
       BatchNorm2d-8         [-1, 16, 224, 224]              32
        BasicBlock-9         [-1, 16, 224, 224]               0
           Conv2d-10         [-1, 16, 224, 224]           2,304
      BatchNorm2d-11         [-1, 16, 224, 224]              32
           Conv2d-12         [-1, 16, 224, 224]           2,304
           Conv2d-13         [-1, 16, 224, 224]           2,304
      BatchNorm2d-14         [-1, 16, 2

## bn momentum 0.9

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+200):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [12]:
# ResNet20의 best_accuracy dropout 1개
best_accuracy

90.55

## lr = 0.001

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+200):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [20]:
# ResNet20의 best_accuracy dropout 1개
best_accuracy

83.41

## lr->0.1, lr_scheduler = MultiStepLR

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
decay_epoch = [100,150]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+200):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [18]:
# ResNet20의 best_accuracy
best_accuracy

92.24

## lr 0.1, weight_decay=1e-4

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)

decay_epoch = [100,150]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+200):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [22]:
# ResNet20의 best_accuracy
best_accuracy

91.94

## lr = 0.0001 and weight_decay=5e-4 no scheduler

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.0001,momentum=0.9, weight_decay=5e-4)
decay_epoch = [100,150]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+200):
  train(epoch)
  test(epoch)
  #scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [24]:
# ResNet20의 best_accuracy
best_accuracy

65.42

## lr_scheduler decay epoch 추가

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
decay_epoch = [50,100,150,180]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+200):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [26]:
# ResNet20의 best_accuracy
best_accuracy

91.73

## decay epoch 3개 sgd weight deacy 1e-3

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-3)
decay_epoch = [100,150,180]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+200):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [12]:
# ResNet20의 best_accuracy
best_accuracy

91.88

## sgd weight deacy 1e-3

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-3)

decay_epoch = [100,150]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+201):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [14]:
# ResNet20의 best_accuracy
best_accuracy

92.0

## lr_scheduler gamm = 0.05

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

decay_epoch = [50,100,150,180]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.05)

for epoch in range(start_epoch, start_epoch+201):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [16]:
# ResNet20의 best_accuracy
best_accuracy

91.74

## optim Adam

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(net.parameters(), lr=0.1)

decay_epoch = [150]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+201):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [12]:
# ResNet20의 best_accuracy
best_accuracy

87.16

## momentum 0.8

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.8, weight_decay=5e-4)
decay_epoch = [100,150]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+201):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [14]:
# ResNet20의 best_accuracy dropout 2개, opt : sgd
best_accuracy

92.28

## model momentum = 0.8

In [19]:
class BasicBlock(nn.Module):
    # block 안에서 plane 수 증가 비율
    expansion = 1
    momen = 0.8
    # in_planes : input, planes: output
    def __init__(self, in_planes, planes, stride=1, option='B'):
        super(BasicBlock, self).__init__()

        self.dropout = nn.Dropout(0.4)

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1,padding=1,bias=False)
        self.bn2 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.shortcut = nn.Sequential()
        # shortcut connection 시 shape가 맞지 않을 때(conv_n 이 conv_n+1로 넘어갈 때)
        if stride != 1 or in_planes != planes:
            if option == 'A':
              # zero padding
              self.shortcut = LambdaLayer(lambda x:
                                          F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
              # projection
              self.shortcut = nn.Sequential(
                  nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                  nn.BatchNorm2d(self.expansion * planes, momentum=self.momen)
                  )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        # shortcut connection
        out += self.shortcut(x)
        out = F.relu(out)
        return out


In [13]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()

        self.in_planes = 16
        self.momen = 0.8

        self.dropout = nn.Dropout(0.3)

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16, momentum = self.momen)


        # feature map : 32 x 32 x 16
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        # feature map : 16 x 16 x 32
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        # feature map : 8 x 8 x 64
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        # 마지막 클래스 예측 (Linear)
        self.fc_out = nn.Linear(64, num_classes)

        self.apply(_weights_init)

  
    def _make_layer(self, block, planes, num_blocks, stride):
        # 처음 stride만 2, 이후 1
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0),-1)
        out = self.fc_out(out)
        return out

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.8, weight_decay=5e-4)
decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [28]:
# ResNet20의 best_accuracy
best_accuracy

92.46

## net momentun 0.8, opt momentum 0.7, weight_decay=3e-4

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.7, weight_decay=3e-4)
decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [31]:
# ResNet32의 best_accuracy
best_accuracy

91.81

## net momentun 0.8, opt momentum 0.7, weight_decay=5e-4

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.7, weight_decay=5e-4)
decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [33]:
# ResNet44의 best_accuracy
best_accuracy

92.27

# net momentun 0.8, opt momentum 0.8, weight_decay=5e-4, lr=0.15

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)

In [14]:
# ResNet20의 best_accuracy
best_accuracy

92.75

## net momentun 0.8, opt momentum 0.8, weight_decay=5e-4, lr=0.2

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.2, momentum=0.8, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.1, weight_decay=1e-4)

#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)
decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [16]:
# ResNet20의 best_accuracy
best_accuracy

92.42

## basic block momentum: 0.7

In [17]:
class BasicBlock(nn.Module):
    # block 안에서 plane 수 증가 비율
    expansion = 1
    momen = 0.7
    # in_planes : input, planes: output
    def __init__(self, in_planes, planes, stride=1, option='B'):
        super(BasicBlock, self).__init__()

        self.dropout = nn.Dropout(0.4)

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1,padding=1,bias=False)
        self.bn2 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.shortcut = nn.Sequential()
        # shortcut connection 시 shape가 맞지 않을 때(conv_n 이 conv_n+1로 넘어갈 때)
        if stride != 1 or in_planes != planes:
            if option == 'A':
              # zero padding
              self.shortcut = LambdaLayer(lambda x:
                                          F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
              # projection
              self.shortcut = nn.Sequential(
                  nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                  nn.BatchNorm2d(self.expansion * planes, momentum=self.momen)
                  )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        # shortcut connection
        out += self.shortcut(x)
        out = F.relu(out)
        return out


In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.1, weight_decay=1e-4)

#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)
decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [19]:
best_accuracy

92.48

## momentum :0.8 decay epoch 추가 lr:0.155

In [11]:
class BasicBlock(nn.Module):
    # block 안에서 plane 수 증가 비율
    expansion = 1
    momen = 0.8
    # in_planes : input, planes: output
    def __init__(self, in_planes, planes, stride=1, option='B'):
        super(BasicBlock, self).__init__()

        self.dropout = nn.Dropout(0.4)

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1,padding=1,bias=False)
        self.bn2 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.shortcut = nn.Sequential()
        # shortcut connection 시 shape가 맞지 않을 때(conv_n 이 conv_n+1로 넘어갈 때)
        if stride != 1 or in_planes != planes:
            if option == 'A':
              # zero padding
              self.shortcut = LambdaLayer(lambda x:
                                          F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
              # projection
              self.shortcut = nn.Sequential(
                  nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                  nn.BatchNorm2d(self.expansion * planes, momentum=self.momen)
                  )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        # shortcut connection
        out += self.shortcut(x)
        out = F.relu(out)
        return out


In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.155, momentum=0.8, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.1, weight_decay=1e-4)

#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)
decay_epoch = [100,150,200,230]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [13]:
best_accuracy

92.55

## lr=0.15, decay epoch만 추가

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.1, weight_decay=1e-4)

#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)
decay_epoch = [100,150,200,230]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [15]:
best_accuracy

92.2

## decay epoch gamma =0.05

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.1, weight_decay=1e-4)

#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)
decay_epoch = [100,150,200,230]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.05)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [17]:
best_accuracy

92.23

## option A

In [11]:
class BasicBlock(nn.Module):
    # block 안에서 plane 수 증가 비율
    expansion = 1
    momen = 0.8
    # in_planes : input, planes: output
    def __init__(self, in_planes, planes, stride=1, option='B'):
        super(BasicBlock, self).__init__()

        self.dropout = nn.Dropout(0.4)

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1,padding=1,bias=False)
        self.bn2 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.shortcut = nn.Sequential()
        # shortcut connection 시 shape가 맞지 않을 때(conv_n 이 conv_n+1로 넘어갈 때)
        if stride != 1 or in_planes != planes:
            if option == 'A':
              # zero padding
              self.shortcut = LambdaLayer(lambda x:
                                          F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
              # projection
              self.shortcut = nn.Sequential(
                  nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                  nn.BatchNorm2d(self.expansion * planes, momentum=self.momen)
                  )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        # shortcut connection
        out += self.shortcut(x)
        out = F.relu(out)
        return out


In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.1, weight_decay=1e-4)

#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)
decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [20]:
best_accuracy

92.34

## option B, decay epoch 200 -> 220

In [12]:
class BasicBlock(nn.Module):
    # block 안에서 plane 수 증가 비율
    expansion = 1
    momen = 0.8
    # in_planes : input, planes: output
    def __init__(self, in_planes, planes, stride=1, option='B'):
        super(BasicBlock, self).__init__()

        self.dropout = nn.Dropout(0.4)

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1,padding=1,bias=False)
        self.bn2 = nn.BatchNorm2d(planes, momentum=self.momen)

        self.shortcut = nn.Sequential()
        # shortcut connection 시 shape가 맞지 않을 때(conv_n 이 conv_n+1로 넘어갈 때)
        if stride != 1 or in_planes != planes:
            if option == 'A':
              # zero padding
              self.shortcut = LambdaLayer(lambda x:
                                          F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
              # projection
              self.shortcut = nn.Sequential(
                  nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                  nn.BatchNorm2d(self.expansion * planes, momentum=self.momen)
                  )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        # shortcut connection
        out += self.shortcut(x)
        out = F.relu(out)
        return out


In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.1, weight_decay=1e-4)

#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)
decay_epoch = [110,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [24]:
# [100,150,220]
best_accuracy 

92.66

## lr 0.148

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.148, momentum=0.8, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.1, weight_decay=1e-4)

#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)
decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [23]:
best_accuracy 

92.61

## gamma 0.15 -> 별로

## adam lr 0.05 

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
#optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.05)
#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)

decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [31]:
best_accuracy 

89.52

## sgd momentum

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.75, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.05)
#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)

decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [33]:
best_accuracy 

92.49

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.85, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.05)
#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)

decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [35]:
best_accuracy

92.0

## decay epoch

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.05)
#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)

decay_epoch = [90,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [37]:
best_accuracy

92.26

## 90, 140, 190

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=0.05)
#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)

decay_epoch = [90,140,190]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

for epoch in range(start_epoch, start_epoch+251):
  train(epoch)
  test(epoch)
  scheduler.step()
  print('epoch : ',epoch, best_accuracy)

In [41]:
best_accuracy

92.52

In [16]:
import matplotlib.pyplot as plt

In [None]:
best_accuracy = 0
start_epoch = 0
validation_running_correct_history = []
validation_running_loss_history = []

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
#optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
optimizer = optim.Adam(net.parameters(), lr=0.0005, weight_decay=1e-4)

#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)
decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
#scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
#scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,threshold=1e-4,patience=5,factor=0.7,mode='min',verbose=1) 
for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)


In [22]:
best_accuracy

89.33

In [None]:
best_accuracy = 0
start_epoch = 0
validation_running_correct_history = []
validation_running_loss_history = []

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
#optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
optimizer = optim.Adam(net.parameters(), lr=0.0005, weight_decay=5e-4)

#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)
decay_epoch = [100,150,200]
#scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,threshold=1e-4,patience=5,factor=0.8,mode='min',verbose=1) 
for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)


In [25]:
best_accuracy

89.1

In [None]:
best_accuracy = 0
start_epoch = 0
validation_running_correct_history = []
validation_running_loss_history = []

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
#optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
optimizer = optim.Adam(net.parameters(), lr=0.001)

#optimizer = optim.SGD(net.parameters(), lr=0.1,momentum=0.9, weight_decay=1e-4)
decay_epoch = [100,150,200]
#scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99,verbose=1)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,threshold=1e-4,patience=5,factor=0.8,mode='min',verbose=1) 
for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)


In [27]:
best_accuracy

89.12

## momentum 0.95 
gamma = 0.99 -> 91.04

In [None]:
best_accuracy = 0
start_epoch = 0
validation_running_correct_history = []
validation_running_loss_history = []

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.95, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.97)
for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)


In [29]:
best_accuracy

91.35

In [None]:
best_accuracy = 0
start_epoch = 0
validation_running_correct_history = []
validation_running_loss_history = []

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.95, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)


In [31]:
best_accuracy

92.1

In [None]:
best_accuracy = 0
start_epoch = 0
validation_running_correct_history = []
validation_running_loss_history = []

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.93, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)


In [33]:
best_accuracy

91.53

In [None]:
best_accuracy = 0
start_epoch = 0
validation_running_correct_history = []
validation_running_loss_history = []

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.97, weight_decay=3e-4)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)


In [35]:
best_accuracy

91.35

In [None]:
best_accuracy = 0
start_epoch = 0
validation_running_correct_history = []
validation_running_loss_history = []

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.97, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)


In [37]:
best_accuracy

92.17

In [None]:

best_accuracy = 0
start_epoch = 0
validation_running_correct_history = []
validation_running_loss_history = []

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.98, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)


In [47]:
best_accuracy

91.64

In [None]:

best_accuracy = 0
start_epoch = 0
validation_running_correct_history = []
validation_running_loss_history = []

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)


In [49]:
best_accuracy

92.01

In [None]:

best_accuracy = 0
start_epoch = 0
validation_running_correct_history = []
validation_running_loss_history = []

net = resnet20()
net = net.to(device)
if device == 'cuda':
  net = torch.nn.DataParallel(net)
  cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.85, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)


In [51]:
best_accuracy

92.47

# Best Model
## 92.75 나왔던 조건으로 resnet32

In [None]:
best_accuracy = 0
start_epoch = 0

net = resnet32()
net = net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=0.15, momentum=0.8, weight_decay=5e-4)

decay_epoch = [100,150,200]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)


for epoch in range(start_epoch, start_epoch+251):
    train(epoch)
    test(epoch)
    scheduler.step()
    print('epoch : ',epoch, best_accuracy)

In [19]:
best_accuracy

93.55