In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

num_epochs = 80
learning_rate = 0.001

transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])

train_dataset = torchvision.datasets.CIFAR10(root = '../../data/',
                                             train = True,
                                             transform = transform,
                                             download = True)

test_dataset = torchvision.datasets.CIFAR10(root = '../../data/',
                                            train = False,
                                            transform = transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = 100,
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                          batch_size = 100,
                                          shuffle = False)

class BasicBlock(nn.Module):
  expansion = 1
  def __init__(self, in_channels, out_channels, stride = 1):
    super().__init__()

    self.residual_function = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1, bias = False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1, bias = False),
        nn.BatchNorm2d(out_channels),
    )

    self.shortcut = nn.Sequential()

    if stride != 1 or in_channels != out_channels:
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_channels, out_channels, kernel_size = 1, stride = stride, bias = False),
          nn.BatchNorm2d(out_channels)
      )

    self.relu = nn.ReLU()

  def forward(self, x):
    x = self.residual_function(x) + self.shortcut(x)
    x = self.relu(x)
    return x

class ResNet(nn.Module):
  def __init__(self, block, num_block, num_classes = 10, init_weights = True):
    super().__init__()

    self.in_channels = 64

    self.conv1 = nn.Sequential(
        nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3, bias = False),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
    )

    self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
    self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
    self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
    self.conv5_x = self._make_layer(block, 512, num_block[3], 2)

    self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
    self.fc = nn.Linear(512 * block.expansion, num_classes)

    if init_weights:
      self._initialize_weights()

  def _make_layer(self, block, out_channels, num_blocks, stride):
     strides = [stride] + [1] * (num_blocks - 1)
     layers = []
     for stride in strides:
       layers.append(block(self.in_channels, out_channels, stride))
       self.in_channels = out_channels * block.expansion

     return nn.Sequential(*layers)

  def forward(self, x):
    output = self.conv1(x)
    output = self.conv2_x(output)
    x = self.conv3_x(output)
    x = self.conv4_x(x)
    x = self.conv5_x(x)
    x = self.avg_pool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)
    return x

  def _initialize_weights(self):
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu')
        if m.bias is not None:
          nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
          nn.init.constant_(m.weight, 1)
          nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
          nn.init.normal_(m.weight, 0, 0.01)
          nn.init.constant_(m.bias, 0)

def update_lr(optimizer, lr):
  for param_group in optimizer.param_groups:
    param_group['lr'] = lr

def train(epoch):
  total_step = len(train_loader)
  curr_lr = learning_rate
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)

    loss.backward()
    optimizer.step()

    if(batch_idx + 1) % 100 == 0:
      print("Epoch [{} / {}], Step [{} / {}], Loss : {:4f}".format(epoch + 1, num_epochs, batch_idx + 1, total_step, loss.item()))

  if(epoch + 1) % 20 == 0:
    curr_lr /= 3
    update_lr(optimizer, curr_lr)

def test():
  model.eval()
  test_loss = 0
  correct = 0
  for data, target in test_loader:
    data, target = data.to(device), target.to(device)
    output = model(data)
    test_loss += criterion(output, target)
    pred = output.data.max(1, keepdim = True)[1]
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()

  test_loss /= len(test_loader.dataset)
  print('\nTest Set : Average Loss : {:4f}, Accuracy : {} / {} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))

Files already downloaded and verified


In [None]:
#BatchNorm (2)

#1 - 2 LayerNorm

class ResNet(nn.Module):
  def __init__(self, block, num_block, num_classes = 10, init_weights = True):
    super().__init__()

    self.in_channels = 64

    self.conv1 = nn.Sequential(
        nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3, bias = False),
        nn.LayerNorm([64, 16, 16]),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
    )

    self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
    self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
    self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
    self.conv5_x = self._make_layer(block, 512, num_block[3], 2)

    self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
    self.fc = nn.Linear(512 * block.expansion, num_classes)

    if init_weights:
      self._initialize_weights()

  def _make_layer(self, block, out_channels, num_blocks, stride):
     strides = [stride] + [1] * (num_blocks - 1)
     layers = []
     for stride in strides:
       layers.append(block(self.in_channels, out_channels, stride))
       self.in_channels = out_channels * block.expansion

     return nn.Sequential(*layers)

  def forward(self, x):
    output = self.conv1(x)
    output = self.conv2_x(output)
    x = self.conv3_x(output)
    x = self.conv4_x(x)
    x = self.conv5_x(x)
    x = self.avg_pool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)
    return x

  def _initialize_weights(self):
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu')
        if m.bias is not None:
          nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
          nn.init.constant_(m.weight, 1)
          nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
          nn.init.normal_(m.weight, 0, 0.01)
          nn.init.constant_(m.bias, 0)

model = ResNet(BasicBlock, [2, 2, 2, 2]).cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

for epoch in range (0, num_epochs):
  train(epoch)
  test()

Epoch [1 / 80], Step [100 / 500], Loss : 1.868051
Epoch [1 / 80], Step [200 / 500], Loss : 1.614345
Epoch [1 / 80], Step [300 / 500], Loss : 1.587963
Epoch [1 / 80], Step [400 / 500], Loss : 1.281564
Epoch [1 / 80], Step [500 / 500], Loss : 1.320653

Test Set : Average Loss : 0.026877, Accuracy : 2803 / 10000 (28%)

Epoch [2 / 80], Step [100 / 500], Loss : 1.396630
Epoch [2 / 80], Step [200 / 500], Loss : 1.278137
Epoch [2 / 80], Step [300 / 500], Loss : 1.228327
Epoch [2 / 80], Step [400 / 500], Loss : 1.206281
Epoch [2 / 80], Step [500 / 500], Loss : 1.110727

Test Set : Average Loss : 0.014288, Accuracy : 5294 / 10000 (53%)

Epoch [3 / 80], Step [100 / 500], Loss : 1.089255
Epoch [3 / 80], Step [200 / 500], Loss : 1.119950
Epoch [3 / 80], Step [300 / 500], Loss : 0.953267
Epoch [3 / 80], Step [400 / 500], Loss : 0.967906
Epoch [3 / 80], Step [500 / 500], Loss : 0.964563

Test Set : Average Loss : 0.012624, Accuracy : 5592 / 10000 (56%)

Epoch [4 / 80], Step [100 / 500], Loss : 0.990

In [None]:
#1 - 3

class ResNet(nn.Module):
  def __init__(self, block, num_block, num_classes = 10, init_weights = True):
    super().__init__()

    self.in_channels = 64

    self.conv1 = nn.Sequential(
        nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3, bias = False),
        nn.InstanceNorm2d([64, 16, 16]),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
    )

    self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
    self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
    self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
    self.conv5_x = self._make_layer(block, 512, num_block[3], 2)

    self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
    self.fc = nn.Linear(512 * block.expansion, num_classes)

    if init_weights:
      self._initialize_weights()

  def _make_layer(self, block, out_channels, num_blocks, stride):
     strides = [stride] + [1] * (num_blocks - 1)
     layers = []
     for stride in strides:
       layers.append(block(self.in_channels, out_channels, stride))
       self.in_channels = out_channels * block.expansion

     return nn.Sequential(*layers)

  def forward(self, x):
    output = self.conv1(x)
    output = self.conv2_x(output)
    x = self.conv3_x(output)
    x = self.conv4_x(x)
    x = self.conv5_x(x)
    x = self.avg_pool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)
    return x

  def _initialize_weights(self):
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu')
        if m.bias is not None:
          nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
          nn.init.constant_(m.weight, 1)
          nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
          nn.init.normal_(m.weight, 0, 0.01)
          nn.init.constant_(m.bias, 0)

model = ResNet(BasicBlock, [2, 2, 2, 2]).cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

for epoch in range (0, num_epochs):
  train(epoch)
  test()

Epoch [1 / 80], Step [100 / 500], Loss : 1.756659
Epoch [1 / 80], Step [200 / 500], Loss : 1.470392
Epoch [1 / 80], Step [300 / 500], Loss : 1.553804
Epoch [1 / 80], Step [400 / 500], Loss : 1.317049
Epoch [1 / 80], Step [500 / 500], Loss : 1.507332

Test Set : Average Loss : 0.013745, Accuracy : 5081 / 10000 (51%)

Epoch [2 / 80], Step [100 / 500], Loss : 1.179566
Epoch [2 / 80], Step [200 / 500], Loss : 1.305919
Epoch [2 / 80], Step [300 / 500], Loss : 1.314830
Epoch [2 / 80], Step [400 / 500], Loss : 1.106212
Epoch [2 / 80], Step [500 / 500], Loss : 1.125040

Test Set : Average Loss : 0.011429, Accuracy : 5980 / 10000 (60%)

Epoch [3 / 80], Step [100 / 500], Loss : 1.118409
Epoch [3 / 80], Step [200 / 500], Loss : 0.900647
Epoch [3 / 80], Step [300 / 500], Loss : 0.954486
Epoch [3 / 80], Step [400 / 500], Loss : 0.876627
Epoch [3 / 80], Step [500 / 500], Loss : 1.148413

Test Set : Average Loss : 0.010831, Accuracy : 6248 / 10000 (62%)

Epoch [4 / 80], Step [100 / 500], Loss : 1.079

In [None]:
#1 - 4
class ResNet(nn.Module):
  def __init__(self, block, num_block, num_classes = 10, init_weights = True):
    super().__init__()

    self.in_channels = 64

    self.conv1 = nn.Sequential(
        nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3, bias = False),
        nn.GroupNorm(32, 64),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
    )

    self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
    self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
    self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
    self.conv5_x = self._make_layer(block, 512, num_block[3], 2)

    self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
    self.fc = nn.Linear(512 * block.expansion, num_classes)

    if init_weights:
      self._initialize_weights()

  def _make_layer(self, block, out_channels, num_blocks, stride):
     strides = [stride] + [1] * (num_blocks - 1)
     layers = []
     for stride in strides:
       layers.append(block(self.in_channels, out_channels, stride))
       self.in_channels = out_channels * block.expansion

     return nn.Sequential(*layers)

  def forward(self, x):
    output = self.conv1(x)
    output = self.conv2_x(output)
    x = self.conv3_x(output)
    x = self.conv4_x(x)
    x = self.conv5_x(x)
    x = self.avg_pool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)
    return x

  def _initialize_weights(self):
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu')
        if m.bias is not None:
          nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
          nn.init.constant_(m.weight, 1)
          nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
          nn.init.normal_(m.weight, 0, 0.01)
          nn.init.constant_(m.bias, 0)

model = ResNet(BasicBlock, [2, 2, 2, 2]).cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

for epoch in range (0, num_epochs):
  train(epoch)
  test()

Epoch [1 / 80], Step [100 / 500], Loss : 1.765405
Epoch [1 / 80], Step [200 / 500], Loss : 1.435027
Epoch [1 / 80], Step [300 / 500], Loss : 1.541980
Epoch [1 / 80], Step [400 / 500], Loss : 1.283082
Epoch [1 / 80], Step [500 / 500], Loss : 1.523031

Test Set : Average Loss : 0.016112, Accuracy : 4534 / 10000 (45%)

Epoch [2 / 80], Step [100 / 500], Loss : 1.319729
Epoch [2 / 80], Step [200 / 500], Loss : 1.140185
Epoch [2 / 80], Step [300 / 500], Loss : 1.347153
Epoch [2 / 80], Step [400 / 500], Loss : 1.125389
Epoch [2 / 80], Step [500 / 500], Loss : 1.439319

Test Set : Average Loss : 0.015314, Accuracy : 5087 / 10000 (51%)

Epoch [3 / 80], Step [100 / 500], Loss : 1.055007
Epoch [3 / 80], Step [200 / 500], Loss : 1.063038
Epoch [3 / 80], Step [300 / 500], Loss : 0.974091
Epoch [3 / 80], Step [400 / 500], Loss : 1.159192
Epoch [3 / 80], Step [500 / 500], Loss : 1.142780

Test Set : Average Loss : 0.013272, Accuracy : 5523 / 10000 (55%)

Epoch [4 / 80], Step [100 / 500], Loss : 0.916

In [None]:
#2
model = ResNet(BasicBlock, [3, 4, 6, 3]).cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

for epoch in range (0, num_epochs):
  train(epoch)
  test()

Epoch [1 / 80], Step [100 / 500], Loss : 1.887846
Epoch [1 / 80], Step [200 / 500], Loss : 1.486607
Epoch [1 / 80], Step [300 / 500], Loss : 1.495817
Epoch [1 / 80], Step [400 / 500], Loss : 1.324360
Epoch [1 / 80], Step [500 / 500], Loss : 1.239718

Test Set : Average Loss : 0.015136, Accuracy : 4451 / 10000 (45%)

Epoch [2 / 80], Step [100 / 500], Loss : 1.354493
Epoch [2 / 80], Step [200 / 500], Loss : 1.299830
Epoch [2 / 80], Step [300 / 500], Loss : 1.301095
Epoch [2 / 80], Step [400 / 500], Loss : 1.076735
Epoch [2 / 80], Step [500 / 500], Loss : 1.009524

Test Set : Average Loss : 0.012907, Accuracy : 5466 / 10000 (55%)

Epoch [3 / 80], Step [100 / 500], Loss : 1.367111
Epoch [3 / 80], Step [200 / 500], Loss : 0.974575
Epoch [3 / 80], Step [300 / 500], Loss : 1.070308
Epoch [3 / 80], Step [400 / 500], Loss : 1.012188
Epoch [3 / 80], Step [500 / 500], Loss : 1.086865

Test Set : Average Loss : 0.011967, Accuracy : 5861 / 10000 (59%)

Epoch [4 / 80], Step [100 / 500], Loss : 0.873

In [None]:
#3 (Batch Size 1로 해봤는데도 안됩니다...)
class BottleNeck(nn.Module):
  expansion = 4
  def __init__(self, in_channels, out_channels, stride = 1):
    super().__init__()

    self.residual_function = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size = 1, stride = 1, bias = False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = stride, padding = 1, bias = False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels * 4, kernel_size = 1, stride = 1, bias = False),
        nn.BatchNorm2d(out_channels * 4),
    )

    self.shortcut = nn.Sequential()

    self.relu = nn.ReLU()

    if stride != 1 or in_channels != out_channels * 4:
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_channels, out_channels * 4, kernel_size = 1, stride = stride, bias = False),
          nn.BatchNorm2d(out_channels * 4)
      )

  def forward(self, x):
    x = self.residual_function(x) + self.shortcut(x)
    x = self.relu(x)
    return x

model = ResNet(BottleNeck, [3, 4, 6, 3]).cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

for epoch in range (0, num_epochs):
  train(epoch)
  test()

Epoch [1 / 80], Step [100 / 500], Loss : 2.132398
Epoch [1 / 80], Step [200 / 500], Loss : 1.680669
Epoch [1 / 80], Step [300 / 500], Loss : 2.022648
Epoch [1 / 80], Step [400 / 500], Loss : 1.665032
Epoch [1 / 80], Step [500 / 500], Loss : 1.806519


OutOfMemoryError: ignored

In [None]:
#4
class ResNet(nn.Module):
  def __init__(self, block, num_block, num_classes = 10, init_weights = True):
    super().__init__()

    self.in_channels = 64

    self.conv1 = nn.Sequential(
        nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3, bias = False),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
    )

    self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
    self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
    self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
    self.conv5_x = self._make_layer(block, 512, num_block[3], 2)

    self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
    self.fc = nn.Linear(512, num_classes)

  def _make_layer(self, block, out_channels, num_blocks, stride):
     strides = [stride] + [1] * (num_blocks - 1)
     layers = []
     for stride in strides:
       layers.append(block(self.in_channels, out_channels, stride))
       self.in_channels = out_channels

     return nn.Sequential(*layers)

  def forward(self, x):
    output = self.conv1(x)
    output = self.conv2_x(output)
    x = self.conv3_x(output)
    x = self.conv4_x(x)
    x = self.conv5_x(x)
    x = self.avg_pool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)
    return x

  def _initialize_weights(self):
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.xavier_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu')
        if m.bias is not None:
          nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
          nn.init.constant_(m.weight, 1)
          nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
          nn.init.normal_(m.weight, 0, 0.01)
          nn.init.constant_(m.bias, 0)

model = ResNet(BasicBlock, [3, 4, 6, 3]).cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

for epoch in range (0, num_epochs):
  train(epoch)
  test()

Epoch [1 / 80], Step [100 / 500], Loss : 1.782318
Epoch [1 / 80], Step [200 / 500], Loss : 1.513158
Epoch [1 / 80], Step [300 / 500], Loss : 1.599254
Epoch [1 / 80], Step [400 / 500], Loss : 1.297476
Epoch [1 / 80], Step [500 / 500], Loss : 1.344003

Test Set : Average Loss : 0.021745, Accuracy : 3189 / 10000 (32%)

Epoch [2 / 80], Step [100 / 500], Loss : 1.367675
Epoch [2 / 80], Step [200 / 500], Loss : 1.445504
Epoch [2 / 80], Step [300 / 500], Loss : 1.074915
Epoch [2 / 80], Step [400 / 500], Loss : 1.070232
Epoch [2 / 80], Step [500 / 500], Loss : 1.044274

Test Set : Average Loss : 0.019417, Accuracy : 4326 / 10000 (43%)

Epoch [3 / 80], Step [100 / 500], Loss : 1.038119
Epoch [3 / 80], Step [200 / 500], Loss : 1.056049
Epoch [3 / 80], Step [300 / 500], Loss : 1.355870
Epoch [3 / 80], Step [400 / 500], Loss : 1.101899
Epoch [3 / 80], Step [500 / 500], Loss : 1.188467

Test Set : Average Loss : 0.013972, Accuracy : 5164 / 10000 (52%)

Epoch [4 / 80], Step [100 / 500], Loss : 0.899