In [None]:
# Training resnet18 on CIFAR10 from scratch

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

epochs = 100
learning_rate = 1e-2

transform_train = transforms.Compose([
  transforms.RandomCrop(32, padding = 4),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_dataset = torchvision.datasets.CIFAR10(root='data', train = True, download = True, transform = transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = 100, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = 100)

# model = torchvision.models.resnet18(num_classes = 10)
# model.conv1 = nn.Conv2d(3, 64, kernel_size = 3, padding = 1, bias = False)
# model = model.to(device)

class BasicBlock(nn.Module):
  expansion = 1

  def __init__(self, in_planes, planes, stride=1):
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(
        in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                            stride=1, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)

    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != self.expansion*planes:
        self.shortcut = nn.Sequential(
            nn.Conv2d(in_planes, self.expansion*planes,
                      kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(self.expansion*planes)
        )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out


class Bottleneck(nn.Module):
  expansion = 4

  def __init__(self, in_planes, planes, stride=1):
    super(Bottleneck, self).__init__()
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                            stride=stride, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    self.conv3 = nn.Conv2d(planes, self.expansion *
                            planes, kernel_size=1, bias=False)
    self.bn3 = nn.BatchNorm2d(self.expansion*planes)

    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != self.expansion*planes:
        self.shortcut = nn.Sequential(
            nn.Conv2d(in_planes, self.expansion*planes,
                      kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(self.expansion*planes)
        )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = F.relu(self.bn2(self.conv2(out)))
    out = self.bn3(self.conv3(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out


class ResNet(nn.Module):
  def __init__(self, block, num_blocks, num_classes=10):
    super(ResNet, self).__init__()
    self.in_planes = 64

    self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                            stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
    self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
    self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
    self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
    self.linear = nn.Linear(512*block.expansion, num_classes)

  def _make_layer(self, block, planes, num_blocks, stride):
    strides = [stride] + [1]*(num_blocks-1)
    layers = []
    for stride in strides:
      layers.append(block(self.in_planes, planes, stride))
      self.in_planes = planes * block.expansion
    return nn.Sequential(*layers)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)
    out = F.avg_pool2d(out, 4)
    out = out.view(out.size(0), -1)
    out = self.linear(out)
    return out

model = ResNet(BasicBlock, [2, 2, 2, 2]).to(device)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 1e-4)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr = 0.1, total_steps = 50000)

for epoch in range(epochs):
  for x, y in train_loader:
    x = x.to(device)
    y = y.to(device)
    pred = model(x)
    cost = loss(pred, y)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    scheduler.step()
  model.eval()
  correct = 0
  total = 0
  for x, y in test_loader:
    x = x.to(device)
    pred = model(x)
    total += y.size(0)
    correct += (pred.argmax(1) == y.to(device)).sum()
  print(f'Epoch: {epoch + 1}, Accuracy: {100 * float(correct) / total}%')

cuda
Files already downloaded and verified
Files already downloaded and verified
Epoch: 1, Accuracy: 54.03%


KeyboardInterrupt: ignored

In [None]:
# Fine-tuning pretrained resnet18 on CIFAR10

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

epochs = 30
learning_rate = 1e-2

transform_train = transforms.Compose([
  transforms.RandomResizedCrop(224),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),                            
])

transform_test = transforms.Compose([
  transforms.Resize(256),                              
  transforms.CenterCrop(224),
  transforms.ToTensor(),
  transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),                            
])

train_dataset = torchvision.datasets.CIFAR10(root='data', train = True, download = True, transform = transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = 100, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = 100)

model = torchvision.models.resnet18(pretrained = True)
for param in model.parameters():
  param.requires_grad = False
model.fc = nn.Linear(512, 10)
model = model.to(device)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 10, gamma = 0.2)

for epoch in range(epochs):
  for x, y in train_loader:
    x = x.to(device)
    y = y.to(device)
    pred = model(x)
    cost = loss(pred, y)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
  scheduler.step()
  model.eval()
  correct = 0
  total = 0
  for x, y in test_loader:
    x = x.to(device)
    pred = model(x)
    total += y.size(0)
    correct += (pred.argmax(1) == y.to(device)).sum()
  print(f'Epoch: {epoch + 1}, Accuracy: {100 * float(correct) / total}%')

cuda
Files already downloaded and verified
Files already downloaded and verified
Epoch: 1, Accuracy: 70.43%
Epoch: 2, Accuracy: 72.96%
Epoch: 3, Accuracy: 74.33%
Epoch: 4, Accuracy: 74.71%
Epoch: 5, Accuracy: 73.24%
Epoch: 6, Accuracy: 74.05%
Epoch: 7, Accuracy: 72.61%
Epoch: 8, Accuracy: 74.59%
Epoch: 9, Accuracy: 72.21%
Epoch: 10, Accuracy: 74.12%
Epoch: 11, Accuracy: 75.47%
Epoch: 12, Accuracy: 75.8%
Epoch: 13, Accuracy: 75.58%
Epoch: 14, Accuracy: 75.3%
Epoch: 15, Accuracy: 75.89%
Epoch: 16, Accuracy: 75.72%
Epoch: 17, Accuracy: 76.03%
Epoch: 18, Accuracy: 75.22%
Epoch: 19, Accuracy: 75.41%
Epoch: 20, Accuracy: 75.81%
Epoch: 21, Accuracy: 75.73%
Epoch: 22, Accuracy: 75.74%
Epoch: 23, Accuracy: 75.69%
Epoch: 24, Accuracy: 75.84%
Epoch: 25, Accuracy: 75.89%
Epoch: 26, Accuracy: 75.97%
Epoch: 27, Accuracy: 75.71%
Epoch: 28, Accuracy: 75.92%
Epoch: 29, Accuracy: 75.8%
Epoch: 30, Accuracy: 75.93%
