In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

In [7]:
# Training resnet18 on CIFAR10 from scratch

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

epochs = 40
learning_rate = 1e-2

transform_train = transforms.Compose([
  transforms.RandomCrop(32, padding = 4),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_dataset = torchvision.datasets.CIFAR10(root='data', train = True, download = True, transform = transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = 100, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = 100)

# model = torchvision.models.resnet18(num_classes = 10)
# model.conv1 = nn.Conv2d(3, 64, kernel_size = 3, padding = 1, bias = False)
# model = model.to(device)

class BasicBlock(nn.Module):
  expansion = 1

  def __init__(self, in_planes, planes, stride=1):
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(
        in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                            stride=1, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)

    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != self.expansion*planes:
        self.shortcut = nn.Sequential(
            nn.Conv2d(in_planes, self.expansion*planes,
                      kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(self.expansion*planes)
        )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out


class Bottleneck(nn.Module):
  expansion = 4

  def __init__(self, in_planes, planes, stride=1):
    super(Bottleneck, self).__init__()
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                            stride=stride, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    self.conv3 = nn.Conv2d(planes, self.expansion *
                            planes, kernel_size=1, bias=False)
    self.bn3 = nn.BatchNorm2d(self.expansion*planes)

    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != self.expansion*planes:
        self.shortcut = nn.Sequential(
            nn.Conv2d(in_planes, self.expansion*planes,
                      kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(self.expansion*planes)
        )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = F.relu(self.bn2(self.conv2(out)))
    out = self.bn3(self.conv3(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out


class ResNet(nn.Module):
  def __init__(self, block, num_blocks, num_classes=10):
    super(ResNet, self).__init__()
    self.in_planes = 64

    self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                            stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
    self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
    self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
    self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
    self.linear = nn.Linear(512*block.expansion, num_classes)

  def _make_layer(self, block, planes, num_blocks, stride):
    strides = [stride] + [1]*(num_blocks-1)
    layers = []
    for stride in strides:
      layers.append(block(self.in_planes, planes, stride))
      self.in_planes = planes * block.expansion
    return nn.Sequential(*layers)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)
    out = F.avg_pool2d(out, 4)
    out = out.view(out.size(0), -1)
    out = self.linear(out)
    return out

model = ResNet(BasicBlock, [2, 2, 2, 2]).to(device)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 1e-4)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr = 0.1, total_steps = 20000)

for epoch in range(epochs):
  for x, y in train_loader:
    x = x.to(device)
    y = y.to(device)
    pred = model(x)
    cost = loss(pred, y)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    scheduler.step()
  model.eval()
  correct = 0
  total = 0
  for x, y in test_loader:
    x = x.to(device)
    pred = model(x)
    total += y.size(0)
    correct += (pred.argmax(1) == y.to(device)).sum()
  print(f'Epoch: {epoch + 1}, Accuracy: {100 * float(correct) / total}%')

cuda
Files already downloaded and verified
Files already downloaded and verified
Epoch: 1, Accuracy: 52.8%
Epoch: 2, Accuracy: 61.9%
Epoch: 3, Accuracy: 64.86%
Epoch: 4, Accuracy: 68.83%
Epoch: 5, Accuracy: 76.94%
Epoch: 6, Accuracy: 77.12%
Epoch: 7, Accuracy: 80.86%
Epoch: 8, Accuracy: 82.54%
Epoch: 9, Accuracy: 83.27%
Epoch: 10, Accuracy: 82.34%
Epoch: 11, Accuracy: 84.15%
Epoch: 12, Accuracy: 84.33%
Epoch: 13, Accuracy: 86.33%
Epoch: 14, Accuracy: 86.03%
Epoch: 15, Accuracy: 87.06%
Epoch: 16, Accuracy: 87.41%
Epoch: 17, Accuracy: 87.91%
Epoch: 18, Accuracy: 88.93%
Epoch: 19, Accuracy: 88.53%
Epoch: 20, Accuracy: 89.7%
Epoch: 21, Accuracy: 87.77%
Epoch: 22, Accuracy: 88.56%
Epoch: 23, Accuracy: 88.49%
Epoch: 24, Accuracy: 89.03%
Epoch: 25, Accuracy: 90.25%
Epoch: 26, Accuracy: 90.05%
Epoch: 27, Accuracy: 89.54%
Epoch: 28, Accuracy: 90.02%
Epoch: 29, Accuracy: 90.71%
Epoch: 30, Accuracy: 91.35%
Epoch: 31, Accuracy: 90.31%
Epoch: 32, Accuracy: 91.7%
Epoch: 33, Accuracy: 91.55%
Epoch: 3

In [6]:
# Fine-tuning pretrained resnet18 on CIFAR10 (last layer / entire network)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

epochs = 40
learning_rate = 1e-3

transform_train = transforms.Compose([
  transforms.RandomResizedCrop(224),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),                            
])

transform_test = transforms.Compose([
  transforms.Resize(256),                              
  transforms.CenterCrop(224),
  transforms.ToTensor(),
  transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),                            
])

train_dataset = torchvision.datasets.CIFAR10(root='data', train = True, download = True, transform = transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = 100, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = 100)

model = torchvision.models.resnet18(pretrained = True)
# for param in model.parameters():
#   param.requires_grad = False
model.fc = nn.Linear(512, 10)
model = model.to(device)

not_fc = [param for name, param in model.named_parameters() if name not in ['fc.weight', 'fc.bias']]

loss = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.fc.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 1e-4)
optimizer = torch.optim.SGD([{'params': not_fc}, {'params': model.fc.parameters(), 'lr': learning_rate * 10}], lr = learning_rate, momentum = 0.9, weight_decay = 1e-4)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr = 1e-2, total_steps = 20000)

for epoch in range(epochs):
  for x, y in train_loader:
    x = x.to(device)
    y = y.to(device)
    pred = model(x)
    cost = loss(pred, y)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
  scheduler.step()
  model.eval()
  correct = 0
  total = 0
  for x, y in test_loader:
    x = x.to(device)
    pred = model(x)
    total += y.size(0)
    correct += (pred.argmax(1) == y.to(device)).sum()
  print(f'Epoch: {epoch + 1}, Accuracy: {100 * float(correct) / total}%')

cuda
Files already downloaded and verified
Files already downloaded and verified
Epoch: 1, Accuracy: 86.07%
Epoch: 2, Accuracy: 89.61%
Epoch: 3, Accuracy: 90.89%
Epoch: 4, Accuracy: 90.82%
Epoch: 5, Accuracy: 92.18%
Epoch: 6, Accuracy: 92.66%
Epoch: 7, Accuracy: 92.47%
Epoch: 8, Accuracy: 92.96%
Epoch: 9, Accuracy: 93.86%
Epoch: 10, Accuracy: 93.74%
Epoch: 11, Accuracy: 93.16%
Epoch: 12, Accuracy: 93.59%
Epoch: 13, Accuracy: 94.29%
Epoch: 14, Accuracy: 94.44%
Epoch: 15, Accuracy: 94.31%
Epoch: 16, Accuracy: 94.19%
Epoch: 17, Accuracy: 94.5%
Epoch: 18, Accuracy: 94.38%
Epoch: 19, Accuracy: 94.04%
Epoch: 20, Accuracy: 94.58%
Epoch: 21, Accuracy: 94.73%
Epoch: 22, Accuracy: 94.67%
Epoch: 23, Accuracy: 94.64%
Epoch: 24, Accuracy: 95.09%
Epoch: 25, Accuracy: 94.6%
Epoch: 26, Accuracy: 94.67%
Epoch: 27, Accuracy: 95.2%
Epoch: 28, Accuracy: 94.59%
Epoch: 29, Accuracy: 95.21%
Epoch: 30, Accuracy: 94.67%
Epoch: 31, Accuracy: 94.86%
Epoch: 32, Accuracy: 95.19%
Epoch: 33, Accuracy: 95.0%
Epoch: 3