In [0]:
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
from torchvision import transforms
from torch.utils.data import dataset, dataloader
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [2]:
!pip show torch

Name: torch
Version: 1.5.0+cu101
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: /usr/local/lib/python3.6/dist-packages
Requires: numpy, future
Required-by: torchvision, torchtext, fastai


In [3]:
device = torch.device("cuda: 0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


> *Having learned PyTorch for 4 days. Okay, let's implement ResNet baseline for each configuration. The implementation based on this diagram:*

 ![alt text](https://miro.medium.com/max/1400/0*pkrso8DZa0m6IAcJ.png)

In [0]:
class ResidualBlock(nn.Module):
  def __init__(self, in_channels, out_channels, expansion, stride, kind):
    super(ResidualBlock, self).__init__()
    self.in_channels = in_channels
    self.out_channels = out_channels
    self.expansion = expansion
    self.stride = stride
    if kind == "Advanced":
      self.residual_function = nn.Sequential(
        nn.Conv2d(self.in_channels, self.out_channels, kernel_size = 1, bias = False, stride = self.stride),
        nn.BatchNorm2d(self.out_channels),
        nn.ReLU(inplace = True),
        nn.Conv2d(self.out_channels, self.out_channels, kernel_size = 3, padding = 1, bias = False),
        nn.BatchNorm2d(self.out_channels),
        nn.ReLU(inplace = True),
        nn.Conv2d(self.out_channels, self.out_channels*self.expansion, kernel_size = 1, stride = 1, bias = False),
        nn.BatchNorm2d(self.out_channels*self.expansion))
    elif kind == "Basic":
      self.residual_function = nn.Sequential(
        nn.Conv2d(self.in_channels, self.out_channels, kernel_size = 1, bias = False, stride = self.stride),
        nn.BatchNorm2d(self.out_channels),
        nn.ReLU(inplace = True),
        nn.Conv2d(self.out_channels, self.out_channels*self.expansion, kernel_size = 3, padding = 1),
        nn.BatchNorm2d(self.out_channels*self.expansion),
        )
    self.short_cut = nn.Sequential()
    if stride != 1 or self.in_channels != self.out_channels*expansion:
      self.short_cut = nn.Sequential(
        nn.Conv2d(self.in_channels, self.out_channels*self.expansion, kernel_size = 1, stride = self.stride, bias = False),
        nn.BatchNorm2d(self.out_channels*expansion))
  def forward(self, x):
    return F.relu(self.residual_function(x) + self.short_cut(x))

In [0]:
class ResNetUniversial(nn.Module):
  def __init__(self, kind, cfg, classes, input_shape = (3,32,32)):
    super(ResNetUniversial, self).__init__()
    if kind == "deep":
      self.expansion = 4
      self.kind = "Advanced"
    elif kind == "shallow":
      self.expansion = 1
      self.kind = "Basic"
    self.conv1 = nn.Sequential(
      nn.Conv2d(3, 64, stride = 2, kernel_size = 7, padding = 3, bias = False),
      nn.BatchNorm2d(64),
      nn.ReLU(inplace = True))
    self.stage1 = self._make_blocks(64, 64, cfg[0], 1)
    self.stage2 = self._make_blocks(256, 128, cfg[1], 2)
    self.stage3 = self._make_blocks(512, 256, cfg[2], 2)
    self.stage4 = self._make_blocks(1024, 512, cfg[3], 2)
    self.avg = nn.AvgPool2d((2,2))
    self.dense = nn.Linear(2048, 10)
  def forward(self, input):
    x = self.conv1(input)
    x = self.stage1(x)
    x = self.stage2(x)
    x = self.stage3(x)
    x = self.stage4(x)
    x = self.avg(x)
    x = x.view(x.size()[0], -1)
    output = self.dense(x)
    return F.softmax(output)
  def _make_blocks(self, in_channels, out_channels, num_blocks, _stride):
    strides = [_stride] + [1]*(num_blocks - 1)
    layers = []
    current_inputs_shape = in_channels
    for (i,stride) in enumerate(strides):
      if i == 0:
        layers.append(ResidualBlock(in_channels, out_channels, self.expansion, stride, self.kind))
      else:
        layers.append(ResidualBlock(out_channels*self.expansion, out_channels, self.expansion, stride, self.kind))
    return nn.Sequential(*layers)

In [0]:
ResNet34 = ResNetUniversial("shallow", [3,4,6,3], 10)

In [0]:
ResNet50 = ResNetUniversial("deep", [3,4,6,3], 10)

In [0]:
ResNet101 = ResNetUniversial("deep", [3,4,23,8], 10)

In [0]:
ResNet152 = ResNetUniversial("deep", [3,8,36,3], 10)

In [0]:
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(ResNet50.parameters(), lr = 0.001)

In [53]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [0]:
def fit(model, criterion, optimizer, n_epochs, train_dl, val_dl):
    for epoch in range(n_epochs):
        print(f"Epoch {epoch+1}/{n_epochs} ...")

        # Train
        model.train()  # IMPORTANT
        running_loss, correct = 0.0, 0
        for X, y in train_dl:
            X, y = X.to(device), y.to(device)

            optimizer.zero_grad()
            y_ = model(X)
            loss = criterion(y_, y)

            loss.backward()
            optimizer.step()

            # Statistics
            #print(f"    batch loss: {loss.item():0.3f}")
            _, y_label_ = torch.max(y_, 1)
            correct += (y_label_ == y).sum().item()
            running_loss += loss.item() * X.shape[0]
        print(
            f"  "
            f"loss: {running_loss / len(train_dl.dataset):0.3f} "
            f"acc:  {correct / len(train_dl.dataset):0.3f}"
        )

        # Eval
        model.eval()  # IMPORTANT
        running_loss, correct = 0.0, 0
        with torch.no_grad():  # IMPORTANT
            for X, y in val_dl:
                X, y = X.to(device), y.to(device)

                y_ = model(X)
                loss = criterion(y_, y)

                _, y_label_ = torch.max(y_, 1)
                correct += (y_label_ == y).sum().item()            
                running_loss += loss.item() * X.shape[0]
        print(
            f"  "
            f"val_loss: {running_loss / len(val_dl.dataset):0.3f} "
            f"val_acc:  {correct / len(val_dl.dataset):0.3f}"
        )

In [87]:
ResNet50.to(device)

ResNetUniversial(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (stage1): Sequential(
    (0): ResidualBlock(
      (residual_function): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): ReLU(inplace=True)
        (6): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (short_cut): Sequential(
        (0): Conv2d(64, 256, kernel_size

In [0]:
fit(ResNet50, loss, optimizer, 50, trainloader, testloader)

Epoch 1/50 ...




  loss: 2.360 acc:  0.101
  val_loss: 2.361 val_acc:  0.100
Epoch 2/50 ...
