In [22]:
import torch
import torch.nn as nn
import torchvision.datasets as dset
import torchvision.transforms as T
import torchvision.models as models
from torchsummary import summary

import sophius.templates as tmpl
import sophius.utils as utils
import sophius.dataload as dload
from sophius.train import train_express_gpu, validate_model

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
cifar10 = dset.CIFAR10('data//CIFAR10', train=True, download=True,
                           transform=T.ToTensor())
cifar_gpu = dload.cifar_to_gpu(cifar10)

Files already downloaded and verified


In [3]:
NUM_VAL = 1024
loader_gpu = dload.get_loader_gpu(cifar_gpu, NUM_VAL, 256)

In [46]:
class _ResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        out = self._shortcut(x) + self._block(x)
        out = self.relu(out)
        return out

class ResNetBlock(_ResNetBlock):
    def __init__(self, in_channels, out_channels):
        super().__init__(in_channels, out_channels)
        conv1 = nn.Conv2d(in_channels=in_channels,
                          out_channels=out_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False)
        conv2 = nn.Conv2d(in_channels=out_channels,
                          out_channels=out_channels,
                          kernel_size=3, 
                          stride=1,
                          padding=1,
                          bias=False)
        bn1 = nn.BatchNorm2d(num_features=out_channels)
        bn2 = nn.BatchNorm2d(num_features=out_channels)
        relu = nn.ReLU(inplace=True)
        self._block = nn.Sequential(conv1, bn1, relu,
                                    conv2, bn2)
        self._shortcut = nn.Identity()
        # for m in self.modules():
        #     if isinstance(m, nn.Conv2d):
        #         nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        #     elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
        #         nn.init.constant_(m.weight, 1)
        #         nn.init.constant_(m.bias, 0)


class ResNetSkipBlock(_ResNetBlock):
    def __init__(self, in_channels, out_channels):
        super().__init__(in_channels, out_channels)
        conv1 = nn.Conv2d(in_channels=in_channels,
                          out_channels=out_channels,
                          kernel_size=3,
                          stride=2,
                          padding=1,
                          bias=False)
        conv2 = nn.Conv2d(in_channels=out_channels,
                          out_channels=out_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False)
        bn1 = nn.BatchNorm2d(num_features=out_channels)
        bn2 = nn.BatchNorm2d(num_features=out_channels)
        relu = nn.ReLU(inplace=True)
        self._block = nn.Sequential(conv1, bn1, relu,
                                    conv2, bn2)
        conv_shortcut = nn.Conv2d(in_channels=in_channels,
                                  out_channels=out_channels,
                                  kernel_size=1, 
                                  stride=2,
                                  padding=0,
                                  bias=False)
        bn3 = nn.BatchNorm2d(num_features=out_channels)
        self._shortcut = nn.Sequential(conv_shortcut, bn3)
        # for m in self.modules():
        #     if isinstance(m, nn.Conv2d):
        #         nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        #     elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
        #         nn.init.constant_(m.weight, 1)
        #         nn.init.constant_(m.bias, 0)

In [5]:
x = torch.randn(32, 3, 32, 32).cuda()
block = ResNetBlock(3, 3).cuda()
out = block(x)

print(out.shape)
print(summary(block, (3, 32, 32)))

torch.Size([32, 3, 32, 32])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
          Identity-1            [-1, 3, 32, 32]               0
            Conv2d-2            [-1, 3, 32, 32]              81
       BatchNorm2d-3            [-1, 3, 32, 32]               6
              ReLU-4            [-1, 3, 32, 32]               0
            Conv2d-5            [-1, 3, 32, 32]              81
       BatchNorm2d-6            [-1, 3, 32, 32]               6
              ReLU-7            [-1, 3, 32, 32]               0
Total params: 174
Trainable params: 174
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.16
Params size (MB): 0.00
Estimated Total Size (MB): 0.18
----------------------------------------------------------------
None


In [6]:
x = torch.randn(32, 3, 32, 32).cuda()
block = ResNetSkipBlock(3, 64).cuda()
out = block(x)
print(out.shape)
print(summary(block, (3, 32, 32)))

torch.Size([32, 64, 16, 16])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 16, 16]             192
       BatchNorm2d-2           [-1, 64, 16, 16]             128
            Conv2d-3           [-1, 64, 16, 16]           1,728
       BatchNorm2d-4           [-1, 64, 16, 16]             128
              ReLU-5           [-1, 64, 16, 16]               0
            Conv2d-6           [-1, 64, 16, 16]          36,864
       BatchNorm2d-7           [-1, 64, 16, 16]             128
              ReLU-8           [-1, 64, 16, 16]               0
Total params: 39,168
Trainable params: 39,168
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 1.00
Params size (MB): 0.15
Estimated Total Size (MB): 1.16
----------------------------------------------------------------
None


In [7]:
print(block)

ResNetSkipBlock(
  (relu): ReLU(inplace=True)
  (_block): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (_shortcut): Sequential(
    (0): Conv2d(3, 64, kernel_size=(1, 1), stride=(2, 2), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)


In [47]:
conv = nn.Conv2d(in_channels=3,
                 out_channels=64,
                 kernel_size=3,
                 stride=2,
                 padding=1,
                 bias=False)
maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
relu = nn.ReLU(inplace=False)
gap = nn.AdaptiveAvgPool2d((1, 1))
flat = tmpl.Flatten()
fc = nn.Linear(512, 10)
model = nn.Sequential(conv, nn.BatchNorm2d(64), relu,
                      ResNetBlock(64, 64), ResNetBlock(64, 64),
                      ResNetSkipBlock(64, 128), ResNetBlock(128, 128),
                      ResNetSkipBlock(128, 256), ResNetBlock(256, 256),
                      ResNetSkipBlock(256, 512), ResNetBlock(512, 512),
                      gap, flat, fc)
x = torch.randn(32, 3, 32, 32)
out = model(x)
print(out.shape)

torch.Size([32, 10])


In [9]:
x = torch.randn(32, 3, 32, 32)
out = conv(x)
print(out.shape)

torch.Size([32, 64, 16, 16])


In [48]:
# default weight init
model_gpu = model.cuda()
validate_model(model=model_gpu,
                train=True,
                loader=loader_gpu,
                milestones=[],
                num_iter=10,
                num_epoch=1,
                verbose=True)

Finished in 23.7s 
val_acc: 0.609, train_acc: 0.641
Finished in 24.3s 
val_acc: 0.583, train_acc: 0.614
Finished in 24.2s 
val_acc: 0.602, train_acc: 0.646
Finished in 23.1s 
val_acc: 0.628, train_acc: 0.687
Finished in 24.1s 
val_acc: 0.601, train_acc: 0.632
Finished in 23.8s 
val_acc: 0.631, train_acc: 0.672
Finished in 25.0s 
val_acc: 0.600, train_acc: 0.621
Finished in 24.8s 
val_acc: 0.596, train_acc: 0.638
Finished in 24.8s 
val_acc: 0.609, train_acc: 0.656
Finished in 24.8s 
val_acc: 0.607, train_acc: 0.625
10 iters: 24.3s 
val: 0.607 +- 0.014 train: 0.643 +- 0.022


(24.250173377990723, 0.60654296875, 0.6431640625)

In [45]:
# custom weight init
model_gpu = model.cuda()
validate_model(model=model_gpu,
               train=True,
               loader=loader_gpu,
               milestones=[],
               num_iter=10,
               num_epoch=1,
               verbose=True)

Finished in 22.7s 
val_acc: 0.604, train_acc: 0.647
Finished in 24.6s 
val_acc: 0.629, train_acc: 0.637
Finished in 23.6s 
val_acc: 0.607, train_acc: 0.654
Finished in 22.6s 
val_acc: 0.587, train_acc: 0.641
Finished in 22.5s 
val_acc: 0.603, train_acc: 0.625
Finished in 22.8s 
val_acc: 0.518, train_acc: 0.564
Finished in 22.8s 
val_acc: 0.600, train_acc: 0.606
Finished in 22.9s 
val_acc: 0.516, train_acc: 0.561
Finished in 23.1s 
val_acc: 0.637, train_acc: 0.675
Finished in 24.5s 
val_acc: 0.555, train_acc: 0.579
10 iters: 23.2s 
val: 0.585 +- 0.040 train: 0.619 +- 0.038


(23.197196984291075, 0.58544921875, 0.6189453125)

In [11]:
resnet18 = models.resnet18().cuda()

In [312]:
t, val_acc, train_acc = train_express_gpu(model = resnet18,
                                          train = True,
                                          loader = loader_gpu,
                                          milestones = [],
                                          num_epoch = 1,
                                          verbose = True)

Finished in 18.9s 
val_acc: 0.516, train_acc: 0.528


In [13]:
summary(model.cuda(), (3, 32, 32))
# 11.181.888‬
# 11.176.512 resnet 18
# diff 5376

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 16, 16]           1,728
       BatchNorm2d-2           [-1, 64, 16, 16]             128
              ReLU-3           [-1, 64, 16, 16]               0
          Identity-4           [-1, 64, 16, 16]               0
            Conv2d-5           [-1, 64, 16, 16]          36,864
       BatchNorm2d-6           [-1, 64, 16, 16]             128
              ReLU-7           [-1, 64, 16, 16]               0
            Conv2d-8           [-1, 64, 16, 16]          36,864
       BatchNorm2d-9           [-1, 64, 16, 16]             128
             ReLU-10           [-1, 64, 16, 16]               0
      ResNetBlock-11           [-1, 64, 16, 16]               0
         Identity-12           [-1, 64, 16, 16]               0
           Conv2d-13           [-1, 64, 16, 16]          36,864
      BatchNorm2d-14           [-1, 64,

In [14]:
summary(resnet18, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 16, 16]           9,408
       BatchNorm2d-2           [-1, 64, 16, 16]             128
              ReLU-3           [-1, 64, 16, 16]               0
         MaxPool2d-4             [-1, 64, 8, 8]               0
            Conv2d-5             [-1, 64, 8, 8]          36,864
       BatchNorm2d-6             [-1, 64, 8, 8]             128
              ReLU-7             [-1, 64, 8, 8]               0
            Conv2d-8             [-1, 64, 8, 8]          36,864
       BatchNorm2d-9             [-1, 64, 8, 8]             128
             ReLU-10             [-1, 64, 8, 8]               0
       BasicBlock-11             [-1, 64, 8, 8]               0
           Conv2d-12             [-1, 64, 8, 8]          36,864
      BatchNorm2d-13             [-1, 64, 8, 8]             128
             ReLU-14             [-1, 6

In [17]:
print(resnet18)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [16]:
print(model)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): ResNetBlock(
    (relu): ReLU(inplace=True)
    (_block): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (_shortcut): Identity()
  )
  (4): ResNetBlock(
    (relu): ReLU(inplace=True)
    (_block): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): 