In [1]:
import torch
import torch.nn as nn
from torchsummary import summary

In [2]:
# resnet block, this block will be used repeatedly
class block(nn.Module):
  def __init__(self, in_channels, out_channels, identity_downsample=None, stride=1): # initializing
    super(block, self).__init__()
    self.expansion = 4 #no. of channels after block finishes is 4 times the intital channels
    self.conv1 = nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=1,padding=0) # first conv layer 1 x 1 it reduces in_channels to out_channels
    self.bn1 = nn.BatchNorm2d(out_channels)
    self.conv2 = nn.Conv2d(out_channels,out_channels,kernel_size=3,stride=stride,padding=1) # second conv layer 3 x 3
    self.bn2 = nn.BatchNorm2d(out_channels)
    self.conv3 = nn.Conv2d(out_channels,out_channels*self.expansion,kernel_size=1,stride=1,padding=0) # third conv layer 1 x 1 it increases number of channels to 4 times the output_channels
    self.bn3 = nn.BatchNorm2d(out_channels*self.expansion)
    self.relu = nn.ReLU()
    self.identity_downsample = identity_downsample

  def forward(self, x):
    identity = x # storing the value of x

    # applying block operation on the x
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)
    x = self.conv3(x)
    x = self.bn3(x)

    # if the output and input dimension doesnt match than we run this to make them equal
    if self.identity_downsample != None:
      identity = self.identity_downsample(identity)

    # adding input to output
    x += identity
    x = self.relu(x)

    return x

In [3]:
class resNet(nn.Module):
  def __init__(self, block, layers, image_channels, num_classes): #initializing, it is inheriting block, layers is the number of time we are going to call block,
    super(resNet, self).__init__()
    self.in_channels = 64

    # this is before we start applying resnet
    self.conv1 = nn.Conv2d(image_channels, 64, kernel_size = 7, stride = 2, padding = 3) # conv layer 7 x 7, output_channel = 64
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU()
    self.maxpool = nn.MaxPool2d(kernel_size=3,stride=2,padding=1) # 3 x 3 maxpool layer

    # resnet layers
    self.layer1 = self.make_layer(block, layers[0], out_channels = 64, stride = 1)
    self.layer2 = self.make_layer(block, layers[1], out_channels = 128, stride = 2)
    self.layer3 = self.make_layer(block, layers[2], out_channels = 256, stride = 2)
    self.layer4 = self.make_layer(block, layers[3], out_channels = 512, stride = 2)

    self.avgpool = nn.AdaptiveAvgPool2d((1,1))
    self.fc = nn.Linear(512*4, num_classes)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = self.avgpool(x)
    x = x.reshape(x.shape[0], -1)
    x = self.fc(x)

    return x

  def make_layer(self, block, num_residual_blocks, out_channels, stride): # fucntion to make resnet layers, it takes number of residual blocks, output channels, stride as input
    identity_downsample = None
    layers = []

    # all of the blocks expect the first one uses stride = 2
    if stride != 1 or self.in_channels != out_channels*4:
      identity_downsample = nn.Sequential(nn.Conv2d(self.in_channels, out_channels*4, kernel_size = 1, stride = stride),
                                          nn.BatchNorm2d(out_channels*4))

    layers.append(block(self.in_channels, out_channels, identity_downsample, stride))
    self.in_channels = out_channels*4

    for i in range(num_residual_blocks - 1):
      layers.append(block(self.in_channels, out_channels))

    return nn.Sequential(*layers)

In [4]:
def resNet50(img_channels = 3, num_classes = 1000):
  return resNet(block, [3,4,6,3], img_channels, num_classes)

def resNet101(img_channels = 3, num_classes = 1000):
  return resNet(block, [3,4,23,3], img_channels, num_classes)

def resNet152(img_channels = 3, num_classes = 1000):
  return resNet(block, [3,8,36,3], img_channels, num_classes)

In [14]:
x = torch.randn(4,3,224,224)
model = resNet152()
output = model(x).to('cuda')
print(output.shape)

torch.Size([4, 1000])


In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = resNet152().to(device)
x = torch.randn(4, 3, 224, 224).to(device)
output = model(x)
print(output.shape)
summary(model, (3, 224, 224))

torch.Size([4, 1000])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,472
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,160
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,928
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,640
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,640
      BatchNorm2d