<a href="https://colab.research.google.com/github/gurbaaz27/dl-models/blob/master/ResNet/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Importing Libraries**

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.datasets as ds
import torch.optim as optim

**Device Configuration**

In [0]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

**Hyperparameters**

In [0]:
batch_size = 128
num_workers = 2
num_epochs = 60
learning_rate = 0.1
weight_decay = 0.0001
momentum = 0.9

**Downloading CIFAR-10 Dataset**

In [0]:
transform_train = transforms.Compose([
                                      transforms.Pad(4),
                                      transforms.RandomCrop(32),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
                              ])
transform_test = transforms.Compose([
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5)) 
                              ]) 
train_dataset = ds.CIFAR10(root='./data',
                           train=True,
                           transform=transform_train,
                           download=True)
test_dataset = ds.CIFAR10(root='./data',
                          train=False,
                          transform=transform_test,
                          download=True)

Files already downloaded and verified
Files already downloaded and verified


**Loading CIFAR-10 Dataset**

In [0]:
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                           batch_size=batch_size,
                                           shuffle=False,
                                           num_workers=num_workers)

**Building the Network Model**

In [0]:
## Basic Block : relatively shallower
class BasicBlock(nn.Module):

  expansion = 1

  def __init__(self, in_channels, out_channels, stride=1):
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(out_channels)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(out_channels)
    self.shortcut = nn.Sequential()
    if stride!=1 or in_channels!=self.expansion*out_channels: 
    #if stride!=1 or in_channels!=out_channels:         where self.expansion = 1 
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_channels, out_channels*self.expansion, 1, stride=stride, bias=False),
          nn.BatchNorm2d(out_channels*self.expansion)
      )

  def forward(self, x):
    y = x
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.conv2(x)
    x = self.bn2(x)
    x += self.shortcut(y)
    x = self.relu(x)
    return x


## Bottleneck architecture : for deep layers network
class BottleNeck(nn.Module):
  expansion = 4

  def __init__(self, in_channels, out_channels, stride=1):
    super(BottleNeck, self).__init__()
    self.conv1 = nn.Conv2d(in_channels, out_channels, 1, bias=False)
    self.bn1 = nn.BatchNorm2d(out_channels)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride = stride, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(out_channels)
    self.conv3 = nn.Conv2d(out_channels, self.expansion*out_channels, 1 , bias=False)
    self.bn3 = nn.BatchNorm2d(out_channels*self.expansion)
    self.shortcut = nn.Sequential()
    if stride!=1 or in_channels!=self.expansion*out_channels:
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_channels, out_channels*self.expansion, 1, stride=stride, bias=False),
          nn.BatchNorm2d(out_channels*self.expansion)
      )

  def forward(self, x):
    y = x
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)
    x = self.conv3(x)
    x = self.bn3(x)
    x += self.shortcut(y)
    x = self.relu(x)
    return x

## ResNet
class ResNet(nn.Module):

  def __init__(self, block, layers, num_classes=10):
    super(ResNet, self).__init__()
    self.in_channels = 64
    self.conv = nn.Conv2d(3, self.in_channels, 3, stride=1, padding=1, bias =False)
    self.bn = nn.BatchNorm2d(self.in_channels)
    self.relu = nn.ReLU(inplace=True)
    self.layer1 = self.make_layer(block, 64, layers[0], stride=1)
    self.layer2 = self.make_layer(block, 128, layers[1], stride=2)
    self.layer3 = self.make_layer(block, 256, layers[2], stride=2)
    self.layer4 = self.make_layer(block, 512, layers[3], stride=2)
    self.linear = nn.Linear(512*block.expansion , num_classes)

  def make_layer(self, block, channels, blocks, stride):
    strides = [stride] + [1]*(blocks-1)
    layers = []
    for stride in strides:
      layers.append(block(self.in_channels, channels, stride))
      self.in_channels = channels*block.expansion
    return nn.Sequential(*layers)
  
  def forward(self, x):
    x = self.conv(x)
    x = self.bn(x)
    x = self.relu(x)
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    x = F.avg_pool2d(x, 4)
    x = x.view(x.size(0), -1)
    x = self.linear(x)
    return x

**Defining different Layered Models**

In [0]:
def ResNet18():
  return ResNet(BasicBlock, [2,2,2,2])

def ResNet34():
  return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
  return ResNet(BottleNeck, [3,4,6,3])

**Instantiating Models of each Architecture**


In [0]:
net = ResNet34().to(device)
net_bottleNeck = ResNet50().to(device)

**Loss and Optimizer**

In [0]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),lr=learning_rate, momentum=momentum,weight_decay=weight_decay)

**Update Learning Rate**

In [0]:
def update_learning_rate(optimizer, learning_rate):
  for param_group in optimizer.param_groups:
    param_group['lr'] = learning_rate

**Models Summary**

In [0]:
#from torchsummary import summary

#summary(net.cuda(), (3,224,224))
print(net)
print('-------------------------------------------------------------')
print(net_bottleNeck)

ResNet(
  (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 

**Training the Models**

In [0]:
curr_lr = learning_rate
for epoch in range(num_epochs):
  for i, (images,labels) in enumerate(train_loader):
    
    images = images.to(device)
    labels = labels.to(device)
    
    outputs = net(images)                       # F pass
    loss = criterion(outputs, labels)

    optimizer.zero_grad()                       # B pass
    loss.backward()
    optimizer.step()

    if (i+1) % 100 == 0:                              
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                 %(epoch+1, num_epochs, i+1, len(train_loader) , loss.item()))
            
  if epoch == 30:
    curr_lr *= 0.1
    update_learning_rate(optimizer, curr_lr)

  if epoch == 45:
    curr_lr *= 0.1
    update_learning_rate(optimizer, curr_lr)

Epoch [1/60], Step [100/391], Loss: 2.0951
Epoch [1/60], Step [200/391], Loss: 2.0771
Epoch [1/60], Step [300/391], Loss: 2.0440
Epoch [2/60], Step [100/391], Loss: 1.7858
Epoch [2/60], Step [200/391], Loss: 1.6498
Epoch [2/60], Step [300/391], Loss: 1.7473
Epoch [3/60], Step [100/391], Loss: 1.5086
Epoch [3/60], Step [200/391], Loss: 1.4558
Epoch [3/60], Step [300/391], Loss: 1.5228
Epoch [4/60], Step [100/391], Loss: 1.2583
Epoch [4/60], Step [200/391], Loss: 1.3912
Epoch [4/60], Step [300/391], Loss: 1.1428
Epoch [5/60], Step [100/391], Loss: 0.9972
Epoch [5/60], Step [200/391], Loss: 1.1295
Epoch [5/60], Step [300/391], Loss: 0.9939
Epoch [6/60], Step [100/391], Loss: 0.8048
Epoch [6/60], Step [200/391], Loss: 0.8433
Epoch [6/60], Step [300/391], Loss: 0.9932
Epoch [7/60], Step [100/391], Loss: 1.0147
Epoch [7/60], Step [200/391], Loss: 0.8446
Epoch [7/60], Step [300/391], Loss: 0.6808
Epoch [8/60], Step [100/391], Loss: 0.6639
Epoch [8/60], Step [200/391], Loss: 0.6654
Epoch [8/60

In [0]:
learning_rate = 0.1
optimizer = optim.SGD(net.parameters(),lr=learning_rate, momentum=momentum,weight_decay=weight_decay)
curr_lr = learning_rate
for epoch in range(num_epochs):
  for i, (images,labels) in enumerate(train_loader):
    
    images = images.to(device)
    labels = labels.to(device)
    
    outputs = net_bottleNeck(images)                       # F pass
    loss = criterion(outputs, labels)

    optimizer.zero_grad()                       # B pass
    loss.backward()
    optimizer.step()

    if (i+1) % 100 == 0 or i == 390:                              
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                 %(epoch+1, num_epochs, i+1, len(train_loader) , loss.item()))
            
  if epoch == 30:
    curr_lr *= 0.1
    update_learning_rate(optimizer, curr_lr)

  if epoch == 45:
    curr_lr *= 0.1
    update_learning_rate(optimizer, curr_lr)

Epoch [1/60], Step [100/391], Loss: 2.4819
Epoch [1/60], Step [200/391], Loss: 2.4656
Epoch [1/60], Step [300/391], Loss: 2.5336
Epoch [1/60], Step [391/391], Loss: 2.6374
Epoch [2/60], Step [100/391], Loss: 2.5175
Epoch [2/60], Step [200/391], Loss: 2.6400
Epoch [2/60], Step [300/391], Loss: 2.5399
Epoch [2/60], Step [391/391], Loss: 2.6114
Epoch [3/60], Step [100/391], Loss: 2.4925
Epoch [3/60], Step [200/391], Loss: 2.4863
Epoch [3/60], Step [300/391], Loss: 2.5173
Epoch [3/60], Step [391/391], Loss: 2.4644
Epoch [4/60], Step [100/391], Loss: 2.5017
Epoch [4/60], Step [200/391], Loss: 2.5572
Epoch [4/60], Step [300/391], Loss: 2.4453
Epoch [4/60], Step [391/391], Loss: 2.6072
Epoch [5/60], Step [100/391], Loss: 2.4757
Epoch [5/60], Step [200/391], Loss: 2.5818
Epoch [5/60], Step [300/391], Loss: 2.5523
Epoch [5/60], Step [391/391], Loss: 2.4805
Epoch [6/60], Step [100/391], Loss: 2.5624
Epoch [6/60], Step [200/391], Loss: 2.5193
Epoch [6/60], Step [300/391], Loss: 2.4714
Epoch [6/60

**Evaluating the Model**

In [0]:
net.eval()
with torch.no_grad():
  correct = 0
  total = 0
  for images,labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    outputs = net(images)
    _,predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (labels==predicted).sum().item()
  print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))

Accuracy of the model on the test images: 93.23 %


In [0]:
net_bottleNeck.eval()
with torch.no_grad():
  correct = 0
  total = 0
  for images,labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    outputs = net_bottleNeck(images)
    _,predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (labels==predicted).sum().item()
  print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))

Accuracy of the model on the test images: 10.0 %


In [0]:
print(torch.cuda.device_count())

1
