# **Convolutional ResNet and Residual Blocks**

## **Libraries**

In [1]:
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
import torch.nn.functional as F
import numpy as np
import torch
import time

In [2]:
if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

## **Settings**

In [3]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [4]:
random_seed = 123
learning_rate = 0.01
num_epochs = 10
batch_size = 128

In [5]:
num_classes = 10

## **MNIST dataset**

In [6]:
train_dataset = datasets.MNIST(root='data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='data', train=False, transform=transforms.ToTensor())

In [7]:
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [8]:
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Image batch dimensions: torch.Size([128, 1, 28, 28])
Image label dimensions: torch.Size([128])


## **ResNet with identity blocks**

In [9]:
class ConvNet(torch.nn.Module):
    def __init__(self, num_classes):
        super(ConvNet, self).__init__()
        # 1st residual block

        # 28x28x1 => 28x28x4
        self.conv_1 = torch.nn.Conv2d(in_channels=1,
                                      out_channels=4,
                                      kernel_size=(1, 1),
                                      stride=(1, 1),
                                      padding=0)
        self.conv_1_bn = torch.nn.BatchNorm2d(4)
                                    
        # 28x28x4 => 28x28x1
        self.conv_2 = torch.nn.Conv2d(in_channels=4,
                                      out_channels=1,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=1)   
        self.conv_2_bn = torch.nn.BatchNorm2d(1)
        
        # 2nd residual block

        # 28x28x1 => 28x28x4
        self.conv_3 = torch.nn.Conv2d(in_channels=1,
                                      out_channels=4,
                                      kernel_size=(1, 1),
                                      stride=(1, 1),
                                      padding=0)
        self.conv_3_bn = torch.nn.BatchNorm2d(4)
                                    
        # 28x28x4 => 28x28x1
        self.conv_4 = torch.nn.Conv2d(in_channels=4,
                                      out_channels=1,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=1)   
        self.conv_4_bn = torch.nn.BatchNorm2d(1)

        # Fully connected
        
        self.linear_1 = torch.nn.Linear(28*28*1, num_classes)

        
    def forward(self, x):
        
        # 1st residual block

        shortcut = x
        
        out = self.conv_1(x)
        out = self.conv_1_bn(out)
        out = F.relu(out)

        out = self.conv_2(out)
        out = self.conv_2_bn(out)
        
        out += shortcut
        out = F.relu(out)
        
        # 2nd residual block
        
        shortcut = out
        
        out = self.conv_3(out)
        out = self.conv_3_bn(out)
        out = F.relu(out)

        out = self.conv_4(out)
        out = self.conv_4_bn(out)
        
        out += shortcut
        out = F.relu(out)
        
        # Fully connected

        logits = self.linear_1(out.view(-1, 28*28*1))
        probas = F.softmax(logits, dim=1)
        return logits, probas

In [10]:
torch.manual_seed(random_seed)
model = ConvNet(num_classes=num_classes)
model = model.to(device)

In [11]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

## **Training phase**

In [12]:
def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):            
        features = features.to(device)
        targets = targets.to(device)
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100

In [13]:
start_time = time.time()
for epoch in range(num_epochs):
    model = model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(device)
        targets = targets.to(device)
        
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        optimizer.step()
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   %(epoch+1, num_epochs, batch_idx, 
                     len(train_loader), cost))

    model = model.eval() 
    with torch.set_grad_enabled(False): 
        print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
              epoch+1, num_epochs, 
              compute_accuracy(model, train_loader)))

    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

Epoch: 001/010 | Batch 000/469 | Cost: 2.6800
Epoch: 001/010 | Batch 050/469 | Cost: 0.2571
Epoch: 001/010 | Batch 100/469 | Cost: 0.3645
Epoch: 001/010 | Batch 150/469 | Cost: 0.2635
Epoch: 001/010 | Batch 200/469 | Cost: 0.4046
Epoch: 001/010 | Batch 250/469 | Cost: 0.3150
Epoch: 001/010 | Batch 300/469 | Cost: 0.3945
Epoch: 001/010 | Batch 350/469 | Cost: 0.2483
Epoch: 001/010 | Batch 400/469 | Cost: 0.2535
Epoch: 001/010 | Batch 450/469 | Cost: 0.3532
Epoch: 001/010 training accuracy: 90.93%
Time elapsed: 2.06 min
Epoch: 002/010 | Batch 000/469 | Cost: 0.3232
Epoch: 002/010 | Batch 050/469 | Cost: 0.2191
Epoch: 002/010 | Batch 100/469 | Cost: 0.3152
Epoch: 002/010 | Batch 150/469 | Cost: 0.2102
Epoch: 002/010 | Batch 200/469 | Cost: 0.3202
Epoch: 002/010 | Batch 250/469 | Cost: 0.2112
Epoch: 002/010 | Batch 300/469 | Cost: 0.2701
Epoch: 002/010 | Batch 350/469 | Cost: 0.4034
Epoch: 002/010 | Batch 400/469 | Cost: 0.2849
Epoch: 002/010 | Batch 450/469 | Cost: 0.2930
Epoch: 002/010 t

In [14]:
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))

Total Training Time: 1454.57 min


## **Evaluation**

In [15]:
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))

Test accuracy: 92.11%


## **ResNet with convolutional blocks for resizing**

In [16]:
class ConvNet(torch.nn.Module):

    def __init__(self, num_classes):
        super(ConvNet, self).__init__()
        
        # 1st residual block

        # 28x28x1 => 14x14x4 
        self.conv_1 = torch.nn.Conv2d(in_channels=1,
                                      out_channels=4,
                                      kernel_size=(3, 3),
                                      stride=(2, 2),
                                      padding=1)
        self.conv_1_bn = torch.nn.BatchNorm2d(4)
                                    
        # 14x14x4 => 14x14x8
        self.conv_2 = torch.nn.Conv2d(in_channels=4,
                                      out_channels=8,
                                      kernel_size=(1, 1),
                                      stride=(1, 1),
                                      padding=0)   
        self.conv_2_bn = torch.nn.BatchNorm2d(8)
        
        # 28x28x1 => 14x14x8
        self.conv_shortcut_1 = torch.nn.Conv2d(in_channels=1,
                                               out_channels=8,
                                               kernel_size=(1, 1),
                                               stride=(2, 2),
                                               padding=0)   
        self.conv_shortcut_1_bn = torch.nn.BatchNorm2d(8)
        
        # 2nd residual block

        # 14x14x8 => 7x7x16 
        self.conv_3 = torch.nn.Conv2d(in_channels=8,
                                      out_channels=16,
                                      kernel_size=(3, 3),
                                      stride=(2, 2),
                                      padding=1)
        self.conv_3_bn = torch.nn.BatchNorm2d(16)
                                    
        # 7x7x16 => 7x7x32
        self.conv_4 = torch.nn.Conv2d(in_channels=16,
                                      out_channels=32,
                                      kernel_size=(1, 1),
                                      stride=(1, 1),
                                      padding=0)   
        self.conv_4_bn = torch.nn.BatchNorm2d(32)
        
        # 14x14x8 => 7x7x32 
        self.conv_shortcut_2 = torch.nn.Conv2d(in_channels=8,
                                               out_channels=32,
                                               kernel_size=(1, 1),
                                               stride=(2, 2),
                                               padding=0)   
        self.conv_shortcut_2_bn = torch.nn.BatchNorm2d(32)

        # Fully connected    
        self.linear_1 = torch.nn.Linear(7*7*32, num_classes)

        
    def forward(self, x):

        # 1st residual block
    
        shortcut = x
        
        out = self.conv_1(x) # 28x28x1 => 14x14x4 
        out = self.conv_1_bn(out)
        out = F.relu(out)

        out = self.conv_2(out) # 14x14x4 => 714x14x8
        out = self.conv_2_bn(out)
        
        # match up dimensions using a linear function (no relu)
        shortcut = self.conv_shortcut_1(shortcut)
        shortcut = self.conv_shortcut_1_bn(shortcut)
        
        out += shortcut
        out = F.relu(out)
        
        # 2nd residual block
        
        shortcut = out
        
        out = self.conv_3(out) # 14x14x8 => 7x7x16 
        out = self.conv_3_bn(out)
        out = F.relu(out)

        out = self.conv_4(out) # 7x7x16 => 7x7x32
        out = self.conv_4_bn(out)
        
        # match up dimensions using a linear function (no relu)
        shortcut = self.conv_shortcut_2(shortcut)
        shortcut = self.conv_shortcut_2_bn(shortcut)
        
        out += shortcut
        out = F.relu(out)
        
        # Fully connected
        
        logits = self.linear_1(out.view(-1, 7*7*32))
        probas = F.softmax(logits, dim=1)
        return logits, probas

In [17]:
torch.manual_seed(random_seed)
model = ConvNet(num_classes=num_classes)
model = model.to(device)

In [18]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

## **Training phase**

In [19]:
def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):            
        features = features.to(device)
        targets = targets.to(device)
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100

In [20]:
for epoch in range(num_epochs):
    model = model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(device)
        targets = targets.to(device)
            
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        optimizer.step()
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   %(epoch+1, num_epochs, batch_idx, 
                     len(train_loader), cost))

    model = model.eval() # eval mode to prevent upd. batchnorm params during inference
    with torch.set_grad_enabled(False): # save memory during inference
        print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
              epoch+1, num_epochs, 
              compute_accuracy(model, train_loader)))

Epoch: 001/010 | Batch 000/469 | Cost: 2.3534
Epoch: 001/010 | Batch 050/469 | Cost: 0.2685
Epoch: 001/010 | Batch 100/469 | Cost: 0.2464
Epoch: 001/010 | Batch 150/469 | Cost: 0.0995
Epoch: 001/010 | Batch 200/469 | Cost: 0.0619
Epoch: 001/010 | Batch 250/469 | Cost: 0.1123
Epoch: 001/010 | Batch 300/469 | Cost: 0.2530
Epoch: 001/010 | Batch 350/469 | Cost: 0.1477
Epoch: 001/010 | Batch 400/469 | Cost: 0.0631
Epoch: 001/010 | Batch 450/469 | Cost: 0.1083
Epoch: 001/010 training accuracy: 97.53%
Epoch: 002/010 | Batch 000/469 | Cost: 0.1181
Epoch: 002/010 | Batch 050/469 | Cost: 0.0374
Epoch: 002/010 | Batch 100/469 | Cost: 0.1096
Epoch: 002/010 | Batch 150/469 | Cost: 0.1729
Epoch: 002/010 | Batch 200/469 | Cost: 0.1072
Epoch: 002/010 | Batch 250/469 | Cost: 0.0343
Epoch: 002/010 | Batch 300/469 | Cost: 0.0258
Epoch: 002/010 | Batch 350/469 | Cost: 0.0444
Epoch: 002/010 | Batch 400/469 | Cost: 0.0288
Epoch: 002/010 | Batch 450/469 | Cost: 0.1070
Epoch: 002/010 training accuracy: 98.42

## **Evaluation**

In [21]:
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))

Test accuracy: 98.33%


## **ResNet with convolutional blocks for resizing (using a helper class)**

In [22]:
class ResidualBlock(torch.nn.Module):

    def __init__(self, channels):
        
        super(ResidualBlock, self).__init__()
        self.conv_1 = torch.nn.Conv2d(in_channels=channels[0],
                                      out_channels=channels[1],
                                      kernel_size=(3, 3),
                                      stride=(2, 2),
                                      padding=1)
        self.conv_1_bn = torch.nn.BatchNorm2d(channels[1])
                                    
        self.conv_2 = torch.nn.Conv2d(in_channels=channels[1],
                                      out_channels=channels[2],
                                      kernel_size=(1, 1),
                                      stride=(1, 1),
                                      padding=0)   
        self.conv_2_bn = torch.nn.BatchNorm2d(channels[2])

        self.conv_shortcut_1 = torch.nn.Conv2d(in_channels=channels[0],
                                               out_channels=channels[2],
                                               kernel_size=(1, 1),
                                               stride=(2, 2),
                                               padding=0)   
        self.conv_shortcut_1_bn = torch.nn.BatchNorm2d(channels[2])

    def forward(self, x):
        shortcut = x
        
        out = self.conv_1(x)
        out = self.conv_1_bn(out)
        out = F.relu(out)

        out = self.conv_2(out)
        out = self.conv_2_bn(out)
        
        shortcut = self.conv_shortcut_1(shortcut)
        shortcut = self.conv_shortcut_1_bn(shortcut)
        
        out += shortcut
        out = F.relu(out)

        return out

In [23]:
class ConvNet(torch.nn.Module):

    def __init__(self, num_classes):
        super(ConvNet, self).__init__()
        
        self.residual_block_1 = ResidualBlock(channels=[1, 4, 8])
        self.residual_block_2 = ResidualBlock(channels=[8, 16, 32])
    
        self.linear_1 = torch.nn.Linear(7*7*32, num_classes)

        
    def forward(self, x):

        out = self.residual_block_1.forward(x)
        out = self.residual_block_2.forward(out)
         
        logits = self.linear_1(out.view(-1, 7*7*32))
        probas = F.softmax(logits, dim=1)
        return logits, probas

In [24]:
torch.manual_seed(random_seed)
model = ConvNet(num_classes=num_classes)

In [25]:
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

## **Training phase**

In [26]:
def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):            
        features = features.to(device)
        targets = targets.to(device)
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100

In [27]:
for epoch in range(num_epochs):
    model = model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(device)
        targets = targets.to(device)
            
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        optimizer.step()
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   %(epoch+1, num_epochs, batch_idx, 
                     len(train_dataset)//batch_size, cost))

    model = model.eval() # eval mode to prevent upd. batchnorm params during inference
    with torch.set_grad_enabled(False): # save memory during inference
        print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
              epoch+1, num_epochs, 
              compute_accuracy(model, train_loader)))

Epoch: 001/010 | Batch 000/468 | Cost: 2.3534


Epoch: 001/010 | Batch 050/468 | Cost: 0.2685
Epoch: 001/010 | Batch 100/468 | Cost: 0.2464
Epoch: 001/010 | Batch 150/468 | Cost: 0.0995
Epoch: 001/010 | Batch 200/468 | Cost: 0.0619
Epoch: 001/010 | Batch 250/468 | Cost: 0.1123
Epoch: 001/010 | Batch 300/468 | Cost: 0.2530
Epoch: 001/010 | Batch 350/468 | Cost: 0.1477
Epoch: 001/010 | Batch 400/468 | Cost: 0.0631
Epoch: 001/010 | Batch 450/468 | Cost: 0.1083
Epoch: 001/010 training accuracy: 97.53%
Epoch: 002/010 | Batch 000/468 | Cost: 0.1181
Epoch: 002/010 | Batch 050/468 | Cost: 0.0374
Epoch: 002/010 | Batch 100/468 | Cost: 0.1096
Epoch: 002/010 | Batch 150/468 | Cost: 0.1729
Epoch: 002/010 | Batch 200/468 | Cost: 0.1072
Epoch: 002/010 | Batch 250/468 | Cost: 0.0343
Epoch: 002/010 | Batch 300/468 | Cost: 0.0258
Epoch: 002/010 | Batch 350/468 | Cost: 0.0444
Epoch: 002/010 | Batch 400/468 | Cost: 0.0288
Epoch: 002/010 | Batch 450/468 | Cost: 0.1070
Epoch: 002/010 training accuracy: 98.42%
Epoch: 003/010 | Batch 000/468 | Cost: 0.035

## **Evaluation**

In [28]:
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))

Test accuracy: 98.33%
