<a href="https://colab.research.google.com/github/ishandahal/stats453-deep_learning_torch/blob/main/Conv/Convolutional_Resnet_Residual_block.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Convolutional ResNet and Residual Blocks**

Not the complete architecture but residual block as described in [1].
- [1] He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016.

### Imports

In [2]:
import time 
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms

if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

## Settings and Dataset

In [3]:
## settings 

## device 
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

## Hyperparameters
random_seed = 123
learnning_rate = 0.0001
num_epochs = 10
batch_size = 128

## Architecture
num_classes = 10

# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root='data', 
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='data', 
                              train=False, 
                              transform=transforms.ToTensor())


train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=batch_size, 
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=batch_size, 
                         shuffle=False)

# Checking the dataset
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)



Image batch dimensions: torch.Size([128, 1, 28, 28])
Image label dimensions: torch.Size([128])


## ResNet with Identity blocks
Following code implements resnet blocks with skip connections such that the input passed via the shortcut matches the dimensions of the main output, which allows the network to learn identity function

In [11]:
### Model 

class ConvNet(torch.nn.Module):

    def __init__(self, num_classes):
        super(ConvNet, self).__init__()

        #### 1st residual block

        # 28x28x1 => 28x28x4
        self.conv_1 = torch.nn.Conv2d(in_channels=1,
                                      out_channels=4,
                                      kernel_size=(1, 1),
                                      stride=(1, 1),
                                      padding=0)
        self.conv_1_bn = torch.nn.BatchNorm2d(4)

        # 28x28x4 => 28x28x1
        self.conv_2 = torch.nn.Conv2d(in_channels=4,
                                      out_channels=1,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=1)
        self.conv_2_bn = torch.nn.BatchNorm2d(1)

        #### 2nd residual block

        # 28x28x1 => 28x28x4
        self.conv_3 = torch.nn.Conv2d(in_channels=1,
                                      out_channels=4,
                                      kernel_size=(1, 1),
                                      stride=(1, 1),
                                      padding=0)
        self.conv_3_bn = torch.nn.BatchNorm2d(4)

        # 28x28x4 => 28x28x1
        self.conv_4 = torch.nn.Conv2d(in_channels=4,
                                      out_channels=1,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=1)
        self.conv_4_bn = torch.nn.BatchNorm2d(1)

        #### fully connected

        self.linear_1 = torch.nn.Linear(28*28*1, num_classes)

    def forward(self, x):

        ## first residual block
        shortcut = x

        out = self.conv_1(x)
        out = self.conv_1_bn(out)
        out = F.relu(out)

        out = self.conv_2(out)
        out = self.conv_2_bn(out)

        out += shortcut
        out = F.relu(out)

        ## second residual block

        shortcut = out

        out = self.conv_3(out)
        out = self.conv_3_bn(out)
        out = F.relu(out)

        out = self.conv_4(out)
        out = self.conv_4_bn(out)

        out += shortcut
        out = F.relu(out)

        ## fully connected

        logits = self.linear_1(out.view(x.size(0), -1))
        probas = F.softmax(logits, dim=1)
        
        return logits, probas

torch.manual_seed(random_seed)
model = ConvNet(num_classes)
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learnning_rate)

## Training

In [12]:
def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):            
        features = features.to(device)
        targets = targets.to(device)
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100


start_time = time.time()
for epoch in range(num_epochs):
    model = model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(device)
        targets = targets.to(device)
        
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   %(epoch+1, num_epochs, batch_idx, 
                     len(train_loader), cost))

    model = model.eval() # eval mode to prevent upd. batchnorm params during inference
    with torch.set_grad_enabled(False): # save memory during inference
        print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
              epoch+1, num_epochs, 
              compute_accuracy(model, train_loader)))

    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
    
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))

Epoch: 001/010 | Batch 000/469 | Cost: 2.6938
Epoch: 001/010 | Batch 050/469 | Cost: 2.0262
Epoch: 001/010 | Batch 100/469 | Cost: 1.3531
Epoch: 001/010 | Batch 150/469 | Cost: 0.9772
Epoch: 001/010 | Batch 200/469 | Cost: 0.8662
Epoch: 001/010 | Batch 250/469 | Cost: 0.6839
Epoch: 001/010 | Batch 300/469 | Cost: 0.6898
Epoch: 001/010 | Batch 350/469 | Cost: 0.4935
Epoch: 001/010 | Batch 400/469 | Cost: 0.5841
Epoch: 001/010 | Batch 450/469 | Cost: 0.4322
Epoch: 001/010 training accuracy: 86.96%
Time elapsed: 0.79 min
Epoch: 002/010 | Batch 000/469 | Cost: 0.6186
Epoch: 002/010 | Batch 050/469 | Cost: 0.5035
Epoch: 002/010 | Batch 100/469 | Cost: 0.4114
Epoch: 002/010 | Batch 150/469 | Cost: 0.5748
Epoch: 002/010 | Batch 200/469 | Cost: 0.4316
Epoch: 002/010 | Batch 250/469 | Cost: 0.5295
Epoch: 002/010 | Batch 300/469 | Cost: 0.3943
Epoch: 002/010 | Batch 350/469 | Cost: 0.4045
Epoch: 002/010 | Batch 400/469 | Cost: 0.3329
Epoch: 002/010 | Batch 450/469 | Cost: 0.3336
Epoch: 002/010 t

## Evaluation 

In [13]:
print("Test accuracy: %.2f%% " % compute_accuracy(model, test_loader))

Test accuracy: 92.69% 


### ResNet with convolution blocks for resizing

Residual block implemented such that the input passed via the shortcut matches the dimensions of the main output. 

In [14]:
class ResNetBlock(torch.nn.Module):
    def __init__(self, channels):
        super(ResNetBlock, self).__init__()

        self.conv_1 = torch.nn.Conv2d(in_channels=channels[0],
                                      out_channels=channels[1],
                                      kernel_size=(3, 3),
                                      stride=(2, 2),
                                      padding=1)
        self.conv_1_bn = torch.nn.BatchNorm2d(channels[1])

        self.conv_2 = torch.nn.Conv2d(in_channels=channels[1],
                                      out_channels=channels[2],
                                      kernel_size=(1, 1),
                                      stride=(1, 1),
                                      padding=0)
        self.conv_2_bn = torch.nn.BatchNorm2d(channels[2])

        self.conv_shortcut_1 = torch.nn.Conv2d(in_channels=channels[0],
                                               out_channels=channels[2],
                                               kernel_size=(1, 1),
                                               stride=(2, 2),
                                               padding=0)
        self.conv_shortcut_1_bn = torch.nn.BatchNorm2d(channels[2])

    def forward(self, x):

        shortcut = x

        out = self.conv_1(x)
        out = self.conv_1_bn(out)
        out = F.relu(out)

        out = self.conv_2(out)
        out = self.conv_2_bn(out)

        ## linear computation so the dimensions match
        shortcut = self.conv_shortcut_1(shortcut)
        shortcut = self.conv_shortcut_1_bn(shortcut)

        out += shortcut
        out = F.relu(out)

        return out

In [15]:
### Model 

class ConvNet(torch.nn.Module):

    def __init__(self, num_classes):
        super(ConvNet, self).__init__()

        self.residual_block_1 = ResNetBlock(channels=[1, 4, 8])
        self.residual_block_2 = ResNetBlock(channels=[8, 16, 32])

        self.linear_1 = torch.nn.Linear(32*7*7, num_classes)
    
    def forward(self, x):

        out = self.residual_block_1(x)
        out = self.residual_block_2(out)

        logits = self.linear_1(torch.flatten(out, 1))
        probas = F.softmax(logits, 1)

        return logits, probas

torch.manual_seed(random_seed)

model = ConvNet(num_classes=num_classes)
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learnning_rate)

In [16]:
def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):            
        features = features.to(device)
        targets = targets.to(device)
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100


start_time = time.time()
for epoch in range(num_epochs):
    model = model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(device)
        targets = targets.to(device)
        
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   %(epoch+1, num_epochs, batch_idx, 
                     len(train_loader), cost))

    model = model.eval() # eval mode to prevent upd. batchnorm params during inference
    with torch.set_grad_enabled(False): # save memory during inference
        print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
              epoch+1, num_epochs, 
              compute_accuracy(model, train_loader)))

    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
    
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))

Epoch: 001/010 | Batch 000/469 | Cost: 2.4548
Epoch: 001/010 | Batch 050/469 | Cost: 1.6318
Epoch: 001/010 | Batch 100/469 | Cost: 1.1419
Epoch: 001/010 | Batch 150/469 | Cost: 0.8564
Epoch: 001/010 | Batch 200/469 | Cost: 0.7523
Epoch: 001/010 | Batch 250/469 | Cost: 0.5760
Epoch: 001/010 | Batch 300/469 | Cost: 0.4550
Epoch: 001/010 | Batch 350/469 | Cost: 0.3929
Epoch: 001/010 | Batch 400/469 | Cost: 0.3879
Epoch: 001/010 | Batch 450/469 | Cost: 0.3932
Epoch: 001/010 training accuracy: 90.54%
Time elapsed: 0.50 min
Epoch: 002/010 | Batch 000/469 | Cost: 0.4230
Epoch: 002/010 | Batch 050/469 | Cost: 0.4022
Epoch: 002/010 | Batch 100/469 | Cost: 0.2546
Epoch: 002/010 | Batch 150/469 | Cost: 0.3138
Epoch: 002/010 | Batch 200/469 | Cost: 0.3389
Epoch: 002/010 | Batch 250/469 | Cost: 0.2874
Epoch: 002/010 | Batch 300/469 | Cost: 0.2350
Epoch: 002/010 | Batch 350/469 | Cost: 0.3111
Epoch: 002/010 | Batch 400/469 | Cost: 0.2538
Epoch: 002/010 | Batch 450/469 | Cost: 0.2186
Epoch: 002/010 t

In [18]:
print(f"Test accuracy: {compute_accuracy(model, test_loader):.2f}%")

Test accuracy: 97.81%
