# Resnet34

## Import the required libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from collections import OrderedDict
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm

## Set device to CUDA

If our system supports GPU, we can accelerate the training and testing greatly by utilizing graphics cards.

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)

cuda


## Let's define the building block of the model


### Why the downsample condition block?

In the code below, we defined the downsample block first as not layers have it.

The resnet starts with an initialization block, followed by 4 layers, each of which consisting of sub layers. Only each of the beginning layers consists of the downsample function.

### Common layers

Then we have defined the common layers in the model. The sequence is as follows:
1. Conv layer of kernel size 3
2. Batch Normalization
3. ReLU activation function
4. Conv layer of kernel size 3
5. Batch Normalization
6. (optional) Downsampling

### Hyperparameters

There parameters aren't modified during training. I have set them as per the implementation used in resnet34 version imported from torchvision.

In [3]:
class Block(nn.Module):
    def __init__(self, in_c, out_c, stride=1, downsample=False):
        super().__init__()
        
        self.conv1 = nn.Conv2d(in_c, out_c, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_c, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_c, out_c, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_c, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        if downsample != False:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_c, out_c, kernel_size=3, stride=stride, padding=1, bias=False),
                nn.BatchNorm2d(out_c, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            )
        
        else:
            self.downsample = None

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample != None:
            i = self.downsample(x)
            out += i

        return out
        

## Let's define the resnet34

### First we define the initial layers

The initial layers consist of:
1. Conv layer 1 of kernel size 7
2. Batch Normalization
3. ReLU Activation function
4. Max Pooling layer

### Define a function _make_layers

We define a dictionary where we keep adding layers by calling the block class. We then return them as a sequence of layers.

### Stack all the layers

We then stack all the layers. Now our model is ready.

In [7]:
class resnet34(nn.Module):
    def __init__(self, out_class):
        super().__init__()

        self.out_class = out_class
        
        self.initial = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False),
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(in_features=512, out_features=self.out_class, bias=True)
        
        self.layer_count = [3, 4, 6, 3]
        self.channels = [64, 64, 128, 256, 512]

        self.layer_1 = self._make_layers(0)
        self.layer_2 = self._make_layers(1, True)
        self.layer_3 = self._make_layers(2, True)
        self.layer_4 = self._make_layers(3, True)
        
    def _make_layers(self, c, first=False):
        layers = OrderedDict()
        
        for i in range(self.layer_count[c]):
            if first==True:
                x = c
                downsample = True
                stride=2
            else:
                x = c+1
                downsample = False
                stride=1
            layers[f'sub_layer_{i}'] = (Block(self.channels[x], self.channels[c+1], stride=stride, downsample=downsample))
            first = False

        return nn.Sequential(layers)
        
    def forward(self, x):
        x = self.initial(x)
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.layer_4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)       

        return x

## Let's test our code on a dataset

### Let's load our data
We will work on the MNIST dataset by importing it from the torchvision library. We will divide the training dataset for training and validations purposes in the 7:3 ratio.
We also apply transforms to our data, by resizing the data to size 224x224. Then the most important part, converting images to a tensor to make it usable for training and testing purposes.

In [5]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
])

train_dataset = datasets.MNIST(
    root='./data',
    train=True,
    download=True,
    transform=transform,
)

test_dataset = datasets.MNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform,
)

train_original_size = len(train_dataset)
train_size = int(0.7 * train_original_size)
val_size = int(0.3 * train_original_size)

train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 12722680.98it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 337187.70it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 3189851.44it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 2332337.02it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






### Training and testing

Now we are ready to train and test our dataset

In [6]:
if __name__ == "__main__":
    model = resnet34(10).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.00005, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, threshold=0.0001, threshold_mode='abs')

    num_epochs = 20

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0
        correct_predictions = 0
        total_samples = 0

        with tqdm(total=len(train_loader), desc=f"Epoch: {epoch}/{num_epochs}", unit="batch") as pbar:
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_predictions += (predicted == labels).sum().item()
                total_samples += labels.size(0)

                pbar.set_postfix(loss=loss.item())
                pbar.update(1)

            avg_loss = running_loss / len(train_loader)
            train_accuracy = 100 * correct_predictions / total_samples

        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == labels).sum().item()
                val_total += labels.size(0)

        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = 100 * val_correct / val_total

        scheduler.step(avg_val_loss)  # Adjust learning rate based on validation loss

        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Val Loss: {avg_val_loss:.4f}, Val Acc: {val_accuracy:.2f}%")


Epoch: 0/20: 100%|██████████| 1313/1313 [02:16<00:00,  9.59batch/s, loss=0.0447] 


Epoch [1/20], Train Loss: 0.1455, Train Acc: 95.62%, Val Loss: 0.0750, Val Acc: 97.77%


Epoch: 1/20: 100%|██████████| 1313/1313 [02:15<00:00,  9.67batch/s, loss=0.0453] 


Epoch [2/20], Train Loss: 0.0580, Train Acc: 98.27%, Val Loss: 0.0575, Val Acc: 98.24%


Epoch: 2/20: 100%|██████████| 1313/1313 [02:15<00:00,  9.66batch/s, loss=0.0149] 


Epoch [3/20], Train Loss: 0.0524, Train Acc: 98.43%, Val Loss: 0.0459, Val Acc: 98.62%


Epoch: 3/20: 100%|██████████| 1313/1313 [02:15<00:00,  9.66batch/s, loss=0.0666] 


Epoch [4/20], Train Loss: 0.0470, Train Acc: 98.55%, Val Loss: 0.0775, Val Acc: 97.58%


Epoch: 4/20: 100%|██████████| 1313/1313 [02:15<00:00,  9.67batch/s, loss=0.0257]  


Epoch [5/20], Train Loss: 0.0421, Train Acc: 98.72%, Val Loss: 0.0409, Val Acc: 98.82%


Epoch: 5/20: 100%|██████████| 1313/1313 [02:15<00:00,  9.68batch/s, loss=0.00294] 


Epoch [6/20], Train Loss: 0.0365, Train Acc: 98.88%, Val Loss: 0.0315, Val Acc: 99.07%


Epoch: 6/20: 100%|██████████| 1313/1313 [02:15<00:00,  9.67batch/s, loss=0.0257]  


Epoch [7/20], Train Loss: 0.0343, Train Acc: 98.91%, Val Loss: 0.0529, Val Acc: 98.37%


Epoch: 7/20: 100%|██████████| 1313/1313 [02:15<00:00,  9.67batch/s, loss=0.0191]  


Epoch [8/20], Train Loss: 0.0327, Train Acc: 99.02%, Val Loss: 0.0322, Val Acc: 99.09%


Epoch: 8/20:  16%|█▌        | 207/1313 [00:21<01:55,  9.60batch/s, loss=0.00144] 


KeyboardInterrupt: 

In [11]:
from torchvision.models import resnet34

model = resnet34()

print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  