<a href="https://colab.research.google.com/github/WhatRaSudeep/SAiDL-Spring-Assignment-2024/blob/main/cifar_VGG16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor

In [3]:
import torch
from torch import nn
from torch.optim import Optimizer
from torchvision.datasets import CIFAR10
from torchvision.transforms import Compose, RandomCrop, RandomHorizontalFlip, ToTensor
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.data import DataLoader

from tqdm.auto import tqdm


def train(
  model: nn.Module,
  dataloader: DataLoader,
  criterion: nn.Module,
  optimizer: Optimizer,
) -> None:
  model.train()

  for inputs, targets in tqdm(dataloader, desc='train', leave=False):
    # Move the data from CPU to GPU
    inputs = inputs.cuda()
    targets = targets.cuda()

    # Reset the gradients (from the last iteration)
    optimizer.zero_grad()

    # Forward inference
    outputs = model(inputs)
    loss = criterion(outputs, targets)

    # Backward propagation
    loss.backward()

    # Update optimizer
    optimizer.step()

@torch.inference_mode()
def evaluate(
  model: nn.Module,
  dataloader: DataLoader,
  verbose=True,
) -> float:
  model.eval()

  num_samples = 0
  num_correct = 0

  for inputs, targets in tqdm(dataloader, desc="eval", leave=False,
                              disable=not verbose):
    # Move the data from CPU to GPU
    inputs = inputs.cuda()
    targets = targets.cuda()

    # Inference
    outputs = model(inputs)

    # Convert logits to class indices
    outputs = outputs.argmax(dim=1)

    # Update metrics
    num_samples += targets.size(0)
    num_correct += (outputs == targets).sum()

  return (num_correct / num_samples * 100).item()

def get_sparsity(tensor: torch.Tensor) -> float:
    """
    calculate the sparsity of the given tensor
        sparsity = #zeros / #elements = 1 - #nonzeros / #elements
    """
    return 1 - float(tensor.count_nonzero()) / tensor.numel()


def get_model_sparsity(model: nn.Module) -> float:
    """
    calculate the sparsity of the given model
        sparsity = #zeros / #elements = 1 - #nonzeros / #elements
    """
    num_nonzeros, num_elements = 0, 0
    for param in model.parameters():
        num_nonzeros += param.count_nonzero()
        num_elements += param.numel()
    return 1 - float(num_nonzeros) / num_elements

def get_num_parameters(model: nn.Module, count_nonzero_only=False) -> int:
    """
    calculate the total number of parameters of model
    :param count_nonzero_only: only count nonzero weights
    """
    num_counted_elements = 0
    for param in model.parameters():
        if count_nonzero_only:
            num_counted_elements += param.count_nonzero()
        else:
            num_counted_elements += param.numel()
    return num_counted_elements


def get_model_size(model: nn.Module, data_width=32, count_nonzero_only=False) -> int:
    """
    calculate the model size in bits
    :param data_width: #bits per element
    :param count_nonzero_only: only count nonzero weights
    """
    return get_num_parameters(model, count_nonzero_only) * data_width

Byte = 8
KiB = 1024 * Byte
MiB = 1024 * KiB
GiB = 1024 * MiB

#Ideal Dataset and Dataloader organization, don't hesitate to do something else
image_size = 32
transforms = {
    "train": Compose([
        RandomCrop(image_size, padding=4),
        RandomHorizontalFlip(),
        ToTensor(),
    ]),
    "test": ToTensor(),
}
dataset = {}
for split in ["train", "test"]:
  dataset[split] = CIFAR10(
    root="data/cifar10",
    train=(split == "train"),
    download=True,
    transform=transforms[split],
  )
# dataloader = {}
# for split in ['train', 'test']:
#   dataloader[split] = DataLoader(
#     dataset[split],
#     batch_size=512,
#     shuffle=(split == 'train'),
#     num_workers=0,
#     pin_memory=True,
#   )

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar10/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:10<00:00, 16470912.59it/s]


Extracting data/cifar10/cifar-10-python.tar.gz to data/cifar10
Files already downloaded and verified


In [None]:
# training_data = datasets.CIFAR10(
#     root="data",
#     train=True,
#     download=True,
#     transform=ToTensor()
# )
# testing_data = datasets.CIFAR10(
#     root="data",
#     train=False,
#     download=True,
#     transform=ToTensor()
# )
# validation_data, training_data = torch.utils.data.random_split(training_data, [5000, 45000])


Files already downloaded and verified
Files already downloaded and verified


In [4]:
dataset['validation'], dataset['train'] = torch.utils.data.random_split(dataset['train'], [5000, 45000])

In [5]:
dataloader = {}
for split in ['train', 'test', 'validation']:
  dataloader[split] = DataLoader(
    dataset[split],
    batch_size=512,
    shuffle=(split == 'train'),
    num_workers=0,
    pin_memory=True,
  )

In [6]:
# batch_size = 64
# train_dataloader = DataLoader(training_data, batch_size=batch_size)
# test_dataloader = DataLoader(testing_data, batch_size=batch_size)
# validation_dataloader = DataLoader(validation_data, batch_size=batch_size)
for x,y in dataloader['train']:
    print("Shape of X [N, C, H, W]: ", x.shape)
    print("Shape of Y: ", y.shape, y.dtype)
    break

Shape of X [N, C, H, W]:  torch.Size([512, 3, 32, 32])
Shape of Y:  torch.Size([512]) torch.int64


In [22]:
class VGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU())
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer5 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer6 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer7 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer8 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer9 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer10 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer11 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer12 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer13 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
            nn.AvgPool2d(kernel_size=1, stride=1))
        self.fc14 = nn.Sequential(
            nn.Linear(512, 4096),
            nn.Dropout(0.5),
            nn.ReLU())
        self.fc15 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc16= nn.Sequential(
            nn.Linear(4096, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc14(out)
        out = self.fc15(out)
        out = self.fc16(out)
        return out



In [8]:
dev = ('cuda' if torch.cuda.is_available() else 'cpu')
print(dev)

cuda


In [23]:
num_classes = 10
num_epochs= 20
learning_rate = 0.001
model = VGG16(num_classes).to(device=dev)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

total_step = len(dataloader['train'])
total_step

88

In [11]:
!pip install torchinfo
from torchinfo import summary




In [24]:
summary(model, input_size=(512, 3, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
VGG16                                    [512, 10]                 --
├─Sequential: 1-1                        [512, 64, 32, 32]         --
│    └─Conv2d: 2-1                       [512, 64, 32, 32]         1,792
│    └─BatchNorm2d: 2-2                  [512, 64, 32, 32]         128
│    └─ReLU: 2-3                         [512, 64, 32, 32]         --
├─Sequential: 1-2                        [512, 64, 16, 16]         --
│    └─Conv2d: 2-4                       [512, 64, 32, 32]         36,928
│    └─BatchNorm2d: 2-5                  [512, 64, 32, 32]         128
│    └─ReLU: 2-6                         [512, 64, 32, 32]         --
│    └─MaxPool2d: 2-7                    [512, 64, 16, 16]         --
├─Sequential: 1-3                        [512, 128, 16, 16]        --
│    └─Conv2d: 2-8                       [512, 128, 16, 16]        73,856
│    └─BatchNorm2d: 2-9                  [512, 128, 16, 16]        256
│

In [25]:
for epoch in range(num_epochs):
    for i,(images, labels) in enumerate(dataloader['train']):
        images = images.to(device=dev)
        labels = labels.to(device=dev)
        outputs = model.forward(images)
        loss = criterion(outputs, labels) #this is the one that calculates the loss between your result and the testing data
        optimizer.zero_grad() #this is the one that zeros the gradient
        loss.backward() #this is the one that calculates the gradient
        optimizer.step() #this is the one that updates the weights

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in dataloader['validation']:
            images = images.to(device = dev)
            labels = labels.to(device = dev)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))



Epoch [1/20], Step [88/88], Loss: 1.9472
Accuracy of the network on the 5000 validation images: 17.96 %
Epoch [2/20], Step [88/88], Loss: 1.8275
Accuracy of the network on the 5000 validation images: 23.96 %
Epoch [3/20], Step [88/88], Loss: 1.6643
Accuracy of the network on the 5000 validation images: 31.68 %
Epoch [4/20], Step [88/88], Loss: 1.4799
Accuracy of the network on the 5000 validation images: 41.58 %
Epoch [5/20], Step [88/88], Loss: 1.3134
Accuracy of the network on the 5000 validation images: 47.02 %
Epoch [6/20], Step [88/88], Loss: 1.1293
Accuracy of the network on the 5000 validation images: 57.5 %
Epoch [7/20], Step [88/88], Loss: 1.0139
Accuracy of the network on the 5000 validation images: 61.6 %
Epoch [8/20], Step [88/88], Loss: 0.9582
Accuracy of the network on the 5000 validation images: 65.52 %
Epoch [9/20], Step [88/88], Loss: 0.8555
Accuracy of the network on the 5000 validation images: 70.28 %
Epoch [10/20], Step [88/88], Loss: 0.8479
Accuracy of the network 

In [26]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in dataloader['test']:
        images = images.to(device =dev)
        labels = labels.to(device = dev)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

Accuracy of the network on the 10000 test images: 82.9 %


In [27]:
torch.save(model, "modelFinal.pth")

In [29]:
torch.save(model.state_dict(), "modelsaidlFinal.pth")

In [37]:
def get_num_parameters(model: nn.Module, count_nonzero_only=False) -> int:
    """
    calculate the total number of parameters of model
    :param count_nonzero_only: only count nonzero weights
    """
    num_counted_elements = 0
    for param in model.parameters():
        if count_nonzero_only:
            num_counted_elements += param.count_nonzero()
        else:
            num_counted_elements += param.numel()
    return num_counted_elements


def get_model_size(model: nn.Module, data_width=32, count_nonzero_only=False) -> int:
    """
    calculate the model size in bits
    :param data_width: #bits per element
    :param count_nonzero_only: only count nonzero weights
    """
    return get_num_parameters(model, count_nonzero_only) * data_width

Byte = 8
KiB = 1024 * Byte
MiB = 1024 * KiB
GiB = 1024 * MiB
mb = get_model_size(model)/MiB
print(f"Model size: {mb}MB")

Model size: 128.35184478759766MB


In [38]:
from torchsummary import summary
summary(model, (3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          36,928
       BatchNorm2d-5           [-1, 64, 32, 32]             128
              ReLU-6           [-1, 64, 32, 32]               0
         MaxPool2d-7           [-1, 64, 16, 16]               0
            Conv2d-8          [-1, 128, 16, 16]          73,856
       BatchNorm2d-9          [-1, 128, 16, 16]             256
             ReLU-10          [-1, 128, 16, 16]               0
           Conv2d-11          [-1, 128, 16, 16]         147,584
      BatchNorm2d-12          [-1, 128, 16, 16]             256
             ReLU-13          [-1, 128, 16, 16]               0
        MaxPool2d-14            [-1, 12