In [1]:
import os
from google.colab import drive
print(os.getcwd())
drive.mount('/content/drive')

/content
Mounted at /content/drive


In [2]:
# PyTorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.nn.quantized as nnq
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.optim import lr_scheduler

In [3]:
# Data transformation and loading
transform = transforms.Compose(
    [transforms.Resize((224, 224)), transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])

# Download CIFAR-10 dataset
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

calibration_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
calibration_subset = torch.utils.data.Subset(calibration_dataset, torch.randperm(len(calibration_dataset))[:1000])
calibration_loader = DataLoader(calibration_subset, batch_size=32, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:01<00:00, 106MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


In [4]:
class LBBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super(LBBlock, self).__init__()
        # First depthwise convolution (ϕ_d1) with batch normalization
        self.depthwise1 = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels)
        self.bn1 = nn.BatchNorm2d(in_channels)

        # Pointwise convolution (ϕ_p) with batch normalization
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Second depthwise convolution (ϕ_d2) with batch normalization
        self.depthwise2 = nn.Conv2d(out_channels, out_channels, kernel_size, stride, padding, groups=out_channels)
        self.bn3 = nn.BatchNorm2d(out_channels)

        self.relu1 = nn.ReLU(inplace=True)
        self.relu2 = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.depthwise1(x)                 # First depthwise convolution (ϕ_d1)
        x = self.bn1(x)                        # Batch normalization after ϕ_d1
        x = self.pointwise(x)                  # Pointwise convolution (ϕ_p)
        x = self.bn2(x)                        # Batch normalization after ϕ_p
        x = self.relu1(x)                          # ReLU after pointwise
        x = self.depthwise2(x)                 # Second depthwise convolution (ϕ_d2)
        x = self.bn3(x)                        # Batch normalization after ϕ_d2
        x = self.relu2(x)                          # ReLU after second depthwise
        return x

class DLBBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super(DLBBlock, self).__init__()
        if in_channels != out_channels:
            self.match_channels = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        else:
            self.match_channels = None

        self.depthwise1 = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels)
        self.bn1 = nn.BatchNorm2d(in_channels)

        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.depthwise2 = nn.Conv2d(out_channels, out_channels, kernel_size, stride, padding, groups=out_channels)
        self.bn3 = nn.BatchNorm2d(out_channels)

        self.relu1 = nn.ReLU(inplace=True)
        self.relu2 = nn.ReLU(inplace=True)

        # Add the FloatFunctional module for quantized addition
        self.add = nn.quantized.FloatFunctional()

    def forward(self, x):
        residual = self.match_channels(x) if self.match_channels else x

        x = self.depthwise1(x)
        x = self.bn1(x)

        x = self.pointwise(x)
        x = self.bn2(x)
        x = self.relu1(x)

        # Use quantized addition
        x = self.add.add(x, residual)
        residual1 = x

        x = self.depthwise2(x)
        x = self.bn3(x)
        x = self.relu2(x)

        # Use quantized addition for final shortcut connections
        x = self.add.add(x, residual)
        x = self.add.add(x, residual1)

        return x

In [5]:
class EtinyNet(nn.Module):
    def __init__(self):
        super(EtinyNet, self).__init__()

        # Initial 3x3 convolution with stride 2 to downsample
        self.initial_conv = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1)

        # First pooling layer
        self.pool = nn.MaxPool2d(2, 2)

        # First set of LBBlocks: [32, 32, 32, 32] -> 56x56 feature map
        self.lb1 = nn.Sequential(
            LBBlock(32, 32),
            LBBlock(32, 32),
            LBBlock(32, 32),
            LBBlock(32, 32)
        )

        # Second set of LBBlocks: [32, 128, 128, 128] -> 28x28 feature map
        self.lb2 = nn.Sequential(
            # First part: [32, 128, 128] x 1
            LBBlock(32, 128),  # Expands channels from 32 to 128

            # Second part: [128, 128, 128] x 3
            LBBlock(128, 128),
            LBBlock(128, 128),
            LBBlock(128, 128)
        )

        # First DLBBlock: [128, 192, 192] -> 14x14 feature map
        self.dlb1 = nn.Sequential(
            DLBBlock(128, 192),
            DLBBlock(192, 192),
            DLBBlock(192, 192)
        )

        # Second DLBBlock: [192, 256, 256] -> 7x7 feature map
        self.dlb2 = nn.Sequential(
            DLBBlock(192, 256),
            DLBBlock(256, 256),
            DLBBlock(256, 512)
        )

        # Global average pooling (7x7 feature map to 1x1)
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))

        # Fully connected layer
        self.fc = nn.Linear(512, 10)  # For CIFAR-10, which has 10 classes

        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()
    def forward(self, x):
        x = self.quant(x)
        #print(x.shape)
        x = self.initial_conv(x)                  # Initial 3x3 convolution with stride 2, 112^2
        #print(x.shape)
        x = self.pool(x)                           # First pooling layer, 56^2
        #print(x.shape)
        x = self.lb1(x)                            # First set of LB blocks, 56^2
        x = self.pool(x)                           # Pooling to reduce to 28x28
        #print(x.shape)
        x = self.lb2(x)                            # Second set of LB blocks
        x = self.pool(x)                           # Pooling to reduce to 14x14
        #print(x.shape)
        x = self.dlb1(x)                           # First set of DLB blocks
        x = self.pool(x)                           # Pooling to reduce to 7x7
        #print(x.shape)
        x = self.dlb2(x)                           # Second set of DLB blocks
        x = self.global_avg_pool(x)                # Global average pooling to get 1x1 feature map
        #print(x.shape)
        x = x.view(-1, 512)                        # Flatten for the fully connected layer
        x = self.fc(x)                             # Fully connected layer
        x = self.dequant(x)

        return x

In [6]:
fusion_list = [
    ['depthwise1', 'bn1'],  # Conv + BN
    ['pointwise', 'bn2', 'relu1'],  # Conv + BN + ReLU
    ['depthwise2', 'bn3', 'relu2']  # Conv + BN + ReLU
]

def fuse_model(model):
    for module_name, module in model.named_children():
        if isinstance(module, (LBBlock, DLBBlock)):
            fusion_list = [
                ['depthwise1', 'bn1'],
                ['pointwise', 'bn2', 'relu1'],
                ['depthwise2', 'bn3', 'relu2']
            ]
            torch.quantization.fuse_modules(module, fusion_list, inplace=True)
        else:
            # Recursively apply to child modules
            fuse_model(module)

In [7]:
model_fp32 = EtinyNet()
model_fp32.load_state_dict(torch.load('/content/drive/My Drive/ECE570/Project/EtinyNetDict.pth'))
model_fp32.eval()

fuse_model(model_fp32)

model_fp32.qconfig = torch.quantization.get_default_qconfig('fbgemm')
model_int8 = model_fp32
model_int8.qconfig = torch.quantization.get_default_qconfig('fbgemm')
model_int8_prepared = torch.quantization.prepare(model_int8)
print(model_int8)

  model_fp32.load_state_dict(torch.load('/content/drive/My Drive/ECE570/Project/EtinyNetDict.pth'))


EtinyNet(
  (initial_conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (lb1): Sequential(
    (0): LBBlock(
      (depthwise1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
      (bn1): Identity()
      (pointwise): ConvReLU2d(
        (0): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
        (1): ReLU(inplace=True)
      )
      (bn2): Identity()
      (depthwise2): ConvReLU2d(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
        (1): ReLU(inplace=True)
      )
      (bn3): Identity()
      (relu1): Identity()
      (relu2): Identity()
    )
    (1): LBBlock(
      (depthwise1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
      (bn1): Identity()
      (pointwise): ConvReLU2d(
        (0): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
        (1): ReLU(inplace=T



In [8]:
# Run Calibration Data through Model
# Use a no_grad context to avoid any training interference
with torch.no_grad():
    for images, _ in calibration_loader:
      model_int8_prepared(images)  # This step runs the calibration data through the model

In [9]:
model_int8_converted = torch.quantization.convert(model_int8_prepared)
print(model_int8_converted)


EtinyNet(
  (initial_conv): QuantizedConv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), scale=0.1268494427204132, zero_point=64, padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (lb1): Sequential(
    (0): LBBlock(
      (depthwise1): QuantizedConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.13847623765468597, zero_point=64, padding=(1, 1), groups=32)
      (bn1): Identity()
      (pointwise): QuantizedConvReLU2d(32, 32, kernel_size=(1, 1), stride=(1, 1), scale=0.057288192212581635, zero_point=0)
      (bn2): Identity()
      (depthwise2): QuantizedConvReLU2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.09686513990163803, zero_point=0, padding=(1, 1), groups=32)
      (bn3): Identity()
      (relu1): Identity()
      (relu2): Identity()
    )
    (1): LBBlock(
      (depthwise1): QuantizedConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.2063780426979065, zero_point=69, padding=(1, 1), groups=32)
      (bn1): Id

In [10]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model_int8_converted(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')

Accuracy of the network on the 10000 test images: 86.22%


In [11]:
from torch.quantization.observer import HistogramObserver
from torch.quantization.observer import PerChannelMinMaxObserver
from torch.quantization import QConfig

In [12]:
class ActivationObserver4bit(HistogramObserver):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.quant_min = 0
        self.quant_max = 2 ** 4 - 1  # 0 to 15 for 4 bits
        self.dtype = torch.quint8

class WeightObserver4bit(PerChannelMinMaxObserver):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.quant_min = -2 ** (4 - 1)  # -8
        self.quant_max = 2 ** (4 - 1) - 1  # 7
        self.dtype = torch.qint8
        self.qscheme = torch.per_channel_symmetric

custom_qconfig_4bit = QConfig(
    activation=ActivationObserver4bit.with_args(reduce_range=False),
    weight=WeightObserver4bit.with_args(reduce_range=False)
)

In [13]:
model_fp32 = EtinyNet()
model_fp32.load_state_dict(torch.load('/content/drive/My Drive/ECE570/Project/EtinyNetDict.pth'))
model_fp32.eval()

fuse_model(model_fp32)

model_fp32.qconfig = custom_qconfig_4bit
model_int4 = model_fp32
model_int4.qconfig = model_int4.qconfig = custom_qconfig_4bit
model_int4_prepared = torch.quantization.prepare(model_int4)
print(model_int4)

EtinyNet(
  (initial_conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (lb1): Sequential(
    (0): LBBlock(
      (depthwise1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
      (bn1): Identity()
      (pointwise): ConvReLU2d(
        (0): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
        (1): ReLU(inplace=True)
      )
      (bn2): Identity()
      (depthwise2): ConvReLU2d(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
        (1): ReLU(inplace=True)
      )
      (bn3): Identity()
      (relu1): Identity()
      (relu2): Identity()
    )
    (1): LBBlock(
      (depthwise1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
      (bn1): Identity()
      (pointwise): ConvReLU2d(
        (0): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
        (1): ReLU(inplace=T

  model_fp32.load_state_dict(torch.load('/content/drive/My Drive/ECE570/Project/EtinyNetDict.pth'))


In [14]:
# Run Calibration Data through Model
# Use a no_grad context to avoid any training interference
with torch.no_grad():
    for images, _ in calibration_loader:
      model_int4_prepared(images)  # This step runs the calibration data through the model
model_int4_converted = torch.quantization.convert(model_int4_prepared)

In [15]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model_int4_converted(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')

Accuracy of the network on the 10000 test images: 22.00%


In [17]:
torch.save(model_int4_converted, '/content/drive/My Drive/ECE570/Project/modei_int4.pth')
torch.save(model_int4_converted.state_dict(), '/content/drive/My Drive/ECE570/Project/model_int4_Dict.pth')
torch.save(model_int8_converted, '/content/drive/My Drive/ECE570/Project/modei_int8.pth')
torch.save(model_int8_converted.state_dict(), '/content/drive/My Drive/ECE570/Project/model_int8_Dict.pth')

In [25]:
def get_quantized_model_size(model):
    """
    Calculate the size of a quantized model using its state_dict.
    """
    state_dict = model.state_dict()

    # Check if state_dict is empty
    if not state_dict:
        print("Warning: State dictionary is empty!")
        return

    # Calculate total size in bytes
    total_size = 0
    total_params = 0

    print("Model Parameters and Buffers:")
    for name, tensor in state_dict.items():
        # Check if the item is a tensor
        if isinstance(tensor, torch.Tensor):
            num_elements = tensor.numel()
            element_size = tensor.element_size()
            size_in_kb = (num_elements * element_size) / 1024
            total_size += num_elements * element_size
            total_params += num_elements


    # Convert total size to megabytes
    size_in_mb = total_size / (1024 ** 2)
    print(f"\nTotal Quantized Model size: {size_in_mb:.2f} MB")
    print(f"Total number of parameters: {total_params}")

# Example usage
get_quantized_model_size(model_int4_converted)
get_quantized_model_size(model_int8_converted)


Model Parameters and Buffers:

Total Quantized Model size: 0.64 MB
Total number of parameters: 650412
Model Parameters and Buffers:

Total Quantized Model size: 0.64 MB
Total number of parameters: 650412
