In [1]:
! pip install efficientnet_pytorch


Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from efficientnet_pytorch import EfficientNet

# MNIST Dataset Preparation
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize for EfficientNet
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize for grayscale images
])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=128, shuffle=False, num_workers=4)

# Define EfficientNet Model for MNIST
class EfficientNetMNIST(nn.Module):
    def __init__(self, version='efficientnet-b0', q=False):
        super(EfficientNetMNIST, self).__init__()
        self.base_model = EfficientNet.from_pretrained(version, in_channels=1, num_classes=10)
        self.q = q
        if q:
            self.quant = torch.quantization.QuantStub()
            self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if self.q:
            x = self.quant(x)
        x = self.base_model(x)
        if self.q:
            x = self.dequant(x)
        return x

# Training Function
def train(model, dataloader, epochs=10, cuda=False):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
    
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for i, data in enumerate(dataloader):
            inputs, labels = data
            if cuda:
                inputs, labels = inputs.cuda(), labels.cuda()
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss / len(dataloader):.4f}, Accuracy: {100 * correct / total:.2f}%')

# Testing Function
def test(model, dataloader, cuda=False):
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for data in dataloader:
            inputs, labels = data
            if cuda:
                inputs, labels = inputs.cuda(), labels.cuda()
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy}%')
    return accuracy

# Train the FP32 Model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
efficientnet_fp32 = EfficientNetMNIST(q=False).to(device)
train(efficientnet_fp32, trainloader, epochs=10, cuda=(device == 'cuda'))
torch.save(efficientnet_fp32.state_dict(), "efficientnet_fp32_mnist.pth")
print("FP32 Model Trained and Saved.")

# Test FP32 Model
fp32_accuracy = test(efficientnet_fp32, testloader, cuda=(device == 'cuda'))
print(f"FP32 Model Accuracy on MNIST: {fp32_accuracy}%")


Loaded pretrained weights for efficientnet-b0
Epoch [1/10], Loss: 0.0758, Accuracy: 97.97%
Epoch [2/10], Loss: 0.0326, Accuracy: 98.97%
Epoch [3/10], Loss: 0.0291, Accuracy: 99.09%
Epoch [4/10], Loss: 0.0279, Accuracy: 99.19%
Epoch [5/10], Loss: 0.0253, Accuracy: 99.25%
Epoch [6/10], Loss: 0.0263, Accuracy: 99.19%
Epoch [7/10], Loss: 0.0225, Accuracy: 99.31%
Epoch [8/10], Loss: 0.0223, Accuracy: 99.31%
Epoch [9/10], Loss: 0.0200, Accuracy: 99.41%
Epoch [10/10], Loss: 0.0186, Accuracy: 99.47%
FP32 Model Trained and Saved.
Accuracy: 96.73%
FP32 Model Accuracy on MNIST: 96.73%


In [3]:
from torch.quantization import quantize_dynamic
def quantize_and_test(model, test_loader, quant_type):

    # Move the model to CPU for quantization
    model.cpu()

    if quant_type == 'int8':
        quantized_model = quantize_dynamic(model, {nn.Linear}, dtype=torch.qint8)  # INT8 quantization
    elif quant_type == 'int16':
        quantized_model = quantize_dynamic(model, {nn.Linear}, dtype=torch.float16)  # INT16 simulation
    else:
        raise ValueError(f"Unsupported quantization type: {quant_type}")

    # Test the quantized model
    quantized_model.eval()

    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            # Move data to CPU for testing
            images, labels = images.cpu(), labels.cpu()
            outputs = quantized_model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"{quant_type.upper()} Quantized Model Accuracy: {accuracy:.2f}%")
    return accuracy

# Quantized model evaluations
quant_types = ['int8', 'int16']
quantized_accuracies = {}
for qt in quant_types:
    quantized_accuracies[qt] = quantize_and_test(efficientnet_fp32, testloader, qt)

# Display results
print("\nQuantization Results:")
for qt, acc in quantized_accuracies.items():
    print(f"{qt.upper()} Accuracy: {acc:.2f}%")
     

INT8 Quantized Model Accuracy: 96.78%
INT16 Quantized Model Accuracy: 96.73%

Quantization Results:
INT8 Accuracy: 96.78%
INT16 Accuracy: 96.73%


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.quantization import quantize_dynamic
from efficientnet_pytorch import EfficientNet

# Per-Layer Quantization
def per_layer_quantize(tensor):
    max_val = tensor.abs().amax()
    scale = 127 / max_val
    quantized_tensor = (tensor * scale).clamp(-127, 127).round().char()
    return quantized_tensor, scale

def per_layer_dequantize(quantized_tensor, scale):
    return quantized_tensor.float() / scale

# Quantized Forward Pass
def quantized_forward_per_layer_efficientnet(model, x, quantize_fn, dequantize_fn):
    with torch.no_grad():
        weights_q = {}
        scales = {}

        # Quantize weights
        for name, param in model.named_parameters():
            weights_q[name], scales[name] = quantize_fn(param.data)

        # Forward pass through EfficientNet
        for name, layer in model.base_model.named_children():
            if isinstance(layer, nn.Conv2d):
                key = f"base_model.{name}.weight"
                activation_scale = 127 / (x.abs().amax() + 1e-8)
                x = F.conv2d(
                    x / activation_scale,
                    dequantize_fn(weights_q[key], scales[key]),
                    bias=None if layer.bias is None else dequantize_fn(weights_q[f"base_model.{name}.bias"], scales[f"base_model.{name}.bias"]),
                    stride=layer.stride,
                    padding=layer.padding,
                    groups=layer.groups
                )
                x = (x * activation_scale).clamp(-127, 127).round().char()
            elif isinstance(layer, nn.BatchNorm2d):
                x = layer(x.float())  # BatchNorm operates on floating-point values
            elif isinstance(layer, nn.ReLU):
                x = x.float()  # ReLU operates on floating-point values
                x = layer(x)
                activation_scale = 127 / (x.abs().amax() + 1e-8)
                x = (x * activation_scale).clamp(-127, 127).round().char()
            elif isinstance(layer, nn.AdaptiveAvgPool2d):
                x = layer(x.float())  # Adaptive pooling
            elif isinstance(layer, nn.Linear):
                x = x.view(x.size(0), -1)  # Flatten the input
                key = f"base_model.{name}.weight"
                activation_scale = 127 / (x.abs().amax() + 1e-8)
                x = F.linear(
                    x / activation_scale,
                    dequantize_fn(weights_q[key], scales[key]),
                    bias=None if f"base_model.{name}.bias" not in weights_q else dequantize_fn(weights_q[f"base_model.{name}.bias"], scales[f"base_model.{name}.bias"])
                )
                x = (x * activation_scale).clamp(-127, 127).round().char()
            elif isinstance(layer, nn.Sequential) or isinstance(layer, nn.ModuleList):
                for sublayer in layer:
                    x = quantized_forward_per_layer_submodule(sublayer, x, weights_q, scales, quantize_fn, dequantize_fn)
            else:
                x = layer(x)

        return x

# Submodule Forward for Sequential and ModuleList
def quantized_forward_per_layer_submodule(layer, x, weights_q, scales, quantize_fn, dequantize_fn):
    if isinstance(layer, nn.Conv2d):
        key = f"{layer}.weight"
        activation_scale = 127 / (x.abs().amax() + 1e-8)
        x = F.conv2d(
            x / activation_scale,
            dequantize_fn(weights_q[key], scales[key]),
            bias=None if f"{layer}.bias" not in weights_q else dequantize_fn(weights_q[f"{layer}.bias"], scales[f"{layer}.bias"]),
            stride=layer.stride,
            padding=layer.padding,
            groups=layer.groups
        )
        x = (x * activation_scale).clamp(-127, 127).round().char()
    elif isinstance(layer, nn.BatchNorm2d):
        x = layer(x.float())
    elif isinstance(layer, nn.ReLU):
        x = x.float()
        x = layer(x)
        activation_scale = 127 / (x.abs().amax() + 1e-8)
        x = (x * activation_scale).clamp(-127, 127).round().char()
    elif isinstance(layer, nn.Linear):
        key = f"{layer}.weight"
        activation_scale = 127 / (x.abs().amax() + 1e-8)
        x = F.linear(
            x / activation_scale,
            dequantize_fn(weights_q[key], scales[key]),
            bias=None if f"{layer}.bias" not in weights_q else dequantize_fn(weights_q[f"{layer}.bias"], scales[f"{layer}.bias"])
        )
        x = (x * activation_scale).clamp(-127, 127).round().char()
    elif isinstance(layer, nn.Sequential) or isinstance(layer, nn.ModuleList):
        for sublayer in layer:
            x = quantized_forward_per_layer_submodule(sublayer, x, weights_q, scales, quantize_fn, dequantize_fn)
    else:
        x = layer(x)
    return x

# Quantized Model Testing
def test_quantized_per_layer_efficientnet(model, dataloader, device, quantize_fn, dequantize_fn):
    model.to(device)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = quantized_forward_per_layer_efficientnet(model, inputs, quantize_fn, dequantize_fn)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Quantized Model Accuracy: {accuracy}%")
    return accuracy

# Example Testing
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
efficientnet_fp32 = EfficientNetMNIST(q=False).to(device)
efficientnet_fp32.load_state_dict(torch.load("efficientnet_fp32_mnist.pth"))
print("Testing Per-Layer INT8 Quantized EfficientNet Model...")
int8_accuracy = test_quantized_per_layer_efficientnet(efficientnet_fp32, testloader, device, per_layer_quantize, per_layer_dequantize)
print(f"INT8 Quantized EfficientNet Model Accuracy: {int8_accuracy}%")


Loaded pretrained weights for efficientnet-b0
Testing Per-Layer INT8 Quantized EfficientNet Model...
Quantized Model Accuracy: 27.35%
INT8 Quantized EfficientNet Model Accuracy: 27.35%


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.quantization import quantize_dynamic
from efficientnet_pytorch import EfficientNet

# Per-Layer Quantization for INT16
def per_layer_quantize_int16(tensor):
    max_val = tensor.abs().amax()
    scale = 32767 / max_val
    quantized_tensor = (tensor * scale).clamp(-32767, 32767).round().short()
    return quantized_tensor, scale

def per_layer_dequantize_int16(quantized_tensor, scale):
    return quantized_tensor.float() / scale

# Quantized Forward Pass
def quantized_forward_per_layer_efficientnet_int16(model, x, quantize_fn, dequantize_fn):
    with torch.no_grad():
        weights_q = {}
        scales = {}

        # Quantize weights
        for name, param in model.named_parameters():
            weights_q[name], scales[name] = quantize_fn(param.data)

        # Forward pass through EfficientNet
        for name, layer in model.base_model.named_children():
            if isinstance(layer, nn.Conv2d):
                key = f"base_model.{name}.weight"
                activation_scale = 32767 / (x.abs().amax() + 1e-8)
                x = F.conv2d(
                    x / activation_scale,
                    dequantize_fn(weights_q[key], scales[key]),
                    bias=None if layer.bias is None else dequantize_fn(weights_q[f"base_model.{name}.bias"], scales[f"base_model.{name}.bias"]),
                    stride=layer.stride,
                    padding=layer.padding,
                    groups=layer.groups
                )
                x = (x * activation_scale).clamp(-32767, 32767).round().short()
            elif isinstance(layer, nn.BatchNorm2d):
                x = layer(x.float())  # BatchNorm operates on floating-point values
            elif isinstance(layer, nn.ReLU):
                x = x.float()  # ReLU operates on floating-point values
                x = layer(x)
                activation_scale = 32767 / (x.abs().amax() + 1e-8)
                x = (x * activation_scale).clamp(-32767, 32767).round().short()
            elif isinstance(layer, nn.AdaptiveAvgPool2d):
                x = layer(x.float())  # Adaptive pooling
            elif isinstance(layer, nn.Linear):
                x = x.view(x.size(0), -1)  # Flatten the input
                key = f"base_model.{name}.weight"
                activation_scale = 32767 / (x.abs().amax() + 1e-8)
                x = F.linear(
                    x / activation_scale,
                    dequantize_fn(weights_q[key], scales[key]),
                    bias=None if f"base_model.{name}.bias" not in weights_q else dequantize_fn(weights_q[f"base_model.{name}.bias"], scales[f"base_model.{name}.bias"])
                )
                x = (x * activation_scale).clamp(-32767, 32767).round().short()
            elif isinstance(layer, nn.Sequential) or isinstance(layer, nn.ModuleList):
                for sublayer in layer:
                    x = quantized_forward_per_layer_submodule(sublayer, x, weights_q, scales, quantize_fn, dequantize_fn)
            else:
                x = layer(x)

        return x

# Quantized Model Testing
def test_quantized_per_layer_efficientnet_int16(model, dataloader, device, quantize_fn, dequantize_fn):
    model.to(device)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = quantized_forward_per_layer_efficientnet_int16(model, inputs, quantize_fn, dequantize_fn)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Quantized Model Accuracy: {accuracy}%")
    return accuracy

# Example Testing
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
efficientnet_fp32 = EfficientNetMNIST(q=False).to(device)
efficientnet_fp32.load_state_dict(torch.load("efficientnet_fp32_mnist.pth"))
print("Testing Per-Layer INT16 Quantized EfficientNet Model...")
int16_accuracy = test_quantized_per_layer_efficientnet_int16(efficientnet_fp32, testloader, device, per_layer_quantize_int16, per_layer_dequantize_int16)
print(f"INT16 Quantized EfficientNet Model Accuracy: {int16_accuracy}%")


Loaded pretrained weights for efficientnet-b0
Testing Per-Layer INT16 Quantized EfficientNet Model...
Quantized Model Accuracy: 9.74%
INT16 Quantized EfficientNet Model Accuracy: 9.74%


In [10]:
import torch.onnx

# Convert FP32 Model to ONNX
def convert_fp32_to_onnx(model, onnx_filename, input_size=(1, 1, 224, 224)):
    model.eval()
    dummy_input = torch.randn(*input_size).to(next(model.parameters()).device)
    torch.onnx.export(
        model,
        dummy_input,
        onnx_filename,
        export_params=True,
        opset_version=11,
        input_names=['input'],
        output_names=['output']
    )
    print(f"FP32 Model exported to {onnx_filename}")

# Example Usage
convert_fp32_to_onnx(efficientnet_fp32, "efficientnet_fp32_mnist.onnx")

  _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version)
  _C._jit_pass_onnx_graph_shape_type_inference(


FP32 Model exported to efficientnet_fp32_mnist.onnx


  _C._jit_pass_onnx_graph_shape_type_inference(
