# ⚙️ Project: Model Compression + Acceleration Toolkit
This notebook shows how to apply quantization and pruning to a PyTorch image classification model, followed by exporting to ONNX format for efficient inference on edge devices.

In [None]:
!pip install torch torchvision onnx onnxruntime

In [None]:
import torch
import torch.nn as nn
import torch.quantization
from torchvision import models
import os
import copy

In [None]:
# Load a pretrained ResNet18 model
model = models.resnet18(pretrained=True)
model.eval()

In [None]:
# Apply dynamic quantization
quantized_model = torch.quantization.quantize_dynamic(
    model, {nn.Linear}, dtype=torch.qint8
)
print('Model quantized!')

In [None]:
import torch.nn.utils.prune as prune
pruned_model = copy.deepcopy(quantized_model)
for name, module in pruned_model.named_modules():
    if isinstance(module, nn.Linear):
        prune.l1_unstructured(module, name='weight', amount=0.4)
print('Model pruned!')

In [None]:
# Dummy input for ONNX export
dummy_input = torch.randn(1, 3, 224, 224)
onnx_path = 'compressed_resnet18.onnx'
torch.onnx.export(pruned_model, dummy_input, onnx_path,
                  input_names=['input'], output_names=['output'],
                  opset_version=11)
print(f'Model exported to ONNX format at {onnx_path}')

In [None]:
import onnxruntime as ort
session = ort.InferenceSession(onnx_path)
inputs = {session.get_inputs()[0].name: dummy_input.numpy()}
outputs = session.run(None, inputs)
print('ONNX inference successful!')

## ✅ Summary
We applied model compression techniques including quantization and pruning to ResNet18, and exported the model to ONNX for deployment. You can extend this by:
- Converting to TensorRT or OpenVINO for further acceleration.
- Profiling latency and memory usage.
- Packaging into a CLI or API.