In [None]:
import torch
import torchvision.models as models
from torch.quantization import quantize_dynamic

In [None]:
# Load and prepare original model
model_fp32 = models.resnet18(pretrained=True)
model_fp32.eval()
print("Loaded FP32 ResNet18 model")

In [None]:
# Apply dynamic quantization
model_int8 = quantize_dynamic(
    model_fp32,
    {torch.nn.Linear},
    dtype=torch.qint8
)
print("Applied INT8 quantization to linear layers")

In [None]:
# Save models
torch.save(model_fp32.state_dict(), "resnet18_fp32.pth")
torch.save(model_int8.state_dict(), "resnet18_quantized.pth")
print("Saved both model versions")

In [None]:
# Compare sizes
import os

size_fp32 = os.path.getsize("resnet18_fp32.pth") / 1024 / 1024
size_int8 = os.path.getsize("resnet18_quantized.pth") / 1024 / 1024

print(f"FP32 model size: {size_fp32:.2f} MB")
print(f"INT8 quantized model size: {size_int8:.2f} MB")