# Packaging for Inference

This notebook covers model packaging and optimization techniques for production inference.

## Topics Covered

1. **Model Export Formats**
   - TorchScript (JIT compilation)
   - ONNX (Open Neural Network Exchange)
   - TensorRT optimization

2. **Model Optimizations**
   - Mixed precision (FP16/BF16)
   - Quantization (Post-Training & Quantization-Aware Training)
   - Dynamic batching
   - Graph optimization

3. **Model Registry & Versioning**
   - MLflow Model Registry
   - Weights & Biases Artifacts
   - S3/Cloud storage patterns
   - Model versioning strategies

4. **Deployment Packaging**
   - Docker containerization
   - Dependency management
   - Environment reproducibility


In [None]:
import torch
import torch.nn as nn
import torch.quantization as quantization
import torchvision.models as models
import onnx
import onnxruntime as ort
import numpy as np
import time
from pathlib import Path
import mlflow
import mlflow.pytorch


## 1. TorchScript Export


In [None]:
def export_to_torchscript(model, example_input, save_path="model_scripted.pt"):
    """
    Export PyTorch model to TorchScript format
    """
    model.eval()
    
    # Method 1: Tracing (for models without control flow)
    try:
        traced_model = torch.jit.trace(model, example_input)
        traced_model.save(f"traced_{save_path}")
        print(f"Model traced and saved to traced_{save_path}")
        
        # Verify the traced model
        with torch.no_grad():
            original_output = model(example_input)
            traced_output = traced_model(example_input)
            
        if torch.allclose(original_output, traced_output, rtol=1e-3):
            print("✓ Traced model outputs match original model")
        else:
            print("⚠ Warning: Traced model outputs differ from original")
            
    except Exception as e:
        print(f"Tracing failed: {e}")
    
    # Method 2: Scripting (for models with control flow)
    try:
        scripted_model = torch.jit.script(model)
        scripted_model.save(f"scripted_{save_path}")
        print(f"Model scripted and saved to scripted_{save_path}")
        
        # Verify the scripted model
        with torch.no_grad():
            original_output = model(example_input)
            scripted_output = scripted_model(example_input)
            
        if torch.allclose(original_output, scripted_output, rtol=1e-3):
            print("✓ Scripted model outputs match original model")
        else:
            print("⚠ Warning: Scripted model outputs differ from original")
            
    except Exception as e:
        print(f"Scripting failed: {e}")

# Example usage
print("TorchScript export function ready")
