# 12. Deployment

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/gaurav-redhat/pytorch_tutorial/blob/main/12_deployment/demo.ipynb)

---

In [None]:
import torch
import torch.nn as nn

## TorchScript

In [None]:
# Simple model
model = nn.Sequential(
    nn.Linear(10, 32),
    nn.ReLU(),
    nn.Linear(32, 5)
)
model.eval()

# Trace
example = torch.randn(1, 10)
traced = torch.jit.trace(model, example)

# Save
traced.save('model_traced.pt')
print('Saved traced model')

In [None]:
# Load and use
loaded = torch.jit.load('model_traced.pt')
output = loaded(torch.randn(5, 10))
print(f'Output shape: {output.shape}')

## Quantization

In [None]:
# Dynamic quantization
quantized = torch.quantization.quantize_dynamic(
    model, {nn.Linear}, dtype=torch.qint8
)

# Compare sizes
import os
torch.save(model.state_dict(), 'model_fp32.pt')
torch.save(quantized.state_dict(), 'model_int8.pt')
print(f'FP32 size: {os.path.getsize("model_fp32.pt")/1024:.1f} KB')
print(f'INT8 size: {os.path.getsize("model_int8.pt")/1024:.1f} KB')