In [1]:
import torch

##  Quantization schemes
<img src="./img/q_scheme.png" width="600" />

Two sets of schemes:
* Symmetric
* Affine

And

* Per-channel
* Per-Tensor

### Per-Channel and Per-Tensor

<img src="./img/per_t_c.png" width="600" />

In [40]:
x = torch.tensor([
    [0.5827, 0.8619], 
    [0.3827, -0.1982], 
    [-0.8213, 0.6351]])

print(x.size())

torch.Size([3, 2])


In [41]:
# per-tensor

scale = torch.tensor(1e-2)
zero_pt = torch.tensor(0)

xq = torch.quantize_per_tensor(x, scale, zero_pt, dtype=torch.qint8)
print(xq)

tensor([[ 0.5800,  0.8600],
        [ 0.3800, -0.2000],
        [-0.8200,  0.6400]], size=(3, 2), dtype=torch.qint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.009999999776482582,
       zero_point=0)


In [32]:
# per-channel

channel_axis = 0
scale = torch.tensor([1e-2, 1e-3, 5e-2])
zero_pt = torch.zeros(3)

xq = torch.quantize_per_channel(x, scale, zero_pt, dtype=torch.qint8, axis=0)
print(xq)

### Symmetric and Affine

Symmetric
* Input range is calculated symmetrically around 0
* Good for quantizing weights
* Wasteful for quantizing activations - why?

Affine 
* Clips the input tightly 


<img src="./img/affine-symmetric.png" width="600" />

### Observers

<img src="./img/observer.png" width="600" />

In [52]:
from torch.ao.quantization.observer import MovingAverageMinMaxObserver, HistogramObserver, MovingAveragePerChannelMinMaxObserver

size = (3,4)
normal = torch.distributions.normal.Normal(0,1)
input = [normal.sample(size) for _ in range(3)]

observers = [
    MovingAverageMinMaxObserver(qscheme=torch.per_tensor_affine), 
    HistogramObserver(), 
    MovingAveragePerChannelMinMaxObserver(qscheme=torch.per_channel_symmetric)
    ]



In [53]:
for obs in observers:
  for x in input: 
      obs(x) 
  print(obs.__class__.__name__, obs.calculate_qparams())


MovingAverageMinMaxObserver (tensor([0.0128]), tensor([128], dtype=torch.int32))
HistogramObserver (tensor([0.0148]), tensor([146], dtype=torch.int32))
MovingAveragePerChannelMinMaxObserver (tensor([0.0127, 0.0126, 0.0116]), tensor([128, 128, 128]))


  src_bin_begin // dst_bin_width, 0, self.dst_nbins - 1
  src_bin_end // dst_bin_width, 0, self.dst_nbins - 1


### QConfig

* High-level abstraction wrapping these knobs in one object
* Allows separate configuration for activation and weights of a layer

In [54]:
from torch.ao.quantization.observer import MovingAverageMinMaxObserver, MovingAveragePerChannelMinMaxObserver
from torch.ao.quantization.qconfig import QConfig

my_qconfig = QConfig(
  activation=MovingAverageMinMaxObserver.with_args(
      qscheme=torch.per_tensor_affine,
      dtype=torch.quint8),
  weight=MovingAveragePerChannelMinMaxObserver.with_args(
      qscheme=torch.per_channel_symmetric)
)


#### Default QConfigs out of the box

In [55]:
torch.quantization.qconfig.default_per_channel_qconfig

QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, reduce_range=True){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})

In [57]:
print(torch.quantization.qconfig.default_dynamic_qconfig)

QConfigDynamic(activation=functools.partial(<class 'torch.ao.quantization.observer.PlaceholderObserver'>, dtype=torch.float32, compute_dtype=torch.quint8){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric){})


In [60]:
torch.ao.quantization.default_per_channel_weight_observer

AttributeError: module 'torch.ao.quantization' has no attribute 'default_per_channel_weight_observer'

In [58]:
print(torch.quantization.qconfig.per_channel_dynamic_qconfig)

QConfigDynamic(activation=functools.partial(<class 'torch.ao.quantization.observer.PlaceholderObserver'>, dtype=torch.float32, compute_dtype=torch.quint8){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})
