In [2]:
import torch.quantization

In [3]:
torch.backends.quantized.engine = 'fbgemm'

In [4]:
torch.backends.quantized.engine

'fbgemm'

## Simple Scaling

In [1]:
import torch

In [134]:
# t = torch.tensor([0.5,0.7,0,-0.7,-0.5])
t = torch.tensor([-10.3, -5.7, 0.0, 5.7, 10.3])
t

tensor([-10.3000,  -5.7000,   0.0000,   5.7000,  10.3000])

In [145]:
type(t)

torch.Tensor

In [135]:
tmin, tmax = min(t), max(t)
tmin, tmax

(tensor(-10.3000), tensor(10.3000))

In [136]:
qmin, qmax = -128, 127

In [137]:
scale = (tmax - tmin) / (qmax - qmin)
scale

tensor(0.0808)

In [140]:
q = torch.round(t/scale)
q = torch.clamp(q,-128,127)
q = q.to(torch.int8)
q

tensor([-128,  -71,    0,   71,  127], dtype=torch.int8)

In [141]:
dq = q.float() * scale
dq

tensor([-10.3404,  -5.7357,   0.0000,   5.7357,  10.2596])

In [144]:
torch.abs(dq - t)

tensor([0.0404, 0.0357, 0.0000, 0.0357, 0.0404])

In [93]:
dtype = torch.int8
torch.iinfo(dtype)

iinfo(min=-128, max=127, dtype=int8)

## Affine Quantization

In [24]:
t = torch.rand(10)
t

tensor([0.4738, 0.0327, 0.2503, 0.5816, 0.3079, 0.0685, 0.1102, 0.7611, 0.4076,
        0.9026])

In [26]:
tmin, tmax = min(t), max(t)
tmin, tmax

(tensor(0.0327), tensor(0.9026))

In [27]:
qmin, qmax = 0, 255

In [28]:
scale = (tmax - tmin) / (qmax - qmin)
scale

tensor(0.0034)

In [34]:
q = torch.round(t/scale)
q

tensor([139.,  10.,  73., 170.,  90.,  20.,  32., 223., 119., 265.])

## Affine Quantization

In [2]:
def quantize(
        x: torch.Tensor,
        scale: float,
        zero_point: int,
        dtype = torch.uint8
    ):
    return round(x/scale + zero_point)
    

In [3]:
def _calculate_scale_and_zeropoint(x,nbits=8):
    minx, maxx = min(x), max(x)

In [10]:
t = torch.tensor([1.345,2.087,3.453])
t

tensor([1.3450, 2.0870, 3.4530])

In [22]:
minx, maxx = min(t), max(t)
minx, maxx

(tensor(1.3450), tensor(3.4530))

In [20]:
scale = (maxx - minx)/255
zero_point = 0 - minx/scale

In [21]:
scale, zero_point

(tensor(0.0083), tensor(-162.7016))