In [2]:
import torch.quantization

In [3]:
torch.backends.quantized.engine = 'fbgemm'

In [4]:
torch.backends.quantized.engine

'fbgemm'

## Symmetric Quantization for Symmetric Data

In [2]:
import torch

In [133]:
# t = torch.tensor([0.5,0.7,0,-0.7,-0.5])
# t = torch.tensor([-10.3, -5.7, 0.0, 5.7, 10.3])
t = torch.tensor([-12., -5., 0., 5., 10.])
t

tensor([-12.,  -5.,   0.,   5.,  10.])

In [134]:
t.dtype

torch.float32

In [135]:
tmax = t.abs().max()
tmax

tensor(12.)

In [136]:
qmin, qmax = -128, 127

In [137]:
scale = tmax / qmax
scale

tensor(0.0945)

In [103]:
q = torch.round(t/scale)
# q = torch.clamp(q,-128,127)
# q = q.to(torch.int8)
q

tensor([-127.,  -53.,    0.,   53.,  106.])

In [30]:
dq = q.float() * scale
dq

tensor([-9.9608, -5.0196,  0.0000,  5.0196,  9.9608])

In [138]:
def calculate_scale(t, qmin=-127, qmax=127):
    tmax = t.abs().max()
    scale = tmax / qmax
    return scale

def quantize(t, scale):
    q = torch.round(t/scale)
    q = q.to(torch.int8)
    return q

def dequantize(t, scale):
    return t * scale

In [139]:
scale = calculate_scale(t, qmin=0, qmax=127)
scale

tensor(0.0945)

In [140]:
q = quantize(t,scale)
q

tensor([-127,  -53,    0,   53,  106], dtype=torch.int8)

In [142]:
dq = dequantize(q,scale)
dq

tensor([-12.0000,  -5.0079,   0.0000,   5.0079,  10.0157])

In [31]:
dtype = torch.int8
torch.iinfo(dtype)

iinfo(min=-128, max=127, dtype=int8)

In [119]:
# Absolute Error
torch.abs(dq - t)

tensor([0.0000, 0.0079, 0.0000, 0.0079, 0.0157])

In [120]:
# Percentage error
torch.where(t == 0, 0, 100 * torch.abs(dq - t)/t)

tensor([-0.0000, -0.1575,  0.0000,  0.1575,  0.1575])

## Symmetric Quantization for Asymmetric Data

In [143]:
# t = torch.rand(10)
t = torch.tensor([0.,12.,30.,70.,90.,100.])
t

tensor([  0.,  12.,  30.,  70.,  90., 100.])

In [125]:
scale = t.max() / 127
scale

tensor(0.7874)

In [144]:
scale = calculate_scale(t, qmin=0, qmax=127)
scale

tensor(0.7874)

In [145]:
q = quantize(t,scale)
q

tensor([  0,  15,  38,  89, 114, 127], dtype=torch.int8)

In [147]:
t_dq = dequantize(q,scale)
t_dq

tensor([  0.0000,  11.8110,  29.9213,  70.0787,  89.7638, 100.0000])

In [150]:
torch.abs(t_dq - t)

tensor([0.0000, 0.1890, 0.0787, 0.0787, 0.2362, 0.0000])

In [152]:
# Percentage error
torch.where(t == 0, 0, 100 * torch.abs((t_dq - t)/t))

tensor([0.0000, 1.5748, 0.2625, 0.1125, 0.2625, 0.0000])

## Symmetric Quantization for Skewed Data

In [156]:
t = torch.tensor([-2.,1.,2.,10.,50.,200.])
t

tensor([ -2.,   1.,   2.,  10.,  50., 200.])

In [157]:
scale = calculate_scale(t, qmin=-128, qmax=127)
scale

tensor(1.5748)

In [158]:
q = quantize(t,scale)
q

tensor([ -1,   1,   1,   6,  32, 127], dtype=torch.int8)

In [159]:
t_dq = dequantize(q,scale)
t_dq

tensor([ -1.5748,   1.5748,   1.5748,   9.4488,  50.3937, 200.0000])

In [160]:
ae = torch.abs(t_dq - t)
ae

tensor([0.4252, 0.5748, 0.4252, 0.5512, 0.3937, 0.0000])

In [161]:
ape = torch.where(t == 0, 0, 100 * torch.abs((t_dq - t)/t))
ape

tensor([21.2598, 57.4803, 21.2598,  5.5118,  0.7874,  0.0000])