In [1]:
import torch
import import_ipynb

In [2]:
import quantize_dequantize

importing Jupyter notebook from quantize_dequantize.ipynb


## Scale and zero point

$s = (rmax - rmin)/ (qmax - qmin) $

$z = int(round(qmin - rmin / s))$

In [3]:
def get_scale_and_zero_point(tensor, dtype):
    q_min = torch.iinfo(dtype).min
    q_max = torch.iinfo(dtype).max

    r_min = torch.min(tensor).item()
    r_max = torch.max(tensor).item()

    scale = (r_max - r_min) / (q_max - q_min)

    zero_point = q_min - (r_min / scale)

    if zero_point < q_min:
        zero_point = q_min
    elif zero_point > q_max:
        zero_point = q_max
    else: 
        zero_point = int(round(zero_point))

    return scale, zero_point

In [4]:
test_tensor = torch.randn((3, 3))

In [5]:
test_tensor

tensor([[ 0.1918, -0.6646,  0.7098],
        [ 0.7538, -0.1810,  0.6129],
        [-0.5967, -0.4364,  0.6953]])

In [6]:
scale, zero_point = get_scale_and_zero_point(test_tensor, torch.int8)

In [7]:
scale, zero_point

(0.00556267920662375, -9)

In [8]:
quantized_tensor = quantize_dequantize.linear_q_with_scale_zero_point(test_tensor, scale, zero_point)

In [9]:
quantized_tensor

tensor([[  25, -128,  119],
        [ 127,  -42,  101],
        [-116,  -87,  116]], dtype=torch.int8)

In [10]:
dequantized_tensor = quantize_dequantize.linear_dq(quantized_tensor, scale, zero_point)

In [11]:
dequantized_tensor

tensor([[ 0.1891, -0.6620,  0.7120],
        [ 0.7565, -0.1836,  0.6119],
        [-0.5952, -0.4339,  0.6953]])

In [16]:
torch.mean(torch.square(test_tensor - dequantized_tensor))

tensor(4.7723e-06)