## Create a simple tensor

In [1]:
import numpy as np

In [2]:
params = np.random.uniform(low=-50,high=150,size=20)
params

array([147.88164384,  16.53920486, -31.67630212, 149.2267514 ,
       -14.64601647,  10.08392699,  86.49534   , -32.1181407 ,
       -37.48593118,  23.49493515, 107.36634257, -46.98736717,
       -20.80065127,  76.85067652,  86.09366675, -46.79494473,
       -47.5148938 ,  73.62558311, -33.12192922, 121.17281763])

In [3]:
## maek sure important values are at the begining for better debugging
## For better understanding
params[0] = params.max() + 1 
params[1] = params.min() - 1
params[2] = 0

In [4]:
print(params)

[150.2267514  -48.5148938    0.         149.2267514  -14.64601647
  10.08392699  86.49534    -32.1181407  -37.48593118  23.49493515
 107.36634257 -46.98736717 -20.80065127  76.85067652  86.09366675
 -46.79494473 -47.5148938   73.62558311 -33.12192922 121.17281763]


In [5]:
params = np.round(params,decimals=2)
params

array([150.23, -48.51,   0.  , 149.23, -14.65,  10.08,  86.5 , -32.12,
       -37.49,  23.49, 107.37, -46.99, -20.8 ,  76.85,  86.09, -46.79,
       -47.51,  73.63, -33.12, 121.17])

## Define the Quantization method and Quantize

In [6]:
def clamp(params_q:np.array,lower_bound:int,upper_bound:int) -> np.array:
    
    params_q[params_q<lower_bound] = lower_bound
    params_q[params_q>upper_bound] = upper_bound
    return params_q

def asymmetric_quantization(params:np.array,bits:int) -> tuple[np.array,float,int]:
    alpha  = params.max()
    beta   = params.min()
    scale  = (alpha- beta) / (2**bits - 1)
    zero   = -1 * np.round(beta / scale)
    lower_bound,upper_bound = 0, 2**bits - 1
    
    # Quantize the parameters
    quantize = clamp(np.round(params / scale + zero),lower_bound,upper_bound).astype(np.int32)
    
    return quantize,scale,zero
 
def asymmetric_dequantize(params_q:np.array, scale:float, zero:int) -> np.array:
    return (params_q - zero) * scale

def symmetric_dequantize(params_q:np.array,scale:float) -> np.array:
    return params_q * scale

def symmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float]:
    
    alpha = np.max(np.abs(params))
    scale = alpha / (2**(bits-1)-1)
    lower_bound = -2**(bits-1)
    upper_bound = 2**(bits-1)-1
    
    quantized = clamp(np.round(params / scale), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale

def quantization_error(params: np.array, params_q: np.array):
    # calculate the MSE
    return np.mean((params - params_q)**2)

(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(symmetric_q, symmetric_scale)                    = symmetric_quantization(params, 8) 

In [7]:
print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Asymmetric scale: {asymmetric_scale},\n zero: {asymmetric_zero}')
print(asymmetric_q)
print('')
print(f'Symmetric scale: {symmetric_scale}')
print(symmetric_q)

Original:
[150.23 -48.51   0.   149.23 -14.65  10.08  86.5  -32.12 -37.49  23.49
 107.37 -46.99 -20.8   76.85  86.09 -46.79 -47.51  73.63 -33.12 121.17]

Asymmetric scale: 0.7793725490196077,
 zero: 62.0
[255.  -0.  62. 253.  43.  75. 173.  21.  14.  92. 200.   2.  35. 161.
 172.   2.   1. 156.  20. 217.]

Symmetric scale: 1.1829133858267715
[127 -41   0 126 -12   9  73 -27 -32  20  91 -40 -18  65  73 -40 -40  62
 -28 102]


In [8]:
asymmetric_zero

62.0

In [9]:
# Dequantize the parameters back to 32 bits
params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_symmetric = symmetric_dequantize(symmetric_q, symmetric_scale,)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Dequantize Asymmetric:')
print(np.round(params_deq_asymmetric,2))
print('')
print(f'Dequantize Symmetric:')
print(np.round(params_deq_symmetric, 2))

Original:
[150.23 -48.51   0.   149.23 -14.65  10.08  86.5  -32.12 -37.49  23.49
 107.37 -46.99 -20.8   76.85  86.09 -46.79 -47.51  73.63 -33.12 121.17]

Dequantize Asymmetric:
[150.42 -48.32   0.   148.86 -14.81  10.13  86.51 -31.95 -37.41  23.38
 107.55 -46.76 -21.04  77.16  85.73 -46.76 -47.54  73.26 -32.73 120.8 ]

Dequantize Symmetric:
[150.23 -48.5    0.   149.05 -14.19  10.65  86.35 -31.94 -37.85  23.66
 107.65 -47.32 -21.29  76.89  86.35 -47.32 -47.32  73.34 -33.12 120.66]


In [10]:
# Calculate the quantization error
print(f'{"Asymmetric error: ":>20}{np.round(quantization_error(params, params_deq_asymmetric), 2)}')
print(f'{"Symmetric error: ":>20}{np.round(quantization_error(params, params_deq_symmetric), 2)}')

  Asymmetric error: 0.05
   Symmetric error: 0.1
