In [None]:
import numpy as np

# do not print with scientific notation
np.set_printoptions(suppress=True)

# generate randomly generated parameters 
params = np.random.uniform(low = -50, high = 150, size = 20)

# set min and max to beginning for easier debugging 
params[0] = params.max() + 1
params[1] = params.min() - 1
params[2] = 0

# round to second decimal
params = np.round(params, 2)

print(params)

[147.69 -49.94   0.   119.37 -32.34  50.36  -8.18 144.12 140.26 130.15
  57.05 -27.46 126.14  18.3  -34.07 112.41 146.69  43.33  65.72 -48.94]


In [None]:
def clamp(params_q: np.array, lower_bound: int, upper_bound: int) -> np.array:
    params_q[params_q < lower_bound] = lower_bound
    params_q[params_q > upper_bound] = upper_bound
    
    return params_q

def asymmmetric_quantization(params: np.array, bits:int) -> tuple[np.array, float, int]:
    # calculate the scale and zero point 
    alpha = params.max()
    beta = params.min()
    scale = (alpha - beta) / (2 **bits - 1)
    zero = -1 * np.round(beta / scale)
    lower_bound, upper_bound = 0, 2**bits -1
    #quantize parameters 
    quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero

def asymmmetric_quantization_percentile(params: np.array, bits:int, percentile:float = 99.99) -> tuple[np.array, float, int]:
    # calculate the scale and zero point 
    alpha = np.percentile(params, percentile)
    beta = np.percentile(params, 100 - percentile)
    scale = (alpha - beta) / (2 **bits - 1)
    zero = -1 * np.round(beta / scale)
    lower_bound, upper_bound = 0, 2**bits -1
    #quantize parameters 
    quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero


def symmmetric_quantization(params: np.array, bits:int) -> tuple[np.array, float]:
    # calculate the scale and zero point 
    alpha = np.max(np.abs(params))
    scale = alpha  / (2 **(bits-1) - 1)
    lower_bound, upper_bound = - 2**(bits-1), 2**(bits-1) -1
    #quantize parameters 
    quantized = clamp(np.round(params / scale), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale

def asymmetric_dequantize(params_q: np.array, scale: float, zero:int)-> np.array:
    return (params_q - zero) * scale

def symmetric_dequantize(params_q: np.array, scale: float)-> np.array:
    return params_q * scale

def quantization_error(params: np.array, params_q: np.array):
    # calculate the MSE
    return np.mean((params - params_q)**2)

In [21]:
(asymmetric_q, asymmetric_scale, asymmteric_zero) = asymmmetric_quantization(params, 8)
(symmetric_q, symmetric_scale) = symmmetric_quantization(params, 8)

print("Original: ")
print(np.round(params, 2))
print('')
print(f"Asymmetric scale: {asymmetric_scale}, zero: {asymmteric_zero}")
print(asymmetric_q)
print('')
print(f"Symmetric scale: {symmetric_scale}")
print(symmetric_q)

Original: 
[147.69 -49.94   0.   119.37 -32.34  50.36  -8.18 144.12 140.26 130.15
  57.05 -27.46 126.14  18.3  -34.07 112.41 146.69  43.33  65.72 -48.94]

Asymmetric scale: 0.7750196078431373, zero: 64.0
[255   0  64 218  22 129  53 250 245 232 138  29 227  88  20 209 253 120
 149   1]

Symmetric scale: 1.1629133858267717
[127 -43   0 103 -28  43  -7 124 121 112  49 -24 108  16 -29  97 126  37
  57 -42]


In [22]:
params_deq_asymmteric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmteric_zero)
params_deq_symmteric = symmetric_dequantize(symmetric_q, symmetric_scale)

print("Original: ")
print(np.round(params, 2))
print('')
print("Deq Asymmetric: ")
print(np.round(params_deq_asymmteric, 2))
print('')
print("Deq Symmetric: ")
print(np.round(params_deq_symmteric, 2))

Original: 
[147.69 -49.94   0.   119.37 -32.34  50.36  -8.18 144.12 140.26 130.15
  57.05 -27.46 126.14  18.3  -34.07 112.41 146.69  43.33  65.72 -48.94]

Deq Asymmetric: 
[148.03 -49.6    0.   119.35 -32.55  50.38  -8.53 144.15 140.28 130.2
  57.35 -27.13 126.33  18.6  -34.1  112.38 146.48  43.4   65.88 -48.83]

Deq Symmetric: 
[147.69 -50.01   0.   119.78 -32.56  50.01  -8.14 144.2  140.71 130.25
  56.98 -27.91 125.59  18.61 -33.72 112.8  146.53  43.03  66.29 -48.84]


In [25]:
# calculate error
print(f'{"Asymmetric error: ":>20}{np.round(quantization_error(params, params_deq_asymmteric), 2)}')
print(f'{"Symmetric error: ":>20}{np.round(quantization_error(params, params_deq_symmteric), 2)}')

  Asymmetric error: 0.04
   Symmetric error: 0.09
