## Quantization

In [60]:
import numpy as np

params = np.random.uniform(low=-50, high=150, size=20)

params[0] = weights.max() + 1
params[1] = weights.min() - 1
params[2] = 0

params = np.round(params, 2)

print(params)

[152.98 -48.35   0.     7.34  -2.44  60.55  38.53 119.43 103.6   91.79
  60.63  93.17 140.03  59.53  90.75  44.96  48.01 121.78 -42.51 137.09]


### Quantization Methods

In [75]:
def clamp(params_q: np.array, lower_bound: int, upper_bound: int) -> np.array:
    params_q[params_q < lower_bound] = lower_bound
    params_q[params_q > upper_bound] = upper_bound
    return params_q

def asymmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float, int]:
    # Calculate scale and zero point
    alpha = np.max(params)
    beta = np.min(params)
    scale = (alpha - beta) / (2**bits - 1)
    zero = -1 * np.round(beta / scale)
    lower_bound = 0
    upper_bound = 2**bits - 1
    # Quantize the params
    quantized = clamp(np.round(params / scale) + zero, lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero

def symmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float]:
    # Calculate scale
    alpha = np.max(params)
    scale = np.abs(alpha) / (2**(bits-1) - 1)
    lower_bound = -2**(bits - 1)
    upper_bound = 2**(bits - 1) - 1
    # Quantize the params
    quantized = clamp(np.round(params / scale), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale

def asymmetric_dequantize(params_q: np.array, scale: float, zero: int) -> np.array:
    return (params_q - zero) * scale

def symmetric_dequantize(params_q: np.array, scale: float) -> np.array:
    return params_q * scale

def quantization_error(params: np.array, params_q: np.array):
    # MSE
    return np.mean((params - params_q) ** 2)

## Quantize

In [73]:
(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(symmetric_q, symmetric_scale) = symmetric_quantization(params, 8)

print("Orginal: ")
print(params)
print()
print(f"Asymmetric s: {asymmetric_scale}, z:{asymmetric_zero}")
print(asymmetric_q)
print()
print()
print(f"Symmetric s: {symmetric_scale}")
print(symmetric_q)

Orginal: 
[152.98 -48.35   0.     7.34  -2.44  60.55  38.53 119.43 103.6   91.79
  60.63  93.17 140.03  59.53  90.75  44.96  48.01 121.78 -42.51 137.09]

Asymmetric s: 0.7895294117647058, z:61.0
[255   0  61  70  58 138 110 212 192 177 138 179 238 136 176 118 122 215
   7 235]


Symmetric s: 1.204566929133858
[127 -40   0   6  -2  50  32  99  86  76  50  77 116  49  75  37  40 101
 -35 114]


## Dequantize

In [77]:
params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_symmetric  = symmetric_dequantize(symmetric_q, symmetric_scale)

print("Orginal: ")
print(params)
print()
print(f"Dequantized Asymmetric:")
print(np.round(params_deq_asymmetric, 2))
print()
print()
print(f"Dequantized Symmetric:")
print(np.round(params_deq_symmetric, 2))

Orginal: 
[152.98 -48.35   0.     7.34  -2.44  60.55  38.53 119.43 103.6   91.79
  60.63  93.17 140.03  59.53  90.75  44.96  48.01 121.78 -42.51 137.09]

Dequantized Asymmetric:
[153.17 -48.16   0.     7.11  -2.37  60.79  38.69 119.22 103.43  91.59
  60.79  93.16 139.75  59.21  90.8   45.    48.16 121.59 -42.63 137.38]


Dequantized Symmetric:
[152.98 -48.18   0.     7.23  -2.41  60.23  38.55 119.25 103.59  91.55
  60.23  92.75 139.73  59.02  90.34  44.57  48.18 121.66 -42.16 137.32]


## Check Error

In [85]:
print(f'{"Asymmetric Error: ":>20}{np.round(quantization_error(params, params_deq_asymmetric), 2)}')
print(f'{"Symmetric Error: ":>20}{np.round(quantization_error(params, params_deq_symmetric), 2)}')

  Asymmetric Error: 0.04
   Symmetric Error: 0.07
