#### Creating a simple array with random items

In [27]:
import numpy as np 

np.set_printoptions(suppress = True)

# Generatinf random distribution "tensors"
params = np.random.uniform(low = -80, high = 100, size = 1000)

In [28]:
# modifing for easy debugging and introducing an outlier
params[-1] = 580

In [29]:
params = np.round(params,4)
print(params)

[-75.5607   2.4964 -51.6582 -34.6734  85.7485 -10.2264  51.8445  88.2061
  22.5767  38.7353  80.0579  91.5367  -7.5655   6.2311 -64.8435 -35.7573
  60.4309  -4.1842 -43.8909  73.8106  27.5571 -24.2997 -35.2939  75.8011
  89.6124  50.0374  56.5549  -7.7567 -46.078  -78.5109 -77.5596  18.5287
  27.6697 -42.6979  39.8209  97.4179  62.7609  94.7738  34.0871  77.4349
 -70.0131 -41.2389 -76.6397  -7.1115  52.5183  37.6431  82.0884 -76.8941
 -20.1937 -71.1431  15.1126 -68.6605  14.9519  19.5964  38.5801 -50.6124
  90.2982  85.689   77.2276   3.4888  53.7329  78.9521  47.8481 -46.9309
  -4.3695  -3.409  -72.9521   6.4319  77.8951  61.4459  63.4833  66.8903
 -52.2251 -26.0495  59.4212  41.9444  58.604  -30.0814 -77.0835 -23.8161
 -21.5756 -20.9183  86.1513 -21.6432 -28.6123  -0.9221  81.4447  59.5623
   9.8243  47.6858  61.8791  75.4339  37.9531 -12.4036  20.8678 -74.6465
 -35.9852  73.1533 -27.3382  85.2882 -73.1429  55.6437 -71.2614  56.755
  11.0195  33.7647 -16.9213 -27.3817 -40.6372 -49.44

### Function for clamping

In [30]:
def clamping(params_arr, lower_bound, upper_bound):
    params_arr[params_arr < lower_bound] = lower_bound
    params_arr[params_arr > upper_bound] = upper_bound
    return params_arr

### Function for asymmetric quantization and dequantization

In [31]:
def asymmetric_quantization(params, bits):
    alpha = np.max(params) # the largest value in our "tensor"
    beta = np.min(params) # smallest value in our "tensor"
    scale = (alpha - beta) / (2**bits - 1) # here wa can also use min/max scaler
    zero_point = -1*np.round(beta / scale)
    lower_bound, upper_bound = 0, (2**bits - 1)
    # Quantization 
    quantized = clamping(np.round(params/scale + zero_point), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero_point

def asymmetric_quantization_percentile(params, bits, percentile= 99.99):
    alpha = np.percentile(params, percentile) 
    beta = np.percentile(params, 100 - percentile) 
    scale = (alpha - beta) / (2**bits - 1) # here wa can also use min/max scaler
    zero_point = -1*np.round(beta / scale)
    lower_bound, upper_bound = 0, (2**bits - 1)
    # Quantization 
    quantized = clamping(np.round(params/scale + zero_point), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero_point

def asymmetric_dequantization(params_q, scale, zero_point):
    return scale * (params_q - zero_point)

### Quantization error

In [32]:
def quantization_error(params, params_q):
    # we can calculate any for of loss here. ** MSE **
    return np.mean((params - params_q)**2)

### Seeing how it plays out... 

In [33]:
(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile) = asymmetric_quantization_percentile(params, 8)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Asymmetric (min-max) scale: {asymmetric_scale}, zero: {asymmetric_zero}')
print(asymmetric_q)
print(f'')
print(f'Asymmetric (percentile) scale: {asymmetric_scale_percentile}, zero: {asymmetric_zero_percentile}')
print(asymmetric_q_percentile)

Original:
[-75.56   2.5  -51.66 -34.67  85.75 -10.23  51.84  88.21  22.58  38.74
  80.06  91.54  -7.57   6.23 -64.84 -35.76  60.43  -4.18 -43.89  73.81
  27.56 -24.3  -35.29  75.8   89.61  50.04  56.55  -7.76 -46.08 -78.51
 -77.56  18.53  27.67 -42.7   39.82  97.42  62.76  94.77  34.09  77.43
 -70.01 -41.24 -76.64  -7.11  52.52  37.64  82.09 -76.89 -20.19 -71.14
  15.11 -68.66  14.95  19.6   38.58 -50.61  90.3   85.69  77.23   3.49
  53.73  78.95  47.85 -46.93  -4.37  -3.41 -72.95   6.43  77.9   61.45
  63.48  66.89 -52.23 -26.05  59.42  41.94  58.6  -30.08 -77.08 -23.82
 -21.58 -20.92  86.15 -21.64 -28.61  -0.92  81.44  59.56   9.82  47.69
  61.88  75.43  37.95 -12.4   20.87 -74.65 -35.99  73.15 -27.34  85.29
 -73.14  55.64 -71.26  56.76  11.02  33.76 -16.92 -27.38 -40.64 -49.44
 -61.    57.39  42.76  70.45 -51.54  64.59 -67.7  -63.63  45.48 -39.51
 -58.49  48.25  36.84 -26.95   3.01  25.16  30.57  51.36 -11.09  61.46
  -0.62 -30.34  52.73  13.9  -18.94   8.8  -20.39 -73.53  40.43 -56

In [34]:
# Dequantize the parameters back to 32 bits
params_deq_asymmetric = asymmetric_dequantization(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_asymmetric_percentile = asymmetric_dequantization(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Dequantized (min-max):')
print(np.round(params_deq_asymmetric,2))
print('')
print(f'Dequantized (percentile):')
print(np.round(params_deq_asymmetric_percentile,2))

Original:
[-75.56   2.5  -51.66 -34.67  85.75 -10.23  51.84  88.21  22.58  38.74
  80.06  91.54  -7.57   6.23 -64.84 -35.76  60.43  -4.18 -43.89  73.81
  27.56 -24.3  -35.29  75.8   89.61  50.04  56.55  -7.76 -46.08 -78.51
 -77.56  18.53  27.67 -42.7   39.82  97.42  62.76  94.77  34.09  77.43
 -70.01 -41.24 -76.64  -7.11  52.52  37.64  82.09 -76.89 -20.19 -71.14
  15.11 -68.66  14.95  19.6   38.58 -50.61  90.3   85.69  77.23   3.49
  53.73  78.95  47.85 -46.93  -4.37  -3.41 -72.95   6.43  77.9   61.45
  63.48  66.89 -52.23 -26.05  59.42  41.94  58.6  -30.08 -77.08 -23.82
 -21.58 -20.92  86.15 -21.64 -28.61  -0.92  81.44  59.56   9.82  47.69
  61.88  75.43  37.95 -12.4   20.87 -74.65 -35.99  73.15 -27.34  85.29
 -73.14  55.64 -71.26  56.76  11.02  33.76 -16.92 -27.38 -40.64 -49.44
 -61.    57.39  42.76  70.45 -51.54  64.59 -67.7  -63.63  45.48 -39.51
 -58.49  48.25  36.84 -26.95   3.01  25.16  30.57  51.36 -11.09  61.46
  -0.62 -30.34  52.73  13.9  -18.94   8.8  -20.39 -73.53  40.43 -56

### Error calculation

In [35]:
# Calculate the quantization error
print(f'{"Error (min-max) excluding outlier: ":>40}{np.round(quantization_error(params[:-1], params_deq_asymmetric[:-1]), 2)}')
print(f'{"Error (percentile) excluding outlier: ":>40}{np.round(quantization_error(params[:-1], params_deq_asymmetric_percentile[:-1]), 2)}')

     Error (min-max) excluding outlier: 0.56
  Error (percentile) excluding outlier: 0.49
