#### Creating a simple array with random items

In [4]:
import numpy as np 

np.set_printoptions(suppress = True)

# Generatinf random distribution "tensors"
params = np.random.uniform(low = -80, high = 100, size = 1000)

In [5]:
# modifing for easy debugging and introducing an outlier
params[-1] = 580

In [6]:
params = np.round(params,4)
print(params)

[-62.0777  -2.5776 -11.7625  66.5823  76.3164  76.7275 -23.1859  11.1418
  41.2651 -62.6476  -5.4219  77.2777  28.4747  19.5193 -47.9054  36.8982
  57.8298  68.6156  43.7236 -43.1733  -2.895   23.5487  75.8315 -12.711
  78.4029 -14.1171 -46.0813  67.5995  73.0548  92.4781 -33.6416  62.173
  10.8874 -32.9668 -23.4267 -29.3006  -9.2433 -61.0039  45.7503  49.601
  41.8301  66.513  -42.9057  16.4512 -62.9521 -19.0508  48.0257  45.766
  77.1686  50.8335 -74.5279   3.6735  96.0366 -60.3811   8.551    1.8577
  55.9453  -9.4286 -10.7737 -22.0233  61.5618  71.9528  79.547  -63.5454
  27.883   81.2322 -30.8302  10.6363  48.9671  40.1385  88.755   90.2469
  18.2343 -33.9495 -64.3099  55.3388 -63.0548  78.4657  19.2303  86.3319
 -37.6092 -31.0181 -51.2445  39.5565 -75.8005  20.4185  41.2692  84.6781
 -68.2599  52.626   35.4548 -58.4813 -73.4016  46.6232   1.5962 -41.8864
 -17.9618  -4.2867  74.3584 -30.5588  28.1344  58.5367  15.4371  92.3833
 -62.3707  81.0786 -55.031   68.7271  21.2692  73.3893 

### Function for clamping

In [7]:
def clamping(params_arr, lower_bound, upper_bound):
    params_arr[params_arr < lower_bound] = lower_bound
    params_arr[params_arr > upper_bound] = upper_bound
    return params_arr

### Function for asymmetric quantization and dequantization

In [18]:
def asymmetric_quantization(params, bits):
    alpha = np.max(params) # the largest value in our "tensor"
    beta = np.min(params) # smallest value in our "tensor"
    scale = (alpha - beta) / (2**bits - 1) # here wa can also use min/max scaler
    zero_point = -1*np.round(beta / scale)
    lower_bound, upper_bound = 0, (2**bits - 1)
    # Quantization 
    quantized = clamping(np.round(params/scale + zero_point), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero_point

def asymmetric_quantization_percentile(params, bits, percentile= 99):
    alpha = np.percentile(params, percentile) 
    beta = np.percentile(params, 100 - percentile) 
    scale = (alpha - beta) / (2**bits - 1) # here wa can also use min/max scaler
    zero_point = -1*np.round(beta / scale)
    lower_bound, upper_bound = 0, (2**bits - 1)
    # Quantization 
    quantized = clamping(np.round(params/scale + zero_point), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero_point

def asymmetric_dequantization(params_q, scale, zero_point):
    return scale * (params_q - zero_point)

### Quantization error

In [19]:
def quantization_error(params, params_q):
    # we can calculate any for of loss here. ** MSE **
    return np.mean((params - params_q)**2)

### Seeing how it plays out... 

In [20]:
(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile) = asymmetric_quantization_percentile(params, 8)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Asymmetric (min-max) scale: {asymmetric_scale}, zero: {asymmetric_zero}')
print(asymmetric_q)
print(f'')
print(f'Asymmetric (percentile) scale: {asymmetric_scale_percentile}, zero: {asymmetric_zero_percentile}')
print(asymmetric_q_percentile)

Original:
[-62.08  -2.58 -11.76  66.58  76.32  76.73 -23.19  11.14  41.27 -62.65
  -5.42  77.28  28.47  19.52 -47.91  36.9   57.83  68.62  43.72 -43.17
  -2.9   23.55  75.83 -12.71  78.4  -14.12 -46.08  67.6   73.05  92.48
 -33.64  62.17  10.89 -32.97 -23.43 -29.3   -9.24 -61.    45.75  49.6
  41.83  66.51 -42.91  16.45 -62.95 -19.05  48.03  45.77  77.17  50.83
 -74.53   3.67  96.04 -60.38   8.55   1.86  55.95  -9.43 -10.77 -22.02
  61.56  71.95  79.55 -63.55  27.88  81.23 -30.83  10.64  48.97  40.14
  88.76  90.25  18.23 -33.95 -64.31  55.34 -63.05  78.47  19.23  86.33
 -37.61 -31.02 -51.24  39.56 -75.8   20.42  41.27  84.68 -68.26  52.63
  35.45 -58.48 -73.4   46.62   1.6  -41.89 -17.96  -4.29  74.36 -30.56
  28.13  58.54  15.44  92.38 -62.37  81.08 -55.03  68.73  21.27  73.39
   4.5   98.99 -52.4   51.16  83.35  -6.78 -46.76  74.5   18.73 -25.43
 -32.84  26.28  12.95  80.33  54.74 -77.7   90.06  29.36  10.55 -68.5
 -47.82  28.29  57.03 -66.46  69.39   6.33 -79.2  -54.27  -0.28 -33.5

In [21]:
# Dequantize the parameters back to 32 bits
params_deq_asymmetric = asymmetric_dequantization(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_asymmetric_percentile = asymmetric_dequantization(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Dequantized (min-max):')
print(np.round(params_deq_asymmetric,2))
print('')
print(f'Dequantized (percentile):')
print(np.round(params_deq_asymmetric_percentile,2))

Original:
[-62.08  -2.58 -11.76  66.58  76.32  76.73 -23.19  11.14  41.27 -62.65
  -5.42  77.28  28.47  19.52 -47.91  36.9   57.83  68.62  43.72 -43.17
  -2.9   23.55  75.83 -12.71  78.4  -14.12 -46.08  67.6   73.05  92.48
 -33.64  62.17  10.89 -32.97 -23.43 -29.3   -9.24 -61.    45.75  49.6
  41.83  66.51 -42.91  16.45 -62.95 -19.05  48.03  45.77  77.17  50.83
 -74.53   3.67  96.04 -60.38   8.55   1.86  55.95  -9.43 -10.77 -22.02
  61.56  71.95  79.55 -63.55  27.88  81.23 -30.83  10.64  48.97  40.14
  88.76  90.25  18.23 -33.95 -64.31  55.34 -63.05  78.47  19.23  86.33
 -37.61 -31.02 -51.24  39.56 -75.8   20.42  41.27  84.68 -68.26  52.63
  35.45 -58.48 -73.4   46.62   1.6  -41.89 -17.96  -4.29  74.36 -30.56
  28.13  58.54  15.44  92.38 -62.37  81.08 -55.03  68.73  21.27  73.39
   4.5   98.99 -52.4   51.16  83.35  -6.78 -46.76  74.5   18.73 -25.43
 -32.84  26.28  12.95  80.33  54.74 -77.7   90.06  29.36  10.55 -68.5
 -47.82  28.29  57.03 -66.46  69.39   6.33 -79.2  -54.27  -0.28 -33.5

### Error calculation

In [22]:
# Calculate the quantization error
print(f'{"Error (min-max) excluding outlier: ":>40}{np.round(quantization_error(params[:-1], params_deq_asymmetric[:-1]), 2)}')
print(f'{"Error (percentile) excluding outlier: ":>40}{np.round(quantization_error(params[:-1], params_deq_asymmetric_percentile[:-1]), 2)}')

     Error (min-max) excluding outlier: 0.54
  Error (percentile) excluding outlier: 0.08


I noticed when I twick the percentile then error rate changes there for if there is a way of using gridsearch to give us the optimal percentile then we should do that 