# How to Quantization

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
fp32_values = np.sort(np.random.uniform(low = 0.0, high=5.0, size= (10)))
print(fp32_values)

[0.03047877 0.24396521 0.34090262 1.47798738 1.50912517 3.4922371
 3.54148801 4.00498773 4.62802566 4.64059323]


In [None]:
beta = np.max(fp32_values)
alpha = np.min(fp32_values)
beta_q = pow(2, 8)-1
alpha_q = 0
print(beta, alpha, beta_q, alpha_q)

4.6405932325782535 0.030478771191066456 255 0


### Calculating the Scale

s = (beta - alpha) / (beta_q - alpha_q)

In [None]:
s = (beta - alpha)/(beta_q - alpha_q)
print(s)

0.018078880240734067


### Calculating Z

z = round((beta X alpha_q - alpha X beta_q) / (beta - alpha))

In [None]:
z = round((beta*alpha_q - alpha*beta_q)/(beta-alpha))
print(z)

-2


### Clipping Values

clip values fromt lower_bound to upper bound

In [None]:
q = np.clip(z, alpha_q, beta_q)
print(q)

0


### Calculating the Q Values

In [None]:
q_values = np.clip((fp32_values / s) + z, alpha_q, beta_q).astype(np.uint8)

In [None]:
print(fp32_values)
print(q_values)

[0.03047877 0.24396521 0.34090262 1.47798738 1.50912517 3.4922371
 3.54148801 4.00498773 4.62802566 4.64059323]
[  0  11  16  79  81 191 193 219 253 254]


## Plot the Results

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fp32_text = [str(round(x,2)) for x in fp32_values.tolist()]
int8_text = [str(round(x,2)) for x in q_values.tolist()]
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Scatter(x=np.arange(len(fp32_values)), y=fp32_values, text = fp32_text,
                    mode='markers+text', textposition="top left",
                    name='fp32_values'), row=1, col=1)

fig.add_trace(go.Scatter(x=np.arange(len(q_values)), y=q_values, text = int8_text,
                    mode='markers+text', textposition="bottom right",
                    name='Quantized values'), row=1, col=2)

fig.update_layout(height=500, width=1000, title_text="Quantization - fp32 -> int8 mapping")
fig.show()

### Dequantization

In [None]:
dequant_values = s*(q_values - z)

In [None]:
fp32_text = [str(round(x,2)) for x in fp32_values.tolist()]
dequant_text = [str(round(x,2)) for x in dequant_values.tolist()]
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Scatter(x=np.arange(len(fp32_values)), y=fp32_values, text = fp32_text,
                    mode='markers+text', textposition="top left",
                    name='fp32_values'), row=1, col=1)

fig.add_trace(go.Scatter(x=np.arange(len(q_values)), y=dequant_values, text = dequant_text,
                    mode='markers+text', textposition="bottom right",
                    name='Dequantized values'), row=1, col=2)

fig.update_layout(height=500, width=1000, title_text="fp32 and dequantized")
fig.show()