<a href="https://colab.research.google.com/github/hdjcool/AI-study-DL/blob/master/11_How_To_Quantize.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# How to Quantization

In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
fp32_values = np.sort(np.random.uniform(low = 0.0, high=5.0, size= (10)))
print(fp32_values)

[0.28301402 0.77873235 1.12233709 1.39843439 1.6999251  1.74000648
 2.00392207 3.13781879 3.16970945 3.34183501]


In [3]:
beta = np.max(fp32_values)
alpha = np.min(fp32_values)
beta_q = pow(2, 8)-1
alpha_q = 0
print(beta, alpha, beta_q, alpha_q)

3.341835014788046 0.28301401533430914 255 0


### Calculating the Scale

s = (beta - alpha) / (beta_q - alpha_q)

In [4]:
s = (beta - alpha)/(beta_q - alpha_q)
print(s)

0.011995376468446027


### Calculating Z

z = round((beta X alpha_q - alpha X beta_q) / (beta - alpha))

In [5]:
z = round((beta*alpha_q - alpha*beta_q)/(beta-alpha))
print(z)

-24


### Clipping Values

clip values fromt lower_bound to upper bound

In [6]:
q = np.clip(z, alpha_q, beta_q)
print(q)

0


### Calculating the Q Values

In [7]:
q_values = np.clip((fp32_values / s) + z, alpha_q, beta_q).astype(np.uint8)

In [8]:
print(fp32_values)
print(q_values)

[0.28301402 0.77873235 1.12233709 1.39843439 1.6999251  1.74000648
 2.00392207 3.13781879 3.16970945 3.34183501]
[  0  40  69  92 117 121 143 237 240 254]


## Plot the Results

In [9]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fp32_text = [str(round(x,2)) for x in fp32_values.tolist()]
int8_text = [str(round(x,2)) for x in q_values.tolist()]
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Scatter(x=np.arange(len(fp32_values)), y=fp32_values, text = fp32_text,
                    mode='markers+text', textposition="top left",
                    name='fp32_values'), row=1, col=1)

fig.add_trace(go.Scatter(x=np.arange(len(q_values)), y=q_values, text = int8_text,
                    mode='markers+text', textposition="bottom right",
                    name='Quantized values'), row=1, col=2)

fig.update_layout(height=500, width=1000, title_text="Quantization - fp32 -> int8 mapping")
fig.show()

### Dequantization

In [10]:
dequant_values = s*(q_values - z)

In [11]:
fp32_text = [str(round(x,2)) for x in fp32_values.tolist()]
dequant_text = [str(round(x,2)) for x in dequant_values.tolist()]
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Scatter(x=np.arange(len(fp32_values)), y=fp32_values, text = fp32_text,
                    mode='markers+text', textposition="top left",
                    name='fp32_values'), row=1, col=1)

fig.add_trace(go.Scatter(x=np.arange(len(q_values)), y=dequant_values, text = dequant_text,
                    mode='markers+text', textposition="bottom right",
                    name='Dequantized values'), row=1, col=2)

fig.update_layout(height=500, width=1000, title_text="fp32 and dequantized")
fig.show()