# Quantization 101
## Image compression


$
\begin{array}{lc}
\text{Quantization}
\\q(x;s,z)=clip\left(round\left(\frac{x}{s}+z\right)\right) \\
\\\text{Dequantization}\\
x\approx\hat{x}=s\cdot(q(x)-z)
\end{array}
$

$
\begin{array}{lc}
s = \frac{\beta-\alpha}{\beta_q-\alpha_q}\\ \\
z = \frac{\beta\alpha_q-\alpha\beta_q}{\beta-\alpha}
\end{array}
$

In [1]:
import tensorflow as tf

Initiate weight

In [2]:
w = tf.keras.initializers.VarianceScaling(scale=2, mode='fan_in')(shape=[10,])
dtype = w.dtype
alpha = tf.math.reduce_min(w).numpy()
beta = tf.math.reduce_max(w).numpy()
print(f'type: {dtype}\nalpha: {alpha}\nbeta: {beta}')

type: <dtype: 'float32'>
alpha: -0.5423163771629333
beta: 0.8558766841888428


In [5]:
def get_offsets(X, bit_width=8):
    alpha = tf.math.reduce_min(w).numpy()
    beta = tf.math.reduce_max(w).numpy()
    alpha_q = 0
    beta_q = 2 ** bit_width -1
    s = (beta - alpha) / (beta_q - alpha_q)
    z = (beta * alpha_q - alpha * beta_q) / (beta - alpha)
    return s, z

In [6]:
 get_offsets(w)

(0.005483110278260474, 98.90670616513373)

## Model compression

$
q(x;s,z,b)=clip\left(round\left(\frac{x}{s}+z\right);0,2^{b}-1\right)
$

$
clip=
\begin{cases}
q_{min},\quad x<q_{min}\\
x,\qquad \ q_{min}<x<q_{max}\\
q_{max},\quad q_{max}<x\\
\end{cases}
$

In [7]:
class Quantizer:
    def __init__(self, bit_width:int = 8):
        self.bit_width = bit_width
        self.step_size = None
        self.center = None

    def quantizer(self, X):
        X_min = tf.math.reduce_min(X)
        X_max = tf.math.reduce_max(X)
        Xq_min = 0
        Xq_max = tf.cast(tf.math.pow(2, self.bit_width) - 1, 'float32')
        self.step_size = (X_max - X_min) / (Xq_max - Xq_min)
        self.center = tf.math.round((X_max * Xq_min - X_min * Xq_max) / (X_max - X_min))
        print(self.step_size, self.center)
        Xq = tf.clip_by_value(tf.math.add(tf.math.divide(X, self.step_size),
                                          self.center), Xq_min, Xq_max)
        return tf.cast(Xq, 'uint8')

    def dequantizer(self, X):
        X = tf.cast(X, tf.float32)
        return self.step_size * (X - self.center)

In [8]:
quant = Quantizer()
q = quant.quantizer(w)
dq = quant.dequantizer(q)

tf.Tensor(0.0054831104, shape=(), dtype=float32) tf.Tensor(99.0, shape=(), dtype=float32)


In [9]:
print(q)

tf.Tensor([255  32  36 148  16  89  95 103 169   0], shape=(10,), dtype=uint8)


In [10]:
print(dq)

tf.Tensor(
[ 0.8553652  -0.3673684  -0.34543595  0.2686724  -0.45509815 -0.0548311
 -0.02193244  0.02193244  0.38381773 -0.5428279 ], shape=(10,), dtype=float32)


In [11]:
diff = w - dq
print(diff)

tf.Tensor(
[0.00051147 0.00152385 0.00542489 0.00491884 0.00147772 0.00544566
 0.00107936 0.00014294 0.00387135 0.00051153], shape=(10,), dtype=float32)
