<a href="https://colab.research.google.com/github/finardi/tutos/blob/master/Quantizer_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import os
import torch

A = torch.tensor([[-0.18120981, -0.29043840], [0.49722983, 0.22141714]])
B = torch.tensor([0.77412377, 0.49299395])

def quantize(C, a=1):
    quantized = [torch.floor(128*elel/a) for 
                 elel in C.view(-1)]
    return torch.tensor(quantized).view(C.size())

def de_quantize(C, a=1):
    dequantized = [a*elel/(128*128) for elel in C.view(-1)]
    return torch.tensor(dequantized).view(C.size())

print(f'A:   [{A.flatten()[:2]}{A.flatten()[2:]}]')
print(f'B:   {B}')
print(f'A@B: {A@B}\n')


A_quant = quantize(A)
B_quant = quantize(B)
print(f'A quant:      [{A_quant.flatten()[:2]}{A_quant.flatten()[2:]}]')
print(f'B quant:      {B_quant}')
print(f'A@B quant:    {A_quant@B_quant}')
print(f'A@B de-quant: {de_quantize(A_quant@B_quant)}')

A:   [tensor([-0.1812, -0.2904])tensor([0.4972, 0.2214])]
B:   tensor([0.7741, 0.4930])
A@B: tensor([-0.2835,  0.4941])

A quant:      [tensor([-24., -38.])tensor([63., 28.])]
B quant:      tensor([99., 63.])
A@B quant:    tensor([-4770.,  8001.])
A@B de-quant: tensor([-0.2911,  0.4883])


# Dynamic Quant exemple

In [117]:
class Net(torch.nn.Module):
    def __init__(self, din=1024, dout=1024):
        super().__init__()
        
        self.dense = torch.nn.Linear(din, dout, bias=False)

    def forward(self, x):
        return self.dense(x) + x

model = Net()

dummy_inputs = torch.randn(1024, 1024)
out = model(dummy_inputs)
out.shape

torch.Size([1024, 1024])

In [118]:
quantized_model = torch.quantization.quantize_dynamic(
    model=model,
    qconfig_spec = {torch.nn.Linear : torch.quantization.default_dynamic_qconfig},
    dtype=torch.qint8,
    )

In [119]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

print_size_of_model(model)
print_size_of_model(quantized_model)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
 
_ = 56

print('\n','=' * _,f'\n # The Dense Layer has {count_parameters(model):,}' \
       ' trainable parameters #\n', '=' * _,'\n' )
print('\n','=' * _,f'\n # The Quantized Dense Layer has {count_parameters(quantized_model):,}' \
       ' trainable parameters #\n', '=' * _,'\n' )

Size (MB): 4.195179
Size (MB): 1.050147

 # The Dense Layer has 1,048,576 trainable parameters #


 # The Quantized Dense Layer has 0 trainable parameters #



In [120]:
qtz_st = quantized_model.state_dict()
for name, q_param in qtz_st.items():
    if isinstance(q_param, tuple) and "dtype" not in name and q_param[0].is_quantized:
        pass
q_param        

(tensor([[-0.0027,  0.0017,  0.0025,  ..., -0.0203,  0.0032,  0.0140],
         [-0.0100, -0.0088, -0.0015,  ..., -0.0098,  0.0167, -0.0029],
         [-0.0108,  0.0157,  0.0081,  ..., -0.0169,  0.0260, -0.0279],
         ...,
         [-0.0054, -0.0223,  0.0201,  ...,  0.0127, -0.0294, -0.0199],
         [ 0.0002,  0.0309,  0.0125,  ...,  0.0311, -0.0120, -0.0098],
         [ 0.0196, -0.0044,  0.0292,  ..., -0.0064,  0.0287, -0.0091]],
        size=(1024, 1024), dtype=torch.qint8,
        quantization_scheme=torch.per_tensor_affine, scale=0.0002450977626722306,
        zero_point=0), None)

In [121]:
qtz_st = model.state_dict()
for name, param in qtz_st.items():
    if isinstance(param, tuple) and "dtype" not in name and param[0].is_quantized:
        pass
name, param        

('dense.weight',
 tensor([[-0.0028,  0.0016,  0.0024,  ..., -0.0204,  0.0032,  0.0139],
         [-0.0100, -0.0088, -0.0014,  ..., -0.0098,  0.0167, -0.0030],
         [-0.0109,  0.0157,  0.0081,  ..., -0.0169,  0.0260, -0.0279],
         ...,
         [-0.0054, -0.0224,  0.0201,  ...,  0.0128, -0.0293, -0.0197],
         [ 0.0003,  0.0309,  0.0126,  ...,  0.0311, -0.0121, -0.0098],
         [ 0.0197, -0.0044,  0.0291,  ..., -0.0065,  0.0286, -0.0090]]))

In [122]:
q_param[0].q_scale(), q_param[0].int_repr()

(0.0002450977626722306, tensor([[ -11,    7,   10,  ...,  -83,   13,   57],
         [ -41,  -36,   -6,  ...,  -40,   68,  -12],
         [ -44,   64,   33,  ...,  -69,  106, -114],
         ...,
         [ -22,  -91,   82,  ...,   52, -120,  -81],
         [   1,  126,   51,  ...,  127,  -49,  -40],
         [  80,  -18,  119,  ...,  -26,  117,  -37]], dtype=torch.int8))

In [123]:
sample = q_param[0][0][:3]
sample

tensor([-0.0027,  0.0017,  0.0025], size=(3,), dtype=torch.qint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.0002450977626722306,
       zero_point=0)

In [124]:
sample = torch.tensor([-0.0027,  0.0017,  0.0025])

In [126]:
def quantize(C, a=1):
    quantized = [torch.IntTensor([elel/a]) for elel in C.view(-1)]
    return torch.tensor(quantized).view(C.size())

quantize(
    C=sample,
    a=q_param[0].q_scale(),
    )

tensor([-11,   6,  10], dtype=torch.int32)

In [None]:
# mais detalhes em: https://github.com/pytorch/pytorch/wiki/Introducing-Quantized-Tensor