In [1]:
import Ipynb_importer
from a_basic_quant import *
from b_model import *
from c_train_and_test import *
from d_post_training_quantize import *

importing Jupyter notebook from a_basic_quant.ipynb
importing Jupyter notebook from b_model.ipynb
importing Jupyter notebook from c_train_and_test.ipynb
importing Jupyter notebook from d_post_training_quantize.ipynb


## 1、理解量化过程·教程
### 1.1 获得一个输入 torch.tensor

In [2]:
x = torch.randn(2,3) *100
x

tensor([[ -11.5217, -116.7138,   64.4203],
        [ 169.1704,  -38.0857, -214.3612]])

### 1.2 获得 x 的最大值和最小值，确定上下限

In [3]:
max_val = x.max()
min_val = x.min()
print(f"max_val: {max_val} \nmin_val:{min_val}")

max_val: 169.17042541503906 
min_val:-214.36123657226562


### 1.3 计算缩放因子 scare, 零点 zero_point

In [4]:
scale, zero_point = calcu_scale_and_zeropoint(min_val, max_val)
print(f"scale: {scale} \nzero_point: {zero_point}")

scale: 1.504045844078064 
zero_point: 142.0


### 1.4 进行量化

In [5]:
q_x = quantize_tensor(x, scale, zero_point)
q_x

tensor([[134.3395,  64.4001, 184.8314],
        [254.4769, 116.6778,   0.0000]])

### 1.5 反量化

In [6]:
x_q_x = dequantize_tensor(q_x, scale, zero_point)

print(f"原始值：\n{x} \n量化反量化：\n{x_q_x}")

原始值：
tensor([[ -11.5217, -116.7138,   64.4203],
        [ 169.1704,  -38.0857, -214.3612]]) 
量化反量化：
tensor([[ -11.5217, -116.7138,   64.4203],
        [ 169.1704,  -38.0857, -213.5745]])


## 2、理解卷积中量化参与的过程

### 2.1 定义一个卷积

In [7]:
conv = torch.nn.Conv2d(1, 40, 3, 1)

### 2.2 定义输入 a 并且输出指定范围数据

In [8]:
a = torch.randn((64, 1, 28, 28))
a[0][0][0][:5]

tensor([-0.9945,  1.6742,  0.7916,  0.5435,  0.5858])

### 2.3 卷积运算并且输出指定范围数据

In [9]:
a_out = conv(a)
a_out[0][0][0][:5]

tensor([ 0.6637,  0.8401,  0.0938,  0.4350, -0.5728], grad_fn=<SliceBackward>)

### 2.4 定义量化卷积层

In [10]:
qconv1 = QConv2d(conv, has_qin=True, has_qout=True, num_bits=8)

### 2.5 计算S 和 z，进行中间参数的量化，并保存

In [11]:
y = qconv1(a)
qconv1.freeze()

### 2.6 量化输入

In [12]:
# 量化输入
qa = qconv1.q_in.quantize_tensor(a).int().float()
print(f"量化输入，展示部分数据：{qa[0][0][0][:5]}")

# 反量化输入
# 这部分没必要，只是为了给你们展示看效果
qaa = qconv1.q_in.dequantize_tensor(qa)
print(f"\n反量化输入，展示部分数据：\n\n{qaa[0][0][0][:5]}\n")
print(f"原始数据对比：\n\n{a[0][0][0][:5]}")

量化输入，展示部分数据：tensor([ 91., 165., 140., 134., 135.])

反量化输入，展示部分数据：

tensor([-1.0093,  1.6582,  0.7570,  0.5407,  0.5768])

原始数据对比：

tensor([-0.9945,  1.6742,  0.7916,  0.5435,  0.5858])


### 2.7 使用量化输入进行推理

In [13]:
qa_out = qconv1.quantize_inference(qa)
print(f"推理输出，展示部分数据：{qa_out[0][0][0][:5]}")

qaa_out = qconv1.q_out.dequantize_tensor(qa_out)
print(f"\n反量化推理输出，展示部分数据：\n\n{qaa_out[0][0][0][:5]}\n")
print(f"原始推理结果对比：\n\n{a_out[0][0][0][:5]}")

推理输出，展示部分数据：tensor([155., 161., 135., 147., 113.], grad_fn=<SliceBackward>)

反量化推理输出，展示部分数据：

tensor([ 0.6667,  0.8407,  0.0870,  0.4348, -0.5508], grad_fn=<SliceBackward>)

原始推理结果对比：

tensor([ 0.6637,  0.8401,  0.0938,  0.4350, -0.5728], grad_fn=<SliceBackward>)


## 3、无法量化的缘故·教程

### 3.1 定义卷积

In [14]:
conv = torch.nn.Conv2d(1, 40, 3, 1)

### 3.2 定义输入并查看类型

In [15]:
a = torch.randn((64, 1, 28, 28))

print(f"shape: {a.shape} \ndtype: {a.dtype}")

shape: torch.Size([64, 1, 28, 28]) 
dtype: torch.float32


### 3.3 卷积运算（正常输出）

In [16]:
a_out = conv(a)

print(f"shape: {a_out.shape} \ndtype: {a_out.dtype}")

shape: torch.Size([64, 40, 26, 26]) 
dtype: torch.float32


### 3.4 改变输入类型

In [17]:
b = a.to(torch.uint8)

b.dtype

torch.uint8

### 3.5 卷积运算（错误输出）

In [18]:
b_out = conv(b)

RuntimeError: expected scalar type Byte but found Float

### 3.6 pytorch 目前无法量化的缘故（猜测）
- https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
- https://www.tensorflow.org/api_docs/python/tf/nn/conv2d#args

for pytorch:
> This module supports TensorFloat32.

for tensorflow:
> A Tensor. Must be one of the following types: half, bfloat16, float32, float64.