In [2]:
from ppq import *                                       
from ppq.api import *
import os

from ppq.utils.TensorRTUtil import build_engine
#For vgg16
# modify configuration below:
WORKING_DIRECTORY = 'working'                             # choose your working directory
TARGET_PLATFORM   = TargetPlatform.TRT_INT8          # choose your target platform
MODEL_TYPE        = NetworkFramework.ONNX                 # or NetworkFramework.CAFFE
INPUT_LAYOUT          = 'chw'                             # input data layout, chw or hwc
NETWORK_INPUTSHAPE    = [1, 3, 224, 224]                  # input shape of your network
CALIBRATION_BATCHSIZE = 64                                 # batchsize of calibration dataset
EXECUTING_DEVICE      = 'cuda'                            # 'cuda' or 'cpu'.
REQUIRE_ANALYSE       = False
TRAINING_YOUR_NETWORK = True                              # 是否需要 Finetuning 一下你的网络
need_accuracy = True
label_path = os.path.join(WORKING_DIRECTORY, 'val.txt')
graph = None
if MODEL_TYPE == NetworkFramework.ONNX:
    graph = load_onnx_graph(onnx_import_file = os.path.join(WORKING_DIRECTORY, 'model.onnx'))
    
assert graph is not None, 'Graph Loading Error, Check your input again.'

QS = QuantizationSettingFactory.default_setting()

if TRAINING_YOUR_NETWORK:
    QS.lsq_optimization = True                                      # 启动网络再训练过程，降低量化误差
    QS.lsq_optimization_setting.steps = 500                         # 再训练步数，影响训练时间，500 步大概几分钟
    QS.lsq_optimization_setting.collecting_device = 'cuda'          # 缓存数据放在那，cuda 就是放在gpu，如果显存超了你就换成 'cpu'
    QS.lsq_optimization_setting.block_size         = 4
    QS.lsq_optimization_setting.lr                 = 1e-5
    QS.lsq_optimization_setting.gamma              = 0
    QS.lsq_optimization_setting.is_scale_trainable = True

QS.dispatching_table.append(operation='OP NAME', platform=TargetPlatform.FP32)
print('正准备量化你的网络，检查下列设置:')
print(f'WORKING DIRECTORY    : {WORKING_DIRECTORY}')
print(f'TARGET PLATFORM      : {TARGET_PLATFORM.name}')
print(f'NETWORK INPUTSHAPE   : {NETWORK_INPUTSHAPE}')
print(f'CALIBRATION BATCHSIZE: {CALIBRATION_BATCHSIZE}')

dataloader = load_calibration_dataset(
    directory    = WORKING_DIRECTORY,
    input_shape  = NETWORK_INPUTSHAPE,
    batchsize    = CALIBRATION_BATCHSIZE,
    input_format = INPUT_LAYOUT)

quantized = quantize_native_model(
    setting=QS,                     # setting 对象用来控制标准量化逻辑
    model=graph,
    calib_dataloader=dataloader,
    calib_steps=32,
    input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入，使用这个参数传参
    inputs=None,                    # 如果你的网络有多个输入，使用这个参数传参，就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)]
    collate_fn=lambda x: x.to(EXECUTING_DEVICE),  # collate_fn 跟 torch dataloader 的 collate fn 是一样的，用于数据预处理，
                                                    # 你当然也可以用 torch dataloader 的那个，然后设置这个为 None
    platform=TARGET_PLATFORM,
    device=EXECUTING_DEVICE,
    do_quantize=False)

正准备量化你的网络，检查下列设置:
WORKING DIRECTORY    : working
TARGET PLATFORM      : TRT_INT8
NETWORK INPUTSHAPE   : [1, 3, 224, 224]
CALIBRATION BATCHSIZE: 64


Loading calibration files: 100%|██████████| 640/640 [00:09<00:00, 65.31it/s]


640 File(s) Loaded.
Loaded sample 0, shape: torch.Size([1, 3, 224, 224])
Loaded sample 1, shape: torch.Size([1, 3, 224, 224])
Loaded sample 2, shape: torch.Size([1, 3, 224, 224])
Loaded sample 3, shape: torch.Size([1, 3, 224, 224])
Loaded sample 4, shape: torch.Size([1, 3, 224, 224])
Batch Shape: torch.Size([64, 3, 224, 224])
[17:07:50] PPQ Quantization Fusion Pass Running ...       Finished.
[17:07:50] PPQ Quantize Simplify Pass Running ...         Finished.
[17:07:50] PPQ Parameter Quantization Pass Running ...    Finished.
[17:07:50] PPQ Runtime Calibration Pass Running ...       

Calibration Progress(Phase 1): 100%|██████████| 32/32 [05:42<00:00, 10.71s/it]


Finished.
[17:13:44] PPQ Quantization Alignment Pass Running ...    Finished.
[17:13:44] PPQ Passive Parameter Quantization Running ... Finished.
[17:13:44] PPQ LSQ Optimization Running ...               
Check following parameters:
Is Scale Trainable:        True
Interested Layers:         []
Collecting Device:         cuda
Num of blocks:             6
Learning Rate:             1e-05
Steps:                     500
Gamma:                     0

# Block [1 / 6]: [/features/features.0/Conv -> /features/features.4/MaxPool]


# Tuning Procedure : 100%|██████████| 500/500 [01:06<00:00,  7.47it/s]


# Tuning Finished  : (37.3235 -> 28.6100) [Block Loss]

# Block [2 / 6]: [/features/features.5/Conv -> /features/features.9/MaxPool]


# Tuning Procedure : 100%|██████████| 500/500 [00:40<00:00, 12.31it/s]


# Tuning Finished  : (127.9212 -> 110.3618) [Block Loss]

# Block [3 / 6]: [/features/features.10/Conv -> /features/features.14/Conv]


# Tuning Procedure : 100%|██████████| 500/500 [00:38<00:00, 12.88it/s]


# Tuning Finished  : (746.9162 -> 512.7488) [Block Loss]

# Block [4 / 6]: [/features/features.17/Conv -> /features/features.21/Conv]


# Tuning Procedure : 100%|██████████| 500/500 [00:28<00:00, 17.43it/s]


# Tuning Finished  : (689.6067 -> 268.4885) [Block Loss]

# Block [5 / 6]: [/features/features.24/Conv -> /features/features.28/Conv]


# Tuning Procedure : 100%|██████████| 500/500 [00:09<00:00, 50.72it/s]


# Tuning Finished  : (314.1484 -> 90.8628) [Block Loss]

# Block [6 / 6]: [/classifier/classifier.0/Gemm -> /classifier/classifier.6/Gemm]


# Tuning Procedure : 100%|██████████| 500/500 [00:16<00:00, 29.48it/s]


# Tuning Finished  : (8.2988 -> 0.8674) [Block Loss]

Finished.
[17:17:26] PPQ Passive Parameter Quantization Running ... Finished.
[17:17:26] PPQ Parameter Baking Pass Running ...          Finished.
--------- Network Snapshot ---------
Num of Op:                    [38]
Num of Quantized Op:          [38]
Num of Variable:              [71]
Num of Quantized Var:         [71]
------- Quantization Snapshot ------
Num of Quant Config:          [108]
ACTIVATED:                    [18]
BAKED:                        [16]
OVERLAPPED:                   [58]
FP32:                         [16]
Network Quantization Finished.


In [30]:
from vgg_task6 import evaluate_quantized_model
result = evaluate_quantized_model(quantized, dataloader, label_path)
print(result)

Evaluating: 100%|██████████| 10/10 [00:01<00:00,  7.28batch/s]

Model Evaluation Accuracy: 0.16%
0.15625





In [31]:
print(result)

0.15625
