-
Notifications
You must be signed in to change notification settings - Fork 17
/
configuration.py
141 lines (121 loc) · 5.21 KB
/
configuration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# Copyright 2023 The HuggingFace Team. All rights reserved.
# Licensed under the MIT License.
"""Configuration classes for quantization with RyzenAI."""
from dataclasses import asdict, dataclass
from enum import Enum
from typing import Optional
import vai_q_onnx
from onnxruntime.quantization import CalibrationMethod, QuantFormat, QuantType
from optimum.configuration_utils import BaseConfig
@dataclass
class QuantizationConfig:
"""
QuantizationConfig is the configuration class handling all the RyzenAI quantization parameters.
Args:
is_static (`bool`):
Whether to apply static quantization or dynamic quantization.
format (`QuantFormat`):
Targeted RyzenAI quantization representation format.
For the Operator Oriented (QOperator) format, all the quantized operators have their own ONNX definitions.
For the Tensor Oriented (QDQ) format, the model is quantized by inserting QuantizeLinear / DeQuantizeLinear
operators.
calibration_method (`CalibrationMethod`):
The method chosen to calculate the activations quantization parameters using the calibration dataset.
activations_dtype (`QuantType`, defaults to `QuantType.QUInt8`):
The quantization data types to use for the activations.
activations_symmetric (`bool`, defaults to `False`):
Whether to apply symmetric quantization on the activations.
weights_dtype (`QuantType`, defaults to `QuantType.QInt8`):
The quantization data types to use for the weights.
weights_symmetric (`bool`, defaults to `True`):
Whether to apply symmetric quantization on the weights.
enable_dpu (`bool`, defaults to `True`):
Determines whether to generate a quantized model that is suitable for the DPU. If set to True, the quantization
process will create a model that is optimized for DPU computations.
"""
format: QuantFormat = QuantFormat.QDQ
calibration_method: CalibrationMethod = vai_q_onnx.PowerOfTwoMethod.MinMSE
activations_dtype: QuantType = QuantType.QUInt8
activations_symmetric: bool = True
weights_dtype: QuantType = QuantType.QInt8
weights_symmetric: bool = True
enable_dpu: bool = True
@staticmethod
def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType) -> str:
return (
f"{'s8' if activations_dtype == QuantType.QInt8 else 'u8'}"
f"/"
f"{'s8' if weights_dtype == QuantType.QInt8 else 'u8'}"
)
@property
def use_symmetric_calibration(self) -> bool:
return self.activations_symmetric and self.weights_symmetric
def __str__(self):
return (
f"{self.format} ("
f"schema: {QuantizationConfig.quantization_type_str(self.activations_dtype, self.weights_dtype)}, "
f"enable_dpu: {self.enable_dpu})"
)
class AutoQuantizationConfig:
@staticmethod
def ipu_cnn_config():
return QuantizationConfig(
format=QuantFormat.QDQ,
calibration_method=vai_q_onnx.PowerOfTwoMethod.MinMSE,
activations_dtype=QuantType.QUInt8,
activations_symmetric=True,
weights_dtype=QuantType.QInt8,
weights_symmetric=True,
enable_dpu=True,
)
@staticmethod
def cpu_cnn_config(
use_symmetric_activations: bool = False,
use_symmetric_weights: bool = True,
enable_dpu: bool = False,
):
return QuantizationConfig(
format=QuantFormat.QDQ,
calibration_method=vai_q_onnx.CalibrationMethod.MinMax,
activations_dtype=QuantType.QUInt8,
activations_symmetric=use_symmetric_activations,
weights_dtype=QuantType.QInt8,
weights_symmetric=use_symmetric_weights,
enable_dpu=enable_dpu,
)
class RyzenAIConfig(BaseConfig):
"""
RyzenAIConfig is the configuration class handling all the VitisAI parameters related to the ONNX IR model export,
and quantization parameters.
Attributes:
opset (`Optional[int]`, defaults to `None`):
ONNX opset version to export the model with.
quantization (`Optional[QuantizationConfig]`, defaults to `None`):
Specify a configuration to quantize ONNX model
"""
CONFIG_NAME = "ryzenai_config.json"
FULL_CONFIGURATION_FILE = "ryzenai_config.json"
def __init__(
self,
opset: Optional[int] = None,
quantization: Optional[QuantizationConfig] = None,
**kwargs,
):
super().__init__()
self.opset = opset
self.quantization = self.dataclass_to_dict(quantization)
self.optimum_version = kwargs.pop("optimum_version", None)
@staticmethod
def dataclass_to_dict(config) -> dict:
new_config = {}
if config is None:
return new_config
if isinstance(config, dict):
return config
for k, v in asdict(config).items():
if isinstance(v, Enum):
v = v.name
elif isinstance(v, list):
v = [elem.name if isinstance(elem, Enum) else elem for elem in v]
new_config[k] = v
return new_config