Skip to content

Commit

Permalink
Export Qlinear to QDQ (#224)
Browse files Browse the repository at this point in the history
Signed-off-by: mengniwa <mengni.wang@intel.com>
  • Loading branch information
mengniwang95 committed Dec 12, 2022
1 parent 40ab5a3 commit e996a93
Show file tree
Hide file tree
Showing 24 changed files with 1,390 additions and 35 deletions.
4 changes: 2 additions & 2 deletions neural_compressor/adaptor/ox_utils/operators/__init__.py
Expand Up @@ -18,12 +18,12 @@

from os.path import dirname, basename, isfile, join
import glob
from .ops import OPERATORS
from .ops import OPERATORS, QOPERATORS

modules = glob.glob(join(dirname(__file__), "*.py"))

for f in modules:
if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'):
__import__(basename(f)[:-3], globals(), locals(), level=1)

__all__ = ["OPERATORS"]
__all__ = ["OPERATORS", "QOPERATORS"]
40 changes: 38 additions & 2 deletions neural_compressor/adaptor/ox_utils/operators/activation.py
Expand Up @@ -17,7 +17,7 @@
#

import onnx
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain

@op_registry(op_types="LeakyRelu, Sigmoid")
Expand Down Expand Up @@ -87,4 +87,40 @@ def quantize(self):
self.quantizer.dequantize_tensor(node, node.input[0])
else:
self.quantizer.model.replace_input_of_all_nodes(node.output[0], node.input[0])
self.quantizer.remove_nodes.append(node)
self.quantizer.remove_nodes.append(node)

@qop_registry(op_types="QLinearLeakyRelu, QLinearSigmoid")
class QActivationOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)

def convert(self):
node = self.node
add_nodes = []
inits = []
# input dq
in_dq = onnx.helper.make_node(
'DequantizeLinear',
node.input[:3],
[node.name + '_in_dequant'],
node.name + '_in_dequant')
inputs = [node.name + '_in_dequant']
add_nodes.append(in_dq)
# output q
out_q = onnx.helper.make_node(
'QuantizeLinear',
[node.name + '_out', node.input[3], node.input[4]],
node.output,
node.name + '_out_quant')
outputs = [node.name + '_out']
add_nodes.append(out_q)

kwargs = {}
for attribute in node.attribute: # pragma: no cover
kwargs.update(attribute_to_kwarg(attribute))

activation_node = onnx.helper.make_node(
node.op_type.split('QLinear')[-1], inputs,
outputs, node.name + '_convert', **kwargs)
add_nodes.append(activation_node)
return True, add_nodes, inits
12 changes: 7 additions & 5 deletions neural_compressor/adaptor/ox_utils/operators/argmax.py
Expand Up @@ -15,9 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#


from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry

@op_registry(op_types="ArgMax")
class ArgMaxOperator(Operator):
Expand All @@ -35,5 +33,9 @@ def convert(self, convert_format):
origin_name = node.input[0].split('_argmax_node')[0]

if origin_name in self.quantizer.quantized_value_map:
node.input[0] = self.quantizer.quantized_value_map[origin_name].q_name
node.name = node.name + '_quant'
node.name = node.name + '_quant'

@qop_registry(op_types="ArgMax")
class QArgMaxOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)
47 changes: 45 additions & 2 deletions neural_compressor/adaptor/ox_utils/operators/attention.py
Expand Up @@ -17,8 +17,8 @@
#

import onnx
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain, find_by_name

@op_registry(op_types="Attention")
class AttentionOperator(Operator):
Expand Down Expand Up @@ -74,3 +74,46 @@ def convert(self, convert_format):
self.quantizer.new_nodes.append(qattention_node)

self.quantizer.remove_nodes.append(node)

@qop_registry(op_types="QAttention")
class QAttentionOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)

def convert(self):
node = self.node
add_nodes = []
inputs = []
inits = []
if find_by_name(node.input[3], self.initializers) is None:
return False, add_nodes, inits
# input dq
in_dq1 = onnx.helper.make_node(
'DequantizeLinear',
[node.input[0], node.input[3], node.input[6]],
[node.name + '_in_dequant1'],
node.name + '_in_dequant1')

in_dq2 = onnx.helper.make_node(
'DequantizeLinear',
[node.input[1], node.input[4], node.input[7]],
[node.name + '_in_dequant2'],
node.name + '_in_dequant2')
inputs = [node.name + '_in_dequant1',
node.name + '_in_dequant2',
node.input[2],
node.input[5]]

add_nodes.extend([in_dq1, in_dq2])

outputs = node.output
kwargs = {}
for attribute in node.attribute: # pragma: no cover
kwargs.update(attribute_to_kwarg(attribute))
kwargs["domain"] = ms_domain

binary_node = onnx.helper.make_node(
'Attention', inputs,
outputs, node.name + '_convert', **kwargs)
add_nodes.append(binary_node)
return True, add_nodes, inits
47 changes: 45 additions & 2 deletions neural_compressor/adaptor/ox_utils/operators/binary_op.py
Expand Up @@ -17,7 +17,7 @@
#

import onnx
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain

@op_registry(op_types="Add, Mul")
Expand Down Expand Up @@ -77,4 +77,47 @@ def convert(self, convert_format):
self.quantizer.new_nodes += [qlinear_binary_math_node]
self.quantizer.remove_nodes.extend(parents)
self.quantizer.remove_nodes.append(child)
self.quantizer.remove_nodes.append(node)
self.quantizer.remove_nodes.append(node)

@qop_registry(op_types="QLinearAdd, QLinearMul")
class QBinaryOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)

def convert(self):
node = self.node
add_nodes = []
inits = []
# input dq
in_dq1 = onnx.helper.make_node(
'DequantizeLinear',
node.input[:3],
[node.name + '_in_dequant1'],
node.name + '_in_dequant1')

in_dq2 = onnx.helper.make_node(
'DequantizeLinear',
node.input[3:6],
[node.name + '_in_dequant2'],
node.name + '_in_dequant2')
inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2']

add_nodes.extend([in_dq1, in_dq2])
# output q
out_q = onnx.helper.make_node(
'QuantizeLinear',
[node.name + '_out', node.input[6], node.input[7]],
node.output,
node.name + '_out_quant')
outputs = [node.name + '_out']
add_nodes.append(out_q)

kwargs = {}
for attribute in node.attribute: # pragma: no cover
kwargs.update(attribute_to_kwarg(attribute))

binary_node = onnx.helper.make_node(
node.op_type.split('QLinear')[-1], inputs,
outputs, node.name + '_convert', **kwargs)
add_nodes.append(binary_node)
return True, add_nodes, inits
41 changes: 40 additions & 1 deletion neural_compressor/adaptor/ox_utils/operators/concat.py
Expand Up @@ -17,7 +17,7 @@
#

import onnx
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain

@op_registry(op_types="Concat")
Expand Down Expand Up @@ -96,3 +96,42 @@ def cast(self): # pragma: no cover
if node.input[0] not in [i.tensor_name for i in self.quantizer.new_value_info.values()]:
return
self.quantizer.dtype_cast(self.node, self.dtype)

@qop_registry(op_types="QLinearConcat")
class QConcatOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)

def convert(self):
node = self.node
add_nodes = []
inputs = []
inits = []
# input dq
for i in range(int((len(node.input) - 2) / 3 - 1)):
in_dq = onnx.helper.make_node(
'DequantizeLinear',
node.input[2 + i*3 : 2 + (i+1)*3],
[node.name + '_in_dequant_' + str(i)],
node.name + '_in_dequant_' + str(i))
inputs.append(node.name + '_in_dequant_' + str(i))
add_nodes.append(in_dq)

# output q
out_q = onnx.helper.make_node(
'QuantizeLinear',
[node.name + '_out', node.input[0], node.input[1]],
node.output,
node.name + '_out_quant')
outputs = [node.name + '_out']
add_nodes.append(out_q)

kwargs = {}
for attribute in node.attribute: # pragma: no cover
kwargs.update(attribute_to_kwarg(attribute))

concat_node = onnx.helper.make_node(
'Concat', inputs,
outputs, node.name + '_convert', **kwargs)
add_nodes.append(concat_node)
return True, add_nodes, inits
70 changes: 69 additions & 1 deletion neural_compressor/adaptor/ox_utils/operators/conv.py
Expand Up @@ -19,7 +19,7 @@

import onnx
from onnx import onnx_pb as onnx_proto
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
from neural_compressor.adaptor.ox_utils.util import find_by_name, attribute_to_kwarg

@op_registry(op_types="Conv, FusedConv")
Expand Down Expand Up @@ -156,6 +156,7 @@ def convert(self, convert_format):
if attribute.name == 'activation_params': # pragma: no cover
continue
kwargs.update(attribute_to_kwarg(attribute))

qlinear_conv_node = onnx.helper.make_node("QLinearConv", qlinear_conv_inputs,
[qlinear_conv_output],
node.name, **kwargs)
Expand All @@ -164,4 +165,71 @@ def convert(self, convert_format):
self.quantizer.remove_nodes.append(child)
self.quantizer.remove_nodes.append(node)

@qop_registry(op_types="QLinearConv")
class QConvOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)

def convert(self):
node = self.node
add_nodes = []
inits = []
# input dq
in_dq1 = onnx.helper.make_node(
'DequantizeLinear',
node.input[:3],
[node.name + '_in_dequant1'],
node.name + '_in_dequant1')

in_dq2 = onnx.helper.make_node(
'DequantizeLinear',
node.input[3:6],
[node.name + '_in_dequant2'],
node.name + '_in_dequant2')

add_nodes.extend([in_dq1, in_dq2])
inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2']
if len(node.input) == 9:
import numpy as np
input_scale = onnx.numpy_helper.to_array(
find_by_name(node.input[1], self.initializers))
weight_scale = onnx.numpy_helper.to_array(
find_by_name(node.input[4], self.initializers))
bias_scale = input_scale * weight_scale

# update scale initializer
bias_scale_data = np.asarray(bias_scale, dtype=np.float32).reshape(-1)
bias_scale_initializer = onnx.numpy_helper.from_array(bias_scale_data,
node.input[8] + '_scale')
inits.extend([bias_scale_initializer])

# update zero initializer
bias_zp_data = np.zeros(bias_scale.shape, dtype=np.int32).reshape(-1)
bias_zp_initializer = onnx.numpy_helper.from_array(
bias_zp_data, node.input[8] + '_zero_point')
inits.extend([bias_zp_initializer])
in_dq3 = onnx.helper.make_node(
'DequantizeLinear',
[node.input[8], bias_scale_initializer.name, bias_zp_initializer.name],
[node.name + '_in_dequant3'],
node.name + '_in_dequant3')
inputs.append(in_dq3.name)
add_nodes.append(in_dq3)
# output q
out_q = onnx.helper.make_node(
'QuantizeLinear',
[node.name + '_out', node.input[6], node.input[7]],
node.output,
node.name + '_out_quant')
outputs = [node.name + '_out']
add_nodes.append(out_q)

kwargs = {}
for attribute in node.attribute: # pragma: no cover
kwargs.update(attribute_to_kwarg(attribute))

binary_node = onnx.helper.make_node(
node.op_type.split('QLinear')[-1], inputs,
outputs, node.name + '_convert', **kwargs)
add_nodes.append(binary_node)
return True, add_nodes, inits
7 changes: 6 additions & 1 deletion neural_compressor/adaptor/ox_utils/operators/direct_q8.py
Expand Up @@ -16,7 +16,7 @@
# limitations under the License.
#

from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator

@op_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze")
class Direct8BitOperator(Operator):
Expand Down Expand Up @@ -83,3 +83,8 @@ def cast(self):
if node.input[0] not in [i.tensor_name for i in self.quantizer.new_value_info.values()]:
return
self.quantizer.dtype_cast(self.node, self.dtype)

@qop_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze")
class QDirectOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)

0 comments on commit e996a93

Please sign in to comment.