Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ONNX] QLinearConv Support #8007

Merged
merged 4 commits into from
May 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions python/tvm/relay/frontend/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -2827,6 +2827,109 @@ def _impl_v11(cls, inputs, attr, params):
)


class QLinearConv(OnnxOpConverter):
"""Operator converter for QLinearConv."""

@classmethod
def _impl_v10(cls, inputs, attr, params):
def get_scalar(x, dtype="float32"):
if isinstance(x, _expr.Var) and x.name_hint in params:
return _op.const(params[x.name_hint].asnumpy(), dtype)
rank = len(infer_shape(x))
assert rank <= 1, "QLinearConv scale and zero_point input must be scalars"
if rank == 1:
x = _op.squeeze(x, [0])
return _op.cast(x, dtype)

data = inputs[0]
x_scale = get_scalar(inputs[1])
x_zero_point = get_scalar(inputs[2], "int32")
weight = inputs[3]
w_scale = get_scalar(inputs[4])
w_zero_point = get_scalar(inputs[5], "int32")
y_scale = get_scalar(inputs[6])
y_zero_point = get_scalar(inputs[7], "int32")

input_shape = infer_shape(data)
ndim = len(input_shape)
kernel_type = infer_type(weight)
kernel_shapes = [get_const_tuple(kernel_type.checked_type.shape)]
if "kernel_shape" not in attr:
attr["kernel_shape"] = kernel_shapes[0][2:]

if "auto_pad" in attr:
attr["auto_pad"] = attr["auto_pad"].decode("utf-8")
if attr["auto_pad"] in ("SAME_UPPER", "SAME_LOWER"):
# Warning: Convolution does not yet support dynamic shapes,
# one will need to run dynamic_to_static on this model after import
data = autopad(
data,
attr.get("strides", [1] * (ndim - 2)),
attr["kernel_shape"],
attr.get("dilations", [1] * (ndim - 2)),
ndim,
pad_value=x_zero_point.data,
mode=attr["auto_pad"],
)
elif attr["auto_pad"] == "VALID":
attr["pads"] = tuple([0 for i in range(ndim - 2)])
elif attr["auto_pad"] == "NOTSET":
pass
else:
msg = 'Value {} in attribute "auto_pad" of operator Conv is invalid.'
raise tvm.error.OpAttributeInvalid(msg.format(attr["auto_pad"]))
attr.pop("auto_pad")

out_channels = kernel_shapes[0][0]
dilation = attr.get("dilations", [1] * (ndim - 2))
strides = attr.get("strides", [1] * (ndim - 2))
padding = attr["pads"] if "pads" in attr else 0
groups = attr["group"] if "group" in attr else 1

if ndim != 4:
raise tvm.error.OpAttributeInvalid(
"Only 2D kernels are supported for operator QLinearConv."
)

out = _qnn.op.conv2d(
data,
weight,
x_zero_point,
w_zero_point,
x_scale,
w_scale,
kernel_size=attr["kernel_shape"],
channels=out_channels,
strides=strides,
padding=padding,
dilation=dilation,
groups=groups,
)
use_bias = len(inputs) == 9
if use_bias:
out = _op.nn.bias_add(out, inputs[8])

out_dtype = infer_type(inputs[7]).checked_type.dtype
requantize_scale = _op.multiply(x_scale, w_scale)

# requantize requires y_scale to be constant,
# if y_scale is not constant, doing dequantize -> quantize
if isinstance(y_scale, _expr.Constant):
out = _qnn.op.requantize(
out,
requantize_scale,
_op.const(0, dtype="int32"),
y_scale,
y_zero_point,
out_dtype=out_dtype,
axis=0,
)
else:
out = _qnn.op.dequantize(out, requantize_scale, _op.const(0, dtype="int32"), axis=0)
out = _qnn.op.quantize(out, y_scale, y_zero_point, axis=0, out_dtype=out_dtype)
return out


class BitShift(OnnxOpConverter):
"""Operator converter for NonZero"""

Expand Down Expand Up @@ -3018,6 +3121,7 @@ def _get_convert_map(opset):
"DequantizeLinear": DequantizeLinear.get_converter(opset),
"DynamicQuantizeLinear": DynamicQuantizeLinear.get_converter(opset),
"ReverseSequence": ReverseSequence.get_converter(opset),
"QLinearConv": QLinearConv.get_converter(opset),
}


Expand Down
202 changes: 198 additions & 4 deletions tests/python/frontend/onnx/test_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,14 @@ def get_tvm_output_with_vm(


def get_tvm_output(
graph_def, input_data, target, device, output_shape=None, output_dtype="float32", opset=None
graph_def,
input_data,
target,
device,
output_shape=None,
output_dtype="float32",
opset=None,
opt_level=1,
):
"""Generic function to execute and get tvm output"""
# TODO: Resolve the issues and remove the following lines
Expand All @@ -76,7 +83,8 @@ def get_tvm_output(
input_names, shape_dict = get_input_data_shape_dict(graph_def, input_data)

mod, params = relay.frontend.from_onnx(graph_def, shape_dict, opset=opset)
with tvm.transform.PassContext(opt_level=1):

with tvm.transform.PassContext(opt_level=opt_level):
graph, lib, params = relay.build(mod, target, params=params)

m = graph_executor.create(graph, lib, device)
Expand Down Expand Up @@ -135,6 +143,7 @@ def verify_with_ort_with_inputs(
rtol=1e-5,
atol=1e-5,
apply_softmax=False,
opt_level=1,
):
if opset is not None:
model.opset_import[0].version = opset
Expand All @@ -156,7 +165,9 @@ def verify_with_ort_with_inputs(
convert_to_static=convert_to_static,
)
else:
tvm_out = get_tvm_output(model, inputs, target, dev, out_shape, dtype, opset=opset)
tvm_out = get_tvm_output(
model, inputs, target, dev, out_shape, dtype, opset=opset, opt_level=opt_level
)
if not isinstance(tvm_out, list):
tvm_out = [tvm_out]
if not isinstance(ort_out, list):
Expand Down Expand Up @@ -4219,7 +4230,6 @@ def verify_cumsum(indata, axis, exclusive=0, reverse=0, type="float32"):
"test_maxpool_with_argmax_2d_precomputed_strides/",
"test_maxunpool_export_with_output_shape/",
"test_mvn/",
"test_qlinearconv/",
"test_qlinearmatmul_2D/",
"test_qlinearmatmul_3D/",
"test_resize_tf_crop_and_resize/",
Expand Down Expand Up @@ -4387,6 +4397,189 @@ def test_reverse_sequence():
verify_reverse_sequence(x, sequence_lens, 1, 0)


def verify_qlinearconv(
x_shape,
w_shape,
y_shape,
padding,
kernel_shape,
strides,
dilations,
auto_pad="NOTSET",
bias=False,
):

x_array = np.random.randint(low=0, high=255, size=x_shape).astype("uint8")
w_array = np.random.uniform(low=0, high=255, size=w_shape).astype("uint8")

initializer = [
helper.make_tensor("x_scale", TensorProto.FLOAT, (), [np.random.rand()]),
helper.make_tensor("x_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
helper.make_tensor("w_scale", TensorProto.FLOAT, (), [np.random.rand()]),
helper.make_tensor("w_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
helper.make_tensor("y_scale", TensorProto.FLOAT, (), [np.random.rand()]),
helper.make_tensor("y_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
]

input_nodes = [
helper.make_tensor_value_info("x", TensorProto.UINT8, list(x_shape)),
helper.make_tensor_value_info("w", TensorProto.UINT8, list(w_shape)),
]
input_names = [
"x",
"x_scale",
"x_zero_point",
"w",
"w_scale",
"w_zero_point",
"y_scale",
"y_zero_point",
]
input_values = [x_array, w_array]

if bias is True:
b_shape = w_shape[0:1]
b_array = np.random.randint(low=0, high=65536, size=b_shape).astype("int32")
input_nodes.append(helper.make_tensor_value_info("B", TensorProto.INT32, list(b_shape)))
input_names.append("B")
input_values.append(b_array)

if padding is None:
## autopadding with unset default attributes
kwargs = {}
if not all([s == 1 for s in strides]):
kwargs["strides"] = strides
if not all([d == 1 for d in dilations]):
kwargs["dilations"] = dilations

node = helper.make_node(
"QLinearConv",
inputs=input_names,
outputs=["y"],
# Default values for other attributes:
auto_pad=auto_pad,
**kwargs,
)
else:
node = helper.make_node(
"QLinearConv",
inputs=input_names,
outputs=["y"],
kernel_shape=kernel_shape,
# Default values for other attributes:
strides=strides,
dilations=dilations,
# groups=1
pads=padding,
)

graph = helper.make_graph(
[node],
"conv_test",
inputs=input_nodes,
outputs=[helper.make_tensor_value_info("y", TensorProto.UINT8, list(y_shape))],
initializer=initializer,
)
model = helper.make_model(graph, producer_name="qlinearconv_test")
# opt_level=1 will cause error
verify_with_ort_with_inputs(model, input_values, opt_level=2)


def test_qlinearconv():
def repeat(N, D):
return tuple([N for _ in range(D)])

# only support QLinearConv2d because only support qnn.conv2d
D = 2

# Convolution with padding
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(5, D),
2 * repeat(1, D),
repeat(3, D),
repeat(1, D),
repeat(1, D),
)

# Convolution with bias
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(5, D),
2 * repeat(1, D),
repeat(3, D),
repeat(1, D),
repeat(1, D),
bias=True,
)

# Convolution with assymetric padding
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(4, D),
repeat(0, D) + repeat(1, D),
repeat(3, D),
repeat(1, D),
repeat(1, D),
)
# Convolution without padding
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(3, D),
2 * repeat(0, D),
repeat(3, D),
repeat(1, D),
repeat(1, D),
)
# Convolution with autopadding
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(5, D),
None,
repeat(3, D),
repeat(1, D),
repeat(1, D),
auto_pad="SAME_UPPER",
)
# Convolution with valid autopadding
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(3, D),
None,
repeat(3, D),
repeat(1, D),
repeat(1, D),
auto_pad="VALID",
)
# Convolution with non uniform stride
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(3, D),
None,
repeat(3, D),
repeat(2, D),
repeat(1, D),
auto_pad="SAME_UPPER",
)
# Convolution with dilation
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(5, D),
2 * repeat(2, D),
repeat(3, D),
repeat(1, D),
repeat(2, D),
)


if __name__ == "__main__":
test_flatten()
test_reshape()
Expand Down Expand Up @@ -4468,3 +4661,4 @@ def test_reverse_sequence():
test_wrong_input()
test_aten()
test_reverse_sequence()
test_qlinearconv()