Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[microNPU][ETHOSU] Add fixed point for matmul #16401

Merged
merged 5 commits into from
Feb 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
50 changes: 40 additions & 10 deletions python/tvm/relay/backend/contrib/ethosu/legalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1402,20 +1402,23 @@ def callback(self, pre, post, node_map):
return ethosu_fc


class MatMulRewriter(DFPatternCallback):
"""Legalize matrix multiplication to an NPU operator"""
class MatrixMultiplicationRewriter(DFPatternCallback):
"""Legalize matrix multiplication with two tensors into sequence of NPU operators"""

def __init__(self):
def __init__(
self,
params_class: Type,
pattern: CallPattern,
):
super().__init__(require_type=True)
self.pattern = (
wildcard().has_attr({"Composite": ethosu_patterns.MatMulParams.composite_name})
)(wildcard(), wildcard())
self.pattern = pattern
self.params_class = params_class

def callback(self, pre, post, node_map):
params = ethosu_patterns.MatMulParams(post.op.body)
params = self.params_class(post.op.body)
ifm = post.args[0]
ifm2 = post.args[1]
lut = relay.const([], dtype="int8")
lut = relay.const([], dtype=params.ifm.dtype)
activation_map = {"clip": "CLIP"}
if params.activation:
activation = activation_map[params.activation.op.name]
Expand Down Expand Up @@ -1471,7 +1474,7 @@ def callback(self, pre, post, node_map):
rounding_mode="NATURAL",
)

# Convert tensor dtype from int32 to int8
# Convert tensor dtype from int32 to output dtype
scalar_tensor = relay.const(np.ones([1, 1, 1, 1], dtype="int32"), dtype="int32")
reduce_sum = ethosu_ops.ethosu_binary_elementwise(
ifm=reduce_sum,
Expand All @@ -1487,7 +1490,7 @@ def callback(self, pre, post, node_map):
ifm_channels=1,
ifm2_channels=1,
reversed_operands=False,
ofm_dtype="int8",
ofm_dtype=params.ofm.dtype,
)

res_columns.append(reduce_sum)
Expand All @@ -1497,6 +1500,32 @@ def callback(self, pre, post, node_map):
return relay.reshape(concat, params.ofm.shape)


class MatMulRewriter(MatrixMultiplicationRewriter):
"""Convert ethos-u.matmul composite function to sequence of NPU operators"""

def __init__(self):
super().__init__(
params_class=ethosu_patterns.MatMulParams,
pattern=(
wildcard().has_attr({"Composite": ethosu_patterns.MatMulParams.composite_name})
)(wildcard(), wildcard()),
)


class MatMulFixedPointRewriter(MatrixMultiplicationRewriter):
"""Convert ethos-u.matmul_fixed_point composite function to sequence of NPU operators"""

def __init__(self):
super().__init__(
params_class=ethosu_patterns.MatMulFixedPointParams,
pattern=(
wildcard().has_attr(
{"Composite": ethosu_patterns.MatMulFixedPointParams.composite_name}
)
)(wildcard(), wildcard()),
)


class PadRewriter(DFPatternCallback):
"""Convert ethos-u.pad2d composite function to ethosu_depthwise_conv2d
operator"""
Expand Down Expand Up @@ -1644,6 +1673,7 @@ def transform_npu_function(self, _, func: relay.Function) -> relay.Function:
PartitionedSplitRewriter(),
FullyConnectedRewriter(),
MatMulRewriter(),
MatMulFixedPointRewriter(),
SplitRewriter(),
ChannelPadRewriter(),
Conv2DRewriter(),
Expand Down
76 changes: 75 additions & 1 deletion python/tvm/relay/op/contrib/ethosu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1917,7 +1917,7 @@ def is_valid(self) -> bool:
Checks whether matrix multiplication has compatible attributes with HW
"""

if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int8]):
if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int8, np.int16]):
return False
if not len(self.ifm.shape) == 2:
return False
Expand All @@ -1938,6 +1938,75 @@ def matmul_pattern():
return optional_clip


class MatMulFixedPointParams:
"""
This class will parse a call to an ethos-u.matmul_fixed_point composite
function and extract the parameter information.
"""

composite_name = "ethos-u.matmul_fixed_point"

@requires_vela
def __init__(self, func_body):
from tvm.relay.backend.contrib.ethosu.util import QDenseArgs

dense_fixed_point = func_body.args[0]
dense = dense_fixed_point.args[0]
# fixed_point_multiply relay operation uses multiplier with 31 fractional bits
# so to determine the size of the fraction use the formula: 31 - shift
self.fraction_size = 31 - dense_fixed_point.attrs.shift
fract_scale = tvm.relay.Constant(tvm.nd.array(np.array(1 / 2**self.fraction_size)))
fract_zero_point = tvm.relay.Constant(tvm.nd.array(np.array(0, dtype="int32")))

self.activation = None
self.weights = TensorParams(
dense.args[QDenseArgs.WEIGHTS.value].args[0].args[0],
None,
fract_scale,
fract_zero_point,
)
self.ifm = TensorParams(
dense.args[QDenseArgs.IFM.value].args[0].args[0],
None,
fract_scale,
fract_zero_point,
)
self.ofm = TensorParams(
func_body,
None,
fract_scale,
fract_zero_point,
)

def is_valid(self) -> bool:
"""
Checks whether matrix multiplication has compatible attributes with HW
"""

if self.fraction_size < 0 or self.fraction_size > 16:
return False
if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int16]):
return False
if not len(self.ifm.shape) == 2:
return False
if not len(self.ofm.shape) == 2:
return False
# The weights must be transposed
if self.ifm.shape[1] != self.weights.shape[1]:
return False
return True


def matmul_fixed_point_pattern():
ifm = is_op("cast")(wildcard())
ifm2 = is_op("cast")(wildcard())
ifm = is_op("fixed_point_multiply")(ifm)
ifm2 = is_op("fixed_point_multiply")(ifm2)
dense = is_op("nn.dense")(ifm, ifm2)
dense = is_op("fixed_point_multiply")(dense)
return is_op("cast")(dense)


class HardSwishParams:
"""
This class will parse a call to a ethos-u.hard_swish composite function
Expand Down Expand Up @@ -2228,6 +2297,11 @@ def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Cal
matmul_pattern(),
lambda pat: MatMulParams(pat).is_valid(),
),
(
MatMulFixedPointParams.composite_name,
matmul_fixed_point_pattern(),
lambda pat: MatMulFixedPointParams(pat).is_valid(),
),
(
MaxPool2DParams.composite_name,
qnn_maxpool2d_pattern(),
Expand Down
3 changes: 2 additions & 1 deletion src/relay/op/contrib/ethosu/identity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ bool EthosuIdentityRel(const Array<Type>& types, int num_inputs, const Attrs& at

const String operator_name = "ethosu_identity";

CheckDataType(reporter, ifm->dtype, {DataType::UInt(8), DataType::Int(8)}, operator_name, "ifm");
CheckDataType(reporter, ifm->dtype, {DataType::UInt(8), DataType::Int(8), DataType::Int(16)},
operator_name, "ifm");

if (ifm->shape.size() > 4) {
reporter->GetDiagCtx().EmitFatal(
Expand Down
51 changes: 51 additions & 0 deletions tests/python/contrib/test_ethosu/test_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1624,5 +1624,56 @@ def subtract_sigmoid_function(lhs, rhs):
)


@pytest.mark.parametrize("accel_type", ["ethos-u55-256", "ethos-u65-256"])
@pytest.mark.parametrize(
"ifm_shape,ofm_channels,fract_size,tolerance",
[[(1, 16), 8, 15, 0.001], [(2, 8), 16, 14, 0.001], [(4, 8), 16, 12, 0.001]],
)
def test_ethosu_matmul_fixed_point(accel_type, ifm_shape, ofm_channels, fract_size, tolerance):
np.random.seed(0)
dtype = "int16"
weights_shape = (ofm_channels, ifm_shape[1])

def create_model():
ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
ifm2 = relay.var("ifm2", shape=weights_shape, dtype=dtype)
ifm_fixed_point = relay.cast(ifm, "int32")
ifm2_fixed_point = relay.cast(ifm2, "int32")
ifm_fixed_point = relay.fixed_point_multiply(ifm_fixed_point, 2**31 - 1, 0)
ifm2_fixed_point = relay.fixed_point_multiply(ifm2_fixed_point, 2**31 - 1, 0)
dense = relay.nn.dense(ifm_fixed_point, ifm2_fixed_point)
dense = relay.fixed_point_multiply(dense, 1, 16)
dense = relay.cast(dense, dtype)
return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], dense))

def convert_to_fixed_point(arr, fract_size):
fract_fact = 0b1 << fract_size
return np.array(arr * fract_fact, dtype=np.int16)

cpu_mod = create_model()
ethosu_mod = partition_for_ethosu(cpu_mod)

input_data = {
"ifm": np.random.uniform(-0.5, 0.5, size=ifm_shape),
"ifm2": np.random.uniform(-0.5, 0.5, size=weights_shape),
}
input_data = {
"ifm": convert_to_fixed_point(input_data["ifm"], fract_size),
"ifm2": convert_to_fixed_point(input_data["ifm2"], fract_size),
}
output_data = generate_ref_data(cpu_mod, input_data)
output_data = {"output": output_data["output"].astype("int16")}
tolerance = convert_to_fixed_point(tolerance, fract_size)

infra.compare_ethosu_with_reference(
ethosu_mod,
input_data,
output_data,
accel_type,
enable_cascader=False,
output_tolerance=tolerance,
)


if __name__ == "__main__":
tvm.testing.main()