Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[frontend][tflite] float16 quant support #7736

Merged
merged 9 commits into from Apr 17, 2021
35 changes: 24 additions & 11 deletions python/tvm/relay/frontend/tflite.py
Expand Up @@ -1882,9 +1882,12 @@ def convert_fully_connected(self, op):
# bias tensor type should be INT32 (quantization) or FLOAT32
assert bias_tensor_type in (TensorType.INT32, TensorType.FLOAT32)
bias_tensor_type_str = self.get_tensor_type_str(bias_tensor_type)
bias_expr = self.exp_tab.new_const(
self.get_tensor_value(bias_tensor), dtype=bias_tensor_type_str
)
if self.has_expr(bias_tensor.tensor_idx):
bias_expr = self.get_expr(bias_tensor.tensor_idx)
else:
bias_expr = self.exp_tab.new_const(
self.get_tensor_value(bias_tensor), dtype=bias_tensor_type_str
)
out = _op.nn.bias_add(out, bias_expr)

# Finally if the dense is quantized. Add a requantize at the end.
Expand Down Expand Up @@ -2852,11 +2855,18 @@ def convert_transpose_conv(self, op):
# weights tensor type should be UINT8 (quantization) or FLOAT32
assert weights_tensor_type in (TensorType.INT8, TensorType.UINT8, TensorType.FLOAT32)
anijain2305 marked this conversation as resolved.
Show resolved Hide resolved
weight_tensor_type_str = self.get_tensor_type_str(weights_tensor_type)
weight_value_ohwi = self.get_tensor_value(weights_tensor)
# Relay kernel_layout should be OIHW
# Relay weights layout should be different from kernel_layout - it should be IOHW
weight_value_iohw = np.transpose(weight_value_ohwi, (3, 0, 1, 2))
weight_expr_iohw = self.exp_tab.new_const(weight_value_iohw, dtype=weight_tensor_type_str)

if self.has_expr(weights_tensor.tensor_idx):
weight_expr_iohw = self.get_expr(weights_tensor.tensor_idx)
weight_expr_iohw = _op.transpose(weight_expr_iohw, axes=(3, 0, 1, 2))
else:
weight_value_ohwi = self.get_tensor_value(weights_tensor)
# Relay kernel_layout should be OIHW
# Relay weights layout should be different from kernel_layout - it should be IOHW
weight_value_iohw = np.transpose(weight_value_ohwi, (3, 0, 1, 2))
weight_expr_iohw = self.exp_tab.new_const(
weight_value_iohw, dtype=weight_tensor_type_str
)

# Output shape value
output_shape_value = self.get_tensor_value(output_shape_tensor)
Expand Down Expand Up @@ -2912,9 +2922,12 @@ def convert_transpose_conv(self, op):
# bias tensor type should be INT32 (quantization) or FLOAT32
assert bias_tensor_type in (TensorType.INT32, TensorType.FLOAT32)
bias_tensor_type_str = self.get_tensor_type_str(bias_tensor_type)
bias_expr = self.exp_tab.new_const(
self.get_tensor_value(bias_tensor), dtype=bias_tensor_type_str
)
if self.has_expr(bias_tensor.tensor_idx):
bias_expr = self.get_expr(bias_tensor.tensor_idx)
else:
bias_expr = self.exp_tab.new_const(
self.get_tensor_value(bias_tensor), dtype=bias_tensor_type_str
)
channel_axis = 3
out = _op.nn.bias_add(out, bias_expr, axis=channel_axis)

Expand Down