Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Relay][TOPI] Add support for group_conv1d_transpose_ncw for generic #16248

Merged
merged 5 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 18 additions & 8 deletions python/tvm/relay/op/strategy/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ def group_conv1d_strategy(attrs, inputs, out_type, target):


# conv1d_transpose
def wrap_compute_conv1d_transpose(topi_compute):
def wrap_compute_conv1d_transpose(topi_compute, has_groups=False):
"""wrap conv1d_transpose topi compute"""

def _compute_conv1d_tranpsoe(attrs, inputs, out_type):
Expand All @@ -722,7 +722,11 @@ def _compute_conv1d_tranpsoe(attrs, inputs, out_type):
out_dtype = attrs.out_dtype
out_dtype = inputs[0].dtype if out_dtype in ("same", "") else out_dtype
output_padding = get_const_tuple(attrs.output_padding)
out = topi_compute(inputs[0], inputs[1], strides, padding, out_dtype, output_padding)
args = [inputs[0], inputs[1], strides, padding, out_dtype, output_padding]
if has_groups:
args.append(attrs.groups)

out = topi_compute(*args)
return [out]

return _compute_conv1d_tranpsoe
Expand All @@ -738,12 +742,18 @@ def conv1d_transpose_strategy(attrs, inputs, out_type, target):
groups = attrs.groups
assert layout == "NCW", "conv1d_transpose ncw only supported"
assert dilation == (1,), "conv1d_transpose dilation is not supported"
assert groups == 1, "conv1d_transpose groups == 1 only supported"
strategy.add_implementation(
wrap_compute_conv1d_transpose(topi.nn.conv1d_transpose_ncw),
wrap_topi_schedule(topi.generic.schedule_conv1d_transpose_ncw),
name="conv1d_transpose_ncw.generic",
)
if groups == 1:
strategy.add_implementation(
wrap_compute_conv1d_transpose(topi.nn.conv1d_transpose_ncw),
wrap_topi_schedule(topi.generic.schedule_conv1d_transpose_ncw),
name="conv1d_transpose_ncw.generic",
)
else: # group_conv1d_transpose
strategy.add_implementation(
wrap_compute_conv1d_transpose(topi.nn.group_conv1d_transpose_ncw, has_groups=True),
wrap_topi_schedule(topi.generic.schedule_group_conv1d_transpose_ncw),
name="group_conv1d_transpose_ncw.generic",
)
return strategy


Expand Down
17 changes: 17 additions & 0 deletions python/tvm/topi/generic/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,23 @@ def schedule_conv1d_transpose_ncw(outs):
return _default_schedule(outs, False)


def schedule_group_conv1d_transpose_ncw(outs):
"""Schedule for group_conv1d_transpose_ncw

Parameters
----------
outs: Array of Tensor
The computation graph description of group conv1d_transpose_ncw
in the format of an array of tensors.

Returns
-------
s: Schedule
The computation schedule for the op.
"""
return _default_schedule(outs, False)


def schedule_depthwise_conv2d_nchw(outs):
"""Schedule for depthwise_conv2d_nchw

Expand Down
142 changes: 132 additions & 10 deletions python/tvm/topi/nn/conv1d_transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
from .utils import get_pad_tuple1d


def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype, output_padding):
"""Transposed 1D convolution ncw forward operator.
def _conv1d_transpose_ncw_preprocess(data, kernel, stride, padding, out_dtype, output_padding):
"""Preprocess data and kernel to make the compute pattern
of conv1d_transpose the same as conv1d.

Parameters
----------
Expand All @@ -49,45 +50,166 @@ def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype, output_paddin

Returns
-------
output : tvm.te.Tensor
3-D with shape [batch, out_channel, out_width]
data_pad : tvm.te.Tensor
Padded input data. 3-D with shape [batch, in_channel, in_width]

kernel: tvm.te.Tensor
Transformed kernel. 3-D with shape [num_filter, in_channel, filter_width]
"""
# some pre-processing and prelimnary checks
if out_dtype is None:
out_dtype = data.dtype

# dilate and pad
if isinstance(stride, (tuple, list)):
stride = stride[0]
if isinstance(output_padding, (tuple, list)):
output_padding = output_padding[0]
batch, channels_in, data_width = data.shape

_, channels_in, _ = data.shape
_, channels_out, kernel_width = kernel.shape
assert output_padding < stride
channels_out = simplify(channels_out)
data = dilate(data, [1, 1, stride], name="data_dilate")
data_dilate = dilate(data, [1, 1, stride], name="data_dilate")
pad_left, pad_right = get_pad_tuple1d(padding, (kernel_width,))
pad_left = kernel_width - 1 - pad_left
pad_right = kernel_width - 1 - pad_right + output_padding
data = pad(data, [0, 0, pad_left], [0, 0, pad_right], name="data_pad")
data_pad = pad(data_dilate, [0, 0, pad_left], [0, 0, pad_right], name="data_pad")

# transpose kernel, switch kernel layout to IOW
# transform kernel layout from IOW to OIW, and rotate kernel by 180 degrees
kernel = te.compute(
(channels_out, channels_in, kernel_width),
lambda o, i, w: kernel[i][o][kernel_width - 1 - w],
name="kernel",
)
return data_pad, kernel


def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype, output_padding):
"""Transposed 1D convolution ncw forward operator.

Parameters
----------
data : tvm.te.Tensor
3-D with shape [batch, in_channel, in_width]

kernel : tvm.te.Tensor
3-D with shape [in_channel, num_filter, filter_width]

stride : ints
The spatial stride along width

padding : int or str
Padding size, or ['VALID', 'SAME']

out_dtype : str
The output data type. This is used for mixed precision.

output_padding : ints
Used to recover the actual output shape in case there are more
than one possible shape. Must be smaller than stride.

Returns
-------
output : tvm.te.Tensor
3-D with shape [batch, out_channel, out_width]

"""

batch, channels_in, _ = data.shape
_, channels_out, kernel_width = kernel.shape

data_pad, transformed_kernel = _conv1d_transpose_ncw_preprocess(
data, kernel, stride, padding, out_dtype, output_padding
)

# convolution
_, _, data_width = data.shape
_, _, data_width = data_pad.shape
out_w = simplify(data_width - kernel_width + 1)
dc = te.reduce_axis((0, channels_in), name="dc")
dw = te.reduce_axis((0, kernel_width), name="dw")
output = te.compute(
(batch, channels_out, out_w),
lambda b, c, w: te.sum(
data[b, dc, w + dw].astype(out_dtype) * kernel[c, dc, dw].astype(out_dtype),
data_pad[b, dc, w + dw].astype(out_dtype)
* transformed_kernel[c, dc, dw].astype(out_dtype),
axis=[dc, dw],
),
tag="conv1d_transpose_ncw",
)

return output


def group_conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype, output_padding, groups):
"""Transposed 1D group convolution ncw forward operator.

Parameters
----------
data : tvm.te.Tensor
3-D with shape [batch, in_channel, in_width]

kernel : tvm.te.Tensor
3-D with shape [in_channel, num_filter, filter_width]

stride : ints
The spatial stride along width

padding : int or str
Padding size, or ['VALID', 'SAME']

out_dtype : str
The output data type. This is used for mixed precision.

output_padding : ints
Used to recover the actual output shape in case there are more
than one possible shape. Must be smaller than stride.

groups : int
number of groups

Returns
-------
output : tvm.te.Tensor
3-D with shape [batch, out_channel, out_width]

"""
if groups == 1:
return conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype, output_padding)

_, in_channels, _ = data.shape

assert (
in_channels % groups == 0
), f"input channels {in_channels} must divide group size {groups}"

data_pad, transformed_kernel = _conv1d_transpose_ncw_preprocess(
data, kernel, stride, padding, out_dtype, output_padding
)

batch, in_channels, in_w = data_pad.shape
out_c, _, filter_w = transformed_kernel.shape

# convolution stage
out_channels = simplify(out_c * groups)
out_w = simplify(in_w - filter_w + 1)
dc = te.reduce_axis((0, in_channels // groups), name="dc")
dw = te.reduce_axis((0, filter_w), name="dw")

# data: batch, in_channels, out_w
# weight: out_channels // G, in_channels, out_w
return te.compute(
(batch, out_channels, out_w),
lambda b, c, w: te.sum(
data_pad[
b, c // (out_channels // groups) * (in_channels // groups) + dc, w + dw
].astype(out_dtype)
* transformed_kernel[
c % (out_channels // groups),
c // (out_channels // groups) * (in_channels // groups) + dc,
dw,
].astype(out_dtype),
axis=[dc, dw],
),
tag="group_conv1d_transpose_ncw",
)
5 changes: 4 additions & 1 deletion python/tvm/topi/testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@
from .conv3d_ndhwc_python import conv3d_ndhwc_python
from .conv3d_transpose_ncdhw_python import conv3d_transpose_ncdhw_python
from .conv2d_transpose_python import conv2d_transpose_nchw_python, conv2d_transpose_nhwc_python
from .conv1d_transpose_ncw_python import conv1d_transpose_ncw_python
from .conv1d_transpose_ncw_python import (
conv1d_transpose_ncw_python,
group_conv1d_transpose_ncw_python,
)
from .correlation_nchw_python import correlation_nchw_python
from .deformable_conv2d_python import deformable_conv2d_nchw_python, deformable_conv2d_nhwc_python
from .depthwise_conv2d_python import (
Expand Down
12 changes: 12 additions & 0 deletions python/tvm/topi/testing/conv1d_transpose_ncw_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@
from tvm.topi.nn.utils import get_pad_tuple1d


def group_conv1d_transpose_ncw_python(a_np, w_np, stride, padding, output_padding, groups=1):
"Grouped version of `conv1d_transpose_ncw_python`, see that for documentation"
a_slices = np.array_split(a_np, groups, axis=1)
w_slices = np.array_split(w_np, groups, axis=0)
b_slices = [
conv1d_transpose_ncw_python(a_slice, w_slice, stride, padding, output_padding)
for a_slice, w_slice in zip(a_slices, w_slices)
]
b_np = np.concatenate(b_slices, axis=1)
return b_np


def conv1d_transpose_ncw_python(a_np, w_np, stride, padding, output_padding):
"""Transposed 1D convolution operator in NCW layout.

Expand Down
110 changes: 110 additions & 0 deletions tests/python/topi/test_topi_group_conv1d_transpose_ncw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test code for group transposed 1d convolution."""

import itertools
import os

import numpy as np

import tvm
import tvm.testing
import tvm.topi.testing

from tvm import te, topi
from tvm.topi.utils import get_const_tuple

_group_conv1d_transpose_ncw_implement = {
"generic": (
topi.nn.group_conv1d_transpose_ncw,
topi.generic.schedule_group_conv1d_transpose_ncw,
),
}


(
batch,
in_channel,
in_size,
num_filter,
kernel,
stride,
padding,
output_padding,
groups,
) = tvm.testing.parameters(
(1, 4, 224, 32, 5, 1, 0, (0,), 4),
(1, 8, 224, 32, 7, 1, 2, (0,), 4),
(1, 8, 224, 32, 5, 2, 1, (0,), 2),
(1, 4, 224, 4, 5, 2, 1, (1,), 4),
(1, 3, 224, 15, 5, 2, 0, (0,), 3),
(1, 32, 32, 128, 5, 1, 0, (0,), 32),
(1, 32, 32, 128, 5, 2, 1, (0,), 16),
)

dtype = tvm.testing.parameter("float32")


@tvm.testing.fixture(cache_return_value=True)
def ref_data(
dtype, batch, in_channel, in_size, num_filter, kernel, stride, padding, output_padding, groups
):
dtype = "float32"
a_shape = (batch, in_channel, in_size)
w_shape = (in_channel, num_filter, kernel)

a_np = np.random.uniform(size=a_shape).astype(dtype)
w_np = np.random.uniform(size=w_shape).astype(dtype)
b_np = tvm.topi.testing.group_conv1d_transpose_ncw_python(
a_np, w_np, stride, padding, output_padding, groups
)
c_np = np.maximum(b_np, 0)
return a_np, w_np, b_np, c_np


@tvm.testing.known_failing_targets("cuda", "vulkan")
def test_group_conv1d_transpose_ncw(
target, dev, ref_data, dtype, stride, padding, output_padding, groups
):
a_np, w_np, b_np, c_np = ref_data

A = te.placeholder(a_np.shape, name="A", dtype=dtype)
W = te.placeholder(w_np.shape, name="W", dtype=dtype)

with tvm.target.Target(target):
fcompute, fschedule = tvm.topi.testing.dispatch(
target, _group_conv1d_transpose_ncw_implement
)
B = fcompute(A, W, stride, padding, A.dtype, output_padding, groups)
C = topi.nn.relu(B)
s1 = fschedule([B])
s2 = fschedule([C])
a = tvm.nd.array(a_np, dev)
w = tvm.nd.array(w_np, dev)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev)
c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), dev)

func1 = tvm.build(s1, [A, W, B], target)
func2 = tvm.build(s2, [A, W, C], target)
func1(a, w, b)
func2(a, w, c)
tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5)
tvm.testing.assert_allclose(c.numpy(), c_np, rtol=1e-5)


if __name__ == "__main__":
tvm.testing.main()