Skip to content

Commit

Permalink
Merge pull request #8149 from jemiryguo/main
Browse files Browse the repository at this point in the history
add incontiguous support for cutensor functions
  • Loading branch information
asi1024 committed Feb 7, 2024
2 parents 7a20c1a + 32bb3ad commit f643379
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 19 deletions.
23 changes: 4 additions & 19 deletions cupyx/cutensor.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,9 @@ cpdef TensorDescriptor create_tensor_descriptor(_ndarray_base a):
(TensorDescriptor): A instance of class TensorDescriptor.
"""
handle = _get_handle()
key = (handle.ptr, a.dtype, tuple(a.shape), tuple(a.strides))
alignment_req = 256
alignment_req = a.itemsize
key = (handle.ptr, a.dtype, tuple(a.shape),
tuple(a.strides), alignment_req)
if a.data.ptr & (alignment_req - 1) != 0:
raise ValueError("Missaligned array")
if key not in _tensor_descriptors:
Expand Down Expand Up @@ -479,18 +480,13 @@ def elementwise_binary(
Examples:
See examples/cutensor/elementwise_binary.py
"""
if not (A._c_contiguous and C._c_contiguous):
raise ValueError('The inputs should be contiguous arrays.')

if out is None:
out = core._ndarray_init(
_cupy.ndarray, C._shape, dtype=C.dtype, obj=None)
elif C.dtype != out.dtype:
raise ValueError('dtype mismatch: {} != {}'.format(C.dtype, out.dtype))
elif not internal.vector_equal(C._shape, out._shape):
raise ValueError('shape mismatch: {} != {}'.format(C.shape, out.shape))
elif not out._c_contiguous:
raise ValueError('`out` should be a contiguous array.')

desc_A = create_tensor_descriptor(A)
desc_C = create_tensor_descriptor(C)
Expand Down Expand Up @@ -625,18 +621,13 @@ def elementwise_trinary(
Examples:
See examples/cutensor/elementwise_trinary.py
"""
if not (A._c_contiguous and B._c_contiguous and C._c_contiguous):
raise ValueError('The inputs should be contiguous arrays.')

if out is None:
out = core._ndarray_init(
_cupy.ndarray, C._shape, dtype=C.dtype, obj=None)
elif C.dtype != out.dtype:
raise ValueError('dtype mismatch: {} != {}'.format(C.dtype, out.dtype))
elif not internal.vector_equal(C._shape, out._shape):
raise ValueError('shape mismatch: {} != {}'.format(C.shape, out.shape))
elif not out._c_contiguous:
raise ValueError('`out` should be a contiguous array.')

desc_A = create_tensor_descriptor(A)
desc_B = create_tensor_descriptor(B)
Expand Down Expand Up @@ -783,9 +774,6 @@ def contraction(
Examples:
See examples/cutensor/contraction.py
"""
if not (A._c_contiguous and B._c_contiguous and C._c_contiguous):
raise ValueError('The inputs should be contiguous arrays.')

desc_A = create_tensor_descriptor(A)
desc_B = create_tensor_descriptor(B)
desc_C = create_tensor_descriptor(C)
Expand Down Expand Up @@ -876,7 +864,7 @@ def reduction(
This routine computes the tensor reduction:
C = alpha * reduce_op(op_A(A)) + beta * op_C(C))
C = alpha * reduce_op(op_A(A)) + beta * op_C(C)
Args:
alpha (scalar): Scaling factor for A.
Expand All @@ -897,9 +885,6 @@ def reduction(
Examples:
See examples/cutensor/reduction.py
"""
if not (A._c_contiguous and C._c_contiguous):
raise ValueError('The inputs should be contiguous arrays.')

desc_A = create_tensor_descriptor(A)
desc_C = create_tensor_descriptor(C)
mode_A = _auto_create_mode(A, mode_A)
Expand Down
129 changes: 129 additions & 0 deletions tests/cupyx_tests/test_cutensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,132 @@ def test_contraction(self):
self.c, mode_c)
cupy.testing.assert_allclose(self.c, self.c_ref,
rtol=self.tol, atol=self.tol)


@testing.parameterize(*testing.product({
'dtype_char': ['e', 'f', 'd', 'F', 'D'],
'shape': [(30, 40, 30, 35)],
'alpha': [0.5, 1.0],
'beta': [0.0, 1.0],
'order': ['C', 'F']
}))
@pytest.mark.skipif(not ct.available, reason='cuTensor is unavailable')
class TestCuTensorIncontiguous:
_tol = {'e': 1e-3, 'f': 2e-6, 'd': 1e-12}

@pytest.fixture(autouse=True)
def setUp(self):
compute_capability = int(device.get_compute_capability())
if compute_capability < 70 and self.dtype_char == 'e':
pytest.skip("Not supported")
self.dtype = numpy.dtype(self.dtype_char)
self.tol = self._tol[self.dtype_char.lower()]

def test_contraction(self):
mode_a = cutensor.create_mode('a', 'b', 'c')
mode_b = cutensor.create_mode('c', 'd', 'b')
mode_c = cutensor.create_mode('d', 'a')
a, b, c, d = self.shape
self.a = testing.shaped_random(
(a, b, c), cupy, dtype=self.dtype, order=self.order)
self.b = testing.shaped_random(
(c, d, b), cupy, dtype=self.dtype, order=self.order)
self.c = testing.shaped_random(
(d, a), cupy, dtype=self.dtype, order=self.order)
delta = 7
c_ref = self.c.copy()
c_ref = cutensor.contraction(self.alpha,
self.a, mode_a,
self.b, mode_b,
self.beta,
c_ref, mode_c)
for a0 in range(0, a, delta):
for d0 in range(0, d, delta):
cutensor.contraction(self.alpha,
self.a[a0:a0+delta], mode_a,
self.b[:, d0:d0+delta], mode_b,
self.beta,
self.c[d0:d0+delta, a0:a0+delta], mode_c)
cupy.testing.assert_allclose(self.c[d0:d0+delta, a0:a0+delta],
c_ref[d0:d0+delta, a0:a0+delta],
rtol=self.tol, atol=self.tol)

def test_reduction(self):
mode_a = cutensor.create_mode('a', 'b', 'c')
mode_c = cutensor.create_mode('b')
a, b, c, _ = self.shape
self.a = testing.shaped_random(
(a, b, c), cupy, dtype=self.dtype, order=self.order)
self.c = testing.shaped_random(
(b,), cupy, dtype=self.dtype, order=self.order)

c_ref = self.c.copy()
c_ref = cutensor.reduction(self.alpha,
self.a, mode_a,
self.beta,
c_ref, mode_c)
delta = 7
for b0 in range(0, b, delta):
cutensor.reduction(self.alpha,
self.a[:, b0:b0+delta, :], mode_a,
self.beta,
self.c[b0:b0+delta], mode_c)
cupy.testing.assert_allclose(self.c[b0:b0+delta],
c_ref[b0:b0+delta],
rtol=self.tol, atol=self.tol)

def test_elementwise_binary(self):
mode_a = cutensor.create_mode('a', 'b', 'c')
mode_c = cutensor.create_mode('c', 'a', 'b')
a, b, c, _ = self.shape
self.a = testing.shaped_random(
(a, b, c), cupy, dtype=self.dtype, order=self.order)
self.c = testing.shaped_random(
(c, a, b), cupy, dtype=self.dtype, order=self.order)

c_ref = self.c.copy()
c_ref = cutensor.elementwise_binary(self.alpha,
self.a, mode_a,
self.beta,
c_ref, mode_c)
delta = 7
for b0 in range(0, b, delta):
cutensor.elementwise_binary(self.alpha,
self.a[:, b0:b0+delta], mode_a,
self.beta,
self.c[:, :, b0:b0+delta], mode_c,
out=self.c[:, :, b0:b0+delta])
cupy.testing.assert_allclose(self.c[:, :, b0:b0+delta],
c_ref[:, :, b0:b0+delta],
rtol=self.tol, atol=self.tol)

def test_elementwise_trinary(self):
mode_a = cutensor.create_mode('a', 'b', 'c')
mode_b = cutensor.create_mode('b', 'c', 'a')
mode_c = cutensor.create_mode('c', 'a', 'b')
a, b, c, _ = self.shape
self.a = testing.shaped_random(
(a, b, c), cupy, dtype=self.dtype, order=self.order)
self.b = testing.shaped_random(
(b, c, a), cupy, dtype=self.dtype, order=self.order)
self.c = testing.shaped_random(
(c, a, b), cupy, dtype=self.dtype, order=self.order)

for gamma in [0.0, 1.0]:
c_ref = self.c.copy()
c_ref = cutensor.elementwise_trinary(self.alpha, self.a, mode_a,
self.beta, self.b, mode_b,
gamma, c_ref, mode_c,
out=c_ref)
delta = 7
for a0 in range(0, a, delta):
cutensor.elementwise_trinary(self.alpha,
self.a[a0:a0+delta],
mode_a, self.beta,
self.b[:, :, a0:a0+delta],
mode_b, gamma,
self.c[:, a0:a0+delta], mode_c,
out=self.c[:, a0:a0+delta])
cupy.testing.assert_allclose(self.c[:, a0:a0+delta],
c_ref[:, a0:a0+delta],
rtol=self.tol, atol=self.tol)

0 comments on commit f643379

Please sign in to comment.