Skip to content

Commit

Permalink
Merge 941d5e5 into 68223ec
Browse files Browse the repository at this point in the history
  • Loading branch information
emcastillo committed Dec 11, 2019
2 parents 68223ec + 941d5e5 commit 0b6b004
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 2 deletions.
41 changes: 41 additions & 0 deletions chainerx_cc/chainerx/index_iterator.h
Expand Up @@ -64,10 +64,28 @@ class IndexIterator {
CHAINERX_HOST_DEVICE void Set(int64_t i) {
CHAINERX_ASSERT(total_size_ > 0);
raw_index_ = i;
#ifdef __CUDA_ARCH__
// TODO(ecastill) add 32-bit case
// 64-bit division is very slow on GPU
uint64_t a = static_cast<uint64_t>(i);
for (int8_t dim = kNdim; --dim > 0;) {
uint64_t s = static_cast<uint64_t>(shape_[dim]);
if (s & (s - 1)) {
uint64_t t = a / s;
index_[dim] = static_cast<int64_t>(a - t * s);
a = t;
} else { // exp of 2
index_[dim] = static_cast<int64_t>(a & (s - 1));
a >>= __popcll(s - 1);
}
}
index_[0] = a;
#else
for (int8_t j = kNdim; --j >= 0;) {
index_[j] = i % shape_[j]; // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
i /= shape_[j];
}
#endif
}

const int64_t* shape_;
Expand Down Expand Up @@ -128,6 +146,8 @@ class IndexIterator<0> {

CHAINERX_HOST_DEVICE const int64_t* index() const { return &raw_index_; }

CHAINERX_HOST_DEVICE void Set(int64_t i) { raw_index_ = i; }

private:
int64_t raw_index_{0};
};
Expand Down Expand Up @@ -247,10 +267,31 @@ class IndexIterator<kDynamicNdim> {
CHAINERX_HOST_DEVICE void Set(int64_t i) {
CHAINERX_ASSERT(total_size_ > 0);
raw_index_ = i;
if (ndim_ == 0) {
return;
}
#ifdef __CUDA_ARCH__
// TODO(ecastill) add 32-bit case
// 64-bit division is very slow on GPU
uint64_t a = static_cast<uint64_t>(i);
for (int8_t dim = ndim_; --dim > 0;) {
uint64_t s = static_cast<uint64_t>(shape_[dim]);
if (s & (s - 1)) {
uint64_t t = a / s;
index_[dim] = static_cast<int64_t>(a - t * s);
a = t;
} else { // exp of 2
index_[dim] = static_cast<int64_t>(a & (s - 1));
a >>= __popcll(s - 1);
}
}
index_[0] = a;
#else
for (int8_t j = ndim_; --j >= 0;) {
index_[j] = i % shape_[j]; // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
i /= shape_[j];
}
#endif
}

const int64_t* shape_;
Expand Down
27 changes: 25 additions & 2 deletions tests/chainerx_tests/unit_tests/test_array_index.py
Expand Up @@ -6,14 +6,37 @@ def test_newaxis():
assert chainerx.newaxis is None


@pytest.mark.parametrize('xp', [chainerx])
@pytest.mark.parametrize_device(['native:0', 'cuda:0'])
@pytest.mark.parametrize('shape, transpose', [
((1,), None),
((2,), None),
((2, 3), None),
((2, 3, 4), None),
((2, 3, 4, 5), None),
((2, 3, 4, 5, 6), None),
((2, 3), (0, 1)),
((2, 3, 4), (0, 2)),
((2, 3, 4, 5), (0, 2)),
((2, 3, 4, 5, 6), (1, 3)),
])
def test_array_indexing(xp, device, shape, transpose):
a = xp.zeros(shape=shape, dtype=chainerx.int8, device=device)
if transpose:
a = a.swapaxes(*transpose)
assert not a.is_contiguous
a += 1
assert a.sum() == a.size


@pytest.mark.slow
@pytest.mark.parametrize('xp', [chainerx])
@pytest.mark.parametrize_device(['cuda:0'])
@pytest.mark.parametrize('shape', [
(64, 32, 6*1024*4), # Less than 2^32 elems
(64, 32, 6*1024*512), # More than 2^32 elems
])
def test_array_contiguous_indexing(xp, device, shape):
def test_large_array_contiguous_indexing(xp, device, shape):
try:
a = xp.zeros(shape=shape, dtype=chainerx.int8, device=device)
except chainerx.ChainerxError as ex:
Expand All @@ -31,7 +54,7 @@ def test_array_contiguous_indexing(xp, device, shape):
(64, 32, 6*1024*4), # Less than 2^32 elems
(64, 32, 6*1024*512) # More than 2^32 elems
])
def test_array_noncontiguous_indexing(xp, device, shape):
def test_large_array_noncontiguous_indexing(xp, device, shape):
try:
a = xp.zeros(shape=shape, dtype=chainerx.int8, device=device)
except chainerx.ChainerxError as ex:
Expand Down

0 comments on commit 0b6b004

Please sign in to comment.