Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions src/TiledArray/tensor/arena_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,21 +316,44 @@ OuterTensor arena_trivial_binary(const LeftTensor& left,
using inner_range_t = typename OuterTensor::value_type::range_type;
TA_ASSERT(left.range().volume() == right.range().volume());
TA_ASSERT(left.nbatch() == right.nbatch());
auto range_fn = [&left](std::size_t ord) -> inner_range_t {
// Union sparsity: a result cell is present if *either* operand cell is.
// ToT arrays with the same outer shape can still differ in which inner cells
// are populated within an outer tile (e.g. occ_tile_size>1 aggregates several
// pairs, some screened to null). A cell present in only one operand is
// combined against an implicit zero slab below -- correct for the linear ops
// (add: l+0 / 0+r; subt: l-0 / 0-r) and numerically correct for mult (l*0=0,
// emitted as an explicit zero tile). Without this, a lone-left cell would
// read a null right slab (segfault) and a lone-right cell would be silently
// dropped, losing that addend.
auto range_fn = [&left, &right](std::size_t ord) -> inner_range_t {
const auto& l = left.data()[ord];
return l.empty() ? inner_range_t{} : l.range();
if (!l.empty()) return l.range();
const auto& r = right.data()[ord];
return r.empty() ? inner_range_t{} : r.range();
};
OuterTensor result = arena_outer_init<OuterTensor>(
left.range(), left.nbatch(), range_fn, alignof(elem_t),
/*zero_init=*/false);
const std::size_t N_cells = left.range().volume() * left.nbatch();
std::vector<elem_t> zeros; // grown lazily; implicit-zero slab for lone cells
for (std::size_t ord = 0; ord < N_cells; ++ord) {
auto& dst = result.data()[ord];
if (dst.empty()) continue;
TA_ASSERT(left.data()[ord].size() == right.data()[ord].size());
TA_ASSERT(left.data()[ord].size() == dst.size());
fill_op(dst.data(), left.data()[ord].data(), right.data()[ord].data(),
dst.size());
const auto& l = left.data()[ord];
const auto& r = right.data()[ord];
const std::size_t n = dst.size();
const bool have_l = !l.empty();
const bool have_r = !r.empty();
TA_ASSERT(!have_l || l.size() == n);
TA_ASSERT(!have_r || r.size() == n);
if (have_l && have_r) {
fill_op(dst.data(), l.data(), r.data(), n);
} else {
if (zeros.size() < n) zeros.assign(n, elem_t{});
const elem_t* l_ptr = have_l ? l.data() : zeros.data();
const elem_t* r_ptr = have_r ? r.data() : zeros.data();
fill_op(dst.data(), l_ptr, r_ptr, n);
}
}
return result;
}
Expand Down
96 changes: 96 additions & 0 deletions tests/arena_tensor_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -725,4 +725,100 @@ BOOST_AUTO_TEST_CASE(tot_axpy_to_accumulates_scaled_operand) {
}
}

// --- mismatched null-inner-cell coverage --------------------------------
// occ_tile_size>1 (and any block-sparse ToT) can produce two operands with
// the same outer shape but different *inner*-cell sparsity within an outer
// tile. Regression coverage for the bug where arena_trivial_binary sized the
// result by the left operand only and read the right unconditionally: a cell
// present in left but null in right read a null slab (segfault), and a cell
// present in right but null in left was silently dropped.

namespace {

/// Build an Outer of `n_outer` cells; cell `ord` is null iff `present[ord]`
/// is false, otherwise a length-`n_inner` ArenaTensor filled deterministically.
Outer make_outer_sparse(std::size_t n_outer, std::size_t n_inner, double base,
const std::vector<bool>& present) {
TA::Range outer_r{static_cast<long>(n_outer)};
auto shape_fn = [n_inner, &present](std::size_t ord) {
return present[ord] ? TA::Range{static_cast<long>(n_inner)} : TA::Range();
};
Outer outer = TA::detail::arena_outer_init<Outer>(outer_r, 1, shape_fn);
for (std::size_t ord = 0; ord < n_outer; ++ord) {
Inner& inner = outer.data()[ord];
if (!inner) continue;
for (std::size_t i = 0; i < inner.size(); ++i)
inner.data()[i] = base + ord * 100.0 + i;
}
return outer;
}

// L present on {0,1,2}, R present on {1,2,4}, over 5 outer cells:
// 0 = lone-left, 1&2 = both, 3 = both-null, 4 = lone-right.
constexpr std::size_t kNo = 5, kNi = 4;
const std::vector<bool> kLpresent{true, true, true, false, false};
const std::vector<bool> kRpresent{false, true, true, false, true};

} // namespace

BOOST_AUTO_TEST_CASE(trivial_add_mismatched_null_inners) {
Outer L = make_outer_sparse(kNo, kNi, 1.0, kLpresent);
Outer R = make_outer_sparse(kNo, kNi, 0.5, kRpresent);
Outer sum = L.add(R); // must not segfault on lone-left cell 0
for (std::size_t ord = 0; ord < kNo; ++ord) {
const Inner &l = L.data()[ord], &r = R.data()[ord], &d = sum.data()[ord];
const bool hl = bool(l), hr = bool(r);
if (!hl && !hr) {
BOOST_CHECK(!d); // both null -> null result
} else {
BOOST_REQUIRE(bool(d));
for (std::size_t i = 0; i < d.size(); ++i) {
const double lv = hl ? l.data()[i] : 0.0;
const double rv = hr ? r.data()[i] : 0.0;
BOOST_CHECK_EQUAL(d.data()[i], lv + rv); // union: lone-right kept too
}
}
}
}

BOOST_AUTO_TEST_CASE(trivial_subt_mismatched_null_inners) {
Outer L = make_outer_sparse(kNo, kNi, 5.0, kLpresent);
Outer R = make_outer_sparse(kNo, kNi, 1.0, kRpresent);
Outer diff = L.subt(R);
for (std::size_t ord = 0; ord < kNo; ++ord) {
const Inner &l = L.data()[ord], &r = R.data()[ord], &d = diff.data()[ord];
const bool hl = bool(l), hr = bool(r);
if (!hl && !hr) {
BOOST_CHECK(!d);
} else {
BOOST_REQUIRE(bool(d));
for (std::size_t i = 0; i < d.size(); ++i) {
const double lv = hl ? l.data()[i] : 0.0;
const double rv = hr ? r.data()[i] : 0.0;
BOOST_CHECK_EQUAL(d.data()[i], lv - rv); // lone-right -> -r
}
}
}
}

BOOST_AUTO_TEST_CASE(trivial_mult_mismatched_null_inners) {
Outer L = make_outer_sparse(kNo, kNi, 2.0, kLpresent);
Outer R = make_outer_sparse(kNo, kNi, 0.5, kRpresent);
Outer prod = L.mult(R);
for (std::size_t ord = 0; ord < kNo; ++ord) {
const Inner &l = L.data()[ord], &r = R.data()[ord], &d = prod.data()[ord];
const bool hl = bool(l), hr = bool(r);
if (hl && hr) {
BOOST_REQUIRE(bool(d));
for (std::size_t i = 0; i < d.size(); ++i)
BOOST_CHECK_EQUAL(d.data()[i], l.data()[i] * r.data()[i]);
} else if (bool(d)) {
// a lone cell multiplies against an implicit zero -> a zero tile
// (numerically equivalent to absent); tolerate either policy.
for (std::size_t i = 0; i < d.size(); ++i)
BOOST_CHECK_EQUAL(d.data()[i], 0.0);
}
}
}

BOOST_AUTO_TEST_SUITE_END()
104 changes: 97 additions & 7 deletions tests/arena_tot_trivial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ bool inners_share_one_slab(const outer_t& tot) {
return true;
}

}
} // namespace

BOOST_AUTO_TEST_SUITE(arena_tot_trivial_suite, TA_UT_LABEL_SERIAL)

Expand Down Expand Up @@ -88,8 +88,8 @@ BOOST_AUTO_TEST_CASE(add_bit_equal_and_one_slab) {
for (std::size_t ord = 0; ord < L.range().volume(); ++ord) {
inner_t inner((L.data() + ord)->range());
for (std::size_t i = 0; i < inner.range().volume(); ++i)
inner.at_ordinal(i) = (L.data() + ord)->at_ordinal(i) +
(R.data() + ord)->at_ordinal(i);
inner.at_ordinal(i) =
(L.data() + ord)->at_ordinal(i) + (R.data() + ord)->at_ordinal(i);
*(baseline.data() + ord) = std::move(inner);
}
BOOST_CHECK(tot_equal(arena_result, baseline));
Expand All @@ -104,8 +104,8 @@ BOOST_AUTO_TEST_CASE(subt_bit_equal_and_one_slab) {
for (std::size_t ord = 0; ord < L.range().volume(); ++ord) {
inner_t inner((L.data() + ord)->range());
for (std::size_t i = 0; i < inner.range().volume(); ++i)
inner.at_ordinal(i) = (L.data() + ord)->at_ordinal(i) -
(R.data() + ord)->at_ordinal(i);
inner.at_ordinal(i) =
(L.data() + ord)->at_ordinal(i) - (R.data() + ord)->at_ordinal(i);
*(baseline.data() + ord) = std::move(inner);
}
BOOST_CHECK(tot_equal(arena_result, baseline));
Expand All @@ -120,8 +120,8 @@ BOOST_AUTO_TEST_CASE(mult_elementwise_bit_equal_and_one_slab) {
for (std::size_t ord = 0; ord < L.range().volume(); ++ord) {
inner_t inner((L.data() + ord)->range());
for (std::size_t i = 0; i < inner.range().volume(); ++i)
inner.at_ordinal(i) = (L.data() + ord)->at_ordinal(i) *
(R.data() + ord)->at_ordinal(i);
inner.at_ordinal(i) =
(L.data() + ord)->at_ordinal(i) * (R.data() + ord)->at_ordinal(i);
*(baseline.data() + ord) = std::move(inner);
}
BOOST_CHECK(tot_equal(arena_result, baseline));
Expand All @@ -141,4 +141,94 @@ BOOST_AUTO_TEST_CASE(arena_outlives_source) {
(9.0 + ord * 100.0 + i) * 2.0);
}

// --- mismatched null-inner-cell coverage (non-arena inner) ---------------
// Same kernel (arena_trivial_binary) backs Tensor<Tensor<double>>; exercise
// the union-sparsity / implicit-zero path with mismatched per-cell nulls.
// An unassigned outer cell is a default (empty) inner Tensor.

namespace {

/// `present[ord]==false` leaves cell `ord` a null (empty) inner tensor.
outer_t make_tot_sparse(std::size_t N_outer, std::size_t n_inner, double base,
const std::vector<bool>& present) {
outer_t outer(TA::Range{static_cast<long>(N_outer)}, 1);
for (std::size_t ord = 0; ord < N_outer; ++ord) {
if (!present[ord]) continue; // leave default-constructed -> empty
inner_t inner(TA::Range{static_cast<long>(n_inner)});
for (std::size_t i = 0; i < n_inner; ++i)
inner.at_ordinal(i) = base + ord * 100.0 + i;
*(outer.data() + ord) = std::move(inner);
}
return outer;
}

// 0 = lone-left, 1&2 = both, 3 = both-null, 4 = lone-right.
const std::vector<bool> nz_L{true, true, true, false, false};
const std::vector<bool> nz_R{false, true, true, false, true};

} // namespace

BOOST_AUTO_TEST_CASE(add_mismatched_null_inners) {
outer_t L = make_tot_sparse(5, 4, 1.0, nz_L);
outer_t R = make_tot_sparse(5, 4, 0.5, nz_R);
outer_t sum = L.add(R); // must not segfault on lone-left cell 0
for (std::size_t ord = 0; ord < 5; ++ord) {
const inner_t& l = *(L.data() + ord);
const inner_t& r = *(R.data() + ord);
const inner_t& d = *(sum.data() + ord);
const bool hl = !l.empty(), hr = !r.empty();
if (!hl && !hr) {
BOOST_CHECK(d.empty());
} else {
BOOST_REQUIRE(!d.empty());
for (std::size_t i = 0; i < d.range().volume(); ++i) {
const double lv = hl ? l.at_ordinal(i) : 0.0;
const double rv = hr ? r.at_ordinal(i) : 0.0;
BOOST_CHECK_EQUAL(d.at_ordinal(i), lv + rv);
}
}
}
}

BOOST_AUTO_TEST_CASE(subt_mismatched_null_inners) {
outer_t L = make_tot_sparse(5, 4, 5.0, nz_L);
outer_t R = make_tot_sparse(5, 4, 1.0, nz_R);
outer_t diff = L.subt(R);
for (std::size_t ord = 0; ord < 5; ++ord) {
const inner_t& l = *(L.data() + ord);
const inner_t& r = *(R.data() + ord);
const inner_t& d = *(diff.data() + ord);
const bool hl = !l.empty(), hr = !r.empty();
if (!hl && !hr) {
BOOST_CHECK(d.empty());
} else {
BOOST_REQUIRE(!d.empty());
for (std::size_t i = 0; i < d.range().volume(); ++i) {
const double lv = hl ? l.at_ordinal(i) : 0.0;
const double rv = hr ? r.at_ordinal(i) : 0.0;
BOOST_CHECK_EQUAL(d.at_ordinal(i), lv - rv);
}
}
}
}

BOOST_AUTO_TEST_CASE(mult_mismatched_null_inners) {
outer_t L = make_tot_sparse(5, 4, 2.0, nz_L);
outer_t R = make_tot_sparse(5, 4, 0.5, nz_R);
outer_t prod = L.mult(R);
for (std::size_t ord = 0; ord < 5; ++ord) {
const inner_t& l = *(L.data() + ord);
const inner_t& r = *(R.data() + ord);
const inner_t& d = *(prod.data() + ord);
if (!l.empty() && !r.empty()) {
BOOST_REQUIRE(!d.empty());
for (std::size_t i = 0; i < d.range().volume(); ++i)
BOOST_CHECK_EQUAL(d.at_ordinal(i), l.at_ordinal(i) * r.at_ordinal(i));
} else if (!d.empty()) {
for (std::size_t i = 0; i < d.range().volume(); ++i)
BOOST_CHECK_EQUAL(d.at_ordinal(i), 0.0);
}
}
}

BOOST_AUTO_TEST_SUITE_END()
Loading