Skip to content

Commit

Permalink
add make_temporary_output_clone
Browse files Browse the repository at this point in the history
This provides an alternative to make_temporary_clone
that doesn't initialize the content for output-parameters
  • Loading branch information
upsj committed Jun 1, 2021
1 parent cc30d8f commit ef44f36
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 54 deletions.
79 changes: 40 additions & 39 deletions core/matrix/dense.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,16 +331,19 @@ void Dense<ValueType>::compute_norm2_impl(LinOp *result) const
template <typename ValueType>
void Dense<ValueType>::convert_to(Dense<ValueType> *result) const
{
if (result->get_size() && result->get_size() == this->get_size()) {
// it's important we clone the const *this instead of result, since
// otherwise the copy_back_deleter of the temporary_clone calls
// convert_to again with the same dimensions, causing an infinite
// recursion. For the same reason we need to check that the result size
// is not zero, since otherwise it could come from the initial
// convert_to call in make_temporary_clone.
auto exec = result->get_executor();
exec->run(
dense::make_copy(make_temporary_clone(exec, this).get(), result));
if (result->get_size() == this->get_size()) {
// we need to create a executor-local clone of the target data, that
// will be copied back later.
auto exec = this->get_executor();
auto result_array = make_temporary_output_clone(exec, &result->values_);
// create a (value, not pointer to avoid allocation overhead) view
// matrix on the array to avoid special-casing cross-executor copies
auto tmp_result =
Dense{exec, result->get_size(),
Array<ValueType>::view(exec, result_array->get_num_elems(),
result_array->get_data()),
result->get_stride()};
exec->run(dense::make_copy(this, &tmp_result));
} else {
result->values_ = this->values_;
result->stride_ = this->stride_;
Expand All @@ -360,12 +363,10 @@ template <typename ValueType>
void Dense<ValueType>::convert_to(
Dense<next_precision<ValueType>> *result) const
{
if (result->get_size() && result->get_size() == this->get_size()) {
// here we don't need to use the target executor, since the copy back
// invokes the non-converting convert_to(Dense<ValueType>*).
if (result->get_size() == this->get_size()) {
auto exec = this->get_executor();
exec->run(
dense::make_copy(this, make_temporary_clone(exec, result).get()));
exec->run(dense::make_copy(
this, make_temporary_output_clone(exec, result).get()));
} else {
result->values_ = this->values_;
result->stride_ = this->stride_;
Expand Down Expand Up @@ -696,8 +697,8 @@ void Dense<ValueType>::transpose(Dense<ValueType> *output) const
{
GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size()));
auto exec = this->get_executor();
exec->run(
dense::make_transpose(this, make_temporary_clone(exec, output).get()));
exec->run(dense::make_transpose(
this, make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -707,7 +708,7 @@ void Dense<ValueType>::conj_transpose(Dense<ValueType> *output) const
GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size()));
auto exec = this->get_executor();
exec->run(dense::make_conj_transpose(
this, make_temporary_clone(exec, output).get()));
this, make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -742,7 +743,7 @@ void Dense<ValueType>::permute(const Array<int32> *permutation_indices,

exec->run(dense::make_symm_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -757,7 +758,7 @@ void Dense<ValueType>::permute(const Array<int64> *permutation_indices,

exec->run(dense::make_symm_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -792,7 +793,7 @@ void Dense<ValueType>::inverse_permute(const Array<int32> *permutation_indices,

exec->run(dense::make_inv_symm_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -807,7 +808,7 @@ void Dense<ValueType>::inverse_permute(const Array<int64> *permutation_indices,

exec->run(dense::make_inv_symm_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -841,7 +842,7 @@ void Dense<ValueType>::row_permute(const Array<int32> *permutation_indices,

exec->run(dense::make_row_gather(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -855,7 +856,7 @@ void Dense<ValueType>::row_permute(const Array<int64> *permutation_indices,

exec->run(dense::make_row_gather(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -893,7 +894,7 @@ void Dense<ValueType>::row_gather(const Array<int32> *row_indices,

exec->run(dense::make_row_gather(
make_temporary_clone(exec, row_indices).get(), this,
make_temporary_clone(exec, row_gathered).get()));
make_temporary_output_clone(exec, row_gathered).get()));
}


Expand All @@ -908,7 +909,7 @@ void Dense<ValueType>::row_gather(const Array<int64> *row_indices,

this->get_executor()->run(dense::make_row_gather(
make_temporary_clone(exec, row_indices).get(), this,
make_temporary_clone(exec, row_gathered).get()));
make_temporary_output_clone(exec, row_gathered).get()));
}


Expand All @@ -922,7 +923,7 @@ void Dense<ValueType>::column_permute(const Array<int32> *permutation_indices,

exec->run(dense::make_column_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -936,7 +937,7 @@ void Dense<ValueType>::column_permute(const Array<int64> *permutation_indices,

exec->run(dense::make_column_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -970,7 +971,7 @@ void Dense<ValueType>::inverse_row_permute(

exec->run(dense::make_inverse_row_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -984,7 +985,7 @@ void Dense<ValueType>::inverse_row_permute(

exec->run(dense::make_inverse_row_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -1018,7 +1019,7 @@ void Dense<ValueType>::inverse_column_permute(

exec->run(dense::make_inverse_column_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -1032,7 +1033,7 @@ void Dense<ValueType>::inverse_column_permute(

exec->run(dense::make_inverse_column_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -1064,7 +1065,7 @@ void Dense<ValueType>::extract_diagonal(Diagonal<ValueType> *output) const
GKO_ASSERT_EQ(output->get_size()[0], diag_size);

exec->run(dense::make_extract_diagonal(
this, make_temporary_clone(exec, output).get()));
this, make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -1104,7 +1105,7 @@ void Dense<ValueType>::compute_absolute(
auto exec = this->get_executor();

exec->run(dense::make_outplace_absolute_dense(
this, make_temporary_clone(exec, output).get()));
this, make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -1126,7 +1127,7 @@ void Dense<ValueType>::make_complex(
auto exec = this->get_executor();

exec->run(dense::make_make_complex(
this, make_temporary_clone(exec, result).get()));
this, make_temporary_output_clone(exec, result).get()));
}


Expand All @@ -1147,8 +1148,8 @@ void Dense<ValueType>::get_real(
GKO_ASSERT_EQUAL_DIMENSIONS(this, result);
auto exec = this->get_executor();

exec->run(
dense::make_get_real(this, make_temporary_clone(exec, result).get()));
exec->run(dense::make_get_real(
this, make_temporary_output_clone(exec, result).get()));
}


Expand All @@ -1169,8 +1170,8 @@ void Dense<ValueType>::get_imag(
GKO_ASSERT_EQUAL_DIMENSIONS(this, result);
auto exec = this->get_executor();

exec->run(
dense::make_get_imag(this, make_temporary_clone(exec, result).get()));
exec->run(dense::make_get_imag(
this, make_temporary_output_clone(exec, result).get()));
}


Expand Down
29 changes: 29 additions & 0 deletions core/test/base/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,14 @@ TYPED_TEST(Array, CanCreateTemporaryCloneOnSameExecutor)
}


TYPED_TEST(Array, CanCreateTemporaryOutputCloneOnSameExecutor)
{
auto tmp_clone = make_temporary_output_clone(this->exec, &this->x);

ASSERT_EQ(tmp_clone.get(), &this->x);
}


// For tests between different memory, check cuda/test/base/array.cu
TYPED_TEST(Array, DoesNotCreateATemporaryCloneBetweenSameMemory)
{
Expand Down Expand Up @@ -299,6 +307,27 @@ TYPED_TEST(Array, DoesNotCopyBackTemporaryCloneBetweenSameMemory)
}


TYPED_TEST(Array, CanCreateTemporaryOutputCloneOnDifferentExecutors)
{
auto other = gko::OmpExecutor::create();

{
auto tmp_clone = make_temporary_output_clone(other, &this->x);
tmp_clone->get_data()[0] = 4;
tmp_clone->get_data()[1] = 5;

// there is no reliable way to check the memory is uninitialized
ASSERT_EQ(tmp_clone->get_num_elems(), this->x.get_num_elems());
ASSERT_EQ(tmp_clone->get_executor(), other);
ASSERT_EQ(this->x.get_executor(), this->exec);
ASSERT_EQ(this->x.get_data()[0], TypeParam{5});
ASSERT_EQ(this->x.get_data()[1], TypeParam{2});
}
ASSERT_EQ(this->x.get_data()[0], TypeParam{4});
ASSERT_EQ(this->x.get_data()[1], TypeParam{5});
}


TYPED_TEST(Array, CanBeCleared)
{
this->x.clear();
Expand Down
26 changes: 26 additions & 0 deletions core/test/base/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,13 +410,39 @@ TEST_F(TemporaryClone, DoesNotCopyToSameMemory)
}


TEST_F(TemporaryClone, OutputDoesNotCopyToSameMemory)
{
auto other = gko::ReferenceExecutor::create();
auto clone = make_temporary_output_clone(other, gko::lend(obj));

ASSERT_NE(clone.get()->get_executor(), other);
ASSERT_EQ(obj->get_executor(), ref);
}


TEST_F(TemporaryClone, CopiesBackAfterLeavingScope)
{
obj->data = 4;
{
auto clone = make_temporary_clone(omp, gko::lend(obj));
clone.get()->data = 7;

ASSERT_EQ(obj->data, 4);
}
ASSERT_EQ(obj->get_executor(), ref);
ASSERT_EQ(obj->data, 7);
}


TEST_F(TemporaryClone, OutputCopiesBackAfterLeavingScope)
{
obj->data = 4;
{
auto clone = make_temporary_output_clone(omp, gko::lend(obj));
clone.get()->data = 7;

ASSERT_EQ(obj->data, 4);
}
ASSERT_EQ(obj->get_executor(), ref);
ASSERT_EQ(obj->data, 7);
}
Expand Down
11 changes: 8 additions & 3 deletions include/ginkgo/core/base/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -576,16 +576,21 @@ namespace detail {
template <typename T>
struct temporary_clone_helper<Array<T>> {
static std::unique_ptr<Array<T>> create(
std::shared_ptr<const Executor> exec, Array<T> *ptr)
std::shared_ptr<const Executor> exec, Array<T> *ptr, bool copy_data)
{
return std::make_unique<Array<T>>(std::move(exec), *ptr);
if (copy_data) {
return std::make_unique<Array<T>>(std::move(exec), *ptr);
} else {
return std::make_unique<Array<T>>(std::move(exec),
ptr->get_num_elems());
}
}
};

template <typename T>
struct temporary_clone_helper<const Array<T>> {
static std::unique_ptr<const Array<T>> create(
std::shared_ptr<const Executor> exec, const Array<T> *ptr)
std::shared_ptr<const Executor> exec, const Array<T> *ptr, bool)
{
return std::make_unique<const Array<T>>(std::move(exec), *ptr);
}
Expand Down
Loading

0 comments on commit ef44f36

Please sign in to comment.