Skip to content

Commit

Permalink
add make_temporary_output_clone
Browse files Browse the repository at this point in the history
This provides an alternative to make_temporary_clone
that doesn't initialize the content for output-parameters
  • Loading branch information
upsj committed Jun 1, 2021
1 parent 65228ef commit f455156
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 42 deletions.
54 changes: 27 additions & 27 deletions core/matrix/dense.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ void Dense<ValueType>::convert_to(Dense<ValueType> *result) const
// we need to create a executor-local clone of the target data, that
// will be copied back later.
auto exec = this->get_executor();
auto result_array = make_temporary_clone(exec, &result->values_);
auto result_array = make_temporary_output_clone(exec, &result->values_);
// create a (value, not pointer to avoid allocation overhead) view
// matrix on the array to avoid special-casing cross-executor copies
auto tmp_result =
Expand Down Expand Up @@ -365,8 +365,8 @@ void Dense<ValueType>::convert_to(
{
if (result->get_size() == this->get_size()) {
auto exec = this->get_executor();
exec->run(
dense::make_copy(this, make_temporary_clone(exec, result).get()));
exec->run(dense::make_copy(
this, make_temporary_output_clone(exec, result).get()));
} else {
result->values_ = this->values_;
result->stride_ = this->stride_;
Expand Down Expand Up @@ -697,8 +697,8 @@ void Dense<ValueType>::transpose(Dense<ValueType> *output) const
{
GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size()));
auto exec = this->get_executor();
exec->run(
dense::make_transpose(this, make_temporary_clone(exec, output).get()));
exec->run(dense::make_transpose(
this, make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -708,7 +708,7 @@ void Dense<ValueType>::conj_transpose(Dense<ValueType> *output) const
GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size()));
auto exec = this->get_executor();
exec->run(dense::make_conj_transpose(
this, make_temporary_clone(exec, output).get()));
this, make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -743,7 +743,7 @@ void Dense<ValueType>::permute(const Array<int32> *permutation_indices,

exec->run(dense::make_symm_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -758,7 +758,7 @@ void Dense<ValueType>::permute(const Array<int64> *permutation_indices,

exec->run(dense::make_symm_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -793,7 +793,7 @@ void Dense<ValueType>::inverse_permute(const Array<int32> *permutation_indices,

exec->run(dense::make_inv_symm_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -808,7 +808,7 @@ void Dense<ValueType>::inverse_permute(const Array<int64> *permutation_indices,

exec->run(dense::make_inv_symm_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -842,7 +842,7 @@ void Dense<ValueType>::row_permute(const Array<int32> *permutation_indices,

exec->run(dense::make_row_gather(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -856,7 +856,7 @@ void Dense<ValueType>::row_permute(const Array<int64> *permutation_indices,

exec->run(dense::make_row_gather(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -894,7 +894,7 @@ void Dense<ValueType>::row_gather(const Array<int32> *row_indices,

exec->run(dense::make_row_gather(
make_temporary_clone(exec, row_indices).get(), this,
make_temporary_clone(exec, row_gathered).get()));
make_temporary_output_clone(exec, row_gathered).get()));
}


Expand All @@ -909,7 +909,7 @@ void Dense<ValueType>::row_gather(const Array<int64> *row_indices,

this->get_executor()->run(dense::make_row_gather(
make_temporary_clone(exec, row_indices).get(), this,
make_temporary_clone(exec, row_gathered).get()));
make_temporary_output_clone(exec, row_gathered).get()));
}


Expand All @@ -923,7 +923,7 @@ void Dense<ValueType>::column_permute(const Array<int32> *permutation_indices,

exec->run(dense::make_column_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -937,7 +937,7 @@ void Dense<ValueType>::column_permute(const Array<int64> *permutation_indices,

exec->run(dense::make_column_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -971,7 +971,7 @@ void Dense<ValueType>::inverse_row_permute(

exec->run(dense::make_inverse_row_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -985,7 +985,7 @@ void Dense<ValueType>::inverse_row_permute(

exec->run(dense::make_inverse_row_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -1019,7 +1019,7 @@ void Dense<ValueType>::inverse_column_permute(

exec->run(dense::make_inverse_column_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -1033,7 +1033,7 @@ void Dense<ValueType>::inverse_column_permute(

exec->run(dense::make_inverse_column_permute(
make_temporary_clone(exec, permutation_indices).get(), this,
make_temporary_clone(exec, output).get()));
make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -1065,7 +1065,7 @@ void Dense<ValueType>::extract_diagonal(Diagonal<ValueType> *output) const
GKO_ASSERT_EQ(output->get_size()[0], diag_size);

exec->run(dense::make_extract_diagonal(
this, make_temporary_clone(exec, output).get()));
this, make_temporary_output_clone(exec, output).get()));
}


Expand Down Expand Up @@ -1105,7 +1105,7 @@ void Dense<ValueType>::compute_absolute(
auto exec = this->get_executor();

exec->run(dense::make_outplace_absolute_dense(
this, make_temporary_clone(exec, output).get()));
this, make_temporary_output_clone(exec, output).get()));
}


Expand All @@ -1127,7 +1127,7 @@ void Dense<ValueType>::make_complex(
auto exec = this->get_executor();

exec->run(dense::make_make_complex(
this, make_temporary_clone(exec, result).get()));
this, make_temporary_output_clone(exec, result).get()));
}


Expand All @@ -1148,8 +1148,8 @@ void Dense<ValueType>::get_real(
GKO_ASSERT_EQUAL_DIMENSIONS(this, result);
auto exec = this->get_executor();

exec->run(
dense::make_get_real(this, make_temporary_clone(exec, result).get()));
exec->run(dense::make_get_real(
this, make_temporary_output_clone(exec, result).get()));
}


Expand All @@ -1170,8 +1170,8 @@ void Dense<ValueType>::get_imag(
GKO_ASSERT_EQUAL_DIMENSIONS(this, result);
auto exec = this->get_executor();

exec->run(
dense::make_get_imag(this, make_temporary_clone(exec, result).get()));
exec->run(dense::make_get_imag(
this, make_temporary_output_clone(exec, result).get()));
}


Expand Down
29 changes: 29 additions & 0 deletions core/test/base/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,14 @@ TYPED_TEST(Array, CanCreateTemporaryCloneOnSameExecutor)
}


TYPED_TEST(Array, CanCreateTemporaryOutputCloneOnSameExecutor)
{
auto tmp_clone = make_temporary_output_clone(this->exec, &this->x);

ASSERT_EQ(tmp_clone.get(), &this->x);
}


// For tests between different memory, check cuda/test/base/array.cu
TYPED_TEST(Array, DoesNotCreateATemporaryCloneBetweenSameMemory)
{
Expand Down Expand Up @@ -299,6 +307,27 @@ TYPED_TEST(Array, DoesNotCopyBackTemporaryCloneBetweenSameMemory)
}


TYPED_TEST(Array, CanCreateTemporaryOutputCloneOnDifferentExecutors)
{
auto other = gko::OmpExecutor::create();

{
auto tmp_clone = make_temporary_output_clone(other, &this->x);
tmp_clone->get_data()[0] = 4;
tmp_clone->get_data()[1] = 5;

// there is no reliable way to check the memory is uninitialized
ASSERT_EQ(tmp_clone->get_num_elems(), this->x.get_num_elems());
ASSERT_EQ(tmp_clone->get_executor(), other);
ASSERT_EQ(this->x.get_executor(), this->exec);
ASSERT_EQ(this->x.get_data()[0], TypeParam{5});
ASSERT_EQ(this->x.get_data()[1], TypeParam{2});
}
ASSERT_EQ(this->x.get_data()[0], TypeParam{4});
ASSERT_EQ(this->x.get_data()[1], TypeParam{5});
}


TYPED_TEST(Array, CanBeCleared)
{
this->x.clear();
Expand Down
26 changes: 26 additions & 0 deletions core/test/base/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,13 +410,39 @@ TEST_F(TemporaryClone, DoesNotCopyToSameMemory)
}


TEST_F(TemporaryClone, OutputDoesNotCopyToSameMemory)
{
auto other = gko::ReferenceExecutor::create();
auto clone = make_temporary_output_clone(other, gko::lend(obj));

ASSERT_NE(clone.get()->get_executor(), other);
ASSERT_EQ(obj->get_executor(), ref);
}


TEST_F(TemporaryClone, CopiesBackAfterLeavingScope)
{
obj->data = 4;
{
auto clone = make_temporary_clone(omp, gko::lend(obj));
clone.get()->data = 7;

ASSERT_EQ(obj->data, 4);
}
ASSERT_EQ(obj->get_executor(), ref);
ASSERT_EQ(obj->data, 7);
}


TEST_F(TemporaryClone, OutputCopiesBackAfterLeavingScope)
{
obj->data = 4;
{
auto clone = make_temporary_output_clone(omp, gko::lend(obj));
clone.get()->data = 7;

ASSERT_EQ(obj->data, 4);
}
ASSERT_EQ(obj->get_executor(), ref);
ASSERT_EQ(obj->data, 7);
}
Expand Down
11 changes: 8 additions & 3 deletions include/ginkgo/core/base/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -576,16 +576,21 @@ namespace detail {
template <typename T>
struct temporary_clone_helper<Array<T>> {
static std::unique_ptr<Array<T>> create(
std::shared_ptr<const Executor> exec, Array<T> *ptr)
std::shared_ptr<const Executor> exec, Array<T> *ptr, bool copy_data)
{
return std::make_unique<Array<T>>(std::move(exec), *ptr);
if (copy_data) {
return std::make_unique<Array<T>>(std::move(exec), *ptr);
} else {
return std::make_unique<Array<T>>(std::move(exec),
ptr->get_num_elems());
}
}
};

template <typename T>
struct temporary_clone_helper<const Array<T>> {
static std::unique_ptr<const Array<T>> create(
std::shared_ptr<const Executor> exec, const Array<T> *ptr)
std::shared_ptr<const Executor> exec, const Array<T> *ptr, bool)
{
return std::make_unique<const Array<T>>(std::move(exec), *ptr);
}
Expand Down
38 changes: 32 additions & 6 deletions include/ginkgo/core/base/temporary_clone.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class copy_back_deleter<const T> {
template <typename T>
struct temporary_clone_helper {
static std::unique_ptr<T> create(std::shared_ptr<const Executor> exec,
T *ptr)
T *ptr, bool)
{
return gko::clone(std::move(exec), ptr);
}
Expand Down Expand Up @@ -137,19 +137,22 @@ class temporary_clone {
*
* @param exec the executor where the clone will be created
* @param ptr a pointer to the object of which the clone will be created
* @param copy_data should the data be copied to the executor, or should
* only the result be copied back afterwards?
*/
explicit temporary_clone(std::shared_ptr<const Executor> exec, pointer ptr)
explicit temporary_clone(std::shared_ptr<const Executor> exec, pointer ptr,
bool copy_data = true)
{
if (ptr->get_executor()->memory_accessible(exec)) {
// just use the object we already have
handle_ = handle_type(ptr, null_deleter<T>());
} else {
// clone the object to the new executor and make sure it's copied
// back before we delete it
handle_ = handle_type(
temporary_clone_helper<T>::create(std::move(exec), ptr)
.release(),
copy_back_deleter<T>(ptr));
handle_ = handle_type(temporary_clone_helper<T>::create(
std::move(exec), ptr, copy_data)
.release(),
copy_back_deleter<T>(ptr));
}
}

Expand Down Expand Up @@ -196,6 +199,29 @@ detail::temporary_clone<T> make_temporary_clone(
}


/**
* Creates a uninitialized temporary_clone that will be copied back to the input
* afterwards. It can be used for output parameters to avoid an unnecessary copy
* in make_temporary_clone.
*
* This is a helper function which avoids the need to explicitly specify the
* type of the object, as would be the case if using the constructor of
* temporary_clone.
*
* @param exec the executor where the uninitialized clone will be created
* @param ptr a pointer to the object of which the clone will be created
*/
template <typename T>
detail::temporary_clone<T> make_temporary_output_clone(
std::shared_ptr<const Executor> exec, T *ptr)
{
static_assert(
!std::is_const<T>::value,
"make_temporary_output_clone should only be used on non-const objects");
return detail::temporary_clone<T>(std::move(exec), ptr);
}


} // namespace gko


Expand Down
Loading

0 comments on commit f455156

Please sign in to comment.