Skip to content

Commit

Permalink
update the usage and doc
Browse files Browse the repository at this point in the history
- inline binding
- use queue, dynamic_shared_memory consistently
- delete unused library

Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
  • Loading branch information
3 people committed Jul 27, 2021
1 parent 5951d47 commit 03bd66f
Show file tree
Hide file tree
Showing 20 changed files with 246 additions and 351 deletions.
4 changes: 2 additions & 2 deletions dev_tools/oneapi/add_host_function.sh
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ while IFS='' read -r line || [ -n "$line" ]; do
if [ -n "${TEMPLATE_INPUT}" ]; then
TEMPLATE_INPUT="<${TEMPLATE_INPUT}>"
fi
echo "${TEMPLATE} void ${NAME}${HOST_SUFFIX} (dim3 grid, dim3 block, gko::size_type dynamic_shared_memory, cudaStream_t stream, ${VARIABLE}) {
/*KEEP*/${NAME}${TEMPLATE_INPUT}<<<grid, block, dynamic_shared_memory, stream>>>(${VAR_INPUT});
echo "${TEMPLATE} void ${NAME}${HOST_SUFFIX} (dim3 grid, dim3 block, size_type dynamic_shared_memory, cudaStream_t queue, ${VARIABLE}) {
/*KEEP*/${NAME}${TEMPLATE_INPUT}<<<grid, block, dynamic_shared_memory, queue>>>(${VAR_INPUT});
}"
echo "${NAME} -> ${NAME}${HOST_SUFFIX}" >> ${MAP_FILE}
fi
Expand Down
3 changes: 1 addition & 2 deletions dpcpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ target_sources(ginkgo_dpcpp
base/version.dp.cpp
base/executor.dp.cpp
base/helper.dp.cpp
base/onemkl_bindings.dp.cpp
components/absolute_array.dp.cpp
components/fill_array.dp.cpp
components/prefix_sum.dp.cpp
Expand Down Expand Up @@ -78,7 +77,7 @@ else ()
target_link_options(ginkgo_dpcpp PUBLIC -fsycl-device-code-split=per_kernel)
endif()
target_link_libraries(ginkgo_dpcpp PUBLIC ginkgo_device)
target_link_libraries(ginkgo_dpcpp PRIVATE MKL::MKL_DPCPP oneDPL)
target_link_libraries(ginkgo_dpcpp PRIVATE MKL::MKL_DPCPP)
if (GINKGO_DPCPP_SINGLE_MODE)
target_compile_definitions(ginkgo_dpcpp PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
endif()
Expand Down
94 changes: 0 additions & 94 deletions dpcpp/base/onemkl_bindings.dp.cpp

This file was deleted.

51 changes: 26 additions & 25 deletions dpcpp/base/onemkl_bindings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,34 +94,35 @@ template <>
struct is_supported<std::complex<double>> : std::true_type {};


#define GKO_DECLARE_DOT(ValueType, Name) \
void Name(::cl::sycl::queue &exec_queue, std::int64_t n, \
const ValueType *x, std::int64_t incx, const ValueType *y, \
std::int64_t incy, ValueType *result)

// Declare the dot for x^T * y
GKO_DECLARE_DOT(float, dot);
GKO_DECLARE_DOT(double, dot);
GKO_DECLARE_DOT(std::complex<float>, dotu);
GKO_DECLARE_DOT(std::complex<double>, dotu);
#define GKO_BIND_DOT(ValueType, Name, Func) \
inline void Name(::cl::sycl::queue &exec_queue, std::int64_t n, \
const ValueType *x, std::int64_t incx, \
const ValueType *y, std::int64_t incy, ValueType *result) \
{ \
Func(exec_queue, n, x, incx, y, incy, result); \
} \
static_assert(true, \
"This assert is used to counter the false positive extra " \
"semi-colon warnings")

// Bind the dot for x^T * y
GKO_BIND_DOT(float, dot, oneapi::mkl::blas::row_major::dot);
GKO_BIND_DOT(double, dot, oneapi::mkl::blas::row_major::dot);
GKO_BIND_DOT(std::complex<float>, dot, oneapi::mkl::blas::row_major::dotu);
GKO_BIND_DOT(std::complex<double>, dot, oneapi::mkl::blas::row_major::dotu);
template <typename ValueType>
GKO_DECLARE_DOT(ValueType, dot)
{
detail::not_implemented(exec_queue, n, x, incx, y, incy, result);
}

// Declare the conj_dot for x' * y
GKO_DECLARE_DOT(float, conj_dot);
GKO_DECLARE_DOT(double, conj_dot);
GKO_DECLARE_DOT(std::complex<float>, conj_dot);
GKO_DECLARE_DOT(std::complex<double>, conj_dot);
GKO_BIND_DOT(ValueType, dot, detail::not_implemented);

// Bind the conj_dot for x' * y
GKO_BIND_DOT(float, conj_dot, oneapi::mkl::blas::row_major::dot);
GKO_BIND_DOT(double, conj_dot, oneapi::mkl::blas::row_major::dot);
GKO_BIND_DOT(std::complex<float>, conj_dot, oneapi::mkl::blas::row_major::dotc);
GKO_BIND_DOT(std::complex<double>, conj_dot,
oneapi::mkl::blas::row_major::dotc);
template <typename ValueType>
GKO_DECLARE_DOT(ValueType, conj_dot)
{
detail::not_implemented(exec_queue, n, x, incx, y, incy, result);
}
GKO_BIND_DOT(ValueType, conj_dot, detail::not_implemented);

#undef GKO_DECLARE_DOT
#undef GKO_BIND_DOT

} // namespace onemkl
} // namespace dpcpp
Expand Down
11 changes: 6 additions & 5 deletions dpcpp/components/format_conversion.dp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ namespace kernel {
* It counts the number of explicit nonzeros per row of Ell.
*/
template <typename ValueType, typename IndexType>
void count_nnz_per_row(dim3 grid, dim3 block, size_t dynamic_shared_memory,
sycl::queue *stream, size_type num_rows,
void count_nnz_per_row(dim3 grid, dim3 block, size_type dynamic_shared_memory,
sycl::queue *queue, size_type num_rows,
size_type max_nnz_per_row, size_type stride,
const ValueType *values, IndexType *result);

Expand All @@ -80,9 +80,10 @@ namespace kernel {
*/
template <typename IndexType>
void convert_row_idxs_to_ptrs(dim3 grid, dim3 block,
size_t dynamic_shared_memory, sycl::queue *stream,
const IndexType *idxs, size_type num_nonzeros,
IndexType *ptrs, size_type length);
size_type dynamic_shared_memory,
sycl::queue *queue, const IndexType *idxs,
size_type num_nonzeros, IndexType *ptrs,
size_type length);


} // namespace kernel
Expand Down
12 changes: 6 additions & 6 deletions dpcpp/components/prefix_sum.dp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,11 @@ void start_prefix_sum(size_type num_elements, ValueType *__restrict__ elements,
}

template <std::uint32_t block_size, typename ValueType>
void start_prefix_sum(dim3 grid, dim3 block, size_t dynamic_shared_memory,
sycl::queue *stream, size_type num_elements,
void start_prefix_sum(dim3 grid, dim3 block, size_type dynamic_shared_memory,
sycl::queue *queue, size_type num_elements,
ValueType *elements, ValueType *block_sum)
{
stream->submit([&](sycl::handler &cgh) {
queue->submit([&](sycl::handler &cgh) {
sycl::accessor<UninitializedArray<ValueType, block_size>, 0,
sycl::access::mode::read_write,
sycl::access::target::local>
Expand Down Expand Up @@ -231,11 +231,11 @@ void finalize_prefix_sum(size_type num_elements,
}

template <std::uint32_t block_size, typename ValueType>
void finalize_prefix_sum(dim3 grid, dim3 block, size_t dynamic_shared_memory,
sycl::queue *stream, size_type num_elements,
void finalize_prefix_sum(dim3 grid, dim3 block, size_type dynamic_shared_memory,
sycl::queue *queue, size_type num_elements,
ValueType *elements, const ValueType *block_sum)
{
stream->submit([&](sycl::handler &cgh) {
queue->submit([&](sycl::handler &cgh) {
cgh.parallel_for(sycl_nd_range(grid, block),
[=](sycl::nd_item<3> item_ct1) {
finalize_prefix_sum<block_size>(
Expand Down
6 changes: 3 additions & 3 deletions dpcpp/components/reduction.dp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,11 @@ void reduce_add_array(
}

template <std::uint32_t cfg = KCFG_1D::encode(256, 16), typename ValueType>
void reduce_add_array(dim3 grid, dim3 block, size_t dynamic_shared_memory,
sycl::queue *stream, size_type size,
void reduce_add_array(dim3 grid, dim3 block, size_type dynamic_shared_memory,
sycl::queue *queue, size_type size,
const ValueType *source, ValueType *result)
{
stream->submit([&](sycl::handler &cgh) {
queue->submit([&](sycl::handler &cgh) {
sycl::accessor<UninitializedArray<ValueType, KCFG_1D::decode<0>(cfg)>,
0, sycl::access::mode::read_write,
sycl::access::target::local>
Expand Down
Loading

0 comments on commit 03bd66f

Please sign in to comment.