Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dpcpp port dense #710

Merged
merged 22 commits into from
Jul 21, 2021
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmake/GinkgoConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ set(GINKGO_AMD_ARCH_FLAGS @GINKGO_AMD_ARCH_FLAGS@)

set(GINKGO_DPCPP_VERSION @GINKGO_DPCPP_VERSION@)
set(GINKGO_DPCPP_FLAGS @GINKGO_DPCPP_FLAGS@)
set(GINKGO_MKL_ROOT @GINKGO_MKL_ROOT@)

set(GINKGO_HAVE_PAPI_SDE @GINKGO_HAVE_PAPI_SDE@)

Expand Down Expand Up @@ -168,4 +169,8 @@ if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_BUILD_HIP)
find_package(rocrand REQUIRED)
endif()

if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_BUILD_DPCPP)
find_package(MKL CONFIG REQUIRED HINTS "${GINKGO_MKL_ROOT}")
endif()

include(${CMAKE_CURRENT_LIST_DIR}/GinkgoTargets.cmake)
8 changes: 6 additions & 2 deletions cmake/create_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ function(ginkgo_create_dpcpp_test test_name)
target_compile_options(${test_target_name} PRIVATE "${GINKGO_DPCPP_FLAGS}")
target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
ginkgo_set_test_target_properties(${test_name} ${test_target_name})
# Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test.
if (MKL_ENV)
set_tests_properties(${test_target_name} PROPERTIES ENVIRONMENT "${MKL_ENV}")
endif()
Comment on lines +45 to +48
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does running tests need MKL environment variables? If this is really necessary, we should be fixing it in the environment, not CMake.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting environment variables is not the responsibility of the build system. I would prefer if we remove this altogether, as we can't guarantee users set it the same way for their application.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the input env is defined by MKL cmake and not all parameters from environment, which means no difference even if user gives same env with different input.
I am not sure whether it is only for test or for all application.
I agree with you if it only contains LD_LIBRARY_PATH
but it also contains MKL_INTERFACE_LAYER, MKL_THREADING_LAYER, MKL_BLACS_MPI depends MKL input or platform

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure whether it is only for test or for all application.

It is not possible to set the environment variables for an application that is built for CMake. Thus, introducing this change would mean that the tests potentially behave differently than application codes.
This is a really bad CMake setup, since it mixes runtime behavior with compile-time options. I should probably bring this up in the next Intel meeting.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with you.

endfunction(ginkgo_create_dpcpp_test)

function(ginkgo_create_thread_test test_name)
Expand Down Expand Up @@ -165,7 +169,7 @@ function(ginkgo_create_common_test test_name)
# use float for DPC++ if necessary
if((exec STREQUAL "dpcpp") AND GINKGO_DPCPP_SINGLE_MODE)
target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1)
endif()
endif()
ginkgo_set_test_target_properties(${test_name}_${exec} ${test_target_name})
endforeach()
endfunction(ginkgo_create_common_test)
endfunction(ginkgo_create_common_test)
1 change: 0 additions & 1 deletion dev_tools/scripts/format_header.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ GINKGO_LICENSE_BEACON="******************************<GINKGO LICENSE>***********

CONTENT="content.cpp" # Store the residual part (start from namespace)
BEFORE="before.cpp" # Store the main header and the #ifdef/#define of header file
BEGIN="begin.cpp" # Store the header before license
HAS_HIP_RUNTIME="false"
DURING_LICENSE="false"
INCLUDE_REGEX="^#include.*"
Expand Down
2 changes: 1 addition & 1 deletion dev_tools/scripts/regroup
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<(rapidjson|gflags|gtest|papi).*'
Priority: 3
- Regex: '^<(omp|cu|hip|thrust|CL/|cooperative).*'
- Regex: '^<(omp|cu|hip|thrust|CL/|cooperative|oneapi).*'
Priority: 2
- Regex: '^<ginkgo.*'
Priority: 5
Expand Down
17 changes: 15 additions & 2 deletions dpcpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@ endif()
ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_VERSION)
set(GINKGO_DPCPP_VERSION ${GINKGO_DPCPP_VERSION} PARENT_SCOPE)

find_package(MKL CONFIG REQUIRED HINTS "$ENV{MKLROOT}")
set(GINKGO_MKL_ROOT "${MKL_ROOT}" PARENT_SCOPE)

add_library(ginkgo_dpcpp $<TARGET_OBJECTS:ginkgo_dpcpp_device> "")
target_sources(ginkgo_dpcpp
PRIVATE
base/version.dp.cpp
base/executor.dp.cpp
base/helper.dp.cpp
components/absolute_array.dp.cpp
components/fill_array.dp.cpp
components/prefix_sum.dp.cpp
Expand Down Expand Up @@ -54,13 +58,22 @@ target_sources(ginkgo_dpcpp
ginkgo_compile_features(ginkgo_dpcpp)
target_compile_definitions(ginkgo_dpcpp PRIVATE GKO_COMPILING_DPCPP)

set(GINKGO_DPCPP_FLAGS ${GINKGO_COMPILER_FLAGS} -fsycl)
set(GINKGO_DPCPP_FLAGS ${GINKGO_DPCPP_FLAGS} PARENT_SCOPE)
target_compile_options(ginkgo_dpcpp PRIVATE "${GINKGO_DPCPP_FLAGS}")
# Note. add MKL via PRIVATE not PUBLIC (MKL example shows) to avoid find_package(MKL) everywhere when link ginkgo
yhmtsai marked this conversation as resolved.
Show resolved Hide resolved
target_compile_options(ginkgo_dpcpp PRIVATE $<TARGET_PROPERTY:MKL::MKL_DPCPP,INTERFACE_COMPILE_OPTIONS>)
target_compile_features(ginkgo_dpcpp PRIVATE cxx_std_17)
target_include_directories(ginkgo_dpcpp PRIVATE $<TARGET_PROPERTY:MKL::MKL_DPCPP,INTERFACE_INCLUDE_DIRECTORIES>)
target_link_options(ginkgo_dpcpp PRIVATE -fsycl-device-lib=all)
target_link_options(ginkgo_dpcpp PRIVATE -fsycl-device-code-split=per_kernel)
# When building ginkgo as a static library, we need to use dpcpp and per_kernel
# link option when the program uses dpcpp related function.
yhmtsai marked this conversation as resolved.
Show resolved Hide resolved
if (BUILD_SHARED_LIBS)
target_link_options(ginkgo_dpcpp PRIVATE -fsycl-device-code-split=per_kernel)
else ()
target_link_options(ginkgo_dpcpp PUBLIC -fsycl-device-code-split=per_kernel)
endif()
target_link_libraries(ginkgo_dpcpp PUBLIC ginkgo_device)
target_link_libraries(ginkgo_dpcpp PRIVATE $<LINK_ONLY:MKL::MKL_DPCPP>)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this separation into linker and compiler flags? Does MKL add SYCL-specific flags?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do you mean separation here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I understand it, this only links against MKL, but doesn't add any include paths or other compiler flags associated with the MKL_DPCPP target. Why is this necessary, as opposed to plain

Suggested change
target_link_libraries(ginkgo_dpcpp PRIVATE $<LINK_ONLY:MKL::MKL_DPCPP>)
target_link_libraries(ginkgo_dpcpp PRIVATE MKL::MKL_DPCPP)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I try both version. LINK_ONLY is usually used by INTERFACE_LINK_LIBRARIES in set_targert_properties
using target_link_libraries will handle it automatically.
previous one gives $<LINK_ONLY:<$<LINK_ONLY>:MKL::MKL_DPCPP>>
updated one gives $<LINK_ONLY>:MKL::MKL_DPCPP>

if (GINKGO_DPCPP_SINGLE_MODE)
target_compile_definitions(ginkgo_dpcpp PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
endif()
Expand Down
6 changes: 6 additions & 0 deletions dpcpp/base/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ struct config {
*/
using lane_mask_type = uint64;


/**
* The number of threads within a CUDA warp.
*/
static constexpr uint32 warp_size = 16;
upsj marked this conversation as resolved.
Show resolved Hide resolved

/**
* The bitmask of the entire warp.
*/
Expand Down
64 changes: 64 additions & 0 deletions dpcpp/base/helper.dp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*******************************<GINKGO LICENSE>******************************
Copyright (c) 2017-2021, the Ginkgo authors
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/

#include <CL/sycl.hpp>


#include "dpcpp/base/helper.hpp"


namespace gko {
namespace kernels {
namespace dpcpp {


bool validate(sycl::queue *queue, unsigned int workgroup_size,
unsigned int subgroup_size)
{
{
auto device = queue->get_device();
auto subgroup_size_list =
device.get_info<cl::sycl::info::device::sub_group_sizes>();
auto max_workgroup_size =
device.get_info<sycl::info::device::max_work_group_size>();
bool allowed = false;
for (auto &i : subgroup_size_list) {
allowed |= (i == subgroup_size);
}
return allowed && (workgroup_size <= max_workgroup_size);
}
}
yhmtsai marked this conversation as resolved.
Show resolved Hide resolved


} // namespace dpcpp
} // namespace kernels
} // namespace gko
45 changes: 35 additions & 10 deletions dpcpp/base/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,36 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <CL/sycl.hpp>


#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/base/types.hpp>


#include "core/base/types.hpp"
#include "dpcpp/base/dim3.dp.hpp"


/**
* GKO_ENABLE_DEFAULT_HOST gives a default host implementation for those
* kernels which require encoded config but do not need explicit template
* parameter and share memory
yhmtsai marked this conversation as resolved.
Show resolved Hide resolved
*
* @param name_ the name of the host function with config
* @param kernel_ the kernel name
*/
#define GKO_ENABLE_DEFAULT_HOST(name_, kernel_) \
template <typename... InferredArgs> \
void name_(dim3 grid, dim3 block, size_t dynamic_shared_memory, \
yhmtsai marked this conversation as resolved.
Show resolved Hide resolved
sycl::queue *queue, InferredArgs... args) \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now I fail to see how the encoded config is used here? Maybe what you need to say here is give a default host implementation for those kernels which do not require a configuration or shared memory ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and it somehow will be replaced by another pull request

{ \
queue->submit([&](sycl::handler &cgh) { \
cgh.parallel_for(sycl_nd_range(grid, block), \
[=](sycl::nd_item<3> item_ct1) { \
kernel_(args..., item_ct1); \
}); \
}); \
}


/**
* GKO_ENABLE_DEFAULT_HOST_CONFIG gives a default host implementation for those
* kernels which require encoded config but do not need explicit template
Expand Down Expand Up @@ -115,18 +139,19 @@ namespace dpcpp {


bool validate(sycl::queue *queue, unsigned workgroup_size,
unsigned subgroup_size)
unsigned subgroup_size);


template <typename IterArr, typename Validate>
std::uint32_t get_first_cfg(IterArr &arr, Validate verify)
yhmtsai marked this conversation as resolved.
Show resolved Hide resolved
{
auto device = queue->get_device();
auto subgroup_size_list =
device.get_info<cl::sycl::info::device::sub_group_sizes>();
auto max_workgroup_size =
device.get_info<sycl::info::device::max_work_group_size>();
bool allowed = false;
for (auto &i : subgroup_size_list) {
allowed |= (i == subgroup_size);
for (auto &cfg : arr) {
if (verify(cfg)) {
return cfg;
}
Comment on lines +171 to +174
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will match the bigger workgroup size and subgroup size first, right?

With the way Intel architectures work for now, should we do it differently? I guess that would mean changing the order of the configurations for every kernel, or making something more complex here.
I think what makes sense for Intel for now is to match a configuration with a big workgroup size but a small subgroup size: even the smaller subgroup size are enough to fill the execution unit, putting more only makes the subgroup happen in a loop. (the only case where I'm not sure about this is 16 bit floating point operations when using any)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, if considering the usage from current dense.
It will depends on the verify function and it is changeable in the host

}
return allowed && (workgroup_size <= max_workgroup_size);
GKO_NOT_SUPPORTED(arr);
return 0;
yhmtsai marked this conversation as resolved.
Show resolved Hide resolved
}


Expand Down
128 changes: 128 additions & 0 deletions dpcpp/base/onemkl_bindings.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*******************************<GINKGO LICENSE>******************************
Copyright (c) 2017-2021, the Ginkgo authors
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/

#ifndef GKO_DPCPP_BASE_ONEMKL_BINDINGS_HPP_
#define GKO_DPCPP_BASE_ONEMKL_BINDINGS_HPP_


#include <CL/sycl.hpp>
#include <oneapi/mkl.hpp>


namespace gko {
/**
* @brief The device specific kernels namespace.
*
* @ingroup kernels
*/
namespace kernels {
/**
* @brief The DPCPP namespace.
*
* @ingroup dpcpp
*/
namespace dpcpp {
/**
* @brief The ONEMKL namespace.
*
* @ingroup onemkl
*/
namespace onemkl {
/**
* @brief The detail namespace.
*
* @ingroup detail
*/
namespace detail {


template <typename... Args>
inline void not_implemented(Args &&...) GKO_NOT_IMPLEMENTED;


} // namespace detail


template <typename ValueType>
struct is_supported : std::false_type {};
yhmtsai marked this conversation as resolved.
Show resolved Hide resolved

template <>
struct is_supported<float> : std::true_type {};

template <>
struct is_supported<double> : std::true_type {};

template <>
struct is_supported<std::complex<float>> : std::true_type {};

template <>
struct is_supported<std::complex<double>> : std::true_type {};


#define GKO_BIND_DOT(ValueType, Name, Func) \
void Name(::cl::sycl::queue &exec_queue, std::int64_t n, \
const ValueType *x, std::int64_t incx, const ValueType *y, \
std::int64_t incy, ValueType *result) \
{ \
Func(exec_queue, n, x, incx, y, incy, result); \
} \
static_assert(true, \
"This assert is used to counter the false positive extra " \
"semi-colon warnings")

// Bind the dot for x^T * y
GKO_BIND_DOT(float, dot, oneapi::mkl::blas::row_major::dot);
GKO_BIND_DOT(double, dot, oneapi::mkl::blas::row_major::dot);
GKO_BIND_DOT(std::complex<float>, dot, oneapi::mkl::blas::row_major::dotu);
GKO_BIND_DOT(std::complex<double>, dot, oneapi::mkl::blas::row_major::dotu);
template <typename ValueType>
GKO_BIND_DOT(ValueType, dot, detail::not_implemented);

// Bind the conj_dot for x' * y
GKO_BIND_DOT(float, conj_dot, oneapi::mkl::blas::row_major::dot);
GKO_BIND_DOT(double, conj_dot, oneapi::mkl::blas::row_major::dot);
GKO_BIND_DOT(std::complex<float>, conj_dot, oneapi::mkl::blas::row_major::dotc);
GKO_BIND_DOT(std::complex<double>, conj_dot,
oneapi::mkl::blas::row_major::dotc);
template <typename ValueType>
GKO_BIND_DOT(ValueType, conj_dot, detail::not_implemented);

#undef GKO_BIND_DOT


} // namespace onemkl
} // namespace dpcpp
} // namespace kernels
} // namespace gko


#endif // GKO_DPCPP_BASE_ONEMKL_BINDINGS_HPP_
Loading