Skip to content

Commit

Permalink
[SYCL] Improve get_kernel_bundle performance (#5496)
Browse files Browse the repository at this point in the history
The patch improves performance of get_kernel_bundle version which
takes a list of kernel_id's.

There are three main improvements:

    1. For faster search of binary image for given kernel ids a special map
    is prebuilt during initial image registration.
    2. To avoid constructing a vector of kernel_id's for a device image each
    time a special map is prebuilt during initial image registration.
    3. The vector of kernel_id's for device images is now shared to avoid
    unnecessary copies.
  • Loading branch information
romanovvlad committed Feb 18, 2022
1 parent 13a7455 commit 4817b3f
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 140 deletions.
2 changes: 1 addition & 1 deletion sycl/source/backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext,
// this by pre-building the device image and extracting kernel info. We can't
// do the same to user images, since they may contain references to undefined
// symbols (e.g. when kernel_bundle is supposed to be joined with another).
std::vector<kernel_id> KernelIDs{};
auto KernelIDs = std::make_shared<std::vector<kernel_id>>();
auto DevImgImpl = std::make_shared<device_image_impl>(
nullptr, TargetContext, Devices, State, KernelIDs, PiProgram);
device_image_plain DevImg{DevImgImpl};
Expand Down
25 changes: 17 additions & 8 deletions sycl/source/detail/device_image_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ __SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {

template <class T> struct LessByHash {
bool operator()(const T &LHS, const T &RHS) const {
return getSyclObjImpl(LHS) < getSyclObjImpl(RHS);
}
};

// The class is impl counterpart for sycl::device_image
// It can represent a program in different states, kernel_id's it has and state
// of specialization constants for it
Expand All @@ -51,7 +57,8 @@ class device_image_impl {

device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
std::vector<device> Devices, bundle_state State,
std::vector<kernel_id> KernelIDs, RT::PiProgram Program)
std::shared_ptr<std::vector<kernel_id>> KernelIDs,
RT::PiProgram Program)
: MBinImage(BinImage), MContext(std::move(Context)),
MDevices(std::move(Devices)), MState(State), MProgram(Program),
MKernelIDs(std::move(KernelIDs)) {
Expand All @@ -60,17 +67,17 @@ class device_image_impl {

device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
std::vector<device> Devices, bundle_state State,
std::vector<kernel_id> KernelIDs, RT::PiProgram Program,
const SpecConstMapT &SpecConstMap,
std::shared_ptr<std::vector<kernel_id>> KernelIDs,
RT::PiProgram Program, const SpecConstMapT &SpecConstMap,
const std::vector<unsigned char> &SpecConstsBlob)
: MBinImage(BinImage), MContext(std::move(Context)),
MDevices(std::move(Devices)), MState(State), MProgram(Program),
MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob),
MSpecConstSymMap(SpecConstMap) {}

bool has_kernel(const kernel_id &KernelIDCand) const noexcept {
return std::binary_search(MKernelIDs.begin(), MKernelIDs.end(),
KernelIDCand, LessByNameComp{});
return std::binary_search(MKernelIDs->begin(), MKernelIDs->end(),
KernelIDCand, LessByHash<kernel_id>{});
}

bool has_kernel(const kernel_id &KernelIDCand,
Expand All @@ -83,7 +90,7 @@ class device_image_impl {
}

const std::vector<kernel_id> &get_kernel_ids() const noexcept {
return MKernelIDs;
return *MKernelIDs;
}

bool has_specialization_constants() const noexcept {
Expand Down Expand Up @@ -176,7 +183,9 @@ class device_image_impl {

const context &get_context() const noexcept { return MContext; }

std::vector<kernel_id> &get_kernel_ids_ref() noexcept { return MKernelIDs; }
std::shared_ptr<std::vector<kernel_id>> &get_kernel_ids_ptr() noexcept {
return MKernelIDs;
}

std::vector<unsigned char> &get_spec_const_blob_ref() noexcept {
return MSpecConstsBlob;
Expand Down Expand Up @@ -312,7 +321,7 @@ class device_image_impl {
RT::PiProgram MProgram = nullptr;
// List of kernel ids available in this image, elements should be sorted
// according to LessByNameComp
std::vector<kernel_id> MKernelIDs;
std::shared_ptr<std::vector<kernel_id>> MKernelIDs;

// A mutex for sycnhronizing access to spec constants blob. Mutable because
// needs to be locked in the const method for getting spec constant value.
Expand Down
6 changes: 0 additions & 6 deletions sycl/source/detail/kernel_bundle_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,6 @@ __SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {

template <class T> struct LessByHash {
bool operator()(const T &LHS, const T &RHS) const {
return getSyclObjImpl(LHS) < getSyclObjImpl(RHS);
}
};

static bool checkAllDevicesAreInContext(const std::vector<device> &Devices,
const context &Context) {
const std::vector<device> &ContextDevices = Context.get_devices();
Expand Down

0 comments on commit 4817b3f

Please sign in to comment.