Skip to content
Open
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
//==------ launch_config.hpp ------- SYCL kernel launch configuration -----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===--------------------------------------------------------------------===//

#pragma once

#include <sycl/ext/oneapi/properties/properties.hpp>
#include <sycl/ext/oneapi/properties/property.hpp>

namespace sycl {
inline namespace _V1 {
template <int Dimensions> class nd_range;
template <int Dimensions> class range;

namespace ext::oneapi::experimental {
namespace detail {
struct AllowCTADTag;
// Trait for identifying sycl::range and sycl::nd_range.
template <typename RangeT> struct is_range_or_nd_range : std::false_type {};
template <int Dimensions>
struct is_range_or_nd_range<range<Dimensions>> : std::true_type {};
template <int Dimensions>
struct is_range_or_nd_range<nd_range<Dimensions>> : std::true_type {};

template <typename RangeT>
constexpr bool is_range_or_nd_range_v = is_range_or_nd_range<RangeT>::value;

template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess;

// Checks that none of the properties in the property list has compile-time
// effects on the kernel.
template <typename T>
struct NoPropertyHasCompileTimeKernelEffect : std::false_type {};
template <typename... Ts>
struct NoPropertyHasCompileTimeKernelEffect<properties_t<Ts...>> {
static constexpr bool value =
!(HasCompileTimeEffect<Ts>::value || ... || false);
};
} // namespace detail

// Available only when Range is range or nd_range
template <
typename RangeT, typename PropertiesT = empty_properties_t,
typename = std::enable_if_t<
ext::oneapi::experimental::detail::is_range_or_nd_range_v<RangeT>>>
class launch_config {
static_assert(ext::oneapi::experimental::detail::
NoPropertyHasCompileTimeKernelEffect<PropertiesT>::value,
"launch_config does not allow properties with compile-time "
"kernel effects.");

public:
launch_config(RangeT Range, PropertiesT Properties = {})
: MRange{Range}, MProperties{Properties} {}

private:
RangeT MRange;
PropertiesT MProperties;

const RangeT &getRange() const noexcept { return MRange; }

const PropertiesT &getProperties() const noexcept { return MProperties; }

template <typename LCRangeT, typename LCPropertiesT>
friend struct detail::LaunchConfigAccess;
};

#ifdef __cpp_deduction_guides
// CTAD work-around to avoid warning from GCC when using default deduction
// guidance.
launch_config(detail::AllowCTADTag)
-> launch_config<void, empty_properties_t, void>;
#endif // __cpp_deduction_guides

namespace detail {
// Helper for accessing the members of launch_config.
template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess {
LaunchConfigAccess(const launch_config<LCRangeT, LCPropertiesT> &LaunchConfig)
: MLaunchConfig{LaunchConfig} {}

const launch_config<LCRangeT, LCPropertiesT> &MLaunchConfig;

const LCRangeT &getRange() const noexcept { return MLaunchConfig.getRange(); }

const LCPropertiesT &getProperties() const noexcept {
return MLaunchConfig.getProperties();
}
};
} // namespace detail
} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl
144 changes: 72 additions & 72 deletions sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@

#include <sycl/detail/common.hpp>
#include <sycl/event.hpp>
#include <sycl/ext/oneapi/experimental/detail/properties/launch_config.hpp>
#include <sycl/ext/oneapi/experimental/enqueue_types.hpp>
#include <sycl/ext/oneapi/experimental/free_function_traits.hpp>
#include <sycl/ext/oneapi/experimental/graph.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp>
#include <sycl/handler.hpp>
#include <sycl/kernel_bundle.hpp>
#include <sycl/nd_range.hpp>
#include <sycl/queue.hpp>
#include <sycl/range.hpp>
Expand All @@ -25,78 +28,6 @@ inline namespace _V1 {
namespace ext::oneapi::experimental {

namespace detail {
// Trait for identifying sycl::range and sycl::nd_range.
template <typename RangeT> struct is_range_or_nd_range : std::false_type {};
template <int Dimensions>
struct is_range_or_nd_range<range<Dimensions>> : std::true_type {};
template <int Dimensions>
struct is_range_or_nd_range<nd_range<Dimensions>> : std::true_type {};

template <typename RangeT>
constexpr bool is_range_or_nd_range_v = is_range_or_nd_range<RangeT>::value;

template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess;

// Checks that none of the properties in the property list has compile-time
// effects on the kernel.
template <typename T>
struct NoPropertyHasCompileTimeKernelEffect : std::false_type {};
template <typename... Ts>
struct NoPropertyHasCompileTimeKernelEffect<properties_t<Ts...>> {
static constexpr bool value =
!(HasCompileTimeEffect<Ts>::value || ... || false);
};
} // namespace detail

// Available only when Range is range or nd_range
template <
typename RangeT, typename PropertiesT = empty_properties_t,
typename = std::enable_if_t<
ext::oneapi::experimental::detail::is_range_or_nd_range_v<RangeT>>>
class launch_config {
static_assert(ext::oneapi::experimental::detail::
NoPropertyHasCompileTimeKernelEffect<PropertiesT>::value,
"launch_config does not allow properties with compile-time "
"kernel effects.");

public:
launch_config(RangeT Range, PropertiesT Properties = {})
: MRange{Range}, MProperties{Properties} {}

private:
RangeT MRange;
PropertiesT MProperties;

const RangeT &getRange() const noexcept { return MRange; }

const PropertiesT &getProperties() const noexcept { return MProperties; }

template <typename LCRangeT, typename LCPropertiesT>
friend struct detail::LaunchConfigAccess;
};

#ifdef __cpp_deduction_guides
// CTAD work-around to avoid warning from GCC when using default deduction
// guidance.
launch_config(detail::AllowCTADTag)
-> launch_config<void, empty_properties_t, void>;
#endif // __cpp_deduction_guides

namespace detail {
// Helper for accessing the members of launch_config.
template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess {
LaunchConfigAccess(const launch_config<LCRangeT, LCPropertiesT> &LaunchConfig)
: MLaunchConfig{LaunchConfig} {}

const launch_config<LCRangeT, LCPropertiesT> &MLaunchConfig;

const LCRangeT &getRange() const noexcept { return MLaunchConfig.getRange(); }

const LCPropertiesT &getProperties() const noexcept {
return MLaunchConfig.getProperties();
}
};

template <typename CommandGroupFunc, typename PropertiesT>
void submit_impl(const queue &Q, PropertiesT Props, CommandGroupFunc &&CGF,
const sycl::detail::code_location &CodeLoc) {
Expand Down Expand Up @@ -357,6 +288,75 @@ void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
});
}

// Free function kernel enqueue functions
template <auto *Func, typename... ArgsT>
void single_task(queue Q, kernel_function_s<Func> KernelFunc, ArgsT &&...Args) {
(void)KernelFunc;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
(void)KernelFunc;

same below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the catch, the functions that take queue actually use the parameter, it's the handler ones that don't.

submit(Q, [&](handler &CGH) {
single_task(CGH, KernelFunc, std::forward<ArgsT>(Args)...);
});
Comment on lines +295 to +297
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a submit_direct* version of this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a submit_direct* version of this? Please sync with @slawekptak to implement it properly from the start rather than create more future work for him.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, there is no submit_direct* version of this in the spec.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can have it in detail:: still. Also, queue::* itself can act as submit_direct.

}

template <auto *Func, typename... ArgsT>
void single_task(handler &CGH, kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
(void)KernelFunc;
queue Q = CGH.getQueue();
sycl::kernel_bundle Bndl =
get_kernel_bundle<Func, sycl::bundle_state::executable>(Q.get_context());
Comment on lines +304 to +306
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This creates and destroys two std::shared_ptrs for almost no reason. IMO, we should fix this getQueue() hack while we're in an ABI breaking window. Maybe by changing handler_impl to store a reference to the sycl::queue it was created with? handler_impl::MQueueOrGraph isn't used directly outside a few getters, so the change should be very simple.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you elaborate a bit please?
What shared pointers are you referring to?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::shared_ptr<detail::queue_impl> impl;
queue(std::shared_ptr<detail::queue_impl> impl) : impl(impl) {}

and similar for sycl::context.

sycl::kernel Krn = Bndl.template ext_oneapi_get_kernel<Func>();
CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
CGH.single_task(Krn);
}

template <auto *Func, int Dimensions, typename... ArgsT>
void nd_launch(queue Q, nd_range<Dimensions> Range,
kernel_function_s<Func> KernelFunc, ArgsT &&...Args) {
(void)KernelFunc;
submit(Q, [&](handler &CGH) {
nd_launch(CGH, Range, KernelFunc, std::forward<ArgsT>(Args)...);
});
}

template <auto *Func, int Dimensions, typename... ArgsT>
void nd_launch(handler &CGH, nd_range<Dimensions> Range,
kernel_function_s<Func> KernelFunc, ArgsT &&...Args) {
(void)KernelFunc;
queue Q = CGH.getQueue();
sycl::kernel_bundle Bndl =
get_kernel_bundle<Func, sycl::bundle_state::executable>(Q.get_context());
sycl::kernel Krn = Bndl.template ext_oneapi_get_kernel<Func>();

CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
CGH.parallel_for(Range, Krn);
}

template <auto *Func, int Dimensions, typename Properties, typename... ArgsT>
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
kernel_function_s<Func> KernelFunc, ArgsT &&...Args) {
(void)KernelFunc;
submit(Q, [&](handler &CGH) {
nd_launch(CGH, Config, KernelFunc, std::forward<ArgsT>(Args)...);
});
}

template <auto *Func, int Dimensions, typename Properties, typename... ArgsT>
void nd_launch(handler &CGH,
launch_config<nd_range<Dimensions>, Properties> Config,
kernel_function_s<Func> KernelFunc, ArgsT &&...Args) {
(void)KernelFunc;
queue Q = CGH.getQueue();
sycl::kernel_bundle Bndl =
get_kernel_bundle<Func, sycl::bundle_state::executable>(Q.get_context());
sycl::kernel Krn = Bndl.template ext_oneapi_get_kernel<Func>();
ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
Properties>
ConfigAccess(Config);
CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
sycl::detail::HandlerAccess::parallelForImpl(
CGH, ConfigAccess.getRange(), ConfigAccess.getProperties(), Krn);
}

inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) {
CGH.memcpy(Dest, Src, NumBytes);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ template <auto *Func> struct is_kernel {
template <auto *Func>
inline constexpr bool is_kernel_v = is_kernel<Func>::value;

template <auto *Func> struct kernel_function_s {};

template <auto *Func> inline constexpr kernel_function_s<Func> kernel_function;

} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl
27 changes: 27 additions & 0 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <sycl/ext/oneapi/device_global/device_global.hpp>
#include <sycl/ext/oneapi/device_global/properties.hpp>
#include <sycl/ext/oneapi/experimental/cluster_group_prop.hpp>
#include <sycl/ext/oneapi/experimental/detail/properties/launch_config.hpp>
#include <sycl/ext/oneapi/experimental/graph.hpp>
#include <sycl/ext/oneapi/experimental/raw_kernel_arg.hpp>
#include <sycl/ext/oneapi/experimental/use_root_sync_prop.hpp>
Expand Down Expand Up @@ -146,6 +147,15 @@ class pipe;
}

namespace ext ::oneapi ::experimental {
template <auto *> struct kernel_function_s;
template <auto *Func, typename... Args>
void single_task(handler &, kernel_function_s<Func>, Args &&...);
template <auto *Func, int Dimensions, typename... Args>
void nd_launch(handler &, nd_range<Dimensions>, kernel_function_s<Func>,
Args &&...);
template <auto *Func, int Dimensions, typename Properties, typename... Args>
void nd_launch(handler &, launch_config<nd_range<Dimensions>, Properties>,
kernel_function_s<Func>, Args &&...);
Comment on lines +150 to +158
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is all of that just for handler::getQueue()? Can you extend

namespace detail {
class HandlerAccess {
public:
static void internalProfilingTagImpl(handler &Handler) {
Handler.internalProfilingTagImpl();
}
template <typename RangeT, typename PropertiesT>
static void parallelForImpl(handler &Handler, RangeT Range, PropertiesT Props,
kernel Kernel) {
Handler.parallel_for_impl(Range, Props, Kernel);
}
static void swap(handler &LHS, handler &RHS) {
std::swap(LHS.implOwner, RHS.implOwner);
std::swap(LHS.impl, RHS.impl);
std::swap(LHS.MLocalAccStorage, RHS.MLocalAccStorage);
std::swap(LHS.MStreamStorage, RHS.MStreamStorage);
std::swap(LHS.MKernelName, RHS.MKernelName);
std::swap(LHS.MKernel, RHS.MKernel);
std::swap(LHS.MSrcPtr, RHS.MSrcPtr);
std::swap(LHS.MDstPtr, RHS.MDstPtr);
std::swap(LHS.MLength, RHS.MLength);
std::swap(LHS.MPattern, RHS.MPattern);
std::swap(LHS.MHostKernel, RHS.MHostKernel);
std::swap(LHS.MCodeLoc, RHS.MCodeLoc);
}
// pre/postProcess are used only for reductions right now, but the
// abstractions they provide aren't reduction-specific. The main problem they
// solve is
//
// # User code
// q.submit([&](handler &cgh) {
// set_dependencies(cgh);
// enqueue_whatever(cgh);
// }); // single submission
//
// that needs to be implemented as multiple enqueues involving
// pre-/post-processing internally. SYCL prohibits recursive submits from
// inside control group function object (lambda above) so we need some
// internal interface to implement that.
__SYCL_EXPORT static void preProcess(handler &CGH, type_erased_cgfo_ty F);
__SYCL_EXPORT static void postProcess(handler &CGH, type_erased_cgfo_ty F);
template <class FunctorTy>
static void preProcess(handler &CGH, FunctorTy &Func) {
preProcess(CGH, type_erased_cgfo_ty{Func});
}
template <class FunctorTy>
static void postProcess(handler &CGH, FunctorTy &Func) {
postProcess(CGH, type_erased_cgfo_ty{Func});
}
};
} // namespace detail
instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that has been added to access getQueue. Now that you've brought HandlerAccess to my attention, it seems like a better solution so I'll try to migrate it over there instead.

template <typename, typename> class work_group_memory;
template <typename, typename> class dynamic_work_group_memory;
struct image_descriptor;
Expand Down Expand Up @@ -3229,6 +3239,23 @@ class __SYCL_EXPORT handler {
friend const decltype(Obj::impl) &
sycl::detail::getSyclObjImpl(const Obj &SyclObject);

template <auto *Func, typename... Args>
friend void ext::oneapi::experimental::single_task(
handler &, ext::oneapi::experimental::kernel_function_s<Func>,
Args &&...);

template <auto *Func, int Dimensions, typename... Args>
friend void ext::oneapi::experimental::nd_launch(
handler &, nd_range<Dimensions>,
ext::oneapi::experimental::kernel_function_s<Func>, Args &&...);

template <auto *Func, int Dimensions, typename Properties, typename... Args>
friend void ext::oneapi::experimental::nd_launch(
handler &,
ext::oneapi::experimental::launch_config<nd_range<Dimensions>,
Properties>,
ext::oneapi::experimental::kernel_function_s<Func>, Args &&...);

/// Read from a host pipe given a host address and
/// \param Name name of the host pipe to be passed into lower level runtime
/// \param Ptr host pointer of host pipe as identified by address of its const
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ can be disabled by setting SYCL_DISABLE_FSYCL_SYCLHPP_WARNING macro.")
#include <sycl/ext/oneapi/experimental/cuda/barrier.hpp>
#include <sycl/ext/oneapi/experimental/cuda/non_uniform_algorithms.hpp>
#include <sycl/ext/oneapi/experimental/current_device.hpp>
#include <sycl/ext/oneapi/experimental/detail/properties/launch_config.hpp>
#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
#include <sycl/ext/oneapi/experimental/event_mode_property.hpp>
Expand Down
Loading