Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 22 additions & 22 deletions sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#pragma once

#include <utility> // for std::forward
#include <utility>

#include <sycl/detail/common.hpp>
#include <sycl/event.hpp>
Expand Down Expand Up @@ -72,14 +72,20 @@ template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess {
return MLaunchConfig.getProperties();
}
};

template <typename CommandGroupFunc>
void submit_impl(queue &Q, CommandGroupFunc &&CGF,
const sycl::detail::code_location &CodeLoc) {
Q.submit_without_event(std::forward<CommandGroupFunc>(CGF), CodeLoc);
}
} // namespace detail

template <typename CommandGroupFunc>
void submit(queue Q, CommandGroupFunc &&CGF,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current()) {
// TODO: Use new submit without Events.
Q.submit(std::forward<CommandGroupFunc>(CGF), CodeLoc);
sycl::ext::oneapi::experimental::detail::submit_impl(
Q, std::forward<CommandGroupFunc>(CGF), CodeLoc);
}

template <typename CommandGroupFunc>
Expand Down Expand Up @@ -205,7 +211,8 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
ReductionsT &&...Reductions) {
submit(Q, [&](handler &CGH) {
nd_launch(CGH, Range, KernelObj, std::forward<ReductionsT>(Reductions)...);
nd_launch<KernelName>(CGH, Range, KernelObj,
std::forward<ReductionsT>(Reductions)...);
});
}

Expand All @@ -228,7 +235,8 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
const KernelType &KernelObj, ReductionsT &&...Reductions) {
submit(Q, [&](handler &CGH) {
nd_launch(CGH, Config, KernelObj, std::forward<ReductionsT>(Reductions)...);
nd_launch<KernelName>(CGH, Config, KernelObj,
std::forward<ReductionsT>(Reductions)...);
});
}

Expand Down Expand Up @@ -270,11 +278,9 @@ inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) {
CGH.memcpy(Dest, Src, NumBytes);
}

inline void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current()) {
submit(Q, [&](handler &CGH) { memcpy(CGH, Dest, Src, NumBytes); }, CodeLoc);
}
__SYCL_EXPORT void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current());

template <typename T>
void copy(handler &CGH, const T *Src, T *Dest, size_t Count) {
Expand All @@ -292,11 +298,9 @@ inline void memset(handler &CGH, void *Ptr, int Value, size_t NumBytes) {
CGH.memset(Ptr, Value, NumBytes);
}

inline void memset(queue Q, void *Ptr, int Value, size_t NumBytes,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current()) {
submit(Q, [&](handler &CGH) { memset(CGH, Ptr, Value, NumBytes); }, CodeLoc);
}
__SYCL_EXPORT void memset(queue Q, void *Ptr, int Value, size_t NumBytes,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current());

template <typename T>
void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count) {
Expand Down Expand Up @@ -324,13 +328,9 @@ inline void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice) {
CGH.mem_advise(Ptr, NumBytes, Advice);
}

inline void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current()) {
submit(
Q, [&](handler &CGH) { mem_advise(CGH, Ptr, NumBytes, Advice); },
CodeLoc);
}
__SYCL_EXPORT void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current());

inline void barrier(handler &CGH) { CGH.ext_oneapi_barrier(); }

Expand Down
39 changes: 39 additions & 0 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ class __SYCL_EXPORT handler {
///
/// \param Queue is a SYCL queue.
/// \param IsHost indicates if this handler is created for SYCL host device.
/// TODO: Unused. Remove with ABI break.
handler(std::shared_ptr<detail::queue_impl> Queue, bool IsHost);

/// Constructs SYCL handler from the associated queue and the submission's
Expand All @@ -476,10 +477,36 @@ class __SYCL_EXPORT handler {
/// \param SecondaryQueue is the secondary SYCL queue of the submission. This
/// is null if no secondary queue is associated with the submission.
/// \param IsHost indicates if this handler is created for SYCL host device.
/// TODO: Unused. Remove with ABI break.
handler(std::shared_ptr<detail::queue_impl> Queue,
std::shared_ptr<detail::queue_impl> PrimaryQueue,
std::shared_ptr<detail::queue_impl> SecondaryQueue, bool IsHost);

/// Constructs SYCL handler from queue.
///
/// \param Queue is a SYCL queue.
/// \param IsHost indicates if this handler is created for SYCL host device.
/// \param CallerNeedsEvent indicates if the event resulting from this handler
/// is needed by the caller.
handler(std::shared_ptr<detail::queue_impl> Queue, bool IsHost,
bool CallerNeedsEvent);

/// Constructs SYCL handler from the associated queue and the submission's
/// primary and secondary queue.
///
/// \param Queue is a SYCL queue. This is equal to either PrimaryQueue or
/// SecondaryQueue.
/// \param PrimaryQueue is the primary SYCL queue of the submission.
/// \param SecondaryQueue is the secondary SYCL queue of the submission. This
/// is null if no secondary queue is associated with the submission.
/// \param IsHost indicates if this handler is created for SYCL host device.
/// \param CallerNeedsEvent indicates if the event resulting from this handler
/// is needed by the caller.
handler(std::shared_ptr<detail::queue_impl> Queue,
std::shared_ptr<detail::queue_impl> PrimaryQueue,
std::shared_ptr<detail::queue_impl> SecondaryQueue, bool IsHost,
bool CallerNeedsEvent);

/// Constructs SYCL handler from Graph.
///
/// The hander will add the command-group as a node to the graph rather than
Expand Down Expand Up @@ -575,6 +602,16 @@ class __SYCL_EXPORT handler {
/// \return a SYCL event object representing the command group
event finalize();

/// Constructs CG object of specific type, passes it to Scheduler and
/// returns sycl::event object representing the command group.
/// It's expected that the method is the latest method executed before
/// object destruction.
/// \param CallerNeedsEvent Specifies if the caller needs an event
/// representing the work related to this handler.
///
/// \return a SYCL event object representing the command group
event finalize(bool CallerNeedsEvent);

/// Saves streams associated with this handler.
///
/// Streams are then forwarded to command group and flushed in the scheduler.
Expand Down Expand Up @@ -1184,6 +1221,8 @@ class __SYCL_EXPORT handler {
Size == 32 || Size == 64 || Size == 128;
}

bool eventNeeded() const;

template <int Dims, typename LambdaArgType> struct TransformUserItemType {
using type = std::conditional_t<
std::is_convertible_v<nd_item<Dims>, LambdaArgType>, nd_item<Dims>,
Expand Down
33 changes: 33 additions & 0 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ namespace ext ::oneapi ::experimental {
// returned by info::queue::state
enum class queue_state { executing, recording };
struct image_descriptor;

namespace detail {
template <typename CommandGroupFunc>
void submit_impl(queue &Q, CommandGroupFunc &&CGF,
const sycl::detail::code_location &CodeLoc);
} // namespace detail
} // namespace ext::oneapi::experimental

/// Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Expand Down Expand Up @@ -2689,13 +2695,40 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
const detail::code_location &);
#endif

template <typename CommandGroupFunc>
friend void ext::oneapi::experimental::detail::submit_impl(
queue &Q, CommandGroupFunc &&CGF,
const sycl::detail::code_location &CodeLoc);

/// A template-free version of submit.
event submit_impl(std::function<void(handler &)> CGH,
const detail::code_location &CodeLoc);
/// A template-free version of submit.
event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
const detail::code_location &CodeLoc);

/// A template-free version of submit_without_event.
void submit_without_event_impl(std::function<void(handler &)> CGH,
const detail::code_location &CodeLoc);

/// Submits a command group function object to the queue, in order to be
/// scheduled for execution on the device.
///
/// \param CGF is a function object containing command group.
/// \param CodeLoc is the code location of the submit call (default argument)
template <typename T>
std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, void>
submit_without_event(T CGF, const detail::code_location &CodeLoc) {
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
#if __SYCL_USE_FALLBACK_ASSERT
// If post-processing is needed, fall back to the regular submit.
// TODO: Revisit whether we can avoid this.
submit(CGF, CodeLoc);
#else
submit_without_event_impl(CGF, CodeLoc);
#endif // __SYCL_USE_FALLBACK_ASSERT
}

/// Checks if the event needs to be discarded and if so, discards it and
/// returns a discarded event. Otherwise, it returns input event.
/// TODO: move to impl class in the next ABI Breaking window
Expand Down
5 changes: 3 additions & 2 deletions sycl/include/sycl/reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1178,8 +1178,9 @@ namespace reduction {
inline void finalizeHandler(handler &CGH) { CGH.finalize(); }
template <class FunctorTy> void withAuxHandler(handler &CGH, FunctorTy Func) {
event E = CGH.finalize();
handler AuxHandler(CGH.MQueue, CGH.MIsHost);
AuxHandler.depends_on(E);
handler AuxHandler(CGH.MQueue, CGH.MIsHost, CGH.eventNeeded());
if (!createSyclObjFromImpl<queue>(CGH.MQueue).is_in_order())
AuxHandler.depends_on(E);
AuxHandler.saveCodeLoc(CGH.MCodeLoc);
Func(AuxHandler);
CGH.MLastEvent = AuxHandler.finalize();
Expand Down
1 change: 1 addition & 0 deletions sycl/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ set(SYCL_COMMON_SOURCES
"context.cpp"
"device.cpp"
"device_selector.cpp"
"enqueue_functions.cpp"
"event.cpp"
"exception.cpp"
"exception_list.cpp"
Expand Down
9 changes: 5 additions & 4 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,8 @@ sycl::detail::pi::PiExtSyncPoint exec_graph_impl::enqueueNode(

sycl::detail::EventImplPtr Event =
sycl::detail::Scheduler::getInstance().addCG(
Node->getCGCopy(), AllocaQueue, CommandBuffer, Deps);
Node->getCGCopy(), AllocaQueue, /*EventNeeded=*/true, CommandBuffer,
Deps);

MCommandMap[Node] = Event->getCommandBufferCommand();
return Event->getSyncPoint();
Expand Down Expand Up @@ -928,7 +929,7 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
CommandBuffer, nullptr, std::move(CGData));

NewEvent = sycl::detail::Scheduler::getInstance().addCG(
std::move(CommandGroup), Queue);
std::move(CommandGroup), Queue, /*EventNeeded=*/true);
}
NewEvent->setEventFromSubmittedExecCommandBuffer(true);
} else if ((CurrentPartition->MSchedule.size() > 0) &&
Expand All @@ -946,7 +947,7 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
.MQueue = Queue;

NewEvent = sycl::detail::Scheduler::getInstance().addCG(
NodeImpl->getCGCopy(), Queue);
NodeImpl->getCGCopy(), Queue, /*EventNeeded=*/true);
} else {
std::vector<std::shared_ptr<sycl::detail::event_impl>> ScheduledEvents;
for (auto &NodeImpl : CurrentPartition->MSchedule) {
Expand Down Expand Up @@ -982,7 +983,7 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
// dependencies are propagated in findRealDeps
sycl::detail::EventImplPtr EventImpl =
sycl::detail::Scheduler::getInstance().addCG(
NodeImpl->getCGCopy(), Queue);
NodeImpl->getCGCopy(), Queue, /*EventNeeded=*/true);

ScheduledEvents.push_back(EventImpl);
}
Expand Down
10 changes: 8 additions & 2 deletions sycl/source/detail/handler_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@ enum class HandlerSubmissionState : std::uint8_t {
class handler_impl {
public:
handler_impl(std::shared_ptr<queue_impl> SubmissionPrimaryQueue,
std::shared_ptr<queue_impl> SubmissionSecondaryQueue)
std::shared_ptr<queue_impl> SubmissionSecondaryQueue,
bool EventNeeded)
: MSubmissionPrimaryQueue(std::move(SubmissionPrimaryQueue)),
MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)){};
MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)),
MEventNeeded(EventNeeded) {};

handler_impl() = default;

Expand Down Expand Up @@ -74,6 +76,10 @@ class handler_impl {
/// submission is a fallback from a previous submission.
std::shared_ptr<queue_impl> MSubmissionSecondaryQueue;

/// Bool stores information about whether the event resulting from the
/// corresponding work is required.
bool MEventNeeded = true;

// Stores auxiliary resources used by internal operations.
std::vector<std::shared_ptr<const void>> MAuxiliaryResources;

Expand Down
Loading