From a67112154ffd5c85162db8be3ba52e23d3c6a5d7 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 16 May 2019 18:07:15 -0700 Subject: [PATCH] [SYCL] Stream class Stream class wraps an accessor to the underlying stream buffer. Offset in this buffer is updated atomically to allow streaming by multiple threads safely. Current implementation is not complete. It supports output of string, char and boolean types. Other operators are defined with todo in the body. Signed-off-by: Artur Gainullin --- clang/lib/Sema/SemaSYCL.cpp | 17 - sycl/CMakeLists.txt | 2 + sycl/include/CL/sycl.hpp | 1 + sycl/include/CL/sycl/accessor.hpp | 24 +- sycl/include/CL/sycl/buffer.hpp | 2 +- sycl/include/CL/sycl/detail/cg.hpp | 11 +- .../CL/sycl/detail/scheduler/commands.hpp | 2 + sycl/include/CL/sycl/detail/stream_impl.hpp | 75 ++++ sycl/include/CL/sycl/handler.hpp | 14 +- sycl/include/CL/sycl/stream.hpp | 386 ++++++++++++++++++ sycl/source/detail/scheduler/commands.cpp | 9 + sycl/source/detail/scheduler/scheduler.cpp | 37 +- sycl/source/detail/stream_impl.cpp | 44 ++ sycl/source/stream.cpp | 35 ++ sycl/test/basic_tests/stream.cpp | 128 ++++++ sycl/test/lit.cfg | 9 + 16 files changed, 747 insertions(+), 49 deletions(-) create mode 100644 sycl/include/CL/sycl/detail/stream_impl.hpp create mode 100644 sycl/include/CL/sycl/stream.hpp create mode 100644 sycl/source/detail/stream_impl.cpp create mode 100644 sycl/source/stream.cpp create mode 100644 sycl/test/basic_tests/stream.cpp diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 2d491709f966e..28d8bf5119ca8 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -67,9 +67,6 @@ class Util { /// sampler class. static bool isSyclSamplerType(const QualType &Ty); - /// Checks whether given clang type is the SYCL stream class. - static bool isSyclStreamType(const QualType &Ty); - /// Checks whether given clang type is declared in the given hierarchy of /// declaration contexts. /// \param Ty the clang type being checked @@ -814,9 +811,6 @@ static void buildArgTys(ASTContext &Context, CXXRecordDecl *KernelObj, assert(SamplerArg && "sampler __init method must have sampler parameter"); CreateAndAddPrmDsc(Fld, SamplerArg->getType()); - } else if (Util::isSyclStreamType(ArgTy)) { - // the parameter is a SYCL stream object - llvm_unreachable("streams not supported yet"); } else if (ArgTy->isStructureOrClassType()) { if (!ArgTy->isStandardLayoutType()) { const DeclaratorDecl *V = @@ -917,9 +911,6 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name, uint64_t Sz = Ctx.getTypeSizeInChars(SamplerArg->getType()).getQuantity(); H.addParamDesc(SYCLIntegrationHeader::kind_sampler, static_cast(Sz), static_cast(Offset)); - } else if (Util::isSyclStreamType(ArgTy)) { - // the parameter is a SYCL stream object - llvm_unreachable("streams not supported yet"); } else if (ArgTy->isStructureOrClassType() || ArgTy->isScalarType()) { // the parameter is an object of standard layout type or scalar; // the check for standard layout is done elsewhere @@ -1369,14 +1360,6 @@ bool Util::isSyclSamplerType(const QualType &Ty) { return matchQualifiedTypeName(Ty, Scopes); } -bool Util::isSyclStreamType(const QualType &Ty) { - static std::array Scopes = { - Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"}, - Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"}, - Util::DeclContextDesc{clang::Decl::Kind::CXXRecord, "stream"}}; - return matchQualifiedTypeName(Ty, Scopes); -} - bool Util::matchQualifiedTypeName(const QualType &Ty, ArrayRef Scopes) { // The idea: check the declaration context chain starting from the type diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 04169b3d9ad67..467505d16a9c8 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -142,6 +142,7 @@ add_library("${SYCLLibrary}" SHARED "${sourceRootPath}/detail/os_util.cpp" "${sourceRootPath}/detail/platform_util.cpp" "${sourceRootPath}/detail/sampler_impl.cpp" + "${sourceRootPath}/detail/stream_impl.cpp" "${sourceRootPath}/detail/scheduler/commands.cpp" "${sourceRootPath}/detail/scheduler/scheduler.cpp" "${sourceRootPath}/detail/scheduler/graph_processor.cpp" @@ -157,6 +158,7 @@ add_library("${SYCLLibrary}" SHARED "${sourceRootPath}/platform.cpp" "${sourceRootPath}/queue.cpp" "${sourceRootPath}/sampler.cpp" + "${sourceRootPath}/stream.cpp" "${sourceRootPath}/spirv_ops.cpp" ) diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp index 9520659d8ada5..dccb6e260b961 100644 --- a/sycl/include/CL/sycl.hpp +++ b/sycl/include/CL/sycl.hpp @@ -34,5 +34,6 @@ #include #include #include +#include #include #include diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 3cdffb352cc50..48ee02d661c05 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -514,28 +514,28 @@ class accessor : return getQualifiedPtr()[LinearIndex]; } - template < - int Dims = Dimensions, - typename = enable_if_t> - operator atomic() const { + template + operator typename std::enable_if>::type() const { const size_t LinearIndex = getLinearIndex(id()); return atomic( multi_ptr(getQualifiedPtr() + LinearIndex)); } - template < - int Dims = Dimensions, - typename = enable_if_t 0)>> - atomic operator[](id Index) const { + template + typename std::enable_if 0), + atomic>::type + operator[](id Index) const { const size_t LinearIndex = getLinearIndex(Index); return atomic( multi_ptr(getQualifiedPtr() + LinearIndex)); } - template < - int Dims = Dimensions, - typename = enable_if_t> - atomic operator[](size_t Index) const { + template + typename enable_if_t>::type + operator[](size_t Index) const { const size_t LinearIndex = getLinearIndex(id(Index)); return atomic( multi_ptr(getQualifiedPtr() + LinearIndex)); diff --git a/sycl/include/CL/sycl/buffer.hpp b/sycl/include/CL/sycl/buffer.hpp index e07822ede6f9f..f6241e376e802 100644 --- a/sycl/include/CL/sycl/buffer.hpp +++ b/sycl/include/CL/sycl/buffer.hpp @@ -158,7 +158,7 @@ class buffer { size_t get_count() const { return Range.size(); } - size_t get_size() const { return impl->get_size(); } + size_t get_size() const { return get_count() * sizeof(T); } AllocatorT get_allocator() const { return impl->get_allocator(); } diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 1207b5fe2513a..1a8eb3b98e336 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -220,6 +220,7 @@ class HostKernel : public HostKernelBase { ~HostKernel() = default; }; +class stream_impl; // The base class for all types of command groups. class CG { public: @@ -272,6 +273,7 @@ class CGExecKernel : public CG { std::vector MArgs; std::string MKernelName; detail::OSModuleHandle MOSModuleHandle; + std::vector> MStreams; CGExecKernel(NDRDescT NDRDesc, std::unique_ptr HKernel, std::shared_ptr SyclKernel, @@ -280,15 +282,20 @@ class CGExecKernel : public CG { std::vector> SharedPtrStorage, std::vector Requirements, std::vector Args, std::string KernelName, - detail::OSModuleHandle OSModuleHandle) + detail::OSModuleHandle OSModuleHandle, + std::vector> Streams) : CG(KERNEL, std::move(ArgsStorage), std::move(AccStorage), std::move(SharedPtrStorage), std::move(Requirements)), MNDRDesc(std::move(NDRDesc)), MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)), MArgs(std::move(Args)), - MKernelName(std::move(KernelName)), MOSModuleHandle(OSModuleHandle) {} + MKernelName(std::move(KernelName)), MOSModuleHandle(OSModuleHandle), + MStreams(std::move(Streams)) {} std::vector getArguments() const { return MArgs; } std::string getKernelName() const { return MKernelName; } + std::vector> getStreams() const { + return MStreams; + } }; // The class which represents "copy" command group. diff --git a/sycl/include/CL/sycl/detail/scheduler/commands.hpp b/sycl/include/CL/sycl/detail/scheduler/commands.hpp index 350de017908dc..d11db5a790ab4 100644 --- a/sycl/include/CL/sycl/detail/scheduler/commands.hpp +++ b/sycl/include/CL/sycl/detail/scheduler/commands.hpp @@ -229,6 +229,8 @@ class ExecCGCommand : public Command { : Command(CommandType::RUN_CG, std::move(Queue)), MCommandGroup(std::move(CommandGroup)) {} + void flushStreams(); + private: // Implementation of enqueueing of ExecCGCommand. cl_int enqueueImp() override; diff --git a/sycl/include/CL/sycl/detail/stream_impl.hpp b/sycl/include/CL/sycl/detail/stream_impl.hpp new file mode 100644 index 0000000000000..9a4ad10131d86 --- /dev/null +++ b/sycl/include/CL/sycl/detail/stream_impl.hpp @@ -0,0 +1,75 @@ +//==----------------- stream_impl.hpp - SYCL standard header file ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +namespace cl { +namespace sycl { + +namespace detail { +class stream_impl { +public: + using AccessorType = accessor; + + using OffsetAccessorType = + accessor; + + stream_impl(size_t BufferSize, size_t MaxStatementSize, handler &CGH); + + // Method to provide an access to the stream buffer + AccessorType getAccessor(handler &CGH) { + return Buf.get_access( + CGH, range<1>(BufferSize_), id<1>(OffsetSize)); + } + + // Method to provide an atomic access to the offset in the stream buffer + OffsetAccessorType getOffsetAccessor(handler &CGH) { + auto OffsetSubBuf = buffer(Buf, id<1>(0), range<1>(OffsetSize)); + auto ReinterpretedBuf = OffsetSubBuf.reinterpret(range<1>(1)); + return ReinterpretedBuf.get_access( + CGH, range<1>(1), id<1>(0)); + } + + // Copy stream buffer to the host and print the contents + void flush(); + + size_t get_size() const; + + size_t get_max_statement_size() const; + +private: + // Size of the stream buffer + size_t BufferSize_; + + // Maximum number of symbols which could be streamed from the beginning of a + // statement till the semicolon + size_t MaxStatementSize_; + + // Size of the variable which is used as an offset in the stream buffer. + // Additinonal memory is allocated in the beginning of the stream buffer for + // this variable. + static const size_t OffsetSize = sizeof(unsigned); + + // Vector on the host side which is used to initialize the stream buffer + std::vector Data; + + // Stream buffer + buffer Buf; +}; +} // namespace detail +} // namespace sycl +} // namespace cl + diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 35b7ef5e58492..10f71aa8c72d6 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -109,6 +109,7 @@ DEFINE_INIT_SIZES(GlobalOffset) #endif //__SYCL_DEVICE_ONLY__ class queue_impl; +class stream_impl; template static Arg member_ptr_helper(RetType (Func::*)(Arg) const); @@ -163,6 +164,7 @@ class handler { // we exit the method they are passed in. std::vector> MArgsStorage; std::vector MAccStorage; + std::vector> MStreamStorage; std::vector> MSharedPtrStorage; // The list of arguments for the kernel. std::vector MArgs; @@ -360,8 +362,8 @@ class handler { std::move(MNDRDesc), std::move(MHostKernel), std::move(MSyclKernel), std::move(MArgsStorage), std::move(MAccStorage), std::move(MSharedPtrStorage), std::move(MRequirements), - std::move(MArgs), std::move(MKernelName), - std::move(MOSModuleHandle))); + std::move(MArgs), std::move(MKernelName), std::move(MOSModuleHandle), + std::move(MStreamStorage))); break; case detail::CG::COPY_ACC_TO_PTR: case detail::CG::COPY_PTR_TO_ACC: @@ -393,6 +395,12 @@ class handler { return EventRet; } + // Save streams associated with this handler. Streams are then forwarded to + // command group and flushed in the scheduler. + void addStream(std::shared_ptr s) { + MStreamStorage.push_back(std::move(s)); + } + ~handler() = default; bool is_host() { return MIsHost; } @@ -485,6 +493,8 @@ class handler { template friend class accessor; + // Make stream class friend to be able to keep the list of associated streams + friend class stream; public: handler(const handler &) = delete; diff --git a/sycl/include/CL/sycl/stream.hpp b/sycl/include/CL/sycl/stream.hpp new file mode 100644 index 0000000000000..ff3ebbbdb3144 --- /dev/null +++ b/sycl/include/CL/sycl/stream.hpp @@ -0,0 +1,386 @@ +//==----------------- stream.hpp - SYCL standard header file ---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +namespace cl { +namespace sycl { + +enum class stream_manipulator { + dec, + hex, + oct, + noshowbase, + showbase, + noshowpos, + showpos, + endl, + fixed, + scientific, + hexfloat, + defaultfloat +}; + +const stream_manipulator dec = stream_manipulator::dec; + +const stream_manipulator hex = stream_manipulator::hex; + +const stream_manipulator oct = stream_manipulator::oct; + +const stream_manipulator noshowbase = stream_manipulator::noshowbase; + +const stream_manipulator showbase = stream_manipulator::showbase; + +const stream_manipulator noshowpos = stream_manipulator::noshowpos; + +const stream_manipulator showpos = stream_manipulator::showpos; + +const stream_manipulator endl = stream_manipulator::endl; + +const stream_manipulator fixed = stream_manipulator::fixed; + +const stream_manipulator scientific = stream_manipulator::scientific; + +const stream_manipulator hexfloat = stream_manipulator::hexfloat; + +const stream_manipulator defaultfloat = stream_manipulator::defaultfloat; + +class stream; + +class __precision_manipulator__ { + int Precision_; + +public: + __precision_manipulator__(int Precision) : Precision_(Precision) {} + friend const stream &operator<<(const stream &, + const __precision_manipulator__ &); +}; + +class __width_manipulator__ { + int Width_; + +public: + __width_manipulator__(int Width) : Width_(Width) {} + friend const stream &operator<<(const stream &, + const __width_manipulator__ &); +}; + +inline __precision_manipulator__ setprecision(int Precision) { + return __precision_manipulator__(Precision); +} + +inline __width_manipulator__ setw(int Width) { + return __width_manipulator__(Width); +} + +class stream { +public: + stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH); + + size_t get_size() const; + + size_t get_max_statement_size() const; + + size_t get_precision() const { return Precision; } + + stream_manipulator get_stream_mode() const { return Manipulator; } + + bool operator==(const stream &RHS) const; + + bool operator!=(const stream &LHS) const; + +private: +#ifdef __SYCL_DEVICE_ONLY__ + char padding[sizeof(std::shared_ptr)]; +#else + std::shared_ptr impl; + template + friend decltype(T::impl) detail::getSyclObjImpl(const T &SyclObject); +#endif + + // Accessor to stream buffer + detail::stream_impl::AccessorType Acc; + + // Atomic accessor to the offset variable. It represents an offset in the + // stream buffer. + detail::stream_impl::OffsetAccessorType OffsetAcc; + mutable stream_manipulator Manipulator = defaultfloat; + + // Fields and methods to work with manipulators + + // Type used for format flags + using FmtFlags = unsigned int; + + mutable int Precision; + mutable int Width; + mutable FmtFlags Flags; + + // Mapping from stream_manipulator to FmtFlags. Each manipulator corresponds + // to the bit in FmtFlags. + static constexpr FmtFlags Dec = 0x0001; + static constexpr FmtFlags Hex = 0x0002; + static constexpr FmtFlags Oct = 0x0004; + static constexpr FmtFlags ShowBase = 0x0008; + static constexpr FmtFlags ShowPos = 0x0010; + static constexpr FmtFlags Fixed = 0x0020; + static constexpr FmtFlags Scientific = 0x0040; + + // Bitmask made of the combination of the base flags. Base flags are mutually + // exclusive, this mask is used to clean base field before setting the new + // base flag. + static constexpr FmtFlags BaseField = Dec | Hex | Oct; + + // Bitmask made of the combination of the floating point value format flags. + // Thease flags are mutually exclusive, this mask is used to clean float field + // before setting the new float flag. + static constexpr FmtFlags FloatField = Scientific | Fixed; + + void set_flag(FmtFlags FormatFlag) const { Flags |= FormatFlag; } + + void unset_flag(FmtFlags FormatFlag) const { Flags &= ~FormatFlag; } + + // This method is used to set the flag for base and float manipulators. These + // flags are mutually exclusive and base/float field needs to be cleared + // before the setting new flag. + void set_flag(FmtFlags FormatFlag, FmtFlags Mask) const { + unset_flag(Mask); + Flags |= FormatFlag & Mask; + } + + // Set the flags which correspond to the input stream manipulator. + void set_manipulator(const stream_manipulator &SM) const { + switch (SM) { + case stream_manipulator::dec: + set_flag(Dec, BaseField); + break; + case stream_manipulator::hex: + set_flag(Hex, BaseField); + break; + case stream_manipulator::oct: + set_flag(Oct, BaseField); + break; + case stream_manipulator::noshowbase: + unset_flag(ShowBase); + break; + case stream_manipulator::showbase: + set_flag(ShowBase); + break; + case stream_manipulator::noshowpos: + unset_flag(ShowPos); + break; + case stream_manipulator::showpos: + set_flag(ShowPos); + break; + case stream_manipulator::fixed: + set_flag(Fixed, FloatField); + break; + case stream_manipulator::scientific: + set_flag(Scientific, FloatField); + break; + case stream_manipulator::hexfloat: + set_flag(Fixed | Scientific, FloatField); + break; + case stream_manipulator::defaultfloat: + unset_flag(FloatField); + break; + default: + // Unknown manipulator + break; + } + } + + friend const stream &operator<<(const stream &, const char); + friend const stream &operator<<(const stream &, const char *); + template + friend typename std::enable_if::value, + const stream &>::type + operator<<(const stream &, const ValueType &); + friend const stream &operator<<(const stream &, const stream_manipulator &); + + // Helper method to update offset atomically according to the provided + // operand size of the output operator. Return true if offset is updated and + // false in case of overflow. + bool update_offset(unsigned Size, unsigned &Cur) const { + unsigned New; + do { + Cur = OffsetAcc[0].load(); + if (Acc.get_count() - Cur < Size) + // Overflow + return false; + New = Cur + Size; + } while (!OffsetAcc[0].compare_exchange_strong(Cur, New)); + return true; + } +}; + +// Character +inline const stream &operator<<(const stream &Out, const char C) { + unsigned Cur; + if (!Out.update_offset(1, Cur)) + return Out; + Out.Acc[Cur] = C; + return Out; +} + +// String +inline const stream &operator<<(const stream &Out, const char *Str) { + unsigned Len; + for (Len = 0; Str[Len] != '\0'; Len++) + ; + + unsigned Cur; + if (!Out.update_offset(Len, Cur)) + return Out; + + for (size_t i = 0; i < Len; i++) { + Out.Acc[i + Cur] = Str[i]; + } + return Out; +} + +// Boolean +inline const stream &operator<<(const stream &Out, const bool &RHS) { + Out << (RHS ? "true" : "false"); + return Out; +} + +// Integral +template +typename std::enable_if::value, + const stream &>::type +operator<<(const stream &Out, const ValueType &RHS) { + // TODO + return Out; +} + +// Floating points + +inline const stream &operator<<(const stream &Out, const float &RHS) { + // TODO + return Out; +} + +inline const stream &operator<<(const stream &Out, const double &RHS) { + // TODO + return Out; +} + +inline const stream &operator<<(const stream &Out, const half &RHS) { + // TODO + return Out; +} + +// Pointer + +template +inline const stream &operator<<(const stream &Out, + const multi_ptr &RHS) { + // TODO + return Out; +} + +template +const stream &operator<<(const stream &Out, const T *RHS) { + // TODO + return Out; +} + +// Manipulators + +inline const stream &operator<<(const stream &Out, + const __precision_manipulator__ &RHS) { + // TODO + return Out; +} + +inline const stream &operator<<(const stream &Out, + const __width_manipulator__ &RHS) { + // TODO + return Out; +} + +inline const stream &operator<<(const stream &Out, + const stream_manipulator &RHS) { + switch (RHS) { + case stream_manipulator::endl: + Out << '\n'; + break; + default: + Out.set_manipulator(RHS); + } + return Out; +} + +// Vec + +template +const stream &operator<<(const stream &Out, const vec &RHS) { + // TODO + return Out; +} + +// SYCL types + +template +inline const stream &operator<<(const stream &Out, const id &RHS) { + // TODO + return Out; +} + +template +inline const stream &operator<<(const stream &Out, + const range &RHS) { + // TODO + return Out; +} + +template +inline const stream &operator<<(const stream &Out, + const item &RHS) { + // TODO + return Out; +} + +template +inline const stream &operator<<(const stream &Out, + const nd_range &RHS) { + // TODO + return Out; +} + +template +inline const stream &operator<<(const stream &Out, + const nd_item &RHS) { + // TODO + return Out; +} + +template +inline const stream &operator<<(const stream &Out, + const group &RHS) { + // TODO + return Out; +} + +} // namespace sycl +} // namespace cl +namespace std { +template <> struct hash { + size_t operator()(const cl::sycl::stream &S) const { +#ifdef __SYCL_DEVICE_ONLY__ + return 0; +#else + return hash>()( + cl::sycl::detail::getSyclObjImpl(S)); +#endif + } +}; +} // namespace std + diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 6ac31ac34ce58..f8459bf78cc80 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -202,6 +203,14 @@ AllocaCommand *ExecCGCommand::getAllocaForReq(Requirement *Req) { throw runtime_error("Alloca for command not found"); } +void ExecCGCommand::flushStreams() { + assert(CommandGroup->getType() == CG::KERNEL && "Expected kernel"); + for (auto StreamImplPtr : + ((CGExecKernel *)MCommandGroup.get())->getStreams()) { + StreamImplPtr->flush(); + } +} + MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq, AllocaCommand *SrcAlloca, Requirement *DstAcc, QueueImplPtr SrcQueue, diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index a6db754f46811..d4e25b456880e 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -50,24 +50,31 @@ void Scheduler::waitForRecordToFinish(GraphBuilder::MemObjRecord *Record) { EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, QueueImplPtr Queue) { - std::lock_guard lock(MGraphLock); - Command *NewCmd = nullptr; - switch (CommandGroup->getType()) { - case CG::UPDATE_HOST: - NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), - DefaultHostQueue); - break; - default: - NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue)); + const bool IsKernel = CommandGroup->getType() == CG::KERNEL; + { + std::lock_guard Lock(MGraphLock); + + switch (CommandGroup->getType()) { + case CG::UPDATE_HOST: + NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), + DefaultHostQueue); + break; + default: + NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue)); + } + + // TODO: Check if lazy mode. + Command *FailedCommand = GraphProcessor::enqueueCommand(NewCmd); + MGraphBuilder.cleanupCommands(); + if (FailedCommand) + // TODO: Reschedule commands. + throw runtime_error("Enqueue process failed."); } - // TODO: Check if lazy mode. - Command *FailedCommand = GraphProcessor::enqueueCommand(NewCmd); - MGraphBuilder.cleanupCommands(); - if (FailedCommand) - // TODO: Reschedule commands. - throw runtime_error("Enqueue process failed."); + if (IsKernel) + ((ExecCGCommand *)NewCmd)->flushStreams(); + return NewCmd->getEvent(); } diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp new file mode 100644 index 0000000000000..b96922aeecb4f --- /dev/null +++ b/sycl/source/detail/stream_impl.cpp @@ -0,0 +1,44 @@ +//==----------------- stream_impl.cpp - SYCL standard header file ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +stream_impl::stream_impl(size_t BufferSize, size_t MaxStatementSize, + handler &CGH) + : BufferSize_(BufferSize), + // Allocate additional place for the offset variable and the end of line + // symbol. Initialize buffer with zeros, this is needed for two reasons: + // 1. We don't need to care about end of line when printing out streamed + // data. + // 2. Offset is properly initialized. + Data(BufferSize + OffsetSize + 1, 0), + Buf(Data.data(), range<1>(BufferSize + OffsetSize + 1), + {property::buffer::use_host_ptr()}), + MaxStatementSize_(MaxStatementSize) {} + +size_t stream_impl::get_size() const { return BufferSize_; } + +size_t stream_impl::get_max_statement_size() const { return MaxStatementSize_; } + +void stream_impl::flush() { + // Access the stream buffer on the host. This access guarantees that kernel is + // executed and buffer contains streamed data. + auto HostAcc = Buf.get_access( + range<1>(BufferSize_), id<1>(OffsetSize)); + + printf("%s", HostAcc.get_pointer()); +} +} // namespace detail +} // namespace sycl +} // namespace cl + diff --git a/sycl/source/stream.cpp b/sycl/source/stream.cpp new file mode 100644 index 0000000000000..599a03299d4c8 --- /dev/null +++ b/sycl/source/stream.cpp @@ -0,0 +1,35 @@ +//==------------------- stream.cpp - SYCL standard header file ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +namespace cl { +namespace sycl { + +stream::stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH) + : impl(std::make_shared(BufferSize, MaxStatementSize, + CGH)), + Acc(impl->getAccessor(CGH)), OffsetAcc(impl->getOffsetAccessor(CGH)) { + // Save stream implementation in the handler so that stream will be alive + // during kernel execution + CGH.addStream(impl); +} + +size_t stream::get_size() const { return impl->get_size(); } + +size_t stream::get_max_statement_size() const { + return impl->get_max_statement_size(); +} + +bool stream::operator==(const stream &RHS) const { return (impl == RHS.impl); } + +bool stream::operator!=(const stream &RHS) const { return !(impl == RHS.impl); } + +} // namespace sycl +} // namespace cl + diff --git a/sycl/test/basic_tests/stream.cpp b/sycl/test/basic_tests/stream.cpp new file mode 100644 index 0000000000000..700fb0701bd5b --- /dev/null +++ b/sycl/test/basic_tests/stream.cpp @@ -0,0 +1,128 @@ +// RUN: %clang -std=c++11 -fsycl -lstdc++ %s -o %t.out -lOpenCL -lsycl +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out | FileCheck %s +// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER +//==------------------ stream.cpp - SYCL stream basic test -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +int main() { + { + cl::sycl::default_selector Selector; + cl::sycl::queue Queue(Selector); + + // Check constructor and getters + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + assert(Out.get_size() == 1024); + assert(Out.get_max_statement_size() == 80); + }); + + // Check common reference semantics + cl::sycl::hash_class Hasher; + + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out1(1024, 80, CGH); + cl::sycl::stream Out2(Out1); + + assert(Out1 == Out2); + assert(Hasher(Out1) == Hasher(Out2)); + + cl::sycl::stream Out3(std::move(Out1)); + + assert(Out2 == Out3); + assert(Hasher(Out2) == Hasher(Out3)); + }); + + // Char type + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + CGH.parallel_for( + cl::sycl::range<1>(10), [=](cl::sycl::id<1> i) { Out << 'a'; }); + }); + Queue.wait(); + + // endl manipulator + // TODO: support cl::sycl::endl. According to specitification endl should be + // constant global variable in cl::sycl which is initialized with + // cl::sycl::stream_manipulator::endl. This approach doesn't currently work, + // variable is not initialized in the kernel code, it contains some garbage + // value. + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + CGH.single_task( + [=]() { Out << cl::sycl::stream_manipulator::endl; }); + }); + Queue.wait(); + + // String type + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + CGH.parallel_for( + cl::sycl::range<1>(10), + [=](cl::sycl::id<1> i) { Out << "Hello, World!\n"; }); + }); + Queue.wait(); + + // Boolean type + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + CGH.single_task([=]() { Out << true; }); + }); + Queue.wait(); + + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + CGH.single_task([=]() { Out << false; }); + }); + Queue.wait(); + + // Multiple streams in command group + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out1(1024, 80, CGH); + cl::sycl::stream Out2(500, 10, CGH); + CGH.parallel_for(cl::sycl::range<1>(2), + [=](cl::sycl::id<1> i) { + Out1 << "Hello, World!\n"; + Out2 << "Hello, World!\n"; + }); + }); + Queue.wait(); + + // The case when stream buffer is full. To check that there is no problem + // with end of line symbol when printing out the stream buffer. + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(10, 10, CGH); + CGH.parallel_for( + cl::sycl::range<1>(2), + [=](cl::sycl::id<1> i) { Out << "aaaaaaaaa\n"; }); + }); + Queue.wait(); + } + return 0; +} +// CHECK: aaaaaaaaaa +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: truefalseHello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: aaaaaaaaa diff --git a/sycl/test/lit.cfg b/sycl/test/lit.cfg index e74ed709e9ca6..c847a50c1fadc 100644 --- a/sycl/test/lit.cfg +++ b/sycl/test/lit.cfg @@ -76,23 +76,32 @@ def getDeviceCount(device_type): cpu_run_substitute = "echo" +cpu_check_substitute = "" if getDeviceCount("cpu"): print("Found available CPU device") cpu_run_substitute = "env SYCL_DEVICE_TYPE=CPU " + cpu_check_substitute = "| FileCheck %s" config.substitutions.append( ('%CPU_RUN_PLACEHOLDER', cpu_run_substitute) ) +config.substitutions.append( ('%CPU_CHECK_PLACEHOLDER', cpu_check_substitute) ) gpu_run_substitute = "echo" +gpu_check_substitute = "" if getDeviceCount("gpu"): print("Found available GPU device") gpu_run_substitute = " env SYCL_DEVICE_TYPE=GPU " + gpu_check_substitute = "| FileCheck %s" config.available_features.add('gpu') config.substitutions.append( ('%GPU_RUN_PLACEHOLDER', gpu_run_substitute) ) +config.substitutions.append( ('%GPU_CHECK_PLACEHOLDER', gpu_check_substitute) ) acc_run_substitute = "echo" +acc_check_substitute = "" if getDeviceCount("accelerator"): print("Found available accelerator device") acc_run_substitute = " env SYCL_DEVICE_TYPE=ACC " + acc_check_substitute = "| FileCheck %s" config.substitutions.append( ('%ACC_RUN_PLACEHOLDER', acc_run_substitute) ) +config.substitutions.append( ('%ACC_CHECK_PLACEHOLDER', acc_check_substitute) ) path = config.environment['PATH'] path = os.path.pathsep.join((config.llvm_tools_dir, path))