diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 2d491709f966e..28d8bf5119ca8 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -67,9 +67,6 @@ class Util { /// sampler class. static bool isSyclSamplerType(const QualType &Ty); - /// Checks whether given clang type is the SYCL stream class. - static bool isSyclStreamType(const QualType &Ty); - /// Checks whether given clang type is declared in the given hierarchy of /// declaration contexts. /// \param Ty the clang type being checked @@ -814,9 +811,6 @@ static void buildArgTys(ASTContext &Context, CXXRecordDecl *KernelObj, assert(SamplerArg && "sampler __init method must have sampler parameter"); CreateAndAddPrmDsc(Fld, SamplerArg->getType()); - } else if (Util::isSyclStreamType(ArgTy)) { - // the parameter is a SYCL stream object - llvm_unreachable("streams not supported yet"); } else if (ArgTy->isStructureOrClassType()) { if (!ArgTy->isStandardLayoutType()) { const DeclaratorDecl *V = @@ -917,9 +911,6 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name, uint64_t Sz = Ctx.getTypeSizeInChars(SamplerArg->getType()).getQuantity(); H.addParamDesc(SYCLIntegrationHeader::kind_sampler, static_cast(Sz), static_cast(Offset)); - } else if (Util::isSyclStreamType(ArgTy)) { - // the parameter is a SYCL stream object - llvm_unreachable("streams not supported yet"); } else if (ArgTy->isStructureOrClassType() || ArgTy->isScalarType()) { // the parameter is an object of standard layout type or scalar; // the check for standard layout is done elsewhere @@ -1369,14 +1360,6 @@ bool Util::isSyclSamplerType(const QualType &Ty) { return matchQualifiedTypeName(Ty, Scopes); } -bool Util::isSyclStreamType(const QualType &Ty) { - static std::array Scopes = { - Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"}, - Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"}, - Util::DeclContextDesc{clang::Decl::Kind::CXXRecord, "stream"}}; - return matchQualifiedTypeName(Ty, Scopes); -} - bool Util::matchQualifiedTypeName(const QualType &Ty, ArrayRef Scopes) { // The idea: check the declaration context chain starting from the type diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 04169b3d9ad67..467505d16a9c8 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -142,6 +142,7 @@ add_library("${SYCLLibrary}" SHARED "${sourceRootPath}/detail/os_util.cpp" "${sourceRootPath}/detail/platform_util.cpp" "${sourceRootPath}/detail/sampler_impl.cpp" + "${sourceRootPath}/detail/stream_impl.cpp" "${sourceRootPath}/detail/scheduler/commands.cpp" "${sourceRootPath}/detail/scheduler/scheduler.cpp" "${sourceRootPath}/detail/scheduler/graph_processor.cpp" @@ -157,6 +158,7 @@ add_library("${SYCLLibrary}" SHARED "${sourceRootPath}/platform.cpp" "${sourceRootPath}/queue.cpp" "${sourceRootPath}/sampler.cpp" + "${sourceRootPath}/stream.cpp" "${sourceRootPath}/spirv_ops.cpp" ) diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp index 9520659d8ada5..dccb6e260b961 100644 --- a/sycl/include/CL/sycl.hpp +++ b/sycl/include/CL/sycl.hpp @@ -34,5 +34,6 @@ #include #include #include +#include #include #include diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 3cdffb352cc50..48ee02d661c05 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -514,28 +514,28 @@ class accessor : return getQualifiedPtr()[LinearIndex]; } - template < - int Dims = Dimensions, - typename = enable_if_t> - operator atomic() const { + template + operator typename std::enable_if>::type() const { const size_t LinearIndex = getLinearIndex(id()); return atomic( multi_ptr(getQualifiedPtr() + LinearIndex)); } - template < - int Dims = Dimensions, - typename = enable_if_t 0)>> - atomic operator[](id Index) const { + template + typename std::enable_if 0), + atomic>::type + operator[](id Index) const { const size_t LinearIndex = getLinearIndex(Index); return atomic( multi_ptr(getQualifiedPtr() + LinearIndex)); } - template < - int Dims = Dimensions, - typename = enable_if_t> - atomic operator[](size_t Index) const { + template + typename enable_if_t>::type + operator[](size_t Index) const { const size_t LinearIndex = getLinearIndex(id(Index)); return atomic( multi_ptr(getQualifiedPtr() + LinearIndex)); diff --git a/sycl/include/CL/sycl/buffer.hpp b/sycl/include/CL/sycl/buffer.hpp index e07822ede6f9f..f6241e376e802 100644 --- a/sycl/include/CL/sycl/buffer.hpp +++ b/sycl/include/CL/sycl/buffer.hpp @@ -158,7 +158,7 @@ class buffer { size_t get_count() const { return Range.size(); } - size_t get_size() const { return impl->get_size(); } + size_t get_size() const { return get_count() * sizeof(T); } AllocatorT get_allocator() const { return impl->get_allocator(); } diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 1207b5fe2513a..1a8eb3b98e336 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -220,6 +220,7 @@ class HostKernel : public HostKernelBase { ~HostKernel() = default; }; +class stream_impl; // The base class for all types of command groups. class CG { public: @@ -272,6 +273,7 @@ class CGExecKernel : public CG { std::vector MArgs; std::string MKernelName; detail::OSModuleHandle MOSModuleHandle; + std::vector> MStreams; CGExecKernel(NDRDescT NDRDesc, std::unique_ptr HKernel, std::shared_ptr SyclKernel, @@ -280,15 +282,20 @@ class CGExecKernel : public CG { std::vector> SharedPtrStorage, std::vector Requirements, std::vector Args, std::string KernelName, - detail::OSModuleHandle OSModuleHandle) + detail::OSModuleHandle OSModuleHandle, + std::vector> Streams) : CG(KERNEL, std::move(ArgsStorage), std::move(AccStorage), std::move(SharedPtrStorage), std::move(Requirements)), MNDRDesc(std::move(NDRDesc)), MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)), MArgs(std::move(Args)), - MKernelName(std::move(KernelName)), MOSModuleHandle(OSModuleHandle) {} + MKernelName(std::move(KernelName)), MOSModuleHandle(OSModuleHandle), + MStreams(std::move(Streams)) {} std::vector getArguments() const { return MArgs; } std::string getKernelName() const { return MKernelName; } + std::vector> getStreams() const { + return MStreams; + } }; // The class which represents "copy" command group. diff --git a/sycl/include/CL/sycl/detail/scheduler/commands.hpp b/sycl/include/CL/sycl/detail/scheduler/commands.hpp index 350de017908dc..d11db5a790ab4 100644 --- a/sycl/include/CL/sycl/detail/scheduler/commands.hpp +++ b/sycl/include/CL/sycl/detail/scheduler/commands.hpp @@ -229,6 +229,8 @@ class ExecCGCommand : public Command { : Command(CommandType::RUN_CG, std::move(Queue)), MCommandGroup(std::move(CommandGroup)) {} + void flushStreams(); + private: // Implementation of enqueueing of ExecCGCommand. cl_int enqueueImp() override; diff --git a/sycl/include/CL/sycl/detail/stream_impl.hpp b/sycl/include/CL/sycl/detail/stream_impl.hpp new file mode 100644 index 0000000000000..9a4ad10131d86 --- /dev/null +++ b/sycl/include/CL/sycl/detail/stream_impl.hpp @@ -0,0 +1,75 @@ +//==----------------- stream_impl.hpp - SYCL standard header file ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +namespace cl { +namespace sycl { + +namespace detail { +class stream_impl { +public: + using AccessorType = accessor; + + using OffsetAccessorType = + accessor; + + stream_impl(size_t BufferSize, size_t MaxStatementSize, handler &CGH); + + // Method to provide an access to the stream buffer + AccessorType getAccessor(handler &CGH) { + return Buf.get_access( + CGH, range<1>(BufferSize_), id<1>(OffsetSize)); + } + + // Method to provide an atomic access to the offset in the stream buffer + OffsetAccessorType getOffsetAccessor(handler &CGH) { + auto OffsetSubBuf = buffer(Buf, id<1>(0), range<1>(OffsetSize)); + auto ReinterpretedBuf = OffsetSubBuf.reinterpret(range<1>(1)); + return ReinterpretedBuf.get_access( + CGH, range<1>(1), id<1>(0)); + } + + // Copy stream buffer to the host and print the contents + void flush(); + + size_t get_size() const; + + size_t get_max_statement_size() const; + +private: + // Size of the stream buffer + size_t BufferSize_; + + // Maximum number of symbols which could be streamed from the beginning of a + // statement till the semicolon + size_t MaxStatementSize_; + + // Size of the variable which is used as an offset in the stream buffer. + // Additinonal memory is allocated in the beginning of the stream buffer for + // this variable. + static const size_t OffsetSize = sizeof(unsigned); + + // Vector on the host side which is used to initialize the stream buffer + std::vector Data; + + // Stream buffer + buffer Buf; +}; +} // namespace detail +} // namespace sycl +} // namespace cl + diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 35b7ef5e58492..10f71aa8c72d6 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -109,6 +109,7 @@ DEFINE_INIT_SIZES(GlobalOffset) #endif //__SYCL_DEVICE_ONLY__ class queue_impl; +class stream_impl; template static Arg member_ptr_helper(RetType (Func::*)(Arg) const); @@ -163,6 +164,7 @@ class handler { // we exit the method they are passed in. std::vector> MArgsStorage; std::vector MAccStorage; + std::vector> MStreamStorage; std::vector> MSharedPtrStorage; // The list of arguments for the kernel. std::vector MArgs; @@ -360,8 +362,8 @@ class handler { std::move(MNDRDesc), std::move(MHostKernel), std::move(MSyclKernel), std::move(MArgsStorage), std::move(MAccStorage), std::move(MSharedPtrStorage), std::move(MRequirements), - std::move(MArgs), std::move(MKernelName), - std::move(MOSModuleHandle))); + std::move(MArgs), std::move(MKernelName), std::move(MOSModuleHandle), + std::move(MStreamStorage))); break; case detail::CG::COPY_ACC_TO_PTR: case detail::CG::COPY_PTR_TO_ACC: @@ -393,6 +395,12 @@ class handler { return EventRet; } + // Save streams associated with this handler. Streams are then forwarded to + // command group and flushed in the scheduler. + void addStream(std::shared_ptr s) { + MStreamStorage.push_back(std::move(s)); + } + ~handler() = default; bool is_host() { return MIsHost; } @@ -485,6 +493,8 @@ class handler { template friend class accessor; + // Make stream class friend to be able to keep the list of associated streams + friend class stream; public: handler(const handler &) = delete; diff --git a/sycl/include/CL/sycl/stream.hpp b/sycl/include/CL/sycl/stream.hpp new file mode 100644 index 0000000000000..ff3ebbbdb3144 --- /dev/null +++ b/sycl/include/CL/sycl/stream.hpp @@ -0,0 +1,386 @@ +//==----------------- stream.hpp - SYCL standard header file ---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +namespace cl { +namespace sycl { + +enum class stream_manipulator { + dec, + hex, + oct, + noshowbase, + showbase, + noshowpos, + showpos, + endl, + fixed, + scientific, + hexfloat, + defaultfloat +}; + +const stream_manipulator dec = stream_manipulator::dec; + +const stream_manipulator hex = stream_manipulator::hex; + +const stream_manipulator oct = stream_manipulator::oct; + +const stream_manipulator noshowbase = stream_manipulator::noshowbase; + +const stream_manipulator showbase = stream_manipulator::showbase; + +const stream_manipulator noshowpos = stream_manipulator::noshowpos; + +const stream_manipulator showpos = stream_manipulator::showpos; + +const stream_manipulator endl = stream_manipulator::endl; + +const stream_manipulator fixed = stream_manipulator::fixed; + +const stream_manipulator scientific = stream_manipulator::scientific; + +const stream_manipulator hexfloat = stream_manipulator::hexfloat; + +const stream_manipulator defaultfloat = stream_manipulator::defaultfloat; + +class stream; + +class __precision_manipulator__ { + int Precision_; + +public: + __precision_manipulator__(int Precision) : Precision_(Precision) {} + friend const stream &operator<<(const stream &, + const __precision_manipulator__ &); +}; + +class __width_manipulator__ { + int Width_; + +public: + __width_manipulator__(int Width) : Width_(Width) {} + friend const stream &operator<<(const stream &, + const __width_manipulator__ &); +}; + +inline __precision_manipulator__ setprecision(int Precision) { + return __precision_manipulator__(Precision); +} + +inline __width_manipulator__ setw(int Width) { + return __width_manipulator__(Width); +} + +class stream { +public: + stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH); + + size_t get_size() const; + + size_t get_max_statement_size() const; + + size_t get_precision() const { return Precision; } + + stream_manipulator get_stream_mode() const { return Manipulator; } + + bool operator==(const stream &RHS) const; + + bool operator!=(const stream &LHS) const; + +private: +#ifdef __SYCL_DEVICE_ONLY__ + char padding[sizeof(std::shared_ptr)]; +#else + std::shared_ptr impl; + template + friend decltype(T::impl) detail::getSyclObjImpl(const T &SyclObject); +#endif + + // Accessor to stream buffer + detail::stream_impl::AccessorType Acc; + + // Atomic accessor to the offset variable. It represents an offset in the + // stream buffer. + detail::stream_impl::OffsetAccessorType OffsetAcc; + mutable stream_manipulator Manipulator = defaultfloat; + + // Fields and methods to work with manipulators + + // Type used for format flags + using FmtFlags = unsigned int; + + mutable int Precision; + mutable int Width; + mutable FmtFlags Flags; + + // Mapping from stream_manipulator to FmtFlags. Each manipulator corresponds + // to the bit in FmtFlags. + static constexpr FmtFlags Dec = 0x0001; + static constexpr FmtFlags Hex = 0x0002; + static constexpr FmtFlags Oct = 0x0004; + static constexpr FmtFlags ShowBase = 0x0008; + static constexpr FmtFlags ShowPos = 0x0010; + static constexpr FmtFlags Fixed = 0x0020; + static constexpr FmtFlags Scientific = 0x0040; + + // Bitmask made of the combination of the base flags. Base flags are mutually + // exclusive, this mask is used to clean base field before setting the new + // base flag. + static constexpr FmtFlags BaseField = Dec | Hex | Oct; + + // Bitmask made of the combination of the floating point value format flags. + // Thease flags are mutually exclusive, this mask is used to clean float field + // before setting the new float flag. + static constexpr FmtFlags FloatField = Scientific | Fixed; + + void set_flag(FmtFlags FormatFlag) const { Flags |= FormatFlag; } + + void unset_flag(FmtFlags FormatFlag) const { Flags &= ~FormatFlag; } + + // This method is used to set the flag for base and float manipulators. These + // flags are mutually exclusive and base/float field needs to be cleared + // before the setting new flag. + void set_flag(FmtFlags FormatFlag, FmtFlags Mask) const { + unset_flag(Mask); + Flags |= FormatFlag & Mask; + } + + // Set the flags which correspond to the input stream manipulator. + void set_manipulator(const stream_manipulator &SM) const { + switch (SM) { + case stream_manipulator::dec: + set_flag(Dec, BaseField); + break; + case stream_manipulator::hex: + set_flag(Hex, BaseField); + break; + case stream_manipulator::oct: + set_flag(Oct, BaseField); + break; + case stream_manipulator::noshowbase: + unset_flag(ShowBase); + break; + case stream_manipulator::showbase: + set_flag(ShowBase); + break; + case stream_manipulator::noshowpos: + unset_flag(ShowPos); + break; + case stream_manipulator::showpos: + set_flag(ShowPos); + break; + case stream_manipulator::fixed: + set_flag(Fixed, FloatField); + break; + case stream_manipulator::scientific: + set_flag(Scientific, FloatField); + break; + case stream_manipulator::hexfloat: + set_flag(Fixed | Scientific, FloatField); + break; + case stream_manipulator::defaultfloat: + unset_flag(FloatField); + break; + default: + // Unknown manipulator + break; + } + } + + friend const stream &operator<<(const stream &, const char); + friend const stream &operator<<(const stream &, const char *); + template + friend typename std::enable_if::value, + const stream &>::type + operator<<(const stream &, const ValueType &); + friend const stream &operator<<(const stream &, const stream_manipulator &); + + // Helper method to update offset atomically according to the provided + // operand size of the output operator. Return true if offset is updated and + // false in case of overflow. + bool update_offset(unsigned Size, unsigned &Cur) const { + unsigned New; + do { + Cur = OffsetAcc[0].load(); + if (Acc.get_count() - Cur < Size) + // Overflow + return false; + New = Cur + Size; + } while (!OffsetAcc[0].compare_exchange_strong(Cur, New)); + return true; + } +}; + +// Character +inline const stream &operator<<(const stream &Out, const char C) { + unsigned Cur; + if (!Out.update_offset(1, Cur)) + return Out; + Out.Acc[Cur] = C; + return Out; +} + +// String +inline const stream &operator<<(const stream &Out, const char *Str) { + unsigned Len; + for (Len = 0; Str[Len] != '\0'; Len++) + ; + + unsigned Cur; + if (!Out.update_offset(Len, Cur)) + return Out; + + for (size_t i = 0; i < Len; i++) { + Out.Acc[i + Cur] = Str[i]; + } + return Out; +} + +// Boolean +inline const stream &operator<<(const stream &Out, const bool &RHS) { + Out << (RHS ? "true" : "false"); + return Out; +} + +// Integral +template +typename std::enable_if::value, + const stream &>::type +operator<<(const stream &Out, const ValueType &RHS) { + // TODO + return Out; +} + +// Floating points + +inline const stream &operator<<(const stream &Out, const float &RHS) { + // TODO + return Out; +} + +inline const stream &operator<<(const stream &Out, const double &RHS) { + // TODO + return Out; +} + +inline const stream &operator<<(const stream &Out, const half &RHS) { + // TODO + return Out; +} + +// Pointer + +template +inline const stream &operator<<(const stream &Out, + const multi_ptr &RHS) { + // TODO + return Out; +} + +template +const stream &operator<<(const stream &Out, const T *RHS) { + // TODO + return Out; +} + +// Manipulators + +inline const stream &operator<<(const stream &Out, + const __precision_manipulator__ &RHS) { + // TODO + return Out; +} + +inline const stream &operator<<(const stream &Out, + const __width_manipulator__ &RHS) { + // TODO + return Out; +} + +inline const stream &operator<<(const stream &Out, + const stream_manipulator &RHS) { + switch (RHS) { + case stream_manipulator::endl: + Out << '\n'; + break; + default: + Out.set_manipulator(RHS); + } + return Out; +} + +// Vec + +template +const stream &operator<<(const stream &Out, const vec &RHS) { + // TODO + return Out; +} + +// SYCL types + +template +inline const stream &operator<<(const stream &Out, const id &RHS) { + // TODO + return Out; +} + +template +inline const stream &operator<<(const stream &Out, + const range &RHS) { + // TODO + return Out; +} + +template +inline const stream &operator<<(const stream &Out, + const item &RHS) { + // TODO + return Out; +} + +template +inline const stream &operator<<(const stream &Out, + const nd_range &RHS) { + // TODO + return Out; +} + +template +inline const stream &operator<<(const stream &Out, + const nd_item &RHS) { + // TODO + return Out; +} + +template +inline const stream &operator<<(const stream &Out, + const group &RHS) { + // TODO + return Out; +} + +} // namespace sycl +} // namespace cl +namespace std { +template <> struct hash { + size_t operator()(const cl::sycl::stream &S) const { +#ifdef __SYCL_DEVICE_ONLY__ + return 0; +#else + return hash>()( + cl::sycl::detail::getSyclObjImpl(S)); +#endif + } +}; +} // namespace std + diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 6ac31ac34ce58..f8459bf78cc80 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -202,6 +203,14 @@ AllocaCommand *ExecCGCommand::getAllocaForReq(Requirement *Req) { throw runtime_error("Alloca for command not found"); } +void ExecCGCommand::flushStreams() { + assert(CommandGroup->getType() == CG::KERNEL && "Expected kernel"); + for (auto StreamImplPtr : + ((CGExecKernel *)MCommandGroup.get())->getStreams()) { + StreamImplPtr->flush(); + } +} + MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq, AllocaCommand *SrcAlloca, Requirement *DstAcc, QueueImplPtr SrcQueue, diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index a6db754f46811..d4e25b456880e 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -50,24 +50,31 @@ void Scheduler::waitForRecordToFinish(GraphBuilder::MemObjRecord *Record) { EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, QueueImplPtr Queue) { - std::lock_guard lock(MGraphLock); - Command *NewCmd = nullptr; - switch (CommandGroup->getType()) { - case CG::UPDATE_HOST: - NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), - DefaultHostQueue); - break; - default: - NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue)); + const bool IsKernel = CommandGroup->getType() == CG::KERNEL; + { + std::lock_guard Lock(MGraphLock); + + switch (CommandGroup->getType()) { + case CG::UPDATE_HOST: + NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), + DefaultHostQueue); + break; + default: + NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue)); + } + + // TODO: Check if lazy mode. + Command *FailedCommand = GraphProcessor::enqueueCommand(NewCmd); + MGraphBuilder.cleanupCommands(); + if (FailedCommand) + // TODO: Reschedule commands. + throw runtime_error("Enqueue process failed."); } - // TODO: Check if lazy mode. - Command *FailedCommand = GraphProcessor::enqueueCommand(NewCmd); - MGraphBuilder.cleanupCommands(); - if (FailedCommand) - // TODO: Reschedule commands. - throw runtime_error("Enqueue process failed."); + if (IsKernel) + ((ExecCGCommand *)NewCmd)->flushStreams(); + return NewCmd->getEvent(); } diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp new file mode 100644 index 0000000000000..b96922aeecb4f --- /dev/null +++ b/sycl/source/detail/stream_impl.cpp @@ -0,0 +1,44 @@ +//==----------------- stream_impl.cpp - SYCL standard header file ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +stream_impl::stream_impl(size_t BufferSize, size_t MaxStatementSize, + handler &CGH) + : BufferSize_(BufferSize), + // Allocate additional place for the offset variable and the end of line + // symbol. Initialize buffer with zeros, this is needed for two reasons: + // 1. We don't need to care about end of line when printing out streamed + // data. + // 2. Offset is properly initialized. + Data(BufferSize + OffsetSize + 1, 0), + Buf(Data.data(), range<1>(BufferSize + OffsetSize + 1), + {property::buffer::use_host_ptr()}), + MaxStatementSize_(MaxStatementSize) {} + +size_t stream_impl::get_size() const { return BufferSize_; } + +size_t stream_impl::get_max_statement_size() const { return MaxStatementSize_; } + +void stream_impl::flush() { + // Access the stream buffer on the host. This access guarantees that kernel is + // executed and buffer contains streamed data. + auto HostAcc = Buf.get_access( + range<1>(BufferSize_), id<1>(OffsetSize)); + + printf("%s", HostAcc.get_pointer()); +} +} // namespace detail +} // namespace sycl +} // namespace cl + diff --git a/sycl/source/stream.cpp b/sycl/source/stream.cpp new file mode 100644 index 0000000000000..599a03299d4c8 --- /dev/null +++ b/sycl/source/stream.cpp @@ -0,0 +1,35 @@ +//==------------------- stream.cpp - SYCL standard header file ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +namespace cl { +namespace sycl { + +stream::stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH) + : impl(std::make_shared(BufferSize, MaxStatementSize, + CGH)), + Acc(impl->getAccessor(CGH)), OffsetAcc(impl->getOffsetAccessor(CGH)) { + // Save stream implementation in the handler so that stream will be alive + // during kernel execution + CGH.addStream(impl); +} + +size_t stream::get_size() const { return impl->get_size(); } + +size_t stream::get_max_statement_size() const { + return impl->get_max_statement_size(); +} + +bool stream::operator==(const stream &RHS) const { return (impl == RHS.impl); } + +bool stream::operator!=(const stream &RHS) const { return !(impl == RHS.impl); } + +} // namespace sycl +} // namespace cl + diff --git a/sycl/test/basic_tests/stream.cpp b/sycl/test/basic_tests/stream.cpp new file mode 100644 index 0000000000000..700fb0701bd5b --- /dev/null +++ b/sycl/test/basic_tests/stream.cpp @@ -0,0 +1,128 @@ +// RUN: %clang -std=c++11 -fsycl -lstdc++ %s -o %t.out -lOpenCL -lsycl +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out | FileCheck %s +// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER +//==------------------ stream.cpp - SYCL stream basic test -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +int main() { + { + cl::sycl::default_selector Selector; + cl::sycl::queue Queue(Selector); + + // Check constructor and getters + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + assert(Out.get_size() == 1024); + assert(Out.get_max_statement_size() == 80); + }); + + // Check common reference semantics + cl::sycl::hash_class Hasher; + + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out1(1024, 80, CGH); + cl::sycl::stream Out2(Out1); + + assert(Out1 == Out2); + assert(Hasher(Out1) == Hasher(Out2)); + + cl::sycl::stream Out3(std::move(Out1)); + + assert(Out2 == Out3); + assert(Hasher(Out2) == Hasher(Out3)); + }); + + // Char type + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + CGH.parallel_for( + cl::sycl::range<1>(10), [=](cl::sycl::id<1> i) { Out << 'a'; }); + }); + Queue.wait(); + + // endl manipulator + // TODO: support cl::sycl::endl. According to specitification endl should be + // constant global variable in cl::sycl which is initialized with + // cl::sycl::stream_manipulator::endl. This approach doesn't currently work, + // variable is not initialized in the kernel code, it contains some garbage + // value. + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + CGH.single_task( + [=]() { Out << cl::sycl::stream_manipulator::endl; }); + }); + Queue.wait(); + + // String type + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + CGH.parallel_for( + cl::sycl::range<1>(10), + [=](cl::sycl::id<1> i) { Out << "Hello, World!\n"; }); + }); + Queue.wait(); + + // Boolean type + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + CGH.single_task([=]() { Out << true; }); + }); + Queue.wait(); + + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(1024, 80, CGH); + CGH.single_task([=]() { Out << false; }); + }); + Queue.wait(); + + // Multiple streams in command group + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out1(1024, 80, CGH); + cl::sycl::stream Out2(500, 10, CGH); + CGH.parallel_for(cl::sycl::range<1>(2), + [=](cl::sycl::id<1> i) { + Out1 << "Hello, World!\n"; + Out2 << "Hello, World!\n"; + }); + }); + Queue.wait(); + + // The case when stream buffer is full. To check that there is no problem + // with end of line symbol when printing out the stream buffer. + Queue.submit([&](cl::sycl::handler &CGH) { + cl::sycl::stream Out(10, 10, CGH); + CGH.parallel_for( + cl::sycl::range<1>(2), + [=](cl::sycl::id<1> i) { Out << "aaaaaaaaa\n"; }); + }); + Queue.wait(); + } + return 0; +} +// CHECK: aaaaaaaaaa +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: truefalseHello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: Hello, World! +// CHECK-NEXT: aaaaaaaaa diff --git a/sycl/test/lit.cfg b/sycl/test/lit.cfg index e74ed709e9ca6..c847a50c1fadc 100644 --- a/sycl/test/lit.cfg +++ b/sycl/test/lit.cfg @@ -76,23 +76,32 @@ def getDeviceCount(device_type): cpu_run_substitute = "echo" +cpu_check_substitute = "" if getDeviceCount("cpu"): print("Found available CPU device") cpu_run_substitute = "env SYCL_DEVICE_TYPE=CPU " + cpu_check_substitute = "| FileCheck %s" config.substitutions.append( ('%CPU_RUN_PLACEHOLDER', cpu_run_substitute) ) +config.substitutions.append( ('%CPU_CHECK_PLACEHOLDER', cpu_check_substitute) ) gpu_run_substitute = "echo" +gpu_check_substitute = "" if getDeviceCount("gpu"): print("Found available GPU device") gpu_run_substitute = " env SYCL_DEVICE_TYPE=GPU " + gpu_check_substitute = "| FileCheck %s" config.available_features.add('gpu') config.substitutions.append( ('%GPU_RUN_PLACEHOLDER', gpu_run_substitute) ) +config.substitutions.append( ('%GPU_CHECK_PLACEHOLDER', gpu_check_substitute) ) acc_run_substitute = "echo" +acc_check_substitute = "" if getDeviceCount("accelerator"): print("Found available accelerator device") acc_run_substitute = " env SYCL_DEVICE_TYPE=ACC " + acc_check_substitute = "| FileCheck %s" config.substitutions.append( ('%ACC_RUN_PLACEHOLDER', acc_run_substitute) ) +config.substitutions.append( ('%ACC_CHECK_PLACEHOLDER', acc_check_substitute) ) path = config.environment['PATH'] path = os.path.pathsep.join((config.llvm_tools_dir, path))