diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 070a1174d0e98..2af95e31d3ca6 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13182,7 +13182,7 @@ class Sema final { void checkSYCLDeviceVarDecl(VarDecl *Var); void copySYCLKernelAttrs(const CXXRecordDecl *KernelObj); void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC); - void MarkDevice(); + void MarkDevices(); /// Emit a diagnostic about the given attribute having a deprecated name, and /// also emit a fixit hint to generate the new attribute name. diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 4d88f67e76dff..8d0a95a4ec9cd 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1039,7 +1039,7 @@ void Sema::ActOnEndOfTranslationUnitFragment(TUFragmentKind Kind) { SyclIntHeader->emit(getLangOpts().SYCLIntHeader); if (SyclIntFooter != nullptr) SyclIntFooter->emit(getLangOpts().SYCLIntFooter); - MarkDevice(); + MarkDevices(); } emitDeferredDiags(); diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index c7be03e5f813f..742b132c1bfbd 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -325,8 +325,8 @@ static int64_t getIntExprValue(const Expr *E, ASTContext &Ctx) { // Collect function attributes related to SYCL. static void collectSYCLAttributes(Sema &S, FunctionDecl *FD, - llvm::SmallVector &Attrs, - bool DirectlyCalled = true) { + llvm::SmallVectorImpl &Attrs, + bool DirectlyCalled) { if (!FD->hasAttrs()) return; @@ -352,30 +352,46 @@ static void collectSYCLAttributes(Sema &S, FunctionDecl *FD, FD->dropAttr(); } } + + // Attributes that should not be propagated from device functions to a kernel. + if (DirectlyCalled) { + llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) { + return isa(A); + }); + } } -class MarkDeviceFunction : public RecursiveASTVisitor { +class DiagDeviceFunction : public RecursiveASTVisitor { // Used to keep track of the constexpr depth, so we know whether to skip // diagnostics. unsigned ConstexprDepth = 0; + Sema &SemaRef; + const llvm::SmallPtrSetImpl &RecursiveFuncs; + struct ConstexprDepthRAII { - MarkDeviceFunction &MDF; + DiagDeviceFunction &DDF; bool Increment; - ConstexprDepthRAII(MarkDeviceFunction &MDF, bool Increment = true) - : MDF(MDF), Increment(Increment) { + ConstexprDepthRAII(DiagDeviceFunction &DDF, bool Increment = true) + : DDF(DDF), Increment(Increment) { if (Increment) - ++MDF.ConstexprDepth; + ++DDF.ConstexprDepth; } ~ConstexprDepthRAII() { if (Increment) - --MDF.ConstexprDepth; + --DDF.ConstexprDepth; } }; public: - MarkDeviceFunction(Sema &S) - : RecursiveASTVisitor(), SemaRef(S) {} + DiagDeviceFunction( + Sema &S, + const llvm::SmallPtrSetImpl &RecursiveFuncs) + : RecursiveASTVisitor(), SemaRef(S), RecursiveFuncs(RecursiveFuncs) {} + + void CheckBody(Stmt *ToBeDiagnosed) { TraverseStmt(ToBeDiagnosed); } bool VisitCallExpr(CallExpr *e) { if (FunctionDecl *Callee = e->getDirectCallee()) { @@ -386,7 +402,7 @@ class MarkDeviceFunction : public RecursiveASTVisitor { // instantiation as template functions. It means that // all functions used by kernel have already been parsed and have // definitions. - if (RecursiveSet.count(Callee) && !ConstexprDepth) { + if (RecursiveFuncs.count(Callee) && !ConstexprDepth) { SemaRef.Diag(e->getExprLoc(), diag::err_sycl_restrict) << Sema::KernelCallRecursiveFunction; SemaRef.Diag(Callee->getSourceRange().getBegin(), @@ -479,148 +495,179 @@ class MarkDeviceFunction : public RecursiveASTVisitor { ConstexprDepthRAII R(*this); return TraverseStmt(ArrLoc.getSizeExpr()); } +}; - // The call graph for this translation unit. - CallGraph SYCLCG; - // The set of functions called by a kernel function. - llvm::SmallPtrSet KernelSet; - // The set of recursive functions identified while building the - // kernel set, this is used for error diagnostics. - llvm::SmallPtrSet RecursiveSet; - // Determines whether the function FD is recursive. - // CalleeNode is a function which is called either directly - // or indirectly from FD. If recursion is detected then create - // diagnostic notes on each function as the callstack is unwound. - void CollectKernelSet(FunctionDecl *CalleeNode, FunctionDecl *FD, - llvm::SmallPtrSet VisitedSet) { - // We're currently checking CalleeNode on a different - // trace through the CallGraph, we avoid infinite recursion - // by using KernelSet to keep track of this. - if (!KernelSet.insert(CalleeNode).second) - // Previously seen, stop recursion. - return; - if (CallGraphNode *N = SYCLCG.getNode(CalleeNode)) { - for (const CallGraphNode *CI : *N) { - if (FunctionDecl *Callee = dyn_cast(CI->getDecl())) { - Callee = Callee->getCanonicalDecl(); - if (VisitedSet.count(Callee)) { - // There's a stack frame to visit this Callee above - // this invocation. Do not recurse here. - RecursiveSet.insert(Callee); - RecursiveSet.insert(CalleeNode); - } else { - VisitedSet.insert(Callee); - CollectKernelSet(Callee, FD, VisitedSet); - VisitedSet.erase(Callee); - } - } - } - } +// This type manages the list of device functions and recursive functions, as +// well as an entry point for attribute collection, for the translation unit +// during MarkDevices. On construction, this type makes sure that all of the +// root-device functions, (that is, those marked with SYCL_EXTERNAL) are +// collected. On destruction, it manages and runs the diagnostics required. +// When processing individual kernel/external functions, the +// SingleDeviceFunctionTracker type updates this type. +class DeviceFunctionTracker { + friend class SingleDeviceFunctionTracker; + CallGraph CG; + Sema &SemaRef; + // The list of functions used on the device, kept so we can diagnose on them + // later. + llvm::SmallPtrSet DeviceFunctions; + llvm::SmallPtrSet RecursiveFunctions; + + void CollectSyclExternalFuncs() { + for (CallGraphNode::CallRecord Record : CG.getRoot()->callees()) + if (auto *FD = dyn_cast(Record.Callee->getDecl())) + if (FD->hasBody() && FD->hasAttr()) + SemaRef.addSyclDeviceDecl(FD); } - // Traverses over CallGraph to collect list of attributes applied to - // functions called by SYCLKernel (either directly and indirectly) which needs - // to be propagated down to callers and applied to SYCL kernels. - // For example, reqd_work_group_size, vec_len_hint, reqd_sub_group_size - // Attributes applied to SYCLKernel are also included - // Returns the kernel body function found during traversal. - FunctionDecl * - CollectPossibleKernelAttributes(FunctionDecl *SYCLKernel, - llvm::SmallVector &Attrs) { - typedef std::pair ChildParentPair; - llvm::SmallPtrSet Visited; - llvm::SmallVector WorkList; - WorkList.push_back({SYCLKernel, nullptr}); - FunctionDecl *KernelBody = nullptr; + CallGraphNode *getNodeForKernel(FunctionDecl *Kernel) { + assert(CG.getNode(Kernel) && "No call graph entry for a kernel?"); + return CG.getNode(Kernel); + } - while (!WorkList.empty()) { - FunctionDecl *FD = WorkList.back().first; - FunctionDecl *ParentFD = WorkList.back().second; - - // To implement rounding-up of a parallel-for range the - // SYCL header implementation modifies the kernel call like this: - // auto Wrapper = [=](TransformedArgType Arg) { - // if (Arg[0] >= NumWorkItems[0]) - // return; - // Arg.set_allowed_range(NumWorkItems); - // KernelFunc(Arg); - // }; - // - // This transformation leads to a condition where a kernel body - // function becomes callable from a new kernel body function. - // Hence this test. - if ((ParentFD == KernelBody) && isSYCLKernelBodyFunction(FD)) - KernelBody = FD; + void AddSingleFunction( + const llvm::SmallPtrSetImpl &DevFuncs, + const llvm::SmallPtrSetImpl &Recursive) { + DeviceFunctions.insert(DevFuncs.begin(), DevFuncs.end()); + RecursiveFunctions.insert(Recursive.begin(), Recursive.end()); + } - if ((ParentFD == SYCLKernel) && isSYCLKernelBodyFunction(FD)) { - assert(!KernelBody && "inconsistent call graph - only one kernel body " - "function can be called"); - KernelBody = FD; - } +public: + DeviceFunctionTracker(Sema &S) : SemaRef(S) { + CG.addToCallGraph(S.getASTContext().getTranslationUnitDecl()); + CollectSyclExternalFuncs(); + } - WorkList.pop_back(); - if (!Visited.insert(FD).second) - continue; // We've already seen this Decl + ~DeviceFunctionTracker() { + DiagDeviceFunction Diagnoser{SemaRef, RecursiveFunctions}; + for (const FunctionDecl *FD : DeviceFunctions) + if (const FunctionDecl *Def = FD->getDefinition()) + Diagnoser.CheckBody(Def->getBody()); + } +}; - // Gather all attributes of FD that are SYCL related. - // Some attributes are allowed only on lambda functions and function - // objects called directly from a kernel (i.e. the one passed to the - // single_task or parallel_for functions). - bool DirectlyCalled = (ParentFD == SYCLKernel); - collectSYCLAttributes(SemaRef, FD, Attrs, DirectlyCalled); - - // Attribute "loop_fuse" can be applied explicitly on kernel function. - // Attribute should not be propagated from device functions to kernel. - if (auto *A = FD->getAttr()) { - if (ParentFD == SYCLKernel) { - Attrs.push_back(A); - } - } +// This type does the heavy lifting for the management of device functions, +// recursive function detection, and attribute collection for a single +// kernel/external function. It walks the callgraph to find all functions that +// are called, marks the recursive-functions, and figures out the list of +// attributes that apply to this kernel. +// +// Upon destruction, this type updates the DeviceFunctionTracker. +class SingleDeviceFunctionTracker { + DeviceFunctionTracker &Parent; + FunctionDecl *SYCLKernel = nullptr; + FunctionDecl *KernelBody = nullptr; + llvm::SmallPtrSet DeviceFunctions; + llvm::SmallPtrSet RecursiveFunctions; + llvm::SmallVector CollectedAttributes; - // Attribute "max_concurrency" is applied to device functions only. The - // attribute is not propagated to the caller. - if (auto *A = FD->getAttr()) - if (ParentFD == SYCLKernel) { - Attrs.push_back(A); - } + FunctionDecl *GetFDFromNode(CallGraphNode *Node) { + FunctionDecl *FD = Node->getDecl()->getAsFunction(); + if (!FD) + return nullptr; - // Attribute "disable_loop_pipelining" can be applied explicitly on - // kernel function. Attribute should not be propagated from device - // functions to kernel. - if (auto *A = FD->getAttr()) { - if (ParentFD == SYCLKernel) { - Attrs.push_back(A); - } - } + return FD->getMostRecentDecl(); + } - // Attribute "initiation_interval" can be applied explicitly on - // kernel function. Attribute should not be propagated from device - // functions to kernel. - if (auto *A = FD->getAttr()) { - if (ParentFD == SYCLKernel) { - Attrs.push_back(A); - } - } + void VisitCallNode(CallGraphNode *Node, + llvm::SmallVectorImpl &CallStack) { + FunctionDecl *CurrentDecl = GetFDFromNode(Node); - // TODO: vec_len_hint should be handled here + // If this isn't a function, I don't think there is anything we can do here. + if (!CurrentDecl) + return; - CallGraphNode *N = SYCLCG.getNode(FD); - if (!N) - continue; + // Determine if this is a recursive function. If so, we're done. + if (llvm::is_contained(CallStack, CurrentDecl)) { + RecursiveFunctions.insert(CurrentDecl->getCanonicalDecl()); + return; + } - for (const CallGraphNode *CI : *N) { - if (auto *Callee = dyn_cast(CI->getDecl())) { - Callee = Callee->getMostRecentDecl(); - if (!Visited.count(Callee)) - WorkList.push_back({Callee, FD}); - } + // We previously thought we could skip this function if we'd seen it before, + // but if we haven't seen it before in this call graph, we can end up + // missing a recursive call. SO, we have to revisit call-graphs we've + // already seen, just in case it ALSO has recursion. For example: + // void recurse1(); + // void recurse2() { recurse1(); } + // void recurse1() { recurse2(); } + // void CallerInKernel() { recurse1(); recurse2(); } + // When checking 'recurse1', we'd have ended up 'visiting' recurse2 without + // realizing it was recursive, since we never went into the + // child-of-its-child, since THAT was recursive and exited early out of + // necessity. + // Then when we go to visit the kernel's call to recurse2, we would + // immediately escape not noticing it was recursive. SO, we have to do a + // little extra work in this case, and make sure we visit the entire call + // graph. + DeviceFunctions.insert(CurrentDecl); + + // Collect attributes for functions that aren't the root kernel. + if (!CallStack.empty()) { + bool DirectlyCalled = CallStack.size() == 1; + collectSYCLAttributes(Parent.SemaRef, CurrentDecl, CollectedAttributes, + DirectlyCalled); + } + + // Calculate the kernel body. Note the 'isSYCLKernelBodyFunction' only + // tests that it is operator(), so hopefully this doesn't get us too many + // false-positives. + if (isSYCLKernelBodyFunction(CurrentDecl)) { + // This is a direct callee of the kernel. + if (CallStack.size() == 1) { + assert(!KernelBody && "inconsistent call graph - only one kernel body " + "function can be called"); + KernelBody = CurrentDecl; + } else if (CallStack.size() == 2 && KernelBody == CallStack.back()) { + // To implement rounding-up of a parallel-for range the + // SYCL header implementation modifies the kernel call like this: + // auto Wrapper = [=](TransformedArgType Arg) { + // if (Arg[0] >= NumWorkItems[0]) + // return; + // Arg.set_allowed_range(NumWorkItems); + // KernelFunc(Arg); + // }; + // + // This transformation leads to a condition where a kernel body + // function becomes callable from a new kernel body function. + // Hence this test. + // FIXME: We need to be more selective here, this can be hit by simply + // having a kernel lambda with a lambda call inside of it. + KernelBody = CurrentDecl; } } - return KernelBody; + + // Recurse. + CallStack.push_back(CurrentDecl); + for (CallGraphNode *CI : Node->callees()) { + VisitCallNode(CI, CallStack); + } + CallStack.pop_back(); } -private: - Sema &SemaRef; + // Function to walk the call graph and identify the important information. + void Init() { + CallGraphNode *KernelNode = Parent.getNodeForKernel(SYCLKernel); + llvm::SmallVector CallStack; + VisitCallNode(KernelNode, CallStack); + } + +public: + SingleDeviceFunctionTracker(DeviceFunctionTracker &P, Decl *Kernel) + : Parent(P), SYCLKernel(Kernel->getAsFunction()) { + Init(); + } + + FunctionDecl *GetSYCLKernel() { return SYCLKernel; } + + FunctionDecl *GetKernelBody() { return KernelBody; } + + llvm::SmallVectorImpl &GetCollectedAttributes() { + return CollectedAttributes; + } + + ~SingleDeviceFunctionTracker() { + Parent.AddSingleFunction(DeviceFunctions, RecursiveFunctions); + } }; class KernelBodyTransform : public TreeTransform { @@ -3411,7 +3458,7 @@ void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj) { assert(KernelBody && "improper parallel_for wrap"); if (KernelBody) { llvm::SmallVector Attrs; - collectSYCLAttributes(*this, KernelBody, Attrs); + collectSYCLAttributes(*this, KernelBody, Attrs, /*DirectlyCalled*/ true); if (!Attrs.empty()) llvm::for_each(Attrs, [OpParens](Attr *A) { OpParens->addAttr(A); }); } @@ -3489,147 +3536,132 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, } } -void Sema::MarkDevice(void) { - // Create the call graph so we can detect recursion and check the validity - // of new operator overrides. Add the kernel function itself in case - // it is recursive. - MarkDeviceFunction Marker(*this); - Marker.SYCLCG.addToCallGraph(getASTContext().getTranslationUnitDecl()); - - // Iterate through SYCL_EXTERNAL functions and add them to the device decls. - for (const auto &entry : *Marker.SYCLCG.getRoot()) { - if (auto *FD = dyn_cast(entry.Callee->getDecl())) { - if (FD->hasAttr() && !FD->hasAttr() && - FD->hasBody()) - addSyclDeviceDecl(FD); +static void PropagateAndDiagnoseDeviceAttr(Sema &S, Attr *A, + FunctionDecl *SYCLKernel, + FunctionDecl *KernelBody) { + switch (A->getKind()) { + case attr::Kind::IntelReqdSubGroupSize: { + auto *Attr = cast(A); + const auto *KBSimdAttr = + KernelBody ? KernelBody->getAttr() : nullptr; + if (auto *Existing = SYCLKernel->getAttr()) { + if (getIntExprValue(Existing->getValue(), S.getASTContext()) != + getIntExprValue(Attr->getValue(), S.getASTContext())) { + S.Diag(SYCLKernel->getLocation(), + diag::err_conflicting_sycl_kernel_attributes); + S.Diag(Existing->getLocation(), diag::note_conflicting_attribute); + S.Diag(Attr->getLocation(), diag::note_conflicting_attribute); + SYCLKernel->setInvalidDecl(); + } + } else if (KBSimdAttr && + (getIntExprValue(Attr->getValue(), S.getASTContext()) != 1)) { + reportConflictingAttrs(S, KernelBody, KBSimdAttr, Attr); + } else { + SYCLKernel->addAttr(A); } + break; } - - for (Decl *D : syclDeviceDecls()) { - if (auto SYCLKernel = dyn_cast(D)) { - llvm::SmallPtrSet VisitedSet; - Marker.CollectKernelSet(SYCLKernel, SYCLKernel, VisitedSet); - - // Let's propagate attributes from device functions to a SYCL kernels - llvm::SmallVector Attrs; - // This function collects all kernel attributes which might be applied to - // a device functions, but need to be propagated down to callers, i.e. - // SYCL kernels - FunctionDecl *KernelBody = - Marker.CollectPossibleKernelAttributes(SYCLKernel, Attrs); - - for (auto *A : Attrs) { - switch (A->getKind()) { - case attr::Kind::IntelReqdSubGroupSize: { - auto *Attr = cast(A); - const auto *KBSimdAttr = - KernelBody ? KernelBody->getAttr() : nullptr; - if (auto *Existing = - SYCLKernel->getAttr()) { - if (getIntExprValue(Existing->getValue(), getASTContext()) != - getIntExprValue(Attr->getValue(), getASTContext())) { - Diag(SYCLKernel->getLocation(), - diag::err_conflicting_sycl_kernel_attributes); - Diag(Existing->getLocation(), diag::note_conflicting_attribute); - Diag(Attr->getLocation(), diag::note_conflicting_attribute); - SYCLKernel->setInvalidDecl(); - } - } else if (KBSimdAttr && (getIntExprValue(Attr->getValue(), - getASTContext()) != 1)) { - reportConflictingAttrs(*this, KernelBody, KBSimdAttr, Attr); - } else { - SYCLKernel->addAttr(A); - } - break; - } - case attr::Kind::ReqdWorkGroupSize: { - auto *RWGSA = cast(A); - if (auto *Existing = SYCLKernel->getAttr()) { - ASTContext &Ctx = getASTContext(); - if (Existing->getXDimVal(Ctx) != RWGSA->getXDimVal(Ctx) || - Existing->getYDimVal(Ctx) != RWGSA->getYDimVal(Ctx) || - Existing->getZDimVal(Ctx) != RWGSA->getZDimVal(Ctx)) { - Diag(SYCLKernel->getLocation(), - diag::err_conflicting_sycl_kernel_attributes); - Diag(Existing->getLocation(), diag::note_conflicting_attribute); - Diag(RWGSA->getLocation(), diag::note_conflicting_attribute); - SYCLKernel->setInvalidDecl(); - } - } else if (auto *Existing = - SYCLKernel->getAttr()) { - ASTContext &Ctx = getASTContext(); - if (Existing->getXDimVal(Ctx) < RWGSA->getXDimVal(Ctx) || - Existing->getYDimVal(Ctx) < RWGSA->getYDimVal(Ctx) || - Existing->getZDimVal(Ctx) < RWGSA->getZDimVal(Ctx)) { - Diag(SYCLKernel->getLocation(), - diag::err_conflicting_sycl_kernel_attributes); - Diag(Existing->getLocation(), diag::note_conflicting_attribute); - Diag(RWGSA->getLocation(), diag::note_conflicting_attribute); - SYCLKernel->setInvalidDecl(); - } else { - SYCLKernel->addAttr(A); - } - } else { - SYCLKernel->addAttr(A); - } - break; - } - case attr::Kind::SYCLIntelMaxWorkGroupSize: { - auto *SIMWGSA = cast(A); - if (auto *Existing = SYCLKernel->getAttr()) { - ASTContext &Ctx = getASTContext(); - if (Existing->getXDimVal(Ctx) > SIMWGSA->getXDimVal(Ctx) || - Existing->getYDimVal(Ctx) > SIMWGSA->getYDimVal(Ctx) || - Existing->getZDimVal(Ctx) > SIMWGSA->getZDimVal(Ctx)) { - Diag(SYCLKernel->getLocation(), - diag::err_conflicting_sycl_kernel_attributes); - Diag(Existing->getLocation(), diag::note_conflicting_attribute); - Diag(SIMWGSA->getLocation(), diag::note_conflicting_attribute); - SYCLKernel->setInvalidDecl(); - } else { - SYCLKernel->addAttr(A); - } - } else { - SYCLKernel->addAttr(A); - } - break; - } - case attr::Kind::SYCLIntelKernelArgsRestrict: - case attr::Kind::SYCLIntelNumSimdWorkItems: - case attr::Kind::SYCLIntelSchedulerTargetFmaxMhz: - case attr::Kind::SYCLIntelMaxGlobalWorkDim: - case attr::Kind::SYCLIntelNoGlobalWorkOffset: - case attr::Kind::SYCLIntelUseStallEnableClusters: - case attr::Kind::SYCLIntelLoopFuse: - case attr::Kind::SYCLIntelFPGAMaxConcurrency: - case attr::Kind::SYCLIntelFPGADisableLoopPipelining: - case attr::Kind::SYCLIntelFPGAInitiationInterval: - case attr::Kind::SYCLSimd: { - if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody && - !KernelBody->getAttr()) { - // Usual kernel can't call ESIMD functions. - Diag(KernelBody->getLocation(), - diag::err_sycl_function_attribute_mismatch) - << A; - Diag(A->getLocation(), diag::note_attribute); - KernelBody->setInvalidDecl(); - } else - SYCLKernel->addAttr(A); - break; - } - // TODO: vec_len_hint should be handled here - default: - // Seeing this means that CollectPossibleKernelAttributes was - // updated while this switch wasn't...or something went wrong - llvm_unreachable("Unexpected attribute was collected by " - "CollectPossibleKernelAttributes"); - } + case attr::Kind::ReqdWorkGroupSize: { + auto *RWGSA = cast(A); + if (auto *Existing = SYCLKernel->getAttr()) { + ASTContext &Ctx = S.getASTContext(); + if (Existing->getXDimVal(Ctx) != RWGSA->getXDimVal(Ctx) || + Existing->getYDimVal(Ctx) != RWGSA->getYDimVal(Ctx) || + Existing->getZDimVal(Ctx) != RWGSA->getZDimVal(Ctx)) { + S.Diag(SYCLKernel->getLocation(), + diag::err_conflicting_sycl_kernel_attributes); + S.Diag(Existing->getLocation(), diag::note_conflicting_attribute); + S.Diag(RWGSA->getLocation(), diag::note_conflicting_attribute); + SYCLKernel->setInvalidDecl(); } + } else if (auto *Existing = + SYCLKernel->getAttr()) { + ASTContext &Ctx = S.getASTContext(); + if (Existing->getXDimVal(Ctx) < RWGSA->getXDimVal(Ctx) || + Existing->getYDimVal(Ctx) < RWGSA->getYDimVal(Ctx) || + Existing->getZDimVal(Ctx) < RWGSA->getZDimVal(Ctx)) { + S.Diag(SYCLKernel->getLocation(), + diag::err_conflicting_sycl_kernel_attributes); + S.Diag(Existing->getLocation(), diag::note_conflicting_attribute); + S.Diag(RWGSA->getLocation(), diag::note_conflicting_attribute); + SYCLKernel->setInvalidDecl(); + } else { + SYCLKernel->addAttr(A); + } + } else { + SYCLKernel->addAttr(A); } + break; } - for (const auto &elt : Marker.KernelSet) { - if (FunctionDecl *Def = elt->getDefinition()) - Marker.TraverseStmt(Def->getBody()); + case attr::Kind::SYCLIntelMaxWorkGroupSize: { + auto *SIMWGSA = cast(A); + if (auto *Existing = SYCLKernel->getAttr()) { + ASTContext &Ctx = S.getASTContext(); + if (Existing->getXDimVal(Ctx) > SIMWGSA->getXDimVal(Ctx) || + Existing->getYDimVal(Ctx) > SIMWGSA->getYDimVal(Ctx) || + Existing->getZDimVal(Ctx) > SIMWGSA->getZDimVal(Ctx)) { + S.Diag(SYCLKernel->getLocation(), + diag::err_conflicting_sycl_kernel_attributes); + S.Diag(Existing->getLocation(), diag::note_conflicting_attribute); + S.Diag(SIMWGSA->getLocation(), diag::note_conflicting_attribute); + SYCLKernel->setInvalidDecl(); + } else { + SYCLKernel->addAttr(A); + } + } else { + SYCLKernel->addAttr(A); + } + break; + } + case attr::Kind::SYCLSimd: + if (KernelBody && !KernelBody->getAttr()) { + // Usual kernel can't call ESIMD functions. + S.Diag(KernelBody->getLocation(), + diag::err_sycl_function_attribute_mismatch) + << A; + S.Diag(A->getLocation(), diag::note_attribute); + KernelBody->setInvalidDecl(); + break; + } + LLVM_FALLTHROUGH; + case attr::Kind::SYCLIntelKernelArgsRestrict: + case attr::Kind::SYCLIntelNumSimdWorkItems: + case attr::Kind::SYCLIntelSchedulerTargetFmaxMhz: + case attr::Kind::SYCLIntelMaxGlobalWorkDim: + case attr::Kind::SYCLIntelNoGlobalWorkOffset: + case attr::Kind::SYCLIntelUseStallEnableClusters: + case attr::Kind::SYCLIntelLoopFuse: + case attr::Kind::SYCLIntelFPGAMaxConcurrency: + case attr::Kind::SYCLIntelFPGADisableLoopPipelining: + case attr::Kind::SYCLIntelFPGAInitiationInterval: + SYCLKernel->addAttr(A); + break; + // TODO: vec_len_hint should be handled here + default: + // Seeing this means that CollectPossibleKernelAttributes was + // updated while this switch wasn't...or something went wrong + llvm_unreachable("Unexpected attribute was collected by " + "CollectPossibleKernelAttributes"); + } +} + +void Sema::MarkDevices() { + // This Tracker object ensures that the SyclDeviceDecls collection includes + // the SYCL_EXTERNAL functions, and manages the diagnostics for all of the + // functions in the kernel. + DeviceFunctionTracker Tracker(*this); + + for (Decl *D : syclDeviceDecls()) { + auto *SYCLKernel = cast(D); + + // This type does the actual analysis on a per-kernel basis. It does this to + // make sure that we're only ever dealing with the context of a single + // kernel at a time. + SingleDeviceFunctionTracker T{Tracker, SYCLKernel}; + + for (auto *A : T.GetCollectedAttributes()) + PropagateAndDiagnoseDeviceAttr(*this, A, T.GetSYCLKernel(), + T.GetKernelBody()); } } diff --git a/clang/test/SemaSYCL/allow-constexpr-recursion.cpp b/clang/test/SemaSYCL/allow-constexpr-recursion.cpp index 924182dd30b05..a73ff61280ce6 100644 --- a/clang/test/SemaSYCL/allow-constexpr-recursion.cpp +++ b/clang/test/SemaSYCL/allow-constexpr-recursion.cpp @@ -6,13 +6,11 @@ sycl::queue q; -// expected-note@+1{{function implemented using recursion declared here}} constexpr int constexpr_recurse1(int n); // expected-note@+1 3{{function implemented using recursion declared here}} constexpr int constexpr_recurse(int n) { if (n) - // expected-error@+1{{SYCL kernel cannot call a recursive function}} return constexpr_recurse1(n - 1); return 103; } @@ -38,6 +36,21 @@ struct ConditionallyExplicitCtor { void conditionally_noexcept() noexcept(constexpr_recurse(5)) {} +template +void ConstexprIf1() { + if constexpr (I == 1) + ConstexprIf1(); +} + +// Same as the above, but split up so the diagnostic is more clear. +// expected-note@+2 2{{function implemented using recursion declared here}} +template +void ConstexprIf2() { + if constexpr (I == 1) + // expected-error@+1{{SYCL kernel cannot call a recursive function}} + ConstexprIf2(); +} + // All of the uses of constexpr_recurse here are forced constant expressions, so // they should not diagnose. void constexpr_recurse_test() { @@ -64,6 +77,10 @@ void constexpr_recurse_test() { } ConditionallyExplicitCtor c(1); + + ConstexprIf1<0>(); // Should not cause a diagnostic. + // expected-error@+1{{SYCL kernel cannot call a recursive function}} + ConstexprIf2<1>(); } void constexpr_recurse_test_err() { diff --git a/clang/test/SemaSYCL/check-notdirect-attribute-propagation.cpp b/clang/test/SemaSYCL/check-notdirect-attribute-propagation.cpp index 03de6a3dbbafa..2a0fe658114f9 100644 --- a/clang/test/SemaSYCL/check-notdirect-attribute-propagation.cpp +++ b/clang/test/SemaSYCL/check-notdirect-attribute-propagation.cpp @@ -10,15 +10,15 @@ } #else -[[cl::reqd_work_group_size(2, 2, 2)]] void not_direct_two() {} // expected-note {{conflicting attribute is here}} +[[cl::reqd_work_group_size(2, 2, 2)]] void not_direct_two() {} // expected-note 2 {{conflicting attribute is here}} -[[intel::max_work_group_size(1, 1, 1)]] // expected-note {{conflicting attribute is here}} +[[intel::max_work_group_size(1, 1, 1)]] // expected-note 3 {{conflicting attribute is here}} void func_two() { not_direct_two(); } -[[cl::reqd_work_group_size(4, 4, 4)]] // expected-note 2 {{conflicting attribute is here}} +[[cl::reqd_work_group_size(4, 4, 4)]] // expected-note 1 {{conflicting attribute is here}} void func_three() { not_direct_two(); @@ -49,6 +49,6 @@ void invoke_foo2() { // CHECK: `-SYCLIntelNoGlobalWorkOffsetAttr {{.*}} parallel_for([]() {}); #else - parallel_for([]() {}); // expected-error 2 {{conflicting attributes applied to a SYCL kernel or SYCL_EXTERNAL function}} + parallel_for([]() {}); // expected-error 3 {{conflicting attributes applied to a SYCL kernel or SYCL_EXTERNAL function}} #endif }