Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1854,11 +1854,14 @@ def SYCLIntelFPGAInitiationInterval : StmtAttr {
let Documentation = [SYCLIntelFPGAInitiationIntervalAttrDocs];
}

def SYCLIntelFPGAMaxConcurrency : StmtAttr {
let Spellings = [CXX11<"intelfpga","max_concurrency">,
CXX11<"intel","max_concurrency">];
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt],
ErrorDiag, "'for', 'while', and 'do' statements">;
def SYCLIntelFPGAMaxConcurrency : DeclOrStmtAttr {
let Spellings = [
CXX11<"intelfpga", "max_concurrency">, CXX11<"intel", "max_concurrency">
];
let Subjects =
SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt, Function],
ErrorDiag,
"'for', 'while', 'do' statements, and (device) functions">;
let Args = [ExprArgument<"NThreadsExpr">];
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
let HasCustomTypeTransform = 1;
Expand Down
11 changes: 7 additions & 4 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2799,10 +2799,10 @@ def SYCLIntelFPGAMaxConcurrencyAttrDocs : Documentation {
let Category = DocCatVariable;
let Heading = "intel::max_concurrency";
let Content = [{
This attribute applies to a loop. Indicates that the loop should allow no more
than N threads or iterations to execute it simultaneously. N must be a non
negative integer. '0' indicates the max_concurrency case to be unbounded. Cannot
be applied multiple times to the same loop.
This attribute applies to a loop or a function. Indicates that the loop/function
should allow no more than N threads or iterations to execute it simultaneously.
N must be a non negative integer. '0' indicates the max_concurrency case to be
unbounded. Cannot be applied multiple times to the same loop.

.. code-block:: c++

Expand All @@ -2811,10 +2811,13 @@ be applied multiple times to the same loop.
[[intel::max_concurrency(2)]] for (int i = 0; i != 10; ++i) a[i] = 0;
}

[[intel::max_concurrency(2)]] void foo1 { }

template<int N>
void bar() {
[[intel::max_concurrency(N)]] for(;;) { }
}
[[intel::max_concurrency(N)]] void bar1() { }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This example is missing the template keyword that introduces N.


}];
}
Expand Down
3 changes: 2 additions & 1 deletion clang/include/clang/Basic/AttributeCommonInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ class AttributeCommonInfo {
ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim ||
ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset ||
ParsedAttr == AT_SYCLIntelUseStallEnableClusters ||
ParsedAttr == AT_SYCLIntelLoopFuse || ParsedAttr == AT_SYCLSimd)
ParsedAttr == AT_SYCLIntelLoopFuse || ParsedAttr == AT_SYCLSimd ||
ParsedAttr == AT_SYCLIntelFPGAMaxConcurrency)
return true;

return false;
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ def err_intel_fpga_reg_limitations : Error <
"__builtin_intel_fpga_reg">;
def illegal_type_declared_here : Note<
"field with illegal type declared here">;
def err_sycl_loop_attr_duplication : Error<
"duplicate %select{unroll|Intel FPGA}0 loop attribute %1">;
def err_sycl_attr_duplication : Error<
"duplicate %select{unroll|Intel FPGA}0 %select{loop|function}1 attribute %2">;
def err_loop_unroll_compatibility : Error<
"incompatible loop unroll instructions: '%0' and '%1'">;
def err_pipe_attribute_arg_not_allowed : Error<
Expand Down
8 changes: 8 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -10241,6 +10241,9 @@ class Sema final {
IntelFPGAMaxReplicatesAttr *
MergeIntelFPGAMaxReplicatesAttr(Decl *D, const IntelFPGAMaxReplicatesAttr &A);

SYCLIntelFPGAMaxConcurrencyAttr *MergeSYCLIntelFPGAMaxConcurrencyAttr(
Decl *D, const SYCLIntelFPGAMaxConcurrencyAttr &A);

/// AddAlignedAttr - Adds an aligned attribute to a particular declaration.
void AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E,
bool IsPackExpansion);
Expand Down Expand Up @@ -10295,6 +10298,11 @@ class Sema final {
/// declaration.
void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID);

/// AddSYCLIntelFPGAMaxConcurrencyAttr - Adds a max_concurrency attribute to a
/// particular declaration.
void AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D,
const AttributeCommonInfo &CI,
Expr *E);
bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type);
bool checkAllowedSYCLInitializer(VarDecl *VD,
bool CheckValueDependent = false);
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,14 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
llvm::ConstantAsMetadata::get(Builder.getInt32(1))};
Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs));
}

if (const auto *A = FD->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) {
const auto *CE = cast<ConstantExpr>(A->getNThreadsExpr());
llvm::APSInt ArgVal = CE->getResultAsAPSInt();
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(ArgVal.getSExtValue()))};
Fn->setMetadata("max_concurrency", llvm::MDNode::get(Context, AttrMDArgs));
}
}

/// Determine whether the function F ends with a return stmt.
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2628,6 +2628,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
NewAttr = S.MergeSYCLIntelNoGlobalWorkOffsetAttr(D, *A);
else if (const auto *A = dyn_cast<IntelFPGAMaxReplicatesAttr>(Attr))
NewAttr = S.MergeIntelFPGAMaxReplicatesAttr(D, *A);
else if (const auto *A = dyn_cast<SYCLIntelFPGAMaxConcurrencyAttr>(Attr))
NewAttr = S.MergeSYCLIntelFPGAMaxConcurrencyAttr(D, *A);
else if (Attr->shouldInheritEvenIfAlreadyPresent() || !DeclHasAttr(D, Attr))
NewAttr = cast<InheritableAttr>(Attr->clone(S.Context));

Expand Down
71 changes: 71 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6294,6 +6294,74 @@ static void handleSYCLIntelPipeIOAttr(Sema &S, Decl *D,
S.addSYCLIntelPipeIOAttr(D, Attr, E);
}

SYCLIntelFPGAMaxConcurrencyAttr *Sema::MergeSYCLIntelFPGAMaxConcurrencyAttr(
Decl *D, const SYCLIntelFPGAMaxConcurrencyAttr &A) {
// Check to see if there's a duplicate attribute with different values
// already applied to the declaration.
if (const auto *DeclAttr = D->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) {
const auto *DeclExpr = dyn_cast<ConstantExpr>(DeclAttr->getNThreadsExpr());
const auto *MergeExpr = dyn_cast<ConstantExpr>(A.getNThreadsExpr());
if (DeclExpr && MergeExpr &&
DeclExpr->getResultAsAPSInt() != MergeExpr->getResultAsAPSInt()) {
Diag(DeclAttr->getLocation(), diag::err_sycl_attr_duplication)
<< 1 << 1 << DeclAttr;
return nullptr;
}
}
// TODO
// max_concurrency and disable_component_pipelining attributes can't be applied
// to the same function.
// if (checkAttrMutualExclusion<IntelDisableComponentPipeline>(S, D, AL))
// return;

return ::new (Context)
SYCLIntelFPGAMaxConcurrencyAttr(Context, A, A.getNThreadsExpr());
}

void Sema::AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D,
const AttributeCommonInfo &CI,
Expr *E) {
if (!E->isValueDependent()) {
llvm::APSInt ArgVal;
ExprResult Res = VerifyIntegerConstantExpression(E, &ArgVal);
if (Res.isInvalid())
return;
E = Res.get();

// This attribute requires a strictly positive value.
if (ArgVal <= 0) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Document description says non-negative values are allowed, and that zero means unbounded.

Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer)
<< CI << /*positive*/ 0;
return;
}

if (const auto *DeclAttr = D->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) {
const auto *DeclExpr =
dyn_cast<ConstantExpr>(DeclAttr->getNThreadsExpr());
if (DeclExpr && ArgVal != DeclExpr->getResultAsAPSInt()) {
Diag(CI.getLoc(), diag::warn_duplicate_attribute) << CI;
Diag(DeclAttr->getLoc(), diag::note_previous_attribute);
return;
}
}
}

D->addAttr(::new (Context) SYCLIntelFPGAMaxConcurrencyAttr(Context, CI, E));
}

static void handleSYCLIntelFPGAMaxConcurrencyAttr(Sema &S, Decl *D,
const ParsedAttr &A) {
S.CheckDeprecatedSYCLAttributeSpelling(A);
// TODO
// max_concurrency and disable_component_pipelining attributes can't be
// applied to the same function. if
// (checkAttrMutualExclusion<IntelDisableComponentPipeline>(S, D, AL))
// return;

Expr *E = A.getArgAsExpr(0);
S.AddSYCLIntelFPGAMaxConcurrencyAttr(D, A, E);
}

namespace {
struct IntrinToName {
uint32_t Id;
Expand Down Expand Up @@ -9547,6 +9615,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_SYCLIntelPipeIO:
handleSYCLIntelPipeIOAttr(S, D, AL);
break;
case ParsedAttr::AT_SYCLIntelFPGAMaxConcurrency:
handleSYCLIntelFPGAMaxConcurrencyAttr(S, D, AL);
break;

// Swift attributes.
case ParsedAttr::AT_SwiftAsyncName:
Expand Down
10 changes: 9 additions & 1 deletion clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,13 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
}
}

// Attribute "max_concurrency" is applied to device functions only. The
// attribute is not propagated to the caller.
if (auto *A = FD->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>())
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
}

// TODO: vec_len_hint should be handled here

CallGraphNode *N = SYCLCG.getNode(FD);
Expand Down Expand Up @@ -3316,7 +3323,8 @@ void Sema::MarkDevice(void) {
case attr::Kind::SYCLIntelNoGlobalWorkOffset:
case attr::Kind::SYCLIntelUseStallEnableClusters:
case attr::Kind::SYCLIntelLoopFuse:
case attr::Kind::SYCLSimd: {
case attr::Kind::SYCLSimd:
case attr::Kind::SYCLIntelFPGAMaxConcurrency: {
if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody &&
!KernelBody->getAttr<SYCLSimdAttr>()) {
// Usual kernel can't call ESIMD functions.
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Sema/SemaStmtAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,8 +627,8 @@ CheckForDuplicationSYCLLoopAttribute(Sema &S,
for (const auto *I : Attrs) {
if (LoopAttr && isa<LoopAttrT>(I)) {
// Cannot specify same type of attribute twice.
S.Diag(I->getLocation(), diag::err_sycl_loop_attr_duplication)
<< isIntelFPGAAttr << LoopAttr;
S.Diag(I->getLocation(), diag::err_sycl_attr_duplication)
<< isIntelFPGAAttr << 0 << LoopAttr;
}
if (isa<LoopAttrT>(I))
LoopAttr = cast<LoopAttrT>(I);
Expand Down
18 changes: 18 additions & 0 deletions clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,17 @@ static void instantiateIntelSYCLFunctionAttr(
S.addIntelSingleArgAttr<AttrName>(New, *Attr, Result.getAs<Expr>());
}

template <typename AttrName>
static void instantiateSYCLIntelFPGAMaxConcurrencyAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const SYCLIntelFPGAMaxConcurrencyAttr *A, Decl *New) {
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Result = S.SubstExpr(A->getNThreadsExpr(), TemplateArgs);
if (!Result.isInvalid())
S.AddSYCLIntelFPGAMaxConcurrencyAttr(New, *A, Result.getAs<Expr>());
}

static void instantiateIntelFPGAPrivateCopiesAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const IntelFPGAPrivateCopiesAttr *A, Decl *New) {
Expand Down Expand Up @@ -940,6 +951,13 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
*this, TemplateArgs, SYCLIntelMaxWorkGroupSize, New);
continue;
}
if (const auto *SYCLIntelMaxConcurrency =
dyn_cast<SYCLIntelFPGAMaxConcurrencyAttr>(TmplAttr)) {
instantiateSYCLIntelFPGAMaxConcurrencyAttr<
SYCLIntelFPGAMaxConcurrencyAttr>(*this, TemplateArgs,
SYCLIntelMaxConcurrency, New);
continue;
}
// Existing DLL attribute on the instantiation takes precedence.
if (TmplAttr->getKind() == attr::DLLExport ||
TmplAttr->getKind() == attr::DLLImport) {
Expand Down
16 changes: 0 additions & 16 deletions clang/test/CodeGenSYCL/intel-fpga-loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
// CHECK: br label %for.cond, !llvm.loop ![[MD_DLP:[0-9]+]]
// CHECK: br label %for.cond, !llvm.loop ![[MD_II:[0-9]+]]
// CHECK: br label %for.cond2, !llvm.loop ![[MD_II_2:[0-9]+]]
// CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]]
// CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_2:[0-9]+]]
// CHECK: br label %for.cond, !llvm.loop ![[MD_LC:[0-9]+]]
// CHECK: br label %for.cond2, !llvm.loop ![[MD_LC_2:[0-9]+]]
// CHECK: br label %for.cond13, !llvm.loop ![[MD_LC_3:[0-9]+]]
Expand Down Expand Up @@ -35,19 +33,6 @@ void ii() {
a[i] = 0;
}

template <int A>
void max_concurrency() {
int a[10];
// CHECK: ![[MD_MC]] = distinct !{![[MD_MC]], ![[MP]], ![[MD_max_concurrency:[0-9]+]]}
// CHECK-NEXT: ![[MD_max_concurrency]] = !{!"llvm.loop.max_concurrency.count", i32 0}
[[intel::max_concurrency(A)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
// CHECK: ![[MD_MC_2]] = distinct !{![[MD_MC_2]], ![[MP]], ![[MD_max_concurrency_2:[0-9]+]]}
// CHECK-NEXT: ![[MD_max_concurrency_2]] = !{!"llvm.loop.max_concurrency.count", i32 4}
[[intel::max_concurrency(4)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

template <int A>
void loop_coalesce() {
int a[10];
Expand Down Expand Up @@ -100,7 +85,6 @@ int main() {
kernel_single_task<class kernel_function>([]() {
disable_loop_pipelining();
ii<4>();
max_concurrency<0>();
loop_coalesce<2>();
max_interleaving<3>();
speculated_iterations<4>();
Expand Down
89 changes: 89 additions & 0 deletions clang/test/CodeGenSYCL/max-concurrency.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown-sycldevice -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment here describing what the test does.

#include "sycl.hpp"

// CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]]
// CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_1:[0-9]+]]

// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name1"() #0 {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name3"() #0 {{.*}} !max_concurrency ![[NUM2:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name4"() #0 {{.*}} !max_concurrency ![[NUM1:[0-9]+]]

template <int A>
void max_concurrency() {
int a[10];
// CHECK: ![[MD_MC]] = distinct !{![[MD_MC]], ![[MP:[0-9]+]], ![[MD_max_concurrency:[0-9]+]]}
// CHECK-NEXT: ![[MP]] = !{!"llvm.loop.mustprogress"}
// CHECK-NEXT: ![[MD_max_concurrency]] = !{!"llvm.loop.max_concurrency.count", i32 5}
[[intel::max_concurrency(A)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
// CHECK: ![[MD_MC_1]] = distinct !{![[MD_MC_1]], ![[MP]], ![[MD_max_concurrency_1:[0-9]+]]}
// CHECK-NEXT: ![[MD_max_concurrency_1]] = !{!"llvm.loop.max_concurrency.count", i32 4}
[[intel::max_concurrency(4)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

// CHECK: ![[NUM1]] = !{i32 4}
// CHECK: ![[NUM2]] = !{i32 3}

template <typename name, typename Func>
Copy link
Contributor

@smanna12 smanna12 Mar 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use updated guidelines for new FE tests:
Please refer section “Guidelines for adding DPC++ in-tree LIT tests (DPC++ Clang FE tests)” in the https://github.com/intel/llvm/blob/sycl/CONTRIBUTING.md guide for the suggested guidelines

__attribute__((sycl_kernel)) void kernel_single_task_1(const Func &kernelFunc) {
kernelFunc();
}

using namespace cl::sycl;

class Functor1 {
public:
[[intel::max_concurrency(4)]] void operator()() const {}
};

[[intel::max_concurrency(2)]] void foo() {}

class Functor2 {
public:
void operator()() const {
foo();
}
};

template <int NT>
class Functor3 {
public:
[[intel::max_concurrency(NT)]] void operator()() const {}
};

template <int NT>
[[intel::max_concurrency(NT)]] void func() {}

int main() {
queue q;

kernel_single_task_1<class kernel_function>([]() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New guidelines suggest using kernel_single_task from the sycl.hpp header instead.

max_concurrency<5>();
});

q.submit([&](handler &h) {
Functor1 f1;
h.single_task<class kernel_name1>(f1);

Functor2 f2;
h.single_task<class kernel_name2>(f2);

h.single_task<class kernel_name3>(
[]() [[intel::max_concurrency(3)]]{});

Functor3<4> f3;
h.single_task<class kernel_name4>(f3);

h.single_task<class kernel_name5>([]() {
func<2>();
});

});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also add an example that uses '0'?



return 0;
}


Loading