diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 7bb6eaa81900..5fdd87026b1d 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -7132,6 +7132,25 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); } + + // Set reqntid if reqd_work_group_size attribute is set + if (const ReqdWorkGroupSizeAttr *A = FD->getAttr()) { + ASTContext &ClangCtx = FD->getASTContext(); + Optional XDimVal = A->getXDimVal(ClangCtx); + Optional YDimVal = A->getYDimVal(ClangCtx); + Optional ZDimVal = A->getZDimVal(ClangCtx); + + // For a SYCLDevice ReqdWorkGroupSizeAttr arguments are reversed. + if (M.getLangOpts().SYCLIsDevice) + std::swap(XDimVal, ZDimVal); + + // Create !{, metadata !"reqntidx", i32 } + addNVVMMetadata(F, "reqntidx", XDimVal->getZExtValue()); + // Create !{, metadata !"reqntidy", i32 } + addNVVMMetadata(F, "reqntidy", YDimVal->getZExtValue()); + // Create !{, metadata !"reqntidz", i32 } + addNVVMMetadata(F, "reqntidz", ZDimVal->getZExtValue()); + } } // Perform special handling in CUDA mode. diff --git a/clang/test/CodeGenSYCL/ptx-reqd-work-group-size.cpp b/clang/test/CodeGenSYCL/ptx-reqd-work-group-size.cpp new file mode 100644 index 000000000000..8807df10ca3b --- /dev/null +++ b/clang/test/CodeGenSYCL/ptx-reqd-work-group-size.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -fsycl-is-device %s -emit-llvm -triple nvptx64-nvidia-cuda-sycldevice -o - | FileCheck %s + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +int main() { + kernel([]() {}); + // CHECK: define dso_local void @{{.*}}kernel_no_reqd_work_size() + // CHECK-NOT: define dso_local void @{{.*}}kernel_no_reqd_work_size() {{.*}} !reqd_work_group_size ![[WGSIZE1D:[0-9]+]] + + kernel( + []() [[intel::reqd_work_group_size(32)]]{}); + // CHECK: define dso_local void @{{.*}}kernel_reqd_work_size_1d() {{.*}} !reqd_work_group_size ![[WGSIZE1D:[0-9]+]] + + kernel( + []() [[intel::reqd_work_group_size(64, 32)]]{}); + // CHECK: define dso_local void @{{.*}}kernel_reqd_work_size_2d() {{.*}} !reqd_work_group_size ![[WGSIZE2D:[0-9]+]] + + kernel( + []() [[intel::reqd_work_group_size(128, 64, 32)]]{}); + // CHECK: define dso_local void @{{.*}}kernel_reqd_work_size_3d() {{.*}} !reqd_work_group_size ![[WGSIZE3D:[0-9]+]] +} + +// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_no_reqd_work_size, !"reqntidx", i32 !{{[0-9]+}}} +// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_no_reqd_work_size, !"reqntidy", i32 !{{[0-9]+}}} +// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_no_reqd_work_size, !"reqntidz", i32 !{{[0-9]+}}} + +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_reqd_work_size_1d, !"reqntidx", i32 1} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_reqd_work_size_1d, !"reqntidy", i32 1} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_reqd_work_size_1d, !"reqntidz", i32 32} + +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_reqd_work_size_2d, !"reqntidx", i32 1} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_reqd_work_size_2d, !"reqntidy", i32 32} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_reqd_work_size_2d, !"reqntidz", i32 64} + +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_reqd_work_size_3d, !"reqntidx", i32 32} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_reqd_work_size_3d, !"reqntidy", i32 64} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}kernel_reqd_work_size_3d, !"reqntidz", i32 128} + +// CHECK: ![[WGSIZE1D]] = !{i32 1, i32 1, i32 32} +// CHECK: ![[WGSIZE2D]] = !{i32 1, i32 32, i32 64} +// CHECK: ![[WGSIZE3D]] = !{i32 32, i32 64, i32 128}