[SYCL][CUDA][HIP] Implement support for AMD and NVIDIA architectures …

…as argument to fsycl-targets (#7348) * Update if_architecture_is extension to include NVIDIA and AMD architectures. * Moves if_architecture_is code header file from intel to oneapi. *Updates experimental/sycl_ext_intel_device_architecture.asciidoc with the contents from proposed/sycl_ext_oneapi_device_architecture.asciidoc. * Rename sycl_ext_intel_device_architecture.asciidoc to sycl_ext_oneapi_device_architecture.asciidoc. * Delete proposed/ycl_ext_oneapi_device_architecture.asciidoc. * Renames nvidia_gpu_smxx to nvidia_gpu_sm_xx. * Remove DPC++ un-supported architectures for nvidia (sm_20 to sm_37)
intel · Dec 10, 2022 · e5de913 · e5de913
1 parent ed3d35c
commit e5de913
Show file tree

Hide file tree

Showing 13 changed files with 1,182 additions and 1,049 deletions.
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
@@ -830,19 +830,6 @@ static bool addSYCLDefaultTriple(Compilation &C,
   return true;
 }
 
-// Prefix for Intel GPU specific targets used for -fsycl-targets
-constexpr char IntelGPU[] = "intel_gpu_";
-
-static llvm::Optional<StringRef> isIntelGPUTarget(StringRef Target) {
-  // Handle target specifications that resemble 'intel_gpu_*' here. These are
-  // 'spir64_gen' based.
-  if (Target.startswith(IntelGPU)) {
-    return tools::SYCL::gen::resolveGenDevice(
-        Target.drop_front(sizeof(IntelGPU) - 1));
-  }
-  return llvm::None;
-}
-
 void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
                                               InputList &Inputs) {
 
@@ -851,6 +838,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
   //
   // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA
   // or HIP type. However, mixed CUDA/HIP compilation is not supported.
+  using namespace tools::SYCL;
   bool IsCuda =
       llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
         return types::isCuda(I.first);
@@ -1128,12 +1116,24 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
 
         for (StringRef Val : SYCLTargetsValues->getValues()) {
           StringRef UserTargetName(Val);
-          if (auto Device = isIntelGPUTarget(Val)) {
+          if (auto Device = gen::isGPUTarget<gen::IntelGPU>(Val)) {
             if (Device->empty()) {
               Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
               continue;
             }
             UserTargetName = "spir64_gen";
+          } else if (auto Device = gen::isGPUTarget<gen::NvidiaGPU>(Val)) {
+            if (Device->empty()) {
+              Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
+              continue;
+            }
+            UserTargetName = "nvptx64-nvidia-cuda";
+          } else if (auto Device = gen::isGPUTarget<gen::AmdGPU>(Val)) {
+            if (Device->empty()) {
+              Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
+              continue;
+            }
+            UserTargetName = "amdgcn-amd-amdhsa";
           }
 
           if (!isValidSYCLTriple(MakeSYCLDeviceTriple(UserTargetName))) {
@@ -5802,6 +5802,7 @@ class OffloadingActionBuilder final {
     }
 
     bool initialize() override {
+      using namespace tools::SYCL;
       // Get the SYCL toolchains. If we don't get any, the action builder will
       // know there is nothing to do related to SYCL offloading.
       auto SYCLTCRange = C.getOffloadToolChains<Action::OFK_SYCL>();
@@ -5841,15 +5842,35 @@ class OffloadingActionBuilder final {
           llvm::StringMap<StringRef> FoundNormalizedTriples;
           for (StringRef Val : SYCLTargetsValues->getValues()) {
             StringRef UserTargetName(Val);
-            if (auto ValidDevice = isIntelGPUTarget(Val)) {
+            if (auto ValidDevice = gen::isGPUTarget<gen::IntelGPU>(Val)) {
               if (ValidDevice->empty())
                 // Unrecognized, we have already diagnosed this earlier; skip.
                 continue;
               // Add the proper -device value to the list.
               GpuArchList.emplace_back(C.getDriver().MakeSYCLDeviceTriple(
                                        "spir64_gen"), ValidDevice->data());
               UserTargetName = "spir64_gen";
+            } else if (auto ValidDevice =
+                           gen::isGPUTarget<gen::NvidiaGPU>(Val)) {
+              if (ValidDevice->empty())
+                // Unrecognized, we have already diagnosed this earlier; skip.
+                continue;
+              // Add the proper -device value to the list.
+              GpuArchList.emplace_back(
+                  C.getDriver().MakeSYCLDeviceTriple("nvptx64-nvidia-cuda"),
+                  ValidDevice->data());
+              UserTargetName = "nvptx64-nvidia-cuda";
+            } else if (auto ValidDevice = gen::isGPUTarget<gen::AmdGPU>(Val)) {
+              if (ValidDevice->empty())
+                // Unrecognized, we have already diagnosed this earlier; skip.
+                continue;
+              // Add the proper -device value to the list.
+              GpuArchList.emplace_back(
+                  C.getDriver().MakeSYCLDeviceTriple("amdgcn-amd-amdhsa"),
+                  ValidDevice->data());
+              UserTargetName = "amdgcn-amd-amdhsa";
             }
+
             llvm::Triple TT(C.getDriver().MakeSYCLDeviceTriple(Val));
             std::string NormalizedName = TT.normalize();
 

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5090,10 +5090,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     // between device and host where we should be able to use the offloading
     // arch to add the macro to the host compile.
     auto addTargetMacros = [&](const llvm::Triple &Triple) {
-      if (!Triple.isSPIR())
+      if (!Triple.isSPIR() && !Triple.isNVPTX() && !Triple.isAMDGCN())
         return;
       SmallString<64> Macro;
-      if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) {
+      if ((Triple.isSPIR() &&
+           Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) ||
+          Triple.isNVPTX() || Triple.isAMDGCN()) {
         StringRef Device = JA.getOffloadingArch();
         if (!Device.empty()) {
           Macro = "-D";

diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp
@@ -597,60 +597,130 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
 StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) {
   StringRef Device;
   Device = llvm::StringSwitch<StringRef>(DeviceName)
-               .Cases("bdw", "8_0_0", "bdw")
-               .Cases("skl", "9_0_9", "skl")
-               .Cases("kbl", "9_1_9", "kbl")
-               .Cases("cfl", "9_2_9", "cfl")
-               .Cases("apl", "9_3_0", "apl")
-               .Cases("glk", "9_4_0", "glk")
-               .Cases("whl", "9_5_0", "whl")
-               .Cases("aml", "9_6_0", "aml")
-               .Cases("cml", "9_7_0", "cml")
-               .Cases("icllp", "11_0_0", "icllp")
-               .Cases("ehl", "11_2_0", "ehl")
-               .Cases("tgllp", "12_0_0", "tgllp")
-               .Case("rkl", "rkl")
-               .Case("adl_s", "adl_s")
-               .Case("rpl_s", "rpl_s")
-               .Case("adl_p", "adl_p")
-               .Case("adl_n", "adl_n")
-               .Cases("dg1", "12_10_0", "dg1")
-               .Case("acm_g10", "acm_g10")
-               .Case("acm_g11", "acm_g11")
-               .Case("acm_g12", "acm_g12")
-               .Case("pvc", "pvc")
+               .Cases("intel_gpu_bdw", "intel_gpu_8_0_0", "bdw")
+               .Cases("intel_gpu_skl", "intel_gpu_9_0_9", "skl")
+               .Cases("intel_gpu_kbl", "intel_gpu_9_1_9", "kbl")
+               .Cases("intel_gpu_cfl", "intel_gpu_9_2_9", "cfl")
+               .Cases("intel_gpu_apl", "intel_gpu_9_3_0", "apl")
+               .Cases("intel_gpu_glk", "intel_gpu_9_4_0", "glk")
+               .Cases("intel_gpu_whl", "intel_gpu_9_5_0", "whl")
+               .Cases("intel_gpu_aml", "intel_gpu_9_6_0", "aml")
+               .Cases("intel_gpu_cml", "intel_gpu_9_7_0", "cml")
+               .Cases("intel_gpu_icllp", "intel_gpu_11_0_0", "icllp")
+               .Cases("intel_gpu_ehl", "intel_gpu_11_2_0", "ehl")
+               .Cases("intel_gpu_tgllp", "intel_gpu_12_0_0", "tgllp")
+               .Case("intel_gpu_rkl", "rkl")
+               .Case("intel_gpu_adl_s", "adl_s")
+               .Case("intel_gpu_rpl_s", "rpl_s")
+               .Case("intel_gpu_adl_p", "adl_p")
+               .Case("intel_gpu_adl_n", "adl_n")
+               .Cases("intel_gpu_dg1", "intel_gpu_12_10_0", "dg1")
+               .Case("intel_gpu_acm_g10", "acm_g10")
+               .Case("intel_gpu_acm_g11", "acm_g11")
+               .Case("intel_gpu_acm_g12", "acm_g12")
+               .Case("intel_gpu_pvc", "pvc")
+               .Case("nvidia_gpu_sm_50", "sm_50")
+               .Case("nvidia_gpu_sm_52", "sm_52")
+               .Case("nvidia_gpu_sm_53", "sm_53")
+               .Case("nvidia_gpu_sm_60", "sm_60")
+               .Case("nvidia_gpu_sm_61", "sm_61")
+               .Case("nvidia_gpu_sm_62", "sm_62")
+               .Case("nvidia_gpu_sm_70", "sm_70")
+               .Case("nvidia_gpu_sm_72", "sm_72")
+               .Case("nvidia_gpu_sm_75", "sm_75")
+               .Case("nvidia_gpu_sm_80", "sm_80")
+               .Case("nvidia_gpu_sm_86", "sm_86")
+               .Case("nvidia_gpu_sm_87", "sm_87")
+               .Case("nvidia_gpu_sm_89", "sm_89")
+               .Case("nvidia_gpu_sm_90", "sm_90")
+               .Case("amd_gpu_gfx700", "gfx700")
+               .Case("amd_gpu_gfx701", "gfx701")
+               .Case("amd_gpu_gfx702", "gfx702")
+               .Case("amd_gpu_gfx801", "gfx801")
+               .Case("amd_gpu_gfx802", "gfx802")
+               .Case("amd_gpu_gfx803", "gfx803")
+               .Case("amd_gpu_gfx805", "gfx805")
+               .Case("amd_gpu_gfx810", "gfx810")
+               .Case("amd_gpu_gfx900", "gfx900")
+               .Case("amd_gpu_gfx902", "gfx902")
+               .Case("amd_gpu_gfx904", "gfx904")
+               .Case("amd_gpu_gfx906", "gfx906")
+               .Case("amd_gpu_gfx908", "gfx908")
+               .Case("amd_gpu_gfx90a", "gfx90a")
+               .Case("amd_gpu_gfx1010", "gfx1010")
+               .Case("amd_gpu_gfx1011", "gfx1011")
+               .Case("amd_gpu_gfx1012", "gfx1012")
+               .Case("amd_gpu_gfx1013", "gfx1013")
+               .Case("amd_gpu_gfx1030", "gfx1030")
+               .Case("amd_gpu_gfx1031", "gfx1031")
+               .Case("amd_gpu_gfx1032", "gfx1032")
                .Default("");
   return Device;
 }
 
-StringRef SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
+SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
   SmallString<64> Macro;
   StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName)
-                      .Case("bdw", "BDW")
-                      .Case("skl", "SKL")
-                      .Case("kbl", "KBL")
-                      .Case("cfl", "CFL")
-                      .Case("apl", "APL")
-                      .Case("glk", "GLK")
-                      .Case("whl", "WHL")
-                      .Case("aml", "AML")
-                      .Case("cml", "CML")
-                      .Case("icllp", "ICLLP")
-                      .Case("ehl", "EHL")
-                      .Case("tgllp", "TGLLP")
-                      .Case("rkl", "RKL")
-                      .Case("adl_s", "ADL_S")
-                      .Case("rpl_s", "RPL_S")
-                      .Case("adl_p", "ADL_P")
-                      .Case("adl_n", "ADL_N")
-                      .Case("dg1", "DG1")
-                      .Case("acm_g10", "ACM_G10")
-                      .Case("acm_g11", "ACM_G11")
-                      .Case("acm_g12", "ACM_G12")
-                      .Case("pvc", "PVC")
+                      .Case("bdw", "INTEL_GPU_BDW")
+                      .Case("skl", "INTEL_GPU_SKL")
+                      .Case("kbl", "INTEL_GPU_KBL")
+                      .Case("cfl", "INTEL_GPU_CFL")
+                      .Case("apl", "INTEL_GPU_APL")
+                      .Case("glk", "INTEL_GPU_GLK")
+                      .Case("whl", "INTEL_GPU_WHL")
+                      .Case("aml", "INTEL_GPU_AML")
+                      .Case("cml", "INTEL_GPU_CML")
+                      .Case("icllp", "INTEL_GPU_ICLLP")
+                      .Case("ehl", "INTEL_GPU_EHL")
+                      .Case("tgllp", "INTEL_GPU_TGLLP")
+                      .Case("rkl", "INTEL_GPU_RKL")
+                      .Case("adl_s", "INTEL_GPU_ADL_S")
+                      .Case("rpl_s", "INTEL_GPU_RPL_S")
+                      .Case("adl_p", "INTEL_GPU_ADL_P")
+                      .Case("adl_n", "INTEL_GPU_ADL_N")
+                      .Case("dg1", "INTEL_GPU_DG1")
+                      .Case("acm_g10", "INTEL_GPU_ACM_G10")
+                      .Case("acm_g11", "INTEL_GPU_ACM_G11")
+                      .Case("acm_g12", "INTEL_GPU_ACM_G12")
+                      .Case("pvc", "INTEL_GPU_PVC")
+                      .Case("sm_50", "NVIDIA_GPU_SM_50")
+                      .Case("sm_52", "NVIDIA_GPU_SM_52")
+                      .Case("sm_53", "NVIDIA_GPU_SM_53")
+                      .Case("sm_60", "NVIDIA_GPU_SM_60")
+                      .Case("sm_61", "NVIDIA_GPU_SM_61")
+                      .Case("sm_62", "NVIDIA_GPU_SM_62")
+                      .Case("sm_70", "NVIDIA_GPU_SM_70")
+                      .Case("sm_72", "NVIDIA_GPU_SM_72")
+                      .Case("sm_75", "NVIDIA_GPU_SM_75")
+                      .Case("sm_80", "NVIDIA_GPU_SM_80")
+                      .Case("sm_86", "NVIDIA_GPU_SM_86")
+                      .Case("sm_87", "NVIDIA_GPU_SM_87")
+                      .Case("sm_89", "NVIDIA_GPU_SM_89")
+                      .Case("sm_90", "NVIDIA_GPU_SM_90")
+                      .Case("gfx700", "AMD_GPU_GFX700")
+                      .Case("gfx701", "AMD_GPU_GFX701")
+                      .Case("gfx702", "AMD_GPU_GFX702")
+                      .Case("gfx801", "AMD_GPU_GFX801")
+                      .Case("gfx802", "AMD_GPU_GFX802")
+                      .Case("gfx803", "AMD_GPU_GFX803")
+                      .Case("gfx805", "AMD_GPU_GFX805")
+                      .Case("gfx810", "AMD_GPU_GFX810")
+                      .Case("gfx900", "AMD_GPU_GFX900")
+                      .Case("gfx902", "AMD_GPU_GFX902")
+                      .Case("gfx904", "AMD_GPU_GFX904")
+                      .Case("gfx906", "AMD_GPU_GFX906")
+                      .Case("gfx908", "AMD_GPU_GFX908")
+                      .Case("gfx90a", "AMD_GPU_GFX90A")
+                      .Case("gfx1010", "AMD_GPU_GFX1010")
+                      .Case("gfx1011", "AMD_GPU_GFX1011")
+                      .Case("gfx1012", "AMD_GPU_GFX1012")
+                      .Case("gfx1013", "AMD_GPU_GFX1013")
+                      .Case("gfx1030", "AMD_GPU_GFX1030")
+                      .Case("gfx1031", "AMD_GPU_GFX1031")
+                      .Case("gfx1032", "AMD_GPU_GFX1032")
                       .Default("");
   if (!Ext.empty()) {
-    Macro = "__SYCL_TARGET_INTEL_GPU_";
+    Macro = "__SYCL_TARGET_";
     Macro += Ext;
     Macro += "__";
   }
@@ -760,6 +830,25 @@ static void parseTargetOpts(StringRef ArgString, const llvm::opt::ArgList &Args,
     CmdArgs.push_back(Args.MakeArgString(TA));
 }
 
+void SYCLToolChain::TranslateGPUTargetOpt(const llvm::opt::ArgList &Args,
+                                          llvm::opt::ArgStringList &CmdArgs,
+                                          OptSpecifier Opt_EQ) const {
+  for (auto *A : Args) {
+    if (A->getOption().matches(Opt_EQ)) {
+      if (auto GpuDevice =
+              tools::SYCL::gen::isGPUTarget<tools::SYCL::gen::AmdGPU>(
+                  A->getValue())) {
+        StringRef ArgString;
+        SmallString<64> OffloadArch("--offload-arch=");
+        OffloadArch += GpuDevice->data();
+        ArgString = OffloadArch;
+        parseTargetOpts(ArgString, Args, CmdArgs);
+        A->claim();
+      }
+    }
+  }
+}
+
 // Expects a specific type of option (e.g. -Xsycl-target-backend) and will
 // extract the arguments.
 void SYCLToolChain::TranslateTargetOpt(const llvm::opt::ArgList &Args,
@@ -915,6 +1004,7 @@ void SYCLToolChain::TranslateBackendTargetArgs(
   // Handle -Xsycl-target-backend.
   TranslateTargetOpt(Args, CmdArgs, options::OPT_Xsycl_backend,
                      options::OPT_Xsycl_backend_EQ, Device);
+  TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_fsycl_targets_EQ);
 }
 
 void SYCLToolChain::TranslateLinkerTargetArgs(

diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h
@@ -106,7 +106,21 @@ class LLVM_LIBRARY_VISIBILITY BackendCompiler : public Tool {
 };
 
 StringRef resolveGenDevice(StringRef DeviceName);
-StringRef getGenDeviceMacro(StringRef DeviceName);
+SmallString<64> getGenDeviceMacro(StringRef DeviceName);
+
+// // Prefix for GPU specific targets used for -fsycl-targets
+constexpr char IntelGPU[] = "intel_gpu_";
+constexpr char NvidiaGPU[] = "nvidia_gpu_";
+constexpr char AmdGPU[] = "amd_gpu_";
+
+template <auto GPUArh> llvm::Optional<StringRef> isGPUTarget(StringRef Target) {
+  // Handle target specifications that resemble '(intel, nvidia, amd)_gpu_*'
+  // here.
+  if (Target.startswith(GPUArh)) {
+    return resolveGenDevice(Target);
+  }
+  return llvm::None;
+}
 
 } // end namespace gen
 
@@ -189,6 +203,9 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain {
                           llvm::opt::OptSpecifier Opt,
                           llvm::opt::OptSpecifier Opt_EQ,
                           StringRef Device) const;
+  void TranslateGPUTargetOpt(const llvm::opt::ArgList &Args,
+                             llvm::opt::ArgStringList &CmdArgs,
+                             llvm::opt::OptSpecifier Opt_EQ) const;
 };
 
 } // end namespace toolchains