Skip to content

Commit

Permalink
[SYCL][CUDA][HIP] Implement support for AMD and NVIDIA architectures …
Browse files Browse the repository at this point in the history
…as argument to fsycl-targets (#7348)

* Update if_architecture_is extension to include NVIDIA and AMD
architectures.
* Moves if_architecture_is code header file from intel to oneapi.
*Updates experimental/sycl_ext_intel_device_architecture.asciidoc with
the contents from proposed/sycl_ext_oneapi_device_architecture.asciidoc.
* Rename sycl_ext_intel_device_architecture.asciidoc to
sycl_ext_oneapi_device_architecture.asciidoc.
* Delete proposed/ycl_ext_oneapi_device_architecture.asciidoc.
* Renames nvidia_gpu_smxx to nvidia_gpu_sm_xx.
* Remove DPC++ un-supported architectures for nvidia (sm_20 to sm_37)
  • Loading branch information
mmoadeli committed Dec 10, 2022
1 parent ed3d35c commit e5de913
Show file tree
Hide file tree
Showing 13 changed files with 1,182 additions and 1,049 deletions.
51 changes: 36 additions & 15 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -830,19 +830,6 @@ static bool addSYCLDefaultTriple(Compilation &C,
return true;
}

// Prefix for Intel GPU specific targets used for -fsycl-targets
constexpr char IntelGPU[] = "intel_gpu_";

static llvm::Optional<StringRef> isIntelGPUTarget(StringRef Target) {
// Handle target specifications that resemble 'intel_gpu_*' here. These are
// 'spir64_gen' based.
if (Target.startswith(IntelGPU)) {
return tools::SYCL::gen::resolveGenDevice(
Target.drop_front(sizeof(IntelGPU) - 1));
}
return llvm::None;
}

void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
InputList &Inputs) {

Expand All @@ -851,6 +838,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
//
// We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA
// or HIP type. However, mixed CUDA/HIP compilation is not supported.
using namespace tools::SYCL;
bool IsCuda =
llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
return types::isCuda(I.first);
Expand Down Expand Up @@ -1128,12 +1116,24 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,

for (StringRef Val : SYCLTargetsValues->getValues()) {
StringRef UserTargetName(Val);
if (auto Device = isIntelGPUTarget(Val)) {
if (auto Device = gen::isGPUTarget<gen::IntelGPU>(Val)) {
if (Device->empty()) {
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
continue;
}
UserTargetName = "spir64_gen";
} else if (auto Device = gen::isGPUTarget<gen::NvidiaGPU>(Val)) {
if (Device->empty()) {
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
continue;
}
UserTargetName = "nvptx64-nvidia-cuda";
} else if (auto Device = gen::isGPUTarget<gen::AmdGPU>(Val)) {
if (Device->empty()) {
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
continue;
}
UserTargetName = "amdgcn-amd-amdhsa";
}

if (!isValidSYCLTriple(MakeSYCLDeviceTriple(UserTargetName))) {
Expand Down Expand Up @@ -5802,6 +5802,7 @@ class OffloadingActionBuilder final {
}

bool initialize() override {
using namespace tools::SYCL;
// Get the SYCL toolchains. If we don't get any, the action builder will
// know there is nothing to do related to SYCL offloading.
auto SYCLTCRange = C.getOffloadToolChains<Action::OFK_SYCL>();
Expand Down Expand Up @@ -5841,15 +5842,35 @@ class OffloadingActionBuilder final {
llvm::StringMap<StringRef> FoundNormalizedTriples;
for (StringRef Val : SYCLTargetsValues->getValues()) {
StringRef UserTargetName(Val);
if (auto ValidDevice = isIntelGPUTarget(Val)) {
if (auto ValidDevice = gen::isGPUTarget<gen::IntelGPU>(Val)) {
if (ValidDevice->empty())
// Unrecognized, we have already diagnosed this earlier; skip.
continue;
// Add the proper -device value to the list.
GpuArchList.emplace_back(C.getDriver().MakeSYCLDeviceTriple(
"spir64_gen"), ValidDevice->data());
UserTargetName = "spir64_gen";
} else if (auto ValidDevice =
gen::isGPUTarget<gen::NvidiaGPU>(Val)) {
if (ValidDevice->empty())
// Unrecognized, we have already diagnosed this earlier; skip.
continue;
// Add the proper -device value to the list.
GpuArchList.emplace_back(
C.getDriver().MakeSYCLDeviceTriple("nvptx64-nvidia-cuda"),
ValidDevice->data());
UserTargetName = "nvptx64-nvidia-cuda";
} else if (auto ValidDevice = gen::isGPUTarget<gen::AmdGPU>(Val)) {
if (ValidDevice->empty())
// Unrecognized, we have already diagnosed this earlier; skip.
continue;
// Add the proper -device value to the list.
GpuArchList.emplace_back(
C.getDriver().MakeSYCLDeviceTriple("amdgcn-amd-amdhsa"),
ValidDevice->data());
UserTargetName = "amdgcn-amd-amdhsa";
}

llvm::Triple TT(C.getDriver().MakeSYCLDeviceTriple(Val));
std::string NormalizedName = TT.normalize();

Expand Down
6 changes: 4 additions & 2 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5090,10 +5090,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// between device and host where we should be able to use the offloading
// arch to add the macro to the host compile.
auto addTargetMacros = [&](const llvm::Triple &Triple) {
if (!Triple.isSPIR())
if (!Triple.isSPIR() && !Triple.isNVPTX() && !Triple.isAMDGCN())
return;
SmallString<64> Macro;
if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) {
if ((Triple.isSPIR() &&
Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) ||
Triple.isNVPTX() || Triple.isAMDGCN()) {
StringRef Device = JA.getOffloadingArch();
if (!Device.empty()) {
Macro = "-D";
Expand Down
182 changes: 136 additions & 46 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -597,60 +597,130 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) {
StringRef Device;
Device = llvm::StringSwitch<StringRef>(DeviceName)
.Cases("bdw", "8_0_0", "bdw")
.Cases("skl", "9_0_9", "skl")
.Cases("kbl", "9_1_9", "kbl")
.Cases("cfl", "9_2_9", "cfl")
.Cases("apl", "9_3_0", "apl")
.Cases("glk", "9_4_0", "glk")
.Cases("whl", "9_5_0", "whl")
.Cases("aml", "9_6_0", "aml")
.Cases("cml", "9_7_0", "cml")
.Cases("icllp", "11_0_0", "icllp")
.Cases("ehl", "11_2_0", "ehl")
.Cases("tgllp", "12_0_0", "tgllp")
.Case("rkl", "rkl")
.Case("adl_s", "adl_s")
.Case("rpl_s", "rpl_s")
.Case("adl_p", "adl_p")
.Case("adl_n", "adl_n")
.Cases("dg1", "12_10_0", "dg1")
.Case("acm_g10", "acm_g10")
.Case("acm_g11", "acm_g11")
.Case("acm_g12", "acm_g12")
.Case("pvc", "pvc")
.Cases("intel_gpu_bdw", "intel_gpu_8_0_0", "bdw")
.Cases("intel_gpu_skl", "intel_gpu_9_0_9", "skl")
.Cases("intel_gpu_kbl", "intel_gpu_9_1_9", "kbl")
.Cases("intel_gpu_cfl", "intel_gpu_9_2_9", "cfl")
.Cases("intel_gpu_apl", "intel_gpu_9_3_0", "apl")
.Cases("intel_gpu_glk", "intel_gpu_9_4_0", "glk")
.Cases("intel_gpu_whl", "intel_gpu_9_5_0", "whl")
.Cases("intel_gpu_aml", "intel_gpu_9_6_0", "aml")
.Cases("intel_gpu_cml", "intel_gpu_9_7_0", "cml")
.Cases("intel_gpu_icllp", "intel_gpu_11_0_0", "icllp")
.Cases("intel_gpu_ehl", "intel_gpu_11_2_0", "ehl")
.Cases("intel_gpu_tgllp", "intel_gpu_12_0_0", "tgllp")
.Case("intel_gpu_rkl", "rkl")
.Case("intel_gpu_adl_s", "adl_s")
.Case("intel_gpu_rpl_s", "rpl_s")
.Case("intel_gpu_adl_p", "adl_p")
.Case("intel_gpu_adl_n", "adl_n")
.Cases("intel_gpu_dg1", "intel_gpu_12_10_0", "dg1")
.Case("intel_gpu_acm_g10", "acm_g10")
.Case("intel_gpu_acm_g11", "acm_g11")
.Case("intel_gpu_acm_g12", "acm_g12")
.Case("intel_gpu_pvc", "pvc")
.Case("nvidia_gpu_sm_50", "sm_50")
.Case("nvidia_gpu_sm_52", "sm_52")
.Case("nvidia_gpu_sm_53", "sm_53")
.Case("nvidia_gpu_sm_60", "sm_60")
.Case("nvidia_gpu_sm_61", "sm_61")
.Case("nvidia_gpu_sm_62", "sm_62")
.Case("nvidia_gpu_sm_70", "sm_70")
.Case("nvidia_gpu_sm_72", "sm_72")
.Case("nvidia_gpu_sm_75", "sm_75")
.Case("nvidia_gpu_sm_80", "sm_80")
.Case("nvidia_gpu_sm_86", "sm_86")
.Case("nvidia_gpu_sm_87", "sm_87")
.Case("nvidia_gpu_sm_89", "sm_89")
.Case("nvidia_gpu_sm_90", "sm_90")
.Case("amd_gpu_gfx700", "gfx700")
.Case("amd_gpu_gfx701", "gfx701")
.Case("amd_gpu_gfx702", "gfx702")
.Case("amd_gpu_gfx801", "gfx801")
.Case("amd_gpu_gfx802", "gfx802")
.Case("amd_gpu_gfx803", "gfx803")
.Case("amd_gpu_gfx805", "gfx805")
.Case("amd_gpu_gfx810", "gfx810")
.Case("amd_gpu_gfx900", "gfx900")
.Case("amd_gpu_gfx902", "gfx902")
.Case("amd_gpu_gfx904", "gfx904")
.Case("amd_gpu_gfx906", "gfx906")
.Case("amd_gpu_gfx908", "gfx908")
.Case("amd_gpu_gfx90a", "gfx90a")
.Case("amd_gpu_gfx1010", "gfx1010")
.Case("amd_gpu_gfx1011", "gfx1011")
.Case("amd_gpu_gfx1012", "gfx1012")
.Case("amd_gpu_gfx1013", "gfx1013")
.Case("amd_gpu_gfx1030", "gfx1030")
.Case("amd_gpu_gfx1031", "gfx1031")
.Case("amd_gpu_gfx1032", "gfx1032")
.Default("");
return Device;
}

StringRef SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
SmallString<64> Macro;
StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName)
.Case("bdw", "BDW")
.Case("skl", "SKL")
.Case("kbl", "KBL")
.Case("cfl", "CFL")
.Case("apl", "APL")
.Case("glk", "GLK")
.Case("whl", "WHL")
.Case("aml", "AML")
.Case("cml", "CML")
.Case("icllp", "ICLLP")
.Case("ehl", "EHL")
.Case("tgllp", "TGLLP")
.Case("rkl", "RKL")
.Case("adl_s", "ADL_S")
.Case("rpl_s", "RPL_S")
.Case("adl_p", "ADL_P")
.Case("adl_n", "ADL_N")
.Case("dg1", "DG1")
.Case("acm_g10", "ACM_G10")
.Case("acm_g11", "ACM_G11")
.Case("acm_g12", "ACM_G12")
.Case("pvc", "PVC")
.Case("bdw", "INTEL_GPU_BDW")
.Case("skl", "INTEL_GPU_SKL")
.Case("kbl", "INTEL_GPU_KBL")
.Case("cfl", "INTEL_GPU_CFL")
.Case("apl", "INTEL_GPU_APL")
.Case("glk", "INTEL_GPU_GLK")
.Case("whl", "INTEL_GPU_WHL")
.Case("aml", "INTEL_GPU_AML")
.Case("cml", "INTEL_GPU_CML")
.Case("icllp", "INTEL_GPU_ICLLP")
.Case("ehl", "INTEL_GPU_EHL")
.Case("tgllp", "INTEL_GPU_TGLLP")
.Case("rkl", "INTEL_GPU_RKL")
.Case("adl_s", "INTEL_GPU_ADL_S")
.Case("rpl_s", "INTEL_GPU_RPL_S")
.Case("adl_p", "INTEL_GPU_ADL_P")
.Case("adl_n", "INTEL_GPU_ADL_N")
.Case("dg1", "INTEL_GPU_DG1")
.Case("acm_g10", "INTEL_GPU_ACM_G10")
.Case("acm_g11", "INTEL_GPU_ACM_G11")
.Case("acm_g12", "INTEL_GPU_ACM_G12")
.Case("pvc", "INTEL_GPU_PVC")
.Case("sm_50", "NVIDIA_GPU_SM_50")
.Case("sm_52", "NVIDIA_GPU_SM_52")
.Case("sm_53", "NVIDIA_GPU_SM_53")
.Case("sm_60", "NVIDIA_GPU_SM_60")
.Case("sm_61", "NVIDIA_GPU_SM_61")
.Case("sm_62", "NVIDIA_GPU_SM_62")
.Case("sm_70", "NVIDIA_GPU_SM_70")
.Case("sm_72", "NVIDIA_GPU_SM_72")
.Case("sm_75", "NVIDIA_GPU_SM_75")
.Case("sm_80", "NVIDIA_GPU_SM_80")
.Case("sm_86", "NVIDIA_GPU_SM_86")
.Case("sm_87", "NVIDIA_GPU_SM_87")
.Case("sm_89", "NVIDIA_GPU_SM_89")
.Case("sm_90", "NVIDIA_GPU_SM_90")
.Case("gfx700", "AMD_GPU_GFX700")
.Case("gfx701", "AMD_GPU_GFX701")
.Case("gfx702", "AMD_GPU_GFX702")
.Case("gfx801", "AMD_GPU_GFX801")
.Case("gfx802", "AMD_GPU_GFX802")
.Case("gfx803", "AMD_GPU_GFX803")
.Case("gfx805", "AMD_GPU_GFX805")
.Case("gfx810", "AMD_GPU_GFX810")
.Case("gfx900", "AMD_GPU_GFX900")
.Case("gfx902", "AMD_GPU_GFX902")
.Case("gfx904", "AMD_GPU_GFX904")
.Case("gfx906", "AMD_GPU_GFX906")
.Case("gfx908", "AMD_GPU_GFX908")
.Case("gfx90a", "AMD_GPU_GFX90A")
.Case("gfx1010", "AMD_GPU_GFX1010")
.Case("gfx1011", "AMD_GPU_GFX1011")
.Case("gfx1012", "AMD_GPU_GFX1012")
.Case("gfx1013", "AMD_GPU_GFX1013")
.Case("gfx1030", "AMD_GPU_GFX1030")
.Case("gfx1031", "AMD_GPU_GFX1031")
.Case("gfx1032", "AMD_GPU_GFX1032")
.Default("");
if (!Ext.empty()) {
Macro = "__SYCL_TARGET_INTEL_GPU_";
Macro = "__SYCL_TARGET_";
Macro += Ext;
Macro += "__";
}
Expand Down Expand Up @@ -760,6 +830,25 @@ static void parseTargetOpts(StringRef ArgString, const llvm::opt::ArgList &Args,
CmdArgs.push_back(Args.MakeArgString(TA));
}

void SYCLToolChain::TranslateGPUTargetOpt(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
OptSpecifier Opt_EQ) const {
for (auto *A : Args) {
if (A->getOption().matches(Opt_EQ)) {
if (auto GpuDevice =
tools::SYCL::gen::isGPUTarget<tools::SYCL::gen::AmdGPU>(
A->getValue())) {
StringRef ArgString;
SmallString<64> OffloadArch("--offload-arch=");
OffloadArch += GpuDevice->data();
ArgString = OffloadArch;
parseTargetOpts(ArgString, Args, CmdArgs);
A->claim();
}
}
}
}

// Expects a specific type of option (e.g. -Xsycl-target-backend) and will
// extract the arguments.
void SYCLToolChain::TranslateTargetOpt(const llvm::opt::ArgList &Args,
Expand Down Expand Up @@ -915,6 +1004,7 @@ void SYCLToolChain::TranslateBackendTargetArgs(
// Handle -Xsycl-target-backend.
TranslateTargetOpt(Args, CmdArgs, options::OPT_Xsycl_backend,
options::OPT_Xsycl_backend_EQ, Device);
TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_fsycl_targets_EQ);
}

void SYCLToolChain::TranslateLinkerTargetArgs(
Expand Down
19 changes: 18 additions & 1 deletion clang/lib/Driver/ToolChains/SYCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,21 @@ class LLVM_LIBRARY_VISIBILITY BackendCompiler : public Tool {
};

StringRef resolveGenDevice(StringRef DeviceName);
StringRef getGenDeviceMacro(StringRef DeviceName);
SmallString<64> getGenDeviceMacro(StringRef DeviceName);

// // Prefix for GPU specific targets used for -fsycl-targets
constexpr char IntelGPU[] = "intel_gpu_";
constexpr char NvidiaGPU[] = "nvidia_gpu_";
constexpr char AmdGPU[] = "amd_gpu_";

template <auto GPUArh> llvm::Optional<StringRef> isGPUTarget(StringRef Target) {
// Handle target specifications that resemble '(intel, nvidia, amd)_gpu_*'
// here.
if (Target.startswith(GPUArh)) {
return resolveGenDevice(Target);
}
return llvm::None;
}

} // end namespace gen

Expand Down Expand Up @@ -189,6 +203,9 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain {
llvm::opt::OptSpecifier Opt,
llvm::opt::OptSpecifier Opt_EQ,
StringRef Device) const;
void TranslateGPUTargetOpt(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
llvm::opt::OptSpecifier Opt_EQ) const;
};

} // end namespace toolchains
Expand Down
Loading

0 comments on commit e5de913

Please sign in to comment.