Skip to content

Commit

Permalink
[libc] Rework the GPU build to be a regular target
Browse files Browse the repository at this point in the history
Summary:
This is a massive patch because it reworks the entire build and
everything that depends on it. This is not split up because various bots
would fail otherwise. I will attempt to describe the necessary changes
here.

This patch completely reworks how the GPU build is built and targeted.
Previously, we used a standard runtimes build and handled both NVPTX and
AMDGPU in a single build via multi-targeting. This added a lot of
divergence in the build system and prevented us from doing various
things like building for the CPU / GPU at the same time, or exporting
the startup libraries or running tests without a full rebuild.

The new appraoch is to handle the GPU builds as strict cross-compiling
runtimes. The first step required
llvm#81557 to allow the `LIBC`
target to build for the GPU without touching the other targets. This
means that the GPU uses all the same handling as the other builds in
`libc`.

The new expected way to build the GPU libc is with
`LLVM_LIBC_RUNTIME_TARGETS=amdgcn-amd-amdhsa;nvptx64-nvidia-cuda`.

The second step was reworking how we generated the embedded GPU library
by moving it into the library install step. Where we previously had one
`libcgpu.a` we now have `libcgpu-amdgpu.a` and `libcgpu-nvptx.a`. This
patch includes the necessary clang / OpenMP changes to make that not
break the bots when this lands.

We unfortunately still require that the NVPTX target has an `internal`
target for tests. This is because the NVPTX target needs to do LTO for
the provided version (The offloading toolchain can handle it) but cannot
use it for the native toolchain which is used for making tests.

This approach is vastly suprerior in every way, allowing us to treat the
GPU as a standard cross-compiling target. We can now install the GPU
utilities to do things like use the offload tests and other fun things.

Depends on llvm#81557
  • Loading branch information
jhuber6 committed Feb 15, 2024
1 parent 95e8a7c commit 5fdaa38
Show file tree
Hide file tree
Showing 50 changed files with 564 additions and 637 deletions.
37 changes: 34 additions & 3 deletions clang/lib/Driver/ToolChains/CommonArgs.cpp
Expand Up @@ -1087,10 +1087,41 @@ static void addOpenMPDeviceLibC(const ToolChain &TC, const ArgList &Args,
"llvm-libc-decls");
bool HasLibC = llvm::sys::fs::exists(LibCDecls) &&
llvm::sys::fs::is_directory(LibCDecls);
if (Args.hasFlag(options::OPT_gpulibc, options::OPT_nogpulibc, HasLibC)) {
CmdArgs.push_back("-lcgpu");
CmdArgs.push_back("-lmgpu");
if (!Args.hasFlag(options::OPT_gpulibc, options::OPT_nogpulibc, HasLibC))
return;

// We don't have access to the offloading toolchains here, so determine from
// the arguments if we have any active NVPTX or AMDGPU toolchains.
llvm::DenseSet<const char *> Libraries;
if (const Arg *Targets = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) {
if (llvm::any_of(Targets->getValues(),
[](auto S) { return llvm::Triple(S).isAMDGPU(); })) {
Libraries.insert("-lcgpu-amdgpu");
Libraries.insert("-lmgpu-amdgpu");
}
if (llvm::any_of(Targets->getValues(),
[](auto S) { return llvm::Triple(S).isNVPTX(); })) {
Libraries.insert("-lcgpu-nvptx");
Libraries.insert("-lmgpu-nvptx");
}
}

for (StringRef Arch : Args.getAllArgValues(options::OPT_offload_arch_EQ)) {
if (llvm::any_of(llvm::split(Arch, ","), [](StringRef Str) {
return IsAMDGpuArch(StringToCudaArch(Str));
})) {
Libraries.insert("-lcgpu-amdgpu");
Libraries.insert("-lmgpu-amdgpu");
}
if (llvm::any_of(llvm::split(Arch, ","), [](StringRef Str) {
return IsNVIDIAGpuArch(StringToCudaArch(Str));
})) {
Libraries.insert("-lcgpu-nvptx");
Libraries.insert("-lmgpu-nvptx");
}
}

llvm::append_range(CmdArgs, Libraries);
}

void tools::addOpenMPRuntimeLibraryPath(const ToolChain &TC,
Expand Down
14 changes: 11 additions & 3 deletions clang/test/Driver/openmp-offload-gpu.c
Expand Up @@ -394,13 +394,21 @@
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --offload-arch=sm_52 -gpulibc -nogpuinc %s 2>&1 \
// RUN: --offload-arch=sm_52,gfx90a -gpulibc -nogpuinc %s 2>&1 \
// RUN: | FileCheck --check-prefix=LIBC-GPU %s
// LIBC-GPU: "-lcgpu"{{.*}}"-lmgpu"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -gpulibc -nogpuinc %s 2>&1 \
// RUN: | FileCheck --check-prefix=LIBC-GPU %s
// LIBC-GPU-DAG: "-lcgpu-amdgpu"
// LIBC-GPU-DAG: "-lmgpu-amdgpu"
// LIBC-GPU-DAG: "-lcgpu-nvptx"
// LIBC-GPU-DAG: "-lmgpu-nvptx"

// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --offload-arch=sm_52 -nogpulibc -nogpuinc %s 2>&1 \
// RUN: | FileCheck --check-prefix=NO-LIBC-GPU %s
// NO-LIBC-GPU-NOT: "-lcgpu"{{.*}}"-lmgpu"
// NO-LIBC-GPU-NOT: -lmgpu{{.*}}-lcgpu
20 changes: 10 additions & 10 deletions libc/CMakeLists.txt
Expand Up @@ -43,7 +43,7 @@ set(LIBC_NAMESPACE "__llvm_libc_${LLVM_VERSION_MAJOR}_${LLVM_VERSION_MINOR}_${LL
CACHE STRING "The namespace to use to enclose internal implementations. Must start with '__llvm_libc'."
)

if(LLVM_LIBC_FULL_BUILD OR LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES)
if(LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD)
if(NOT LIBC_HDRGEN_EXE)
# We need to set up hdrgen first since other targets depend on it.
add_subdirectory(utils/LibcTableGenUtil)
Expand All @@ -65,7 +65,7 @@ if(("libc" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT LLVM_RUNTIMES_BUILD) OR
# to build libc-hdrgen and return.

# Always make the RPC server availible to other projects for GPU mode.
if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES)
if(LLVM_LIBC_GPU_BUILD)
add_subdirectory(utils/gpu/server)
endif()
return()
Expand Down Expand Up @@ -105,7 +105,7 @@ if(COMMAND_RETURN_CODE EQUAL 0)
message(STATUS "Set COMPILER_RESOURCE_DIR to "
"${COMPILER_RESOURCE_DIR} using --print-resource-dir")
else()
if (LIBC_TARGET_ARCHITECTURE_IS_GPU)
if (LIBC_TARGET_OS_IS_GPU)
message(FATAL_ERROR "COMPILER_RESOURCE_DIR must be set for GPU builds")
else()
set(COMPILER_RESOURCE_DIR OFF)
Expand Down Expand Up @@ -203,11 +203,7 @@ foreach(config_path IN LISTS LIBC_CONFIG_JSON_FILE_LIST)
load_libc_config(${config_path}/config.json ${cmd_line_conf})
endforeach()

if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
set(LIBC_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include)
set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}/gpu-none-llvm)
set(LIBC_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR})
elseif(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND LIBC_ENABLE_USE_BY_CLANG)
if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND LIBC_ENABLE_USE_BY_CLANG)
set(LIBC_INCLUDE_DIR ${LLVM_BINARY_DIR}/include/${LLVM_DEFAULT_TARGET_TRIPLE})
set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}/${LLVM_DEFAULT_TARGET_TRIPLE})
set(LIBC_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE})
Expand All @@ -222,10 +218,14 @@ else()
set(LIBC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/include)
set(LIBC_LIBRARY_DIR ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
endif()
set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR})
if(LIBC_TARGET_OS_IS_GPU)
set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}/${LLVM_DEFAULT_TARGET_TRIPLE})
else()
set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR})
endif()
endif()

if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(LIBC_TARGET_OS_IS_GPU)
include(prepare_libc_gpu_build)
set(LIBC_ENABLE_UNITTESTS OFF)
endif()
Expand Down
28 changes: 16 additions & 12 deletions libc/cmake/modules/LLVMLibCArchitectures.cmake
Expand Up @@ -6,18 +6,6 @@
# platform.
# ------------------------------------------------------------------------------

if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES)
# We set the generic target and OS to "gpu" here. More specific defintions
# for the exact target GPU are set up in prepare_libc_gpu_build.cmake.
set(LIBC_TARGET_OS "gpu")
set(LIBC_TARGET_ARCHITECTURE_IS_GPU TRUE)
set(LIBC_TARGET_ARCHITECTURE "gpu")
if(LIBC_TARGET_TRIPLE)
message(WARNING "LIBC_TARGET_TRIPLE is ignored as LIBC_GPU_BUILD is on. ")
endif()
return()
endif()

if(MSVC)
# If the compiler is visual c++ or equivalent, we will assume a host build.
set(LIBC_TARGET_OS ${CMAKE_HOST_SYSTEM_NAME})
Expand Down Expand Up @@ -59,6 +47,10 @@ function(get_arch_and_system_from_triple triple arch_var sys_var)
set(target_arch "riscv32")
elseif(target_arch MATCHES "^riscv64")
set(target_arch "riscv64")
elseif(target_arch MATCHES "^amdgcn")
set(target_arch "amdgpu")
elseif(target_arch MATCHES "^nvptx64")
set(target_arch "nvptx")
else()
return()
endif()
Expand All @@ -75,6 +67,12 @@ function(get_arch_and_system_from_triple triple arch_var sys_var)
set(target_sys "darwin")
endif()

# Setting OS name for GPU architectures.
list(GET triple_comps -1 target_sys)
if(target_sys MATCHES "^amdhsa" OR target_sys MATCHES "^cuda")
set(target_sys "gpu")
endif()

set(${sys_var} ${target_sys} PARENT_SCOPE)
endfunction(get_arch_and_system_from_triple)

Expand Down Expand Up @@ -156,6 +154,10 @@ elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "riscv64")
elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "riscv32")
set(LIBC_TARGET_ARCHITECTURE_IS_RISCV32 TRUE)
set(LIBC_TARGET_ARCHITECTURE "riscv")
elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "amdgpu")
set(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU TRUE)
elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "nvptx")
set(LIBC_TARGET_ARCHITECTURE_IS_NVPTX TRUE)
else()
message(FATAL_ERROR
"Unsupported libc target architecture ${LIBC_TARGET_ARCHITECTURE}")
Expand All @@ -178,6 +180,8 @@ elseif(LIBC_TARGET_OS STREQUAL "darwin")
set(LIBC_TARGET_OS_IS_DARWIN TRUE)
elseif(LIBC_TARGET_OS STREQUAL "windows")
set(LIBC_TARGET_OS_IS_WINDOWS TRUE)
elseif(LIBC_TARGET_OS STREQUAL "gpu")
set(LIBC_TARGET_OS_IS_GPU TRUE)
else()
message(FATAL_ERROR
"Unsupported libc target operating system ${LIBC_TARGET_OS}")
Expand Down
2 changes: 1 addition & 1 deletion libc/cmake/modules/LLVMLibCCheckMPFR.cmake
Expand Up @@ -2,7 +2,7 @@ set(LLVM_LIBC_MPFR_INSTALL_PATH "" CACHE PATH "Path to where MPFR is installed (

if(LLVM_LIBC_MPFR_INSTALL_PATH)
set(LIBC_TESTS_CAN_USE_MPFR TRUE)
elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU)
elseif(LIBC_TARGET_OS_IS_GPU)
set(LIBC_TESTS_CAN_USE_MPFR FALSE)
else()
try_compile(
Expand Down
2 changes: 1 addition & 1 deletion libc/cmake/modules/LLVMLibCHeaderRules.cmake
Expand Up @@ -139,7 +139,7 @@ function(add_gen_header target_name)
${hdrgen_deps}
)

if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(LIBC_TARGET_OS_IS_GPU)
file(MAKE_DIRECTORY ${LIBC_INCLUDE_DIR}/llvm-libc-decls)
set(decl_out_file ${LIBC_INCLUDE_DIR}/llvm-libc-decls/${relative_path})
add_custom_command(
Expand Down
141 changes: 117 additions & 24 deletions libc/cmake/modules/LLVMLibCLibraryRules.cmake
Expand Up @@ -50,31 +50,9 @@ function(collect_object_file_deps target result)
endif()
endfunction(collect_object_file_deps)

# A rule to build a library from a collection of entrypoint objects.
# Usage:
# add_entrypoint_library(
# DEPENDS <list of add_entrypoint_object targets>
# )
#
# NOTE: If one wants an entrypoint to be available in a library, then they will
# have to list the entrypoint target explicitly in the DEPENDS list. Implicit
# entrypoint dependencies will not be added to the library.
function(add_entrypoint_library target_name)
cmake_parse_arguments(
"ENTRYPOINT_LIBRARY"
"" # No optional arguments
"" # No single value arguments
"DEPENDS" # Multi-value arguments
${ARGN}
)
if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
"of 'add_entrypoint_object' targets.")
endif()

get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
function(get_all_object_file_deps result fq_deps_list)
set(all_deps "")
foreach(dep IN LISTS fq_deps_list)
foreach(dep ${fq_deps_list})
get_target_property(dep_type ${dep} "TARGET_TYPE")
if(NOT ((${dep_type} STREQUAL ${ENTRYPOINT_OBJ_TARGET_TYPE}) OR
(${dep_type} STREQUAL ${ENTRYPOINT_EXT_TARGET_TYPE}) OR
Expand Down Expand Up @@ -102,6 +80,121 @@ function(add_entrypoint_library target_name)
list(APPEND all_deps ${entrypoint_target})
endforeach(dep)
list(REMOVE_DUPLICATES all_deps)
set(${result} ${all_deps} PARENT_SCOPE)
endfunction()

# A rule to build a library from a collection of entrypoint objects and bundle
# it into a GPU fatbinary. Usage is the same as 'add_entrypoint_library'.
# Usage:
# add_gpu_entrypoint_library(
# DEPENDS <list of add_entrypoint_object targets>
# )
function(add_gpu_entrypoint_library target_name)
cmake_parse_arguments(
"ENTRYPOINT_LIBRARY"
"" # No optional arguments
"" # No single value arguments
"DEPENDS" # Multi-value arguments
${ARGN}
)
if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
"of 'add_entrypoint_object' targets.")
endif()

get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
get_all_object_file_deps(all_deps "${fq_deps_list}")

# The GPU 'libc' needs to be exported in a format that can be linked with
# offloading langauges like OpenMP or CUDA. This wraps every GPU object into a
# fat binary and adds them to a static library.
set(objects "")
foreach(dep IN LISTS all_deps)
set(object $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>)
string(FIND ${dep} "." last_dot_loc REVERSE)
math(EXPR name_loc "${last_dot_loc} + 1")
string(SUBSTRING ${dep} ${name_loc} -1 name)
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
set(prefix --image=arch=generic,triple=nvptx64-nvidia-cuda,feature=+ptx63)
else()
set(prefix --image=arch=generic,triple=amdgcn-amd-amdhsa)
endif()

# Use the 'clang-offload-packager' to merge these files into a binary blob.
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin"
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary
COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER}
"${prefix},file=$<JOIN:${object},,file=>" -o
${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin
DEPENDS ${dep}
COMMENT "Packaging LLVM offloading binary for '${object}'"
)
add_custom_target(${dep}.__gpubin__ DEPENDS ${dep}
"${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")

# CMake does not permit setting the name on object files. In order to have
# human readable names we create an empty stub file with the entrypoint
# name. This empty file will then have the created binary blob embedded.
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp
DEPENDS ${dep} ${dep}.__gpubin__
)
add_custom_target(${dep}.__stub__
DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp")

add_library(${dep}.__fatbin__
EXCLUDE_FROM_ALL OBJECT
"${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
)

# This is always compiled for the LLVM host triple instead of the native GPU
# triple that is used by default in the build.
target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib)
target_compile_options(${dep}.__fatbin__ PRIVATE
--target=${LLVM_HOST_TRIPLE}
"SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
add_dependencies(${dep}.__fatbin__ ${dep} ${dep}.__stub__ ${dep}.__gpubin__)

# Set the list of newly create fat binaries containing embedded device code.
list(APPEND objects $<TARGET_OBJECTS:${dep}.__fatbin__>)
endforeach()

add_library(
${target_name}
STATIC
${objects}
)
set_target_properties(${target_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR})
endfunction(add_gpu_entrypoint_library)

# A rule to build a library from a collection of entrypoint objects.
# Usage:
# add_entrypoint_library(
# DEPENDS <list of add_entrypoint_object targets>
# )
#
# NOTE: If one wants an entrypoint to be available in a library, then they will
# have to list the entrypoint target explicitly in the DEPENDS list. Implicit
# entrypoint dependencies will not be added to the library.
function(add_entrypoint_library target_name)
cmake_parse_arguments(
"ENTRYPOINT_LIBRARY"
"" # No optional arguments
"" # No single value arguments
"DEPENDS" # Multi-value arguments
${ARGN}
)
if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
"of 'add_entrypoint_object' targets.")
endif()

get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
get_all_object_file_deps(all_deps "${fq_deps_list}")

set(objects "")
foreach(dep IN LISTS all_deps)
list(APPEND objects $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>)
Expand Down

0 comments on commit 5fdaa38

Please sign in to comment.