Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 106 additions & 71 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ tvm_option(USE_HEXAGON_SDK "Path to the Hexagon SDK root (required for Hexagon s
tvm_option(USE_HEXAGON_RPC "Enable Hexagon RPC using minRPC implementation over Android." OFF)
tvm_option(USE_HEXAGON_GTEST "Path to Hexagon specific gtest version for runtime cpp tests." /path/to/hexagon/gtest)
tvm_option(USE_HEXAGON_EXTERNAL_LIBS "Path to git repo containing external Hexagon runtime sources or libraries" OFF)

tvm_option(USE_RPC "Build with RPC" ON)
tvm_option(USE_THREADS "Build with thread support" ON)
tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" OFF)
Expand Down Expand Up @@ -112,6 +113,18 @@ include_directories(SYSTEM ${COMPILER_RT_PATH})
# initial variables
set(TVM_LINKER_LIBS "")
set(TVM_RUNTIME_LINKER_LIBS "")
# Early target creation so contrib cmake files can call
# target_link_libraries(tvm_runtime_extra PRIVATE <object_lib>) directly.
add_library(tvm_runtime_extra SHARED)
set_target_properties(tvm_runtime_extra PROPERTIES LINKER_LANGUAGE CXX)
# INTERFACE target carrying compile definitions for OBJECT libs that build
# into tvm_runtime_extra. On MSVC, TVM_RUNTIME_EXPORTS makes TVM_RUNTIME_DLL
# expand to __declspec(dllexport) so that functions defined in extra modules
# are properly exported from tvm_runtime_extra.dll.
add_library(tvm_runtime_extra_defs INTERFACE)
target_link_libraries(tvm_runtime_extra_defs INTERFACE tvm_ffi_header)
target_compile_definitions(tvm_runtime_extra_defs
INTERFACE TVM_RUNTIME_EXPORTS TVM_FFI_EXPORTS)


# Check if this is being run on its own or as a subdirectory for another project
Expand Down Expand Up @@ -328,10 +341,10 @@ tvm_file_glob(GLOB RUNTIME_SRCS
src/runtime/*.cc
src/runtime/vm/*.cc
src/runtime/memory/*.cc
src/runtime/disco/*.cc
src/runtime/minrpc/*.cc
src/runtime/vm/*.cc
)
# Note: src/runtime/disco/** moves to libtvm_runtime_extra.
# Note: src/runtime/{cuda,vulkan,opencl,metal,rocm,hexagon}/* move to per-backend DSOs.
set(TVM_RUNTIME_EXT_OBJS "")

if(BUILD_FOR_HEXAGON)
Expand All @@ -343,17 +356,11 @@ if(BUILD_FOR_HEXAGON)
add_definitions(-D_MACH_I32=int)
endif()

# distributed disco runtime are disabled for hexagon
if (NOT BUILD_FOR_HEXAGON)
tvm_file_glob(GLOB RUNTIME_DISCO_DISTRIBUTED_SRCS src/runtime/disco/distributed/*.cc)
list(APPEND RUNTIME_SRCS ${RUNTIME_DISCO_DISTRIBUTED_SRCS})
endif()

# Package runtime rules
if(NOT USE_RTTI)
endif()

if (INDEX_DEFAULT_I64)
if(INDEX_DEFAULT_I64)
add_definitions(-DTVM_INDEX_DEFAULT_I64=1)
endif()

Expand All @@ -362,36 +369,8 @@ if(USE_RPC)
tvm_file_glob(GLOB RUNTIME_RPC_SRCS src/runtime/rpc/*.cc)
list(APPEND RUNTIME_SRCS ${RUNTIME_RPC_SRCS})
endif(USE_RPC)

if(USE_CUDA AND USE_NCCL)
message(STATUS "Build with NCCL...")
find_nccl(${USE_NCCL})
include_directories(SYSTEM ${NCCL_INCLUDE_DIR})
tvm_file_glob(GLOB RUNTIME_NCCL_SRC src/runtime/disco/nccl/*.cc src/runtime/disco/cuda_ipc/*.cc 3rdparty/tensorrt_llm/*.cu)
set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=0")
list(APPEND RUNTIME_SRCS ${RUNTIME_NCCL_SRC})
endif()

if (USE_CUDA AND USE_NVSHMEM)
message(STATUS "Build with NVSHMEM...")
find_nvshmem(${USE_NVSHMEM})
if (NOT NVSHMEM_FOUND)
message(FATAL_ERROR "Cannot find NVSHMEM, USE_NVSHMEM=" ${USE_NVSHMEM})
endif()
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
tvm_file_glob(GLOB RUNTIME_NVSHMEM_SRCS src/runtime/contrib/nvshmem/*.cc src/runtime/contrib/nvshmem/*.cu)
list(APPEND RUNTIME_SRCS ${RUNTIME_NVSHMEM_SRCS})
endif()

if(USE_ROCM AND USE_RCCL)
message(STATUS "Build with RCCL...")
find_rccl(${USE_RCCL})
include_directories(SYSTEM ${RCCL_INCLUDE_DIR})
tvm_file_glob(GLOB RUNTIME_RCCL_SRC src/runtime/disco/nccl/*.cc)
set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=1")
list(APPEND RUNTIME_SRCS ${RUNTIME_RCCL_SRC})
endif()
# Note: disco/**, NCCL, NVSHMEM, RCCL all move to libtvm_runtime_extra
# (assembled inline below after all contrib cmake files).

# Enable ctest if gtest is available
if(USE_GTEST)
Expand Down Expand Up @@ -471,6 +450,90 @@ include(cmake/modules/contrib/ExampleNPU.cmake)
include(cmake/modules/contrib/vllm.cmake)
include(cmake/modules/Git.cmake)

# ---- libtvm_runtime_extra assembly ----
# Disco core sources.
tvm_file_glob(GLOB _disco_core_srcs src/runtime/disco/*.cc)
add_library(tvm_disco_objs OBJECT ${_disco_core_srcs})
target_link_libraries(tvm_disco_objs PRIVATE tvm_runtime_extra_defs)
target_link_libraries(tvm_runtime_extra PRIVATE tvm_disco_objs)

# Distributed disco (disabled for Hexagon cross-compile).
if(NOT BUILD_FOR_HEXAGON)
tvm_file_glob(GLOB _disco_dist_srcs src/runtime/disco/distributed/*.cc)
add_library(tvm_disco_distributed_objs OBJECT ${_disco_dist_srcs})
target_link_libraries(tvm_disco_distributed_objs PRIVATE tvm_runtime_extra_defs)
target_link_libraries(tvm_runtime_extra PRIVATE tvm_disco_distributed_objs)
endif()

# NCCL / cuda_ipc — requires CUDA + NCCL.
if(USE_CUDA AND USE_NCCL)
find_nccl(${USE_NCCL})
include_directories(SYSTEM ${NCCL_INCLUDE_DIR})
tvm_file_glob(GLOB _nccl_srcs src/runtime/disco/nccl/*.cc src/runtime/disco/cuda_ipc/*.cc 3rdparty/tensorrt_llm/*.cu)
set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=0")
add_library(tvm_nccl_objs OBJECT ${_nccl_srcs})
target_link_libraries(tvm_nccl_objs PRIVATE tvm_runtime_extra_defs)
find_library(LIBRT rt)
target_link_libraries(tvm_runtime_extra PRIVATE tvm_nccl_objs nccl ${LIBRT})
endif()

# NVSHMEM.
if(USE_CUDA AND USE_NVSHMEM)
find_nvshmem(${USE_NVSHMEM})
if(NOT NVSHMEM_FOUND)
message(FATAL_ERROR "Cannot find NVSHMEM, USE_NVSHMEM=" ${USE_NVSHMEM})
endif()
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
tvm_file_glob(GLOB _nvshmem_srcs src/runtime/contrib/nvshmem/*.cc src/runtime/contrib/nvshmem/*.cu)
add_library(tvm_nvshmem_objs OBJECT ${_nvshmem_srcs})
target_link_libraries(tvm_nvshmem_objs PRIVATE tvm_runtime_extra_defs)
target_include_directories(tvm_nvshmem_objs PUBLIC ${NVSHMEM_INCLUDE_DIR})
find_library(NVSHMEM_HOST nvshmem_host ${NVSHMEM_LIB_DIR})
find_library(NVSHMEM_DEVICE nvshmem_device ${NVSHMEM_LIB_DIR})
target_link_libraries(tvm_runtime_extra PRIVATE tvm_nvshmem_objs ${NVSHMEM_HOST} ${NVSHMEM_DEVICE})
set_target_properties(tvm_runtime_extra PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()

# RCCL.
if(USE_ROCM AND USE_RCCL)
find_rccl(${USE_RCCL})
include_directories(SYSTEM ${RCCL_INCLUDE_DIR})
tvm_file_glob(GLOB _rccl_srcs src/runtime/disco/nccl/*.cc)
set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=1")
add_library(tvm_rccl_objs OBJECT ${_rccl_srcs})
target_link_libraries(tvm_rccl_objs PRIVATE tvm_runtime_extra_defs)
target_link_libraries(tvm_runtime_extra PRIVATE tvm_rccl_objs rccl)
endif()

target_link_libraries(tvm_runtime_extra PUBLIC tvm_runtime)

# If disco/cuda_ipc is included, link the CUDA DSO.
if(USE_CUDA)
target_link_libraries(tvm_runtime_extra PUBLIC tvm_runtime_cuda)
endif()

# CUTLASS fpA_intB_gemm and flash_attn are separate shared libs.
if(USE_CUDA AND USE_CUTLASS)
target_link_libraries(tvm_runtime_extra PRIVATE fpA_intB_gemm fpA_intB_gemm_tvm)
target_link_libraries(tvm_runtime_extra PRIVATE -Wl,--no-as-needed flash_attn)
endif()

if(TVM_VISIBILITY_FLAG)
set_property(TARGET tvm_runtime_extra APPEND PROPERTY LINK_OPTIONS "${TVM_VISIBILITY_FLAG}")
endif()

set_target_properties(tvm_runtime_extra PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
)

install(TARGETS tvm_runtime_extra DESTINATION lib${LIB_SUFFIX})
if(TVM_BUILD_PYTHON_MODULE)
install(TARGETS tvm_runtime_extra DESTINATION "lib")
endif()

add_library(tvm_objs OBJECT ${COMPILER_SRCS})
add_library(tvm_runtime_objs OBJECT ${RUNTIME_SRCS})
target_link_libraries(tvm_objs PUBLIC tvm_ffi_header)
Expand Down Expand Up @@ -762,45 +825,17 @@ dump_options_to_file("${TVM_ALL_OPTIONS}")

if(USE_CUDA AND USE_CUTLASS)
install(TARGETS fpA_intB_gemm EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
# fpA_intB_gemm is a separate shared library; link it into the runtime so
# the runtime exposes its kernels and tvm_compiler picks them up
# transitively at run time.
target_link_libraries(tvm_runtime PRIVATE fpA_intB_gemm)
# fpA_intB_gemm_tvm is an OBJECT library carrying the
# `fastertransformer.gemm_fp16_int` global registration. Linking it into
# both tvm_runtime and tvm_compiler causes the static initializer to run
# twice (once per shared library). Anchor it in tvm_runtime only.
target_link_libraries(tvm_runtime PRIVATE fpA_intB_gemm_tvm)

install(TARGETS flash_attn EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
target_link_libraries(tvm_runtime PRIVATE -Wl,--no-as-needed flash_attn)
# fpA_intB_gemm, fpA_intB_gemm_tvm, and flash_attn are linked by
# tvm_runtime_extra (see the inline assembly block above); no link needed here.
endif()

if(USE_CUDA AND USE_NVTX)
set_source_files_properties(src/runtime/nvtx.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NVTX_ENABLED=1")
endif()

if(USE_CUDA AND USE_NCCL)
find_library(LIBRT rt)
# Runtime-only dependency.
target_link_libraries(tvm_runtime PRIVATE nccl ${LIBRT})
endif()


if (USE_CUDA AND USE_NVSHMEM)
target_include_directories(tvm_runtime_objs PUBLIC ${NVSHMEM_INCLUDE_DIR})
find_library(NVSHMEM_HOST nvshmem_host ${NVSHMEM_LIB_DIR})
find_library(NVSHMEM_DEVICE nvshmem_device ${NVSHMEM_LIB_DIR})
# Runtime-only dependency.
target_link_libraries(tvm_runtime PRIVATE ${NVSHMEM_HOST} ${NVSHMEM_DEVICE})
set_target_properties(tvm_runtime PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(tvm_compiler PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()

if(USE_ROCM AND USE_RCCL)
# Runtime-only dependency.
target_link_libraries(tvm_runtime PRIVATE rccl)
endif()
# Note: NCCL, NVSHMEM, RCCL target_link_libraries are handled in the inline
# libtvm_runtime_extra assembly block above.

# Python package installation configuration
# This section ensures that all necessary files are installed for the Python wheel
Expand Down
5 changes: 5 additions & 0 deletions ci/jenkins/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@
"build/lib/libtvm_compiler.so",
"build/lib/libtvm_runtime.so",
"build/lib/libtvm_ffi.so",
"build/lib/libtvm_runtime_cuda.so",
"build/lib/libtvm_runtime_vulkan.so",
"build/lib/libtvm_runtime_opencl.so",
"build/lib/libtvm_runtime_rocm.so",
"build/lib/libtvm_runtime_extra.so",
"build/libtvm_allvisible.so",
"build/config.cmake",
],
Expand Down
4 changes: 2 additions & 2 deletions ci/jenkins/generated/arm_jenkinsfile.groovy

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions ci/jenkins/generated/cpu_jenkinsfile.groovy

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions ci/jenkins/generated/gpu_jenkinsfile.groovy

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion ci/jenkins/templates/cpu_jenkinsfile.groovy.j2
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
)
cmake_build(ci_cpu, 'build')
make_cpp_tests(ci_cpu, 'build')
{{ m.upload_artifacts(tag='cpu', filenames=tvm_lib + cpptest) }}
{{ m.upload_artifacts(tag='cpu', bundles=["tvm_lib", "cpptest"]) }}
{% endcall %}

{% set test_method_names = [] %}
Expand Down
2 changes: 1 addition & 1 deletion ci/jenkins/templates/gpu_jenkinsfile.groovy.j2
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
) %}
sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh build"
cmake_build("${ci_gpu} --no-gpu", 'build')
{{ m.upload_artifacts(tag='gpu', filenames=tvm_lib + tvm_lib_gpu_extra) }}
{{ m.upload_artifacts(tag='gpu', bundles=["tvm_lib", "tvm_lib_gpu_extra"]) }}

// compiler test
sh "rm -rf build"
Expand Down
8 changes: 7 additions & 1 deletion ci/scripts/jenkins/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,13 @@ def s3(source: str, destination: str, recursive: bool) -> list[str]:
if item != ".":
source = s3_path + "/" + item
recursive = False
stdout = s3(source=source, destination=item, recursive=recursive)
try:
stdout = s3(source=source, destination=item, recursive=recursive)
except Exception:
# Optional artifacts (e.g. per-backend device runtime DSOs) may not
# exist in S3 when the build config didn't produce them. Skip silently.
logging.warning(f"Download failed for {item}, skipping (may be optional)")
continue
Comment thread
tqchen marked this conversation as resolved.
files = parse_output_files(stdout)
chmod(files)
for file in files:
Expand Down
Loading
Loading