diff --git a/CMakeLists.txt b/CMakeLists.txt index e4065075cce8..46958c542e73 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,7 @@ tvm_option(USE_HEXAGON_SDK "Path to the Hexagon SDK root (required for Hexagon s tvm_option(USE_HEXAGON_RPC "Enable Hexagon RPC using minRPC implementation over Android." OFF) tvm_option(USE_HEXAGON_GTEST "Path to Hexagon specific gtest version for runtime cpp tests." /path/to/hexagon/gtest) tvm_option(USE_HEXAGON_EXTERNAL_LIBS "Path to git repo containing external Hexagon runtime sources or libraries" OFF) + tvm_option(USE_RPC "Build with RPC" ON) tvm_option(USE_THREADS "Build with thread support" ON) tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" OFF) @@ -112,6 +113,18 @@ include_directories(SYSTEM ${COMPILER_RT_PATH}) # initial variables set(TVM_LINKER_LIBS "") set(TVM_RUNTIME_LINKER_LIBS "") +# Early target creation so contrib cmake files can call +# target_link_libraries(tvm_runtime_extra PRIVATE ) directly. +add_library(tvm_runtime_extra SHARED) +set_target_properties(tvm_runtime_extra PROPERTIES LINKER_LANGUAGE CXX) +# INTERFACE target carrying compile definitions for OBJECT libs that build +# into tvm_runtime_extra. On MSVC, TVM_RUNTIME_EXPORTS makes TVM_RUNTIME_DLL +# expand to __declspec(dllexport) so that functions defined in extra modules +# are properly exported from tvm_runtime_extra.dll. +add_library(tvm_runtime_extra_defs INTERFACE) +target_link_libraries(tvm_runtime_extra_defs INTERFACE tvm_ffi_header) +target_compile_definitions(tvm_runtime_extra_defs + INTERFACE TVM_RUNTIME_EXPORTS TVM_FFI_EXPORTS) # Check if this is being run on its own or as a subdirectory for another project @@ -328,10 +341,10 @@ tvm_file_glob(GLOB RUNTIME_SRCS src/runtime/*.cc src/runtime/vm/*.cc src/runtime/memory/*.cc - src/runtime/disco/*.cc src/runtime/minrpc/*.cc - src/runtime/vm/*.cc ) +# Note: src/runtime/disco/** moves to libtvm_runtime_extra. +# Note: src/runtime/{cuda,vulkan,opencl,metal,rocm,hexagon}/* move to per-backend DSOs. set(TVM_RUNTIME_EXT_OBJS "") if(BUILD_FOR_HEXAGON) @@ -343,17 +356,11 @@ if(BUILD_FOR_HEXAGON) add_definitions(-D_MACH_I32=int) endif() -# distributed disco runtime are disabled for hexagon -if (NOT BUILD_FOR_HEXAGON) - tvm_file_glob(GLOB RUNTIME_DISCO_DISTRIBUTED_SRCS src/runtime/disco/distributed/*.cc) - list(APPEND RUNTIME_SRCS ${RUNTIME_DISCO_DISTRIBUTED_SRCS}) -endif() - # Package runtime rules if(NOT USE_RTTI) endif() -if (INDEX_DEFAULT_I64) +if(INDEX_DEFAULT_I64) add_definitions(-DTVM_INDEX_DEFAULT_I64=1) endif() @@ -362,36 +369,8 @@ if(USE_RPC) tvm_file_glob(GLOB RUNTIME_RPC_SRCS src/runtime/rpc/*.cc) list(APPEND RUNTIME_SRCS ${RUNTIME_RPC_SRCS}) endif(USE_RPC) - -if(USE_CUDA AND USE_NCCL) - message(STATUS "Build with NCCL...") - find_nccl(${USE_NCCL}) - include_directories(SYSTEM ${NCCL_INCLUDE_DIR}) - tvm_file_glob(GLOB RUNTIME_NCCL_SRC src/runtime/disco/nccl/*.cc src/runtime/disco/cuda_ipc/*.cc 3rdparty/tensorrt_llm/*.cu) - set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=0") - list(APPEND RUNTIME_SRCS ${RUNTIME_NCCL_SRC}) -endif() - -if (USE_CUDA AND USE_NVSHMEM) - message(STATUS "Build with NVSHMEM...") - find_nvshmem(${USE_NVSHMEM}) - if (NOT NVSHMEM_FOUND) - message(FATAL_ERROR "Cannot find NVSHMEM, USE_NVSHMEM=" ${USE_NVSHMEM}) - endif() - set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - set(CMAKE_POSITION_INDEPENDENT_CODE ON) - tvm_file_glob(GLOB RUNTIME_NVSHMEM_SRCS src/runtime/contrib/nvshmem/*.cc src/runtime/contrib/nvshmem/*.cu) - list(APPEND RUNTIME_SRCS ${RUNTIME_NVSHMEM_SRCS}) -endif() - -if(USE_ROCM AND USE_RCCL) - message(STATUS "Build with RCCL...") - find_rccl(${USE_RCCL}) - include_directories(SYSTEM ${RCCL_INCLUDE_DIR}) - tvm_file_glob(GLOB RUNTIME_RCCL_SRC src/runtime/disco/nccl/*.cc) - set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=1") - list(APPEND RUNTIME_SRCS ${RUNTIME_RCCL_SRC}) -endif() +# Note: disco/**, NCCL, NVSHMEM, RCCL all move to libtvm_runtime_extra +# (assembled inline below after all contrib cmake files). # Enable ctest if gtest is available if(USE_GTEST) @@ -471,6 +450,90 @@ include(cmake/modules/contrib/ExampleNPU.cmake) include(cmake/modules/contrib/vllm.cmake) include(cmake/modules/Git.cmake) +# ---- libtvm_runtime_extra assembly ---- +# Disco core sources. +tvm_file_glob(GLOB _disco_core_srcs src/runtime/disco/*.cc) +add_library(tvm_disco_objs OBJECT ${_disco_core_srcs}) +target_link_libraries(tvm_disco_objs PRIVATE tvm_runtime_extra_defs) +target_link_libraries(tvm_runtime_extra PRIVATE tvm_disco_objs) + +# Distributed disco (disabled for Hexagon cross-compile). +if(NOT BUILD_FOR_HEXAGON) + tvm_file_glob(GLOB _disco_dist_srcs src/runtime/disco/distributed/*.cc) + add_library(tvm_disco_distributed_objs OBJECT ${_disco_dist_srcs}) + target_link_libraries(tvm_disco_distributed_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_disco_distributed_objs) +endif() + +# NCCL / cuda_ipc — requires CUDA + NCCL. +if(USE_CUDA AND USE_NCCL) + find_nccl(${USE_NCCL}) + include_directories(SYSTEM ${NCCL_INCLUDE_DIR}) + tvm_file_glob(GLOB _nccl_srcs src/runtime/disco/nccl/*.cc src/runtime/disco/cuda_ipc/*.cc 3rdparty/tensorrt_llm/*.cu) + set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=0") + add_library(tvm_nccl_objs OBJECT ${_nccl_srcs}) + target_link_libraries(tvm_nccl_objs PRIVATE tvm_runtime_extra_defs) + find_library(LIBRT rt) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_nccl_objs nccl ${LIBRT}) +endif() + +# NVSHMEM. +if(USE_CUDA AND USE_NVSHMEM) + find_nvshmem(${USE_NVSHMEM}) + if(NOT NVSHMEM_FOUND) + message(FATAL_ERROR "Cannot find NVSHMEM, USE_NVSHMEM=" ${USE_NVSHMEM}) + endif() + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + tvm_file_glob(GLOB _nvshmem_srcs src/runtime/contrib/nvshmem/*.cc src/runtime/contrib/nvshmem/*.cu) + add_library(tvm_nvshmem_objs OBJECT ${_nvshmem_srcs}) + target_link_libraries(tvm_nvshmem_objs PRIVATE tvm_runtime_extra_defs) + target_include_directories(tvm_nvshmem_objs PUBLIC ${NVSHMEM_INCLUDE_DIR}) + find_library(NVSHMEM_HOST nvshmem_host ${NVSHMEM_LIB_DIR}) + find_library(NVSHMEM_DEVICE nvshmem_device ${NVSHMEM_LIB_DIR}) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_nvshmem_objs ${NVSHMEM_HOST} ${NVSHMEM_DEVICE}) + set_target_properties(tvm_runtime_extra PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +endif() + +# RCCL. +if(USE_ROCM AND USE_RCCL) + find_rccl(${USE_RCCL}) + include_directories(SYSTEM ${RCCL_INCLUDE_DIR}) + tvm_file_glob(GLOB _rccl_srcs src/runtime/disco/nccl/*.cc) + set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=1") + add_library(tvm_rccl_objs OBJECT ${_rccl_srcs}) + target_link_libraries(tvm_rccl_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_rccl_objs rccl) +endif() + +target_link_libraries(tvm_runtime_extra PUBLIC tvm_runtime) + +# If disco/cuda_ipc is included, link the CUDA DSO. +if(USE_CUDA) + target_link_libraries(tvm_runtime_extra PUBLIC tvm_runtime_cuda) +endif() + +# CUTLASS fpA_intB_gemm and flash_attn are separate shared libs. +if(USE_CUDA AND USE_CUTLASS) + target_link_libraries(tvm_runtime_extra PRIVATE fpA_intB_gemm fpA_intB_gemm_tvm) + target_link_libraries(tvm_runtime_extra PRIVATE -Wl,--no-as-needed flash_attn) +endif() + +if(TVM_VISIBILITY_FLAG) + set_property(TARGET tvm_runtime_extra APPEND PROPERTY LINK_OPTIONS "${TVM_VISIBILITY_FLAG}") +endif() + +set_target_properties(tvm_runtime_extra PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" +) + +install(TARGETS tvm_runtime_extra DESTINATION lib${LIB_SUFFIX}) +if(TVM_BUILD_PYTHON_MODULE) + install(TARGETS tvm_runtime_extra DESTINATION "lib") +endif() + add_library(tvm_objs OBJECT ${COMPILER_SRCS}) add_library(tvm_runtime_objs OBJECT ${RUNTIME_SRCS}) target_link_libraries(tvm_objs PUBLIC tvm_ffi_header) @@ -762,45 +825,17 @@ dump_options_to_file("${TVM_ALL_OPTIONS}") if(USE_CUDA AND USE_CUTLASS) install(TARGETS fpA_intB_gemm EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX}) - # fpA_intB_gemm is a separate shared library; link it into the runtime so - # the runtime exposes its kernels and tvm_compiler picks them up - # transitively at run time. - target_link_libraries(tvm_runtime PRIVATE fpA_intB_gemm) - # fpA_intB_gemm_tvm is an OBJECT library carrying the - # `fastertransformer.gemm_fp16_int` global registration. Linking it into - # both tvm_runtime and tvm_compiler causes the static initializer to run - # twice (once per shared library). Anchor it in tvm_runtime only. - target_link_libraries(tvm_runtime PRIVATE fpA_intB_gemm_tvm) - install(TARGETS flash_attn EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX}) - target_link_libraries(tvm_runtime PRIVATE -Wl,--no-as-needed flash_attn) + # fpA_intB_gemm, fpA_intB_gemm_tvm, and flash_attn are linked by + # tvm_runtime_extra (see the inline assembly block above); no link needed here. endif() if(USE_CUDA AND USE_NVTX) set_source_files_properties(src/runtime/nvtx.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NVTX_ENABLED=1") endif() -if(USE_CUDA AND USE_NCCL) - find_library(LIBRT rt) - # Runtime-only dependency. - target_link_libraries(tvm_runtime PRIVATE nccl ${LIBRT}) -endif() - - -if (USE_CUDA AND USE_NVSHMEM) - target_include_directories(tvm_runtime_objs PUBLIC ${NVSHMEM_INCLUDE_DIR}) - find_library(NVSHMEM_HOST nvshmem_host ${NVSHMEM_LIB_DIR}) - find_library(NVSHMEM_DEVICE nvshmem_device ${NVSHMEM_LIB_DIR}) - # Runtime-only dependency. - target_link_libraries(tvm_runtime PRIVATE ${NVSHMEM_HOST} ${NVSHMEM_DEVICE}) - set_target_properties(tvm_runtime PROPERTIES CUDA_SEPARABLE_COMPILATION ON) - set_target_properties(tvm_compiler PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -endif() - -if(USE_ROCM AND USE_RCCL) - # Runtime-only dependency. - target_link_libraries(tvm_runtime PRIVATE rccl) -endif() +# Note: NCCL, NVSHMEM, RCCL target_link_libraries are handled in the inline +# libtvm_runtime_extra assembly block above. # Python package installation configuration # This section ensures that all necessary files are installed for the Python wheel diff --git a/ci/jenkins/data.py b/ci/jenkins/data.py index 99e54e85330e..44cdba1d02b2 100644 --- a/ci/jenkins/data.py +++ b/ci/jenkins/data.py @@ -44,6 +44,11 @@ "build/lib/libtvm_compiler.so", "build/lib/libtvm_runtime.so", "build/lib/libtvm_ffi.so", + "build/lib/libtvm_runtime_cuda.so", + "build/lib/libtvm_runtime_vulkan.so", + "build/lib/libtvm_runtime_opencl.so", + "build/lib/libtvm_runtime_rocm.so", + "build/lib/libtvm_runtime_extra.so", "build/libtvm_allvisible.so", "build/config.cmake", ], diff --git a/ci/jenkins/generated/arm_jenkinsfile.groovy b/ci/jenkins/generated/arm_jenkinsfile.groovy index 18c77a97da5b..a457cc8ee005 100644 --- a/ci/jenkins/generated/arm_jenkinsfile.groovy +++ b/ci/jenkins/generated/arm_jenkinsfile.groovy @@ -60,7 +60,7 @@ // 'python3 jenkins/generate.py' // Note: This timestamp is here to ensure that updates to the Jenkinsfile are // always rebased on main before merging: -// Generated at 2026-04-25T15:49:49.180036 +// Generated at 2026-05-21T18:31:35.598730 import org.jenkinsci.plugins.pipeline.modeldefinition.Utils // These are set at runtime from data in ci/jenkins/docker-images.yml, update @@ -496,7 +496,7 @@ def run_build(node_type) { cmake_build(ci_arm, 'build') make_cpp_tests(ci_arm, 'build') sh( - script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/arm --items build/lib/libtvm_compiler.so build/lib/libtvm_runtime.so build/lib/libtvm_ffi.so build/config.cmake build/cpptest build/build.ninja build/CMakeFiles/rules.ninja", + script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/arm --bundle tvm_lib --bundle cpptest", label: 'Upload artifacts to S3', ) }) diff --git a/ci/jenkins/generated/cpu_jenkinsfile.groovy b/ci/jenkins/generated/cpu_jenkinsfile.groovy index 4c7eb8402de6..995e96662ff4 100644 --- a/ci/jenkins/generated/cpu_jenkinsfile.groovy +++ b/ci/jenkins/generated/cpu_jenkinsfile.groovy @@ -60,7 +60,7 @@ // 'python3 jenkins/generate.py' // Note: This timestamp is here to ensure that updates to the Jenkinsfile are // always rebased on main before merging: -// Generated at 2026-04-25T15:49:49.168038 +// Generated at 2026-05-21T18:31:35.583867 import org.jenkinsci.plugins.pipeline.modeldefinition.Utils // These are set at runtime from data in ci/jenkins/docker-images.yml, update @@ -496,7 +496,7 @@ def run_build(node_type) { cmake_build(ci_cpu, 'build') make_cpp_tests(ci_cpu, 'build') sh( - script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cpu --items build/lib/libtvm_compiler.so build/lib/libtvm_runtime.so build/lib/libtvm_ffi.so build/config.cmake build/cpptest build/build.ninja build/CMakeFiles/rules.ninja", + script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cpu --bundle tvm_lib --bundle cpptest", label: 'Upload artifacts to S3', ) }) diff --git a/ci/jenkins/generated/gpu_jenkinsfile.groovy b/ci/jenkins/generated/gpu_jenkinsfile.groovy index 5afd4aa0b2c2..539e379bf623 100644 --- a/ci/jenkins/generated/gpu_jenkinsfile.groovy +++ b/ci/jenkins/generated/gpu_jenkinsfile.groovy @@ -60,7 +60,7 @@ // 'python3 jenkins/generate.py' // Note: This timestamp is here to ensure that updates to the Jenkinsfile are // always rebased on main before merging: -// Generated at 2026-04-25T15:49:49.200674 +// Generated at 2026-05-21T18:31:35.612295 import org.jenkinsci.plugins.pipeline.modeldefinition.Utils // These are set at runtime from data in ci/jenkins/docker-images.yml, update @@ -492,7 +492,7 @@ def run_build(node_type) { sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh build" cmake_build("${ci_gpu} --no-gpu", 'build') sh( - script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu --items build/lib/libtvm_compiler.so build/lib/libtvm_runtime.so build/lib/libtvm_ffi.so build/config.cmake build/3rdparty/libflash_attn/src/libflash_attn.so build/3rdparty/cutlass_fpA_intB_gemm/cutlass_kernels/libfpA_intB_gemm.so", + script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu --bundle tvm_lib --bundle tvm_lib_gpu_extra", label: 'Upload artifacts to S3', ) @@ -502,7 +502,7 @@ def run_build(node_type) { sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu_other.sh build" cmake_build("${ci_gpu} --no-gpu", 'build') sh( - script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu2 --items build/lib/libtvm_compiler.so build/lib/libtvm_runtime.so build/lib/libtvm_ffi.so build/config.cmake", + script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu2 --bundle tvm_lib", label: 'Upload artifacts to S3', ) }) diff --git a/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2 b/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2 index 06a1660ecb21..d2e479d5e87a 100644 --- a/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2 +++ b/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2 @@ -31,7 +31,7 @@ ) cmake_build(ci_cpu, 'build') make_cpp_tests(ci_cpu, 'build') - {{ m.upload_artifacts(tag='cpu', filenames=tvm_lib + cpptest) }} + {{ m.upload_artifacts(tag='cpu', bundles=["tvm_lib", "cpptest"]) }} {% endcall %} {% set test_method_names = [] %} diff --git a/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2 b/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2 index 2b7f5f75c98c..7ab5256419f0 100644 --- a/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2 +++ b/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2 @@ -27,7 +27,7 @@ ) %} sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh build" cmake_build("${ci_gpu} --no-gpu", 'build') - {{ m.upload_artifacts(tag='gpu', filenames=tvm_lib + tvm_lib_gpu_extra) }} + {{ m.upload_artifacts(tag='gpu', bundles=["tvm_lib", "tvm_lib_gpu_extra"]) }} // compiler test sh "rm -rf build" diff --git a/ci/scripts/jenkins/s3.py b/ci/scripts/jenkins/s3.py index eb986dec996f..e2e65dcadb8a 100755 --- a/ci/scripts/jenkins/s3.py +++ b/ci/scripts/jenkins/s3.py @@ -170,7 +170,13 @@ def s3(source: str, destination: str, recursive: bool) -> list[str]: if item != ".": source = s3_path + "/" + item recursive = False - stdout = s3(source=source, destination=item, recursive=recursive) + try: + stdout = s3(source=source, destination=item, recursive=recursive) + except Exception: + # Optional artifacts (e.g. per-backend device runtime DSOs) may not + # exist in S3 when the build config didn't produce them. Skip silently. + logging.warning(f"Download failed for {item}, skipping (may be optional)") + continue files = parse_output_files(stdout) chmod(files) for file in files: diff --git a/cmake/modules/CUDA.cmake b/cmake/modules/CUDA.cmake index a79e55883739..e56396c1a620 100644 --- a/cmake/modules/CUDA.cmake +++ b/cmake/modules/CUDA.cmake @@ -50,12 +50,6 @@ if(USE_CUDA) # [0] https://github.com/Kitware/CMake/commit/6377a438 set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_INCLUDES 0) - tvm_file_glob(GLOB RUNTIME_CUDA_SRCS src/runtime/cuda/*.cc) - list(APPEND RUNTIME_SRCS ${RUNTIME_CUDA_SRCS}) - - list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUDART_LIBRARY}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUDA_LIBRARY}) - if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) if(CMAKE_VERSION VERSION_LESS "3.24") message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES not set. Please upgrade CMake to 3.24 to use native, or set CMAKE_CUDA_ARCHITECTURES manually") @@ -63,77 +57,102 @@ if(USE_CUDA) message(STATUS "CMAKE_CUDA_ARCHITECTURES not set, using native") set(CMAKE_CUDA_ARCHITECTURES native) endif() +endif(USE_CUDA) - if(USE_CUDNN) - message(STATUS "Build with cuDNN support") - include_directories(SYSTEM ${CUDA_CUDNN_INCLUDE_DIRS}) - tvm_file_glob(GLOB CUDNN_RELAX_CONTRIB_SRC src/relax/backend/contrib/cudnn/*.cc) - list(APPEND COMPILER_SRCS ${CUDNN_RELAX_CONTRIB_SRC}) - tvm_file_glob(GLOB CONTRIB_CUDNN_SRCS src/runtime/contrib/cudnn/*.cc) - list(APPEND RUNTIME_SRCS ${CONTRIB_CUDNN_SRCS}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUDNN_LIBRARY}) - endif(USE_CUDNN) - - if (USE_CUDNN_FRONTEND) - message(STATUS "Build with cuDNN Frontend support") - if (IS_DIRECTORY ${USE_CUDNN_FRONTEND}) - find_file(CUDNN_FRONTEND_HEADER cudnn_frontend.h HINTS ${USE_CUDNN_FRONTEND}/include) - include_directories(SYSTEM ${USE_CUDNN_FRONTEND}/include) - else() - find_file(CUDNN_FRONTEND_HEADER cudnn_frontend.h) - endif() - if (NOT CUDNN_FRONTEND_HEADER) - message(FATAL_ERROR "Cannot find cudnn_frontend.h, please set USE_CUDNN_FRONTEND to the path of the cuDNN frontend header") - endif() - tvm_file_glob(GLOB CONTRIB_CUDNN_FRONTEND_SRCS src/runtime/contrib/cudnn/cudnn_frontend/*.cc) - set_property(SOURCE ${CONTRIB_CUDNN_SRCS} APPEND PROPERTY COMPILE_DEFINITIONS TVM_USE_CUDNN_FRONTEND=1) - list(APPEND RUNTIME_SRCS ${CONTRIB_CUDNN_FRONTEND_SRCS}) - endif(USE_CUDNN_FRONTEND) - - if(USE_CUBLAS) - message(STATUS "Build with cuBLAS support") - tvm_file_glob(GLOB CUBLAS_CONTRIB_SRC src/relax/backend/contrib/cublas/*.cc) - list(APPEND COMPILER_SRCS ${CUBLAS_CONTRIB_SRC}) - tvm_file_glob(GLOB CONTRIB_CUBLAS_SRCS src/runtime/contrib/cublas/*.cc) - list(APPEND RUNTIME_SRCS ${CONTRIB_CUBLAS_SRCS}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUBLAS_LIBRARY}) - if(NOT CUDA_CUBLASLT_LIBRARY STREQUAL "CUDA_CUBLASLT_LIBRARY-NOTFOUND") - list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUBLASLT_LIBRARY}) - endif() - endif(USE_CUBLAS) +if(USE_CUDA) + message(STATUS "Build cuda device runtime") - if(USE_THRUST) - message(STATUS "Build with Thrust support") - tvm_file_glob(GLOB CONTRIB_THRUST_SRC src/runtime/contrib/thrust/*.cu) - add_library(tvm_thrust_objs OBJECT ${CONTRIB_THRUST_SRC}) - target_link_libraries(tvm_thrust_objs PRIVATE tvm_ffi_header) - target_compile_options(tvm_thrust_objs PRIVATE $<$:--expt-extended-lambda>) - if (NOT USE_THRUST MATCHES ${IS_TRUE_PATTERN}) - find_package(CCCL REQUIRED COMPONENTS Thrust) - target_link_libraries(tvm_thrust_objs PRIVATE CCCL::Thrust) - endif() - list(APPEND TVM_RUNTIME_EXT_OBJS $) - endif(USE_THRUST) + tvm_file_glob(GLOB RUNTIME_CUDA_SRCS src/runtime/cuda/*.cc) + tvm_file_glob(GLOB VM_CUDA_BUILTIN_SRC_CC src/runtime/vm/cuda/*.cc) - if(USE_CURAND) - message(STATUS "Build with cuRAND support") - message(STATUS "${CUDA_CURAND_LIBRARY}") - tvm_file_glob(GLOB CONTRIB_CURAND_SRC_CC src/runtime/contrib/curand/*.cc) - tvm_file_glob(GLOB CONTRIB_CURAND_SRC_CU src/runtime/contrib/curand/*.cu) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CURAND_LIBRARY}) - list(APPEND RUNTIME_SRCS ${CONTRIB_CURAND_SRC_CC}) - list(APPEND RUNTIME_SRCS ${CONTRIB_CURAND_SRC_CU}) - endif(USE_CURAND) + add_library(tvm_runtime_cuda_objs OBJECT ${RUNTIME_CUDA_SRCS} ${VM_CUDA_BUILTIN_SRC_CC}) + target_link_libraries(tvm_runtime_cuda_objs PUBLIC tvm_ffi_header) + set_target_properties(tvm_runtime_cuda_objs PROPERTIES POSITION_INDEPENDENT_CODE ON) + if(TVM_VISIBILITY_FLAG) + target_compile_options(tvm_runtime_cuda_objs PRIVATE "${TVM_VISIBILITY_FLAG}") + endif() + add_library(tvm_runtime_cuda SHARED $) + target_link_libraries(tvm_runtime_cuda PUBLIC tvm_runtime ${CUDA_CUDART_LIBRARY} ${CUDA_CUDA_LIBRARY}) + set_target_properties(tvm_runtime_cuda PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ) + install(TARGETS tvm_runtime_cuda DESTINATION lib${LIB_SUFFIX}) + if(TVM_BUILD_PYTHON_MODULE) + install(TARGETS tvm_runtime_cuda DESTINATION "lib") + endif() if(USE_NVTX) message(STATUS "Build with NVTX support") - message(STATUS "${CUDA_NVTX_LIBRARY}") - cmake_minimum_required(VERSION 3.13) # to compile CUDA code - enable_language(CUDA) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_NVTX_LIBRARY}) - endif(USE_NVTX) - - # Add CUDA builtins to RelaxVM - tvm_file_glob(GLOB VM_CUDA_BUILTIN_SRC_CC src/runtime/vm/cuda/*.cc) - list(APPEND RUNTIME_SRCS ${VM_CUDA_BUILTIN_SRC_CC}) + target_link_libraries(tvm_runtime_cuda PRIVATE ${CUDA_NVTX_LIBRARY}) + endif() endif(USE_CUDA) + +# Contrib sources gated by USE_CUDA go into libtvm_runtime_extra. +# See the RuntimeExtra assembly block in CMakeLists.txt. + +if(USE_CUDA AND USE_CUDNN) + message(STATUS "Build with cuDNN support") + include_directories(SYSTEM ${CUDA_CUDNN_INCLUDE_DIRS}) + tvm_file_glob(GLOB CUDNN_RELAX_CONTRIB_SRC src/relax/backend/contrib/cudnn/*.cc) + list(APPEND COMPILER_SRCS ${CUDNN_RELAX_CONTRIB_SRC}) + tvm_file_glob(GLOB CONTRIB_CUDNN_SRCS src/runtime/contrib/cudnn/*.cc) + add_library(tvm_cudnn_objs OBJECT ${CONTRIB_CUDNN_SRCS}) + target_link_libraries(tvm_cudnn_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_cudnn_objs ${CUDA_CUDNN_LIBRARY}) +endif(USE_CUDNN) + +if(USE_CUDA AND USE_CUDNN_FRONTEND) + message(STATUS "Build with cuDNN Frontend support") + if(IS_DIRECTORY ${USE_CUDNN_FRONTEND}) + find_file(CUDNN_FRONTEND_HEADER cudnn_frontend.h HINTS ${USE_CUDNN_FRONTEND}/include) + include_directories(SYSTEM ${USE_CUDNN_FRONTEND}/include) + else() + find_file(CUDNN_FRONTEND_HEADER cudnn_frontend.h) + endif() + if(NOT CUDNN_FRONTEND_HEADER) + message(FATAL_ERROR "Cannot find cudnn_frontend.h, please set USE_CUDNN_FRONTEND to the path of the cuDNN frontend header") + endif() + tvm_file_glob(GLOB CONTRIB_CUDNN_FRONTEND_SRCS src/runtime/contrib/cudnn/cudnn_frontend/*.cc) + set_source_files_properties(${CONTRIB_CUDNN_SRCS} PROPERTIES COMPILE_DEFINITIONS TVM_USE_CUDNN_FRONTEND=1) + add_library(tvm_cudnn_frontend_objs OBJECT ${CONTRIB_CUDNN_FRONTEND_SRCS}) + target_link_libraries(tvm_cudnn_frontend_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_cudnn_frontend_objs) +endif(USE_CUDA AND USE_CUDNN_FRONTEND) + +if(USE_CUDA AND USE_CUBLAS) + message(STATUS "Build with cuBLAS support") + tvm_file_glob(GLOB CUBLAS_CONTRIB_SRC src/relax/backend/contrib/cublas/*.cc) + list(APPEND COMPILER_SRCS ${CUBLAS_CONTRIB_SRC}) + tvm_file_glob(GLOB CONTRIB_CUBLAS_SRCS src/runtime/contrib/cublas/*.cc) + add_library(tvm_cublas_objs OBJECT ${CONTRIB_CUBLAS_SRCS}) + target_link_libraries(tvm_cublas_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_cublas_objs ${CUDA_CUBLAS_LIBRARY}) + if(NOT CUDA_CUBLASLT_LIBRARY STREQUAL "CUDA_CUBLASLT_LIBRARY-NOTFOUND") + target_link_libraries(tvm_runtime_extra PRIVATE ${CUDA_CUBLASLT_LIBRARY}) + endif() +endif(USE_CUDA AND USE_CUBLAS) + +if(USE_CUDA AND USE_THRUST) + message(STATUS "Build with Thrust support") + tvm_file_glob(GLOB CONTRIB_THRUST_SRC src/runtime/contrib/thrust/*.cu) + add_library(tvm_thrust_objs OBJECT ${CONTRIB_THRUST_SRC}) + target_link_libraries(tvm_thrust_objs PRIVATE tvm_runtime_extra_defs) + target_compile_options(tvm_thrust_objs PRIVATE $<$:--expt-extended-lambda>) + if(NOT USE_THRUST MATCHES ${IS_TRUE_PATTERN}) + find_package(CCCL REQUIRED COMPONENTS Thrust) + target_link_libraries(tvm_thrust_objs PRIVATE CCCL::Thrust) + endif() + target_link_libraries(tvm_runtime_extra PRIVATE tvm_thrust_objs) +endif(USE_CUDA AND USE_THRUST) + +if(USE_CUDA AND USE_CURAND) + message(STATUS "Build with cuRAND support") + message(STATUS "${CUDA_CURAND_LIBRARY}") + tvm_file_glob(GLOB CONTRIB_CURAND_SRC_CC src/runtime/contrib/curand/*.cc) + tvm_file_glob(GLOB CONTRIB_CURAND_SRC_CU src/runtime/contrib/curand/*.cu) + add_library(tvm_curand_objs OBJECT ${CONTRIB_CURAND_SRC_CC} ${CONTRIB_CURAND_SRC_CU}) + target_link_libraries(tvm_curand_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_curand_objs ${CUDA_CURAND_LIBRARY}) +endif(USE_CUDA AND USE_CURAND) diff --git a/cmake/modules/Hexagon.cmake b/cmake/modules/Hexagon.cmake index 59953744084f..370d968e623d 100644 --- a/cmake/modules/Hexagon.cmake +++ b/cmake/modules/Hexagon.cmake @@ -119,7 +119,8 @@ function(add_hexagon_wrapper_paths) endfunction() if(BUILD_FOR_HEXAGON) - # Common sources for TVM runtime with Hexagon support + # When building FOR Hexagon (the DSP itself), all runtime sources go into + # the single libtvm_runtime (static or shared). No per-backend DSO split. file_glob_append(RUNTIME_HEXAGON_SRCS "${TVMRT_SOURCE_DIR}/hexagon/*.cc" ) @@ -156,7 +157,7 @@ if(BUILD_FOR_HEXAGON) set(USE_CUSTOM_LOGGING ON) # To use a custom logger -# QHL support. + # QHL support. if(USE_HEXAGON_QHL) file_glob_append(TVM_QHL_WRAPPER_SRCS "${TVMRT_SOURCE_DIR}/hexagon/qhl/*.cc" @@ -201,10 +202,10 @@ if(BUILD_FOR_HEXAGON) # Include hexagon external library runtime sources if(USE_HEXAGON_EXTERNAL_LIBS) # Check if the libs are provided as an absolute path - if (EXISTS ${USE_HEXAGON_EXTERNAL_LIBS}) + if(EXISTS ${USE_HEXAGON_EXTERNAL_LIBS}) # Check if the libs are provided as a git url elseif(USE_HEXAGON_EXTERNAL_LIBS MATCHES "\.git$") - if (NOT DEFINED HEXAGON_EXTERNAL_LIBS_SHA) + if(NOT DEFINED HEXAGON_EXTERNAL_LIBS_SHA) message(FATAL_ERROR "HEXAGON_EXTERNA_LIBS_SHA must be set when " "USE_HEXAGON_EXTERNAL_LIBS is set to a git repository") endif() @@ -224,7 +225,7 @@ if(BUILD_FOR_HEXAGON) "${USE_HEXAGON_EXTERNAL_LIBS}/src/runtime/hexagon/*.cc" ) list(APPEND RUNTIME_HEXAGON_SRCS "${HEXAGON_EXTERNAL_RUNTIME_SRCS}") - if (EXISTS "${USE_HEXAGON_EXTERNAL_LIBS}/HexagonExternalCompileFlags.cmake") + if(EXISTS "${USE_HEXAGON_EXTERNAL_LIBS}/HexagonExternalCompileFlags.cmake") # External libraries will define HEXAGON_EXTERNAL_LIBS_COMPILE_FLAGS, # changing this variable name will break downstream external libraries. include("${USE_HEXAGON_EXTERNAL_LIBS}/HexagonExternalCompileFlags.cmake") @@ -329,4 +330,28 @@ if(USE_HEXAGON_RPC) endif() endif() # USE_HEXAGON_RPC -list(APPEND RUNTIME_SRCS ${RUNTIME_HEXAGON_SRCS} ${TVM_QHL_WRAPPER_SRCS}) +# When building for the Hexagon DSP itself, all sources fold into +# libtvm_runtime (static/shared). When building for a host with +# USE_HEXAGON=ON, create a separate libtvm_runtime_hexagon.so. +if(BUILD_FOR_HEXAGON) + list(APPEND RUNTIME_SRCS ${RUNTIME_HEXAGON_SRCS} ${TVM_QHL_WRAPPER_SRCS}) +elseif(USE_HEXAGON) + message(STATUS "Build hexagon device runtime") + add_library(tvm_runtime_hexagon_objs OBJECT ${RUNTIME_HEXAGON_SRCS} ${TVM_QHL_WRAPPER_SRCS}) + target_link_libraries(tvm_runtime_hexagon_objs PUBLIC tvm_ffi_header) + set_target_properties(tvm_runtime_hexagon_objs PROPERTIES POSITION_INDEPENDENT_CODE ON) + if(TVM_VISIBILITY_FLAG) + target_compile_options(tvm_runtime_hexagon_objs PRIVATE "${TVM_VISIBILITY_FLAG}") + endif() + add_library(tvm_runtime_hexagon SHARED $) + target_link_libraries(tvm_runtime_hexagon PUBLIC tvm_runtime) + set_target_properties(tvm_runtime_hexagon PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ) + install(TARGETS tvm_runtime_hexagon DESTINATION lib${LIB_SUFFIX}) + if(TVM_BUILD_PYTHON_MODULE) + install(TARGETS tvm_runtime_hexagon DESTINATION "lib") + endif() +endif() diff --git a/cmake/modules/Metal.cmake b/cmake/modules/Metal.cmake index a9f0e9dd533e..73ba1f5d6a99 100644 --- a/cmake/modules/Metal.cmake +++ b/cmake/modules/Metal.cmake @@ -16,12 +16,28 @@ # under the License. if(USE_METAL) - message(STATUS "Build with Metal support") + message(STATUS "Build metal device runtime") find_library(METAL_LIB Metal) find_library(FOUNDATION_LIB Foundation) tvm_file_glob(GLOB RUNTIME_METAL_SRCS src/runtime/metal/*.mm) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${METAL_LIB} ${FOUNDATION_LIB}) - list(APPEND RUNTIME_SRCS ${RUNTIME_METAL_SRCS}) + + add_library(tvm_runtime_metal_objs OBJECT ${RUNTIME_METAL_SRCS}) + target_link_libraries(tvm_runtime_metal_objs PUBLIC tvm_ffi_header) + set_target_properties(tvm_runtime_metal_objs PROPERTIES POSITION_INDEPENDENT_CODE ON) + if(TVM_VISIBILITY_FLAG) + target_compile_options(tvm_runtime_metal_objs PRIVATE "${TVM_VISIBILITY_FLAG}") + endif() + add_library(tvm_runtime_metal SHARED $) + target_link_libraries(tvm_runtime_metal PUBLIC tvm_runtime ${METAL_LIB} ${FOUNDATION_LIB}) + set_target_properties(tvm_runtime_metal PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ) + install(TARGETS tvm_runtime_metal DESTINATION lib${LIB_SUFFIX}) + if(TVM_BUILD_PYTHON_MODULE) + install(TARGETS tvm_runtime_metal DESTINATION "lib") + endif() endif(USE_METAL) # When USE_METAL=OFF the codegen-side fallback in # src/target/metal/metal_fallback_module.cc handles construction; no opt diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake index 3076c5f2275b..a90e9cfe1469 100644 --- a/cmake/modules/OpenCL.cmake +++ b/cmake/modules/OpenCL.cmake @@ -18,6 +18,7 @@ if(USE_OPENCL) tvm_file_glob(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc) + set(_opencl_libs "") if(${USE_OPENCL} MATCHES ${IS_TRUE_PATTERN}) message(STATUS "Enabled runtime search for OpenCL library location") file_glob_append(RUNTIME_OPENCL_SRCS @@ -27,14 +28,33 @@ if(USE_OPENCL) else() find_opencl(${USE_OPENCL}) if(NOT OpenCL_FOUND) - message(FATAL_ERROR "Error! Cannot find specified OpenCL library") + message(FATAL_ERROR "Error! Cannot find specified OpenCL library") endif() message(STATUS "Build with OpenCL support") include_directories(SYSTEM ${OpenCL_INCLUDE_DIRS}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${OpenCL_LIBRARIES}) + list(APPEND _opencl_libs ${OpenCL_LIBRARIES}) + endif() + + message(STATUS "Build opencl device runtime") + + add_library(tvm_runtime_opencl_objs OBJECT ${RUNTIME_OPENCL_SRCS}) + target_link_libraries(tvm_runtime_opencl_objs PUBLIC tvm_ffi_header) + set_target_properties(tvm_runtime_opencl_objs PROPERTIES POSITION_INDEPENDENT_CODE ON) + if(TVM_VISIBILITY_FLAG) + target_compile_options(tvm_runtime_opencl_objs PRIVATE "${TVM_VISIBILITY_FLAG}") + endif() + add_library(tvm_runtime_opencl SHARED $) + target_link_libraries(tvm_runtime_opencl PUBLIC tvm_runtime ${_opencl_libs}) + set_target_properties(tvm_runtime_opencl PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ) + install(TARGETS tvm_runtime_opencl DESTINATION lib${LIB_SUFFIX}) + if(TVM_BUILD_PYTHON_MODULE) + install(TARGETS tvm_runtime_opencl DESTINATION "lib") endif() - list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS}) if(USE_OPENCL_ENABLE_HOST_PTR) add_definitions(-DOPENCL_ENABLE_HOST_PTR) endif(USE_OPENCL_ENABLE_HOST_PTR) diff --git a/cmake/modules/ROCM.cmake b/cmake/modules/ROCM.cmake index 366dde3a6957..bc0159377b01 100644 --- a/cmake/modules/ROCM.cmake +++ b/cmake/modules/ROCM.cmake @@ -26,45 +26,65 @@ if(ROCM_FOUND) add_definitions(-D__HIP_PLATFORM_AMD__=1) endif(ROCM_FOUND) - if(USE_ROCM) if(NOT ROCM_FOUND) message(FATAL_ERROR "Cannot find ROCM, USE_ROCM=" ${USE_ROCM}) endif() - message(STATUS "Build with ROCM support") + message(STATUS "Build rocm device runtime") + tvm_file_glob(GLOB RUNTIME_ROCM_SRCS src/runtime/rocm/*.cc) - list(APPEND RUNTIME_SRCS ${RUNTIME_ROCM_SRCS}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${ROCM_HIPHCC_LIBRARY}) - if (ROCM_HSA_LIBRARY) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${ROCM_HSA_LIBRARY}) + + set(_rocm_libs ${ROCM_HIPHCC_LIBRARY}) + if(ROCM_HSA_LIBRARY) + list(APPEND _rocm_libs ${ROCM_HSA_LIBRARY}) endif() - if(USE_HIPBLAS) - message(STATUS "Build with HIPBLAS support") - tvm_file_glob(GLOB HIPBLAS_CONTRIB_SRC src/relax/backend/contrib/hipblas/*.cc) - list(APPEND COMPILER_SRCS ${HIPBLAS_CONTRIB_SRC}) - tvm_file_glob(GLOB HIPBLAS_CONTRIB_SRCS src/runtime/contrib/hipblas/*.cc) - list(APPEND RUNTIME_SRCS ${HIPBLAS_CONTRIB_SRCS}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${ROCM_HIPBLAS_LIBRARY}) - if(NOT ROCM_HIPBLASLT_LIBRARY STREQUAL "ROCM_HIPBLASLT_LIBRARY-NOTFOUND") - list(APPEND TVM_RUNTIME_LINKER_LIBS ${ROCM_HIPBLASLT_LIBRARY}) - endif() - endif(USE_HIPBLAS) + add_library(tvm_runtime_rocm_objs OBJECT ${RUNTIME_ROCM_SRCS}) + target_link_libraries(tvm_runtime_rocm_objs PUBLIC tvm_ffi_header) + set_target_properties(tvm_runtime_rocm_objs PROPERTIES POSITION_INDEPENDENT_CODE ON) + if(TVM_VISIBILITY_FLAG) + target_compile_options(tvm_runtime_rocm_objs PRIVATE "${TVM_VISIBILITY_FLAG}") + endif() + add_library(tvm_runtime_rocm SHARED $) + target_link_libraries(tvm_runtime_rocm PUBLIC tvm_runtime ${_rocm_libs}) + set_target_properties(tvm_runtime_rocm PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ) + install(TARGETS tvm_runtime_rocm DESTINATION lib${LIB_SUFFIX}) + if(TVM_BUILD_PYTHON_MODULE) + install(TARGETS tvm_runtime_rocm DESTINATION "lib") + endif() +endif(USE_ROCM) - if(USE_THRUST) - message(STATUS "Build with rocThrust support") - # We need to override CXX to hipcc. This is required by rocthrust - if (${CMAKE_CXX_COMPILER} MATCHES "hipcc$") - message(STATUS "Using hipcc compiler to compile rocthrust code.") - else() - message(FATAL_ERROR "Set CXX=hipcc to compile rocthrust code.") - endif() +# HIPBLAS contrib goes into libtvm_runtime_extra. +if(USE_ROCM AND USE_HIPBLAS) + message(STATUS "Build with HIPBLAS support") + tvm_file_glob(GLOB HIPBLAS_CONTRIB_SRC src/relax/backend/contrib/hipblas/*.cc) + list(APPEND COMPILER_SRCS ${HIPBLAS_CONTRIB_SRC}) + tvm_file_glob(GLOB HIPBLAS_CONTRIB_SRCS src/runtime/contrib/hipblas/*.cc) + add_library(tvm_hipblas_objs OBJECT ${HIPBLAS_CONTRIB_SRCS}) + target_link_libraries(tvm_hipblas_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_hipblas_objs ${ROCM_HIPBLAS_LIBRARY}) + if(NOT ROCM_HIPBLASLT_LIBRARY STREQUAL "ROCM_HIPBLASLT_LIBRARY-NOTFOUND") + target_link_libraries(tvm_runtime_extra PRIVATE ${ROCM_HIPBLASLT_LIBRARY}) + endif() +endif(USE_ROCM AND USE_HIPBLAS) - find_package(rocprim REQUIRED) - find_package(rocthrust REQUIRED) - set_source_files_properties(src/runtime/contrib/thrust/thrust.cu PROPERTIES LANGUAGE CXX) - list(APPEND RUNTIME_SRCS src/runtime/contrib/thrust/thrust.cu) - list(APPEND TVM_RUNTIME_LINKER_LIBS roc::rocthrust) - endif(USE_THRUST) +if(USE_ROCM AND USE_THRUST) + message(STATUS "Build with rocThrust support") + # We need to override CXX to hipcc. This is required by rocthrust + if(${CMAKE_CXX_COMPILER} MATCHES "hipcc$") + message(STATUS "Using hipcc compiler to compile rocthrust code.") + else() + message(FATAL_ERROR "Set CXX=hipcc to compile rocthrust code.") + endif() -endif(USE_ROCM) + find_package(rocprim REQUIRED) + find_package(rocthrust REQUIRED) + set_source_files_properties(src/runtime/contrib/thrust/thrust.cu PROPERTIES LANGUAGE CXX) + add_library(tvm_rocthrust_objs OBJECT src/runtime/contrib/thrust/thrust.cu) + target_link_libraries(tvm_rocthrust_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_rocthrust_objs roc::rocthrust) +endif(USE_ROCM AND USE_THRUST) diff --git a/cmake/modules/Vulkan.cmake b/cmake/modules/Vulkan.cmake index bce4dada8802..c64e5581c9c7 100644 --- a/cmake/modules/Vulkan.cmake +++ b/cmake/modules/Vulkan.cmake @@ -22,17 +22,12 @@ if(USE_VULKAN) if(NOT Vulkan_FOUND) message(FATAL_ERROR "Cannot find Vulkan, USE_VULKAN=" ${USE_VULKAN}) endif() - if (USE_SPIRV_KHR_INTEGER_DOT_PRODUCT) + if(USE_SPIRV_KHR_INTEGER_DOT_PRODUCT) add_definitions(-DTVM_SPIRV_KHR_INTEGER_DOT_PRODUCT=1) message(STATUS "Enable SPIRV_KHR_INTEGER_DOT_PRODUCT") endif() include_directories(SYSTEM ${Vulkan_INCLUDE_DIRS}) message(STATUS "Build with Vulkan support") - tvm_file_glob(GLOB RUNTIME_VULKAN_SRCS src/runtime/vulkan/*.cc) - # SPIR-V codegen tooling lives under src/target/vulkan/ alongside the - # fallback module. The fallback module itself is always compiled (in - # CMakeLists.txt's CODEGEN_SRCS); the rest depends on spirv-tools and - # is only compiled when USE_VULKAN=ON. tvm_file_glob(GLOB COMPILER_VULKAN_SRCS src/target/vulkan/build_vulkan.cc src/target/vulkan/codegen_spirv.cc @@ -41,9 +36,31 @@ if(USE_VULKAN) src/target/vulkan/spirv_support.cc src/target/vulkan/spirv_utils.cc ) - list(APPEND RUNTIME_SRCS ${RUNTIME_VULKAN_SRCS}) list(APPEND COMPILER_SRCS ${COMPILER_VULKAN_SRCS}) list(APPEND TVM_LINKER_LIBS ${Vulkan_SPIRV_TOOLS_LIBRARY}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${Vulkan_LIBRARY}) add_definitions(-DTVM_ENABLE_SPIRV=1) endif(USE_VULKAN) + +if(USE_VULKAN) + message(STATUS "Build vulkan device runtime") + + tvm_file_glob(GLOB RUNTIME_VULKAN_SRCS src/runtime/vulkan/*.cc) + + add_library(tvm_runtime_vulkan_objs OBJECT ${RUNTIME_VULKAN_SRCS}) + target_link_libraries(tvm_runtime_vulkan_objs PUBLIC tvm_ffi_header) + set_target_properties(tvm_runtime_vulkan_objs PROPERTIES POSITION_INDEPENDENT_CODE ON) + if(TVM_VISIBILITY_FLAG) + target_compile_options(tvm_runtime_vulkan_objs PRIVATE "${TVM_VISIBILITY_FLAG}") + endif() + add_library(tvm_runtime_vulkan SHARED $) + target_link_libraries(tvm_runtime_vulkan PUBLIC tvm_runtime ${Vulkan_LIBRARY}) + set_target_properties(tvm_runtime_vulkan PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + ) + install(TARGETS tvm_runtime_vulkan DESTINATION lib${LIB_SUFFIX}) + if(TVM_BUILD_PYTHON_MODULE) + install(TARGETS tvm_runtime_vulkan DESTINATION "lib") + endif() +endif(USE_VULKAN) diff --git a/cmake/modules/contrib/BLAS.cmake b/cmake/modules/contrib/BLAS.cmake index 542effb50463..cee3e2fc30e7 100644 --- a/cmake/modules/contrib/BLAS.cmake +++ b/cmake/modules/contrib/BLAS.cmake @@ -17,8 +17,9 @@ if(USE_BLAS STREQUAL "openblas") find_library(BLAS_LIBRARY openblas) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${BLAS_LIBRARY}) - list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/cblas.cc) + add_library(tvm_blas_objs OBJECT src/runtime/contrib/cblas/cblas.cc) + target_link_libraries(tvm_blas_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_blas_objs ${BLAS_LIBRARY}) message(STATUS "Using BLAS library " ${BLAS_LIBRARY}) find_path(BLAS_INCLUDE_DIR cblas.h PATH_SUFFIXES openblas) if(BLAS_INCLUDE_DIR) @@ -27,14 +28,16 @@ if(USE_BLAS STREQUAL "openblas") endif() elseif(USE_BLAS STREQUAL "atlas" OR USE_BLAS STREQUAL "blas") find_library(BLAS_LIBRARY cblas) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${BLAS_LIBRARY}) - list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/cblas.cc) + add_library(tvm_blas_objs OBJECT src/runtime/contrib/cblas/cblas.cc) + target_link_libraries(tvm_blas_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_blas_objs ${BLAS_LIBRARY}) message(STATUS "Use BLAS library " ${BLAS_LIBRARY}) elseif(USE_BLAS STREQUAL "apple") find_library(BLAS_LIBRARY Accelerate) include_directories(SYSTEM ${BLAS_LIBRARY}/Versions/Current/Frameworks/vecLib.framework/Versions/Current/Headers/) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${BLAS_LIBRARY}) - list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/cblas.cc) + add_library(tvm_blas_objs OBJECT src/runtime/contrib/cblas/cblas.cc) + target_link_libraries(tvm_blas_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_blas_objs ${BLAS_LIBRARY}) message(STATUS "Use BLAS library " ${BLAS_LIBRARY}) elseif(USE_BLAS STREQUAL "mkl") message(DEPRECATION "USE_BLAS=mkl is deprecated. Use USE_MKL=ON instead.") @@ -63,8 +66,9 @@ if(USE_MKL OR USE_MKL_PATH) find_library(BLAS_LIBRARY_MKL NAMES mkl_rt HINTS ${USE_MKL}/lib/ ${USE_MKL}/lib/intel64_win) endif() include_directories(SYSTEM ${USE_MKL}/include) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${BLAS_LIBRARY_MKL}) - list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/mkl.cc) + add_library(tvm_mkl_objs OBJECT src/runtime/contrib/cblas/mkl.cc) + target_link_libraries(tvm_mkl_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_mkl_objs ${BLAS_LIBRARY_MKL}) add_definitions(-DUSE_MKL_BLAS=1) message(STATUS "Use MKL library " ${BLAS_LIBRARY_MKL}) endif() diff --git a/cmake/modules/contrib/CLML.cmake b/cmake/modules/contrib/CLML.cmake index 34b998ef9914..1d0f0f3a50cf 100644 --- a/cmake/modules/contrib/CLML.cmake +++ b/cmake/modules/contrib/CLML.cmake @@ -24,7 +24,7 @@ if(USE_CLML) list(APPEND COMPILER_SRCS ${CLML_RUNTIME_MODULE}) endif() message(STATUS "Build with CLML support : " ${USE_CLML}) - if (NOT USE_CLML STREQUAL "ON") + if(NOT USE_CLML STREQUAL "ON") set(CLML_VERSION_HEADER "${USE_CLML}/CL/cl_qcom_ml_ops.h") if(EXISTS ${CLML_VERSION_HEADER}) file(READ ${CLML_VERSION_HEADER} ver) @@ -45,7 +45,7 @@ endif() if(USE_CLML_GRAPH_EXECUTOR) set(CLML_PATH ${CMAKE_CURRENT_SOURCE_DIR}/clml) # Detect custom CLML path. - if (NOT USE_CLML_GRAPH_EXECUTOR STREQUAL "ON") + if(NOT USE_CLML_GRAPH_EXECUTOR STREQUAL "ON") set(CLML_PATH ${USE_CLML_GRAPH_EXECUTOR}) endif() @@ -68,8 +68,14 @@ if(USE_CLML_GRAPH_EXECUTOR) list(APPEND EXTERN_CLML_COMPUTE_LIB ${CLML_PATH}/lib/libOpenCL.so ${CLML_PATH}/lib/libOpenCL_system.so) endif() endif() - list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_CLML_COMPUTE_LIB}) - list(APPEND RUNTIME_SRCS ${CLML_CONTRIB_SRC}) + add_library(tvm_clml_objs OBJECT ${CLML_CONTRIB_SRC}) + target_link_libraries(tvm_clml_objs PRIVATE tvm_runtime_extra_defs) + # CLML depends on OpenCL runtime symbols — link the OpenCL DSO instead of + # duplicating sources (which would cause duplicate registrations). + target_link_libraries(tvm_runtime_extra PRIVATE tvm_clml_objs ${EXTERN_CLML_COMPUTE_LIB}) + if(TARGET tvm_runtime_opencl) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_runtime_opencl) + endif() message(STATUS "Build with CLML graph runtime support: " ${EXTERN_CLML_COMPUTE_LIB}) @@ -77,8 +83,6 @@ if(USE_CLML_GRAPH_EXECUTOR) add_definitions(-DTVM_GRAPH_EXECUTOR_CLML) message(STATUS "Enable OpenCL as fallback to CLML") - file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc) - list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS}) set(USE_OPENCL ${CLML_PATH}) if(USE_OPENCL_ENABLE_HOST_PTR) add_definitions(-DOPENCL_ENABLE_HOST_PTR) diff --git a/cmake/modules/contrib/CUTLASS.cmake b/cmake/modules/contrib/CUTLASS.cmake index a3a09f141c9e..7f44c2e6db0c 100644 --- a/cmake/modules/contrib/CUTLASS.cmake +++ b/cmake/modules/contrib/CUTLASS.cmake @@ -16,9 +16,6 @@ # under the License. if(USE_CUDA AND USE_CUTLASS) - set(CUTLASS_GEN_COND "$,$>") - set(CUTLASS_RUNTIME_OBJS "") - tvm_file_glob(GLOB CUTLASS_CONTRIB_SRC src/relax/backend/contrib/cutlass/*.cc ) @@ -38,12 +35,12 @@ if(USE_CUDA AND USE_CUTLASS) set(CUTLASS_FPA_INTB_RUNTIME_SRCS "") list(APPEND CUTLASS_FPA_INTB_RUNTIME_SRCS src/runtime/contrib/cutlass/weight_preprocess.cc) add_library(fpA_intB_cutlass_objs OBJECT ${CUTLASS_FPA_INTB_RUNTIME_SRCS}) - target_link_libraries(fpA_intB_cutlass_objs PRIVATE tvm_ffi_header) + target_link_libraries(fpA_intB_cutlass_objs PRIVATE tvm_runtime_extra_defs) target_include_directories(fpA_intB_cutlass_objs PRIVATE ${PROJECT_SOURCE_DIR}/3rdparty/cutlass_fpA_intB_gemm ${PROJECT_SOURCE_DIR}/3rdparty/cutlass_fpA_intB_gemm/cutlass/include ) - list(APPEND CUTLASS_RUNTIME_OBJS "$<${CUTLASS_GEN_COND}:$>") + target_link_libraries(tvm_runtime_extra PRIVATE fpA_intB_cutlass_objs) ### Build cutlass runtime objects for flash attention add_subdirectory(${PROJECT_SOURCE_DIR}/3rdparty/libflash_attn) @@ -56,13 +53,13 @@ if(USE_CUDA AND USE_CUTLASS) set(CUTLASS_DIR ${PROJECT_SOURCE_DIR}/3rdparty/cutlass) set(TVM_CUTLASS_RUNTIME_SRCS "") - if (CMAKE_CUDA_ARCHITECTURES MATCHES "90a") + if(CMAKE_CUDA_ARCHITECTURES MATCHES "90a") list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp16_group_gemm_sm90.cu) list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp8_group_gemm_sm90.cu) list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp8_gemm.cu) list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm_sm90.cu) endif() - if (CMAKE_CUDA_ARCHITECTURES MATCHES "100a") + if(CMAKE_CUDA_ARCHITECTURES MATCHES "100a") list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp16_group_gemm_sm100.cu) list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm_sm100.cu) list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp8_groupwise_scaled_group_gemm_sm100.cu) @@ -74,14 +71,11 @@ if(USE_CUDA AND USE_CUTLASS) ${CUTLASS_DIR}/include ${PROJECT_SOURCE_DIR}/3rdparty/cutlass_fpA_intB_gemm/cutlass_extensions/include ) - target_link_libraries(tvm_cutlass_objs PRIVATE tvm_ffi_header) + target_link_libraries(tvm_cutlass_objs PRIVATE tvm_runtime_extra_defs) # Note: enable this to get more detailed logs for cutlass kernels # target_compile_definitions(tvm_cutlass_objs PRIVATE CUTLASS_DEBUG_TRACE_LEVEL=2) - list(APPEND CUTLASS_RUNTIME_OBJS "$<${CUTLASS_GEN_COND}:$>") + target_link_libraries(tvm_runtime_extra PRIVATE tvm_cutlass_objs) endif() - ### Add cutlass objects to list of TVM runtime extension objs - list(APPEND TVM_RUNTIME_EXT_OBJS "${CUTLASS_RUNTIME_OBJS}") - message(STATUS "Build with CUTLASS") endif() diff --git a/cmake/modules/contrib/CoreML.cmake b/cmake/modules/contrib/CoreML.cmake index c530d8650fb2..94520f2b570f 100644 --- a/cmake/modules/contrib/CoreML.cmake +++ b/cmake/modules/contrib/CoreML.cmake @@ -20,6 +20,7 @@ if(USE_COREML) find_library(FOUNDATION_LIB Foundation) find_library(COREML_LIB Coreml) tvm_file_glob(GLOB COREML_CONTRIB_SRC src/runtime/contrib/coreml/*.mm) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${FOUNDATION_LIB} ${COREML_LIB}) - list(APPEND RUNTIME_SRCS ${COREML_CONTRIB_SRC}) + add_library(tvm_coreml_objs OBJECT ${COREML_CONTRIB_SRC}) + target_link_libraries(tvm_coreml_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_coreml_objs ${FOUNDATION_LIB} ${COREML_LIB}) endif(USE_COREML) diff --git a/cmake/modules/contrib/DNNL.cmake b/cmake/modules/contrib/DNNL.cmake index e3d75677b547..191b04594b80 100644 --- a/cmake/modules/contrib/DNNL.cmake +++ b/cmake/modules/contrib/DNNL.cmake @@ -17,19 +17,20 @@ if(IS_DIRECTORY ${USE_DNNL}) find_library(EXTERN_LIBRARY_DNNL NAMES dnnl HINTS ${USE_DNNL}/lib/) - if (EXTERN_LIBRARY_DNNL STREQUAL "EXTERN_LIBRARY_DNNL-NOTFOUND") + if(EXTERN_LIBRARY_DNNL STREQUAL "EXTERN_LIBRARY_DNNL-NOTFOUND") message(WARNING "Cannot find DNNL library at ${USE_DNNL}.") else() add_definitions(-DUSE_JSON_RUNTIME=1) tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/relax/backend/contrib/dnnl/*.cc) list(APPEND COMPILER_SRCS ${DNNL_CONTRIB_SRC}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL}) tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl_json_runtime.cc src/runtime/contrib/dnnl/dnnl_utils.cc src/runtime/contrib/dnnl/dnnl.cc src/runtime/contrib/cblas/dnnl_blas.cc) - list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC}) + add_library(tvm_dnnl_objs OBJECT ${DNNL_CONTRIB_SRC}) + target_link_libraries(tvm_dnnl_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_dnnl_objs ${EXTERN_LIBRARY_DNNL}) message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL}) endif() elseif((USE_DNNL STREQUAL "ON") OR (USE_DNNL STREQUAL "JSON")) @@ -38,20 +39,22 @@ elseif((USE_DNNL STREQUAL "ON") OR (USE_DNNL STREQUAL "JSON")) list(APPEND COMPILER_SRCS ${DNNL_CONTRIB_SRC}) find_library(EXTERN_LIBRARY_DNNL dnnl) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL}) tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl_json_runtime.cc src/runtime/contrib/dnnl/dnnl_utils.cc src/runtime/contrib/dnnl/dnnl.cc src/runtime/contrib/cblas/dnnl_blas.cc) - list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC}) + add_library(tvm_dnnl_objs OBJECT ${DNNL_CONTRIB_SRC}) + target_link_libraries(tvm_dnnl_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_dnnl_objs ${EXTERN_LIBRARY_DNNL}) message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL}) elseif(USE_DNNL STREQUAL "C_SRC") find_library(EXTERN_LIBRARY_DNNL dnnl) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL}) tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl.cc src/runtime/contrib/dnnl/dnnl_utils.cc src/runtime/contrib/cblas/dnnl_blas.cc) - list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC}) + add_library(tvm_dnnl_objs OBJECT ${DNNL_CONTRIB_SRC}) + target_link_libraries(tvm_dnnl_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_dnnl_objs ${EXTERN_LIBRARY_DNNL}) message(STATUS "Build with DNNL C source module: " ${EXTERN_LIBRARY_DNNL}) elseif(USE_DNNL STREQUAL "OFF") # pass diff --git a/cmake/modules/contrib/ExampleNPU.cmake b/cmake/modules/contrib/ExampleNPU.cmake index 2fc53a4dfc82..51b023dbbf0d 100644 --- a/cmake/modules/contrib/ExampleNPU.cmake +++ b/cmake/modules/contrib/ExampleNPU.cmake @@ -28,12 +28,14 @@ if(USE_EXAMPLE_NPU_CODEGEN) endif() endif() -# Example NPU Runtime +# Example NPU Runtime — goes into libtvm_runtime_extra. if(USE_EXAMPLE_NPU_RUNTIME) message(STATUS "Build with Example NPU runtime") tvm_file_glob(GLOB RUNTIME_EXAMPLE_NPU_SRCS src/runtime/contrib/example_npu/*.cc) - list(APPEND RUNTIME_SRCS ${RUNTIME_EXAMPLE_NPU_SRCS}) + add_library(tvm_example_npu_objs OBJECT ${RUNTIME_EXAMPLE_NPU_SRCS}) + target_link_libraries(tvm_example_npu_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_example_npu_objs) add_definitions(-DTVM_GRAPH_EXECUTOR_EXAMPLE_NPU) endif() diff --git a/cmake/modules/contrib/NNAPI.cmake b/cmake/modules/contrib/NNAPI.cmake index 23eb6dd11eda..496ce96a8060 100644 --- a/cmake/modules/contrib/NNAPI.cmake +++ b/cmake/modules/contrib/NNAPI.cmake @@ -27,13 +27,14 @@ if(USE_NNAPI_CODEGEN) endif() endif() -# NNAPI Runtime +# NNAPI Runtime — goes into libtvm_runtime_extra. if(USE_NNAPI_RUNTIME) message(STATUS "Build with NNAPI runtime") tvm_file_glob(GLOB RUNTIME_NNAPI_SRCS src/runtime/contrib/nnapi/*.cc) - list(APPEND RUNTIME_SRCS ${RUNTIME_NNAPI_SRCS}) - list(APPEND TVM_RUNTIME_LINKER_LIBS neuralnetworks log) + add_library(tvm_nnapi_objs OBJECT ${RUNTIME_NNAPI_SRCS}) + target_link_libraries(tvm_nnapi_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_nnapi_objs neuralnetworks log) add_definitions(-DTVM_GRAPH_EXECUTOR_NNAPI) endif() diff --git a/cmake/modules/contrib/Random.cmake b/cmake/modules/contrib/Random.cmake index a003de1553ba..16e699fb62a6 100644 --- a/cmake/modules/contrib/Random.cmake +++ b/cmake/modules/contrib/Random.cmake @@ -18,5 +18,7 @@ if(USE_RANDOM) message(STATUS "Build with contrib.random") tvm_file_glob(GLOB RANDOM_CONTRIB_SRC src/runtime/contrib/random/random.cc) - list(APPEND RUNTIME_SRCS ${RANDOM_CONTRIB_SRC}) + add_library(tvm_random_objs OBJECT ${RANDOM_CONTRIB_SRC}) + target_link_libraries(tvm_random_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_random_objs) endif(USE_RANDOM) diff --git a/cmake/modules/contrib/Sort.cmake b/cmake/modules/contrib/Sort.cmake index 4e4c9781216f..2fbeedd95e30 100644 --- a/cmake/modules/contrib/Sort.cmake +++ b/cmake/modules/contrib/Sort.cmake @@ -18,5 +18,7 @@ if(USE_SORT) message(STATUS "Build with contrib.sort") tvm_file_glob(GLOB SORT_CONTRIB_SRC src/runtime/contrib/sort/*.cc) - list(APPEND RUNTIME_SRCS ${SORT_CONTRIB_SRC}) + add_library(tvm_sort_objs OBJECT ${SORT_CONTRIB_SRC}) + target_link_libraries(tvm_sort_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_sort_objs) endif(USE_SORT) diff --git a/cmake/modules/contrib/TensorRT.cmake b/cmake/modules/contrib/TensorRT.cmake index a9729ed99656..08a841a46df1 100644 --- a/cmake/modules/contrib/TensorRT.cmake +++ b/cmake/modules/contrib/TensorRT.cmake @@ -19,7 +19,7 @@ # compilation of TensorRT modules without requiring TensorRT to be installed. The compiled modules # will only be able to be executed using a TVM built with USE_TENSORRT_RUNTIME=ON. -include (FindPackageHandleStandardArgs) +include(FindPackageHandleStandardArgs) if(USE_TENSORRT_CODEGEN) message(STATUS "Build with TensorRT codegen") @@ -33,7 +33,7 @@ if(USE_TENSORRT_CODEGEN) endif() endif() -# TensorRT Runtime +# TensorRT Runtime — goes into libtvm_runtime_extra. if(USE_TENSORRT_RUNTIME) if(IS_DIRECTORY ${USE_TENSORRT_RUNTIME}) set(TENSORRT_ROOT_DIR ${USE_TENSORRT_RUNTIME}) @@ -47,12 +47,12 @@ if(USE_TENSORRT_RUNTIME) endif() message(STATUS "TENSORRT_LIB_DIR: " ${TENSORRT_LIB_DIR}) include_directories(${TENSORRT_INCLUDE_DIR}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_LIB_DIR}) - # TRT runtime sources tvm_file_glob(GLOB RUNTIME_TENSORRT_SRCS src/runtime/contrib/tensorrt/*.cc) set_source_files_properties(${RUNTIME_TENSORRT_SRCS} PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations") - list(APPEND RUNTIME_SRCS ${RUNTIME_TENSORRT_SRCS}) + add_library(tvm_tensorrt_objs OBJECT ${RUNTIME_TENSORRT_SRCS}) + target_link_libraries(tvm_tensorrt_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_tensorrt_objs ${TENSORRT_LIB_DIR}) # Set defines add_definitions(-DTVM_GRAPH_EXECUTOR_TENSORRT) diff --git a/cmake/modules/contrib/vllm.cmake b/cmake/modules/contrib/vllm.cmake index 4a09edd02e58..7a571ff50508 100644 --- a/cmake/modules/contrib/vllm.cmake +++ b/cmake/modules/contrib/vllm.cmake @@ -21,5 +21,7 @@ if(USE_VLLM) enable_language(CUDA) tvm_file_glob(GLOB VLLM_CONTRIB_SRC src/runtime/contrib/vllm/*.cu src/runtime/contrib/vllm/*.cc) - list(APPEND RUNTIME_SRCS ${VLLM_CONTRIB_SRC}) + add_library(tvm_vllm_objs OBJECT ${VLLM_CONTRIB_SRC}) + target_link_libraries(tvm_vllm_objs PRIVATE tvm_runtime_extra_defs) + target_link_libraries(tvm_runtime_extra PRIVATE tvm_vllm_objs) endif(USE_VLLM) diff --git a/include/tvm/runtime/memory/memory_manager.h b/include/tvm/runtime/memory/memory_manager.h index 70cd8b7b3c77..9163f58f7d62 100644 --- a/include/tvm/runtime/memory/memory_manager.h +++ b/include/tvm/runtime/memory/memory_manager.h @@ -54,7 +54,7 @@ struct Buffer { AllocatorType alloc_type; }; -class Allocator { +class TVM_RUNTIME_DLL Allocator { public: explicit Allocator(AllocatorType type) : type_(type) {} virtual ~Allocator() = default; @@ -65,8 +65,8 @@ class Allocator { * \param mem_scope The device memory scope hint. * \return The empty Tensor. */ - TVM_RUNTIME_DLL Tensor Empty(ffi::Shape shape, DLDataType dtype, Device dev, - ffi::Optional mem_scope = std::nullopt); + Tensor Empty(ffi::Shape shape, DLDataType dtype, Device dev, + ffi::Optional mem_scope = std::nullopt); /*! \brief Return the allocator type. */ inline AllocatorType type() const { return type_; } /*! \brief Allocate a buffer given a size, alignment and type. @@ -76,8 +76,7 @@ class Allocator { * \param type_hint A type hint to the allocator. * \return A sized allocation in the form of a buffer. */ - TVM_RUNTIME_DLL virtual Buffer Alloc(Device dev, size_t nbytes, size_t alignment, - DLDataType type_hint) = 0; + virtual Buffer Alloc(Device dev, size_t nbytes, size_t alignment, DLDataType type_hint) = 0; /*! \brief Allocate a buffer given a shape and type. * \param dev The device where the array is allocated. * \param shape The shape of the tensor. @@ -85,8 +84,8 @@ class Allocator { * \param mem_scope A memory scope of the buffer. * \return A sized allocation in the form of a buffer. */ - TVM_RUNTIME_DLL virtual Buffer Alloc(Device dev, ffi::Shape shape, DLDataType type_hint, - const std::string& mem_scope = ""); + virtual Buffer Alloc(Device dev, ffi::Shape shape, DLDataType type_hint, + const std::string& mem_scope = ""); /*! \brief Create a view for the buffer given a shape, type and scope. * \param buffer The existing buffer upon which we need to create a view. @@ -95,9 +94,8 @@ class Allocator { * \param mem_scope A memory scope of the view. * \return A device pointer to the created view. */ - TVM_RUNTIME_DLL virtual void* CreateView(const Buffer& buffer, ffi::Shape shape, - DLDataType type_hint, - const std::string& mem_scope = "global") { + virtual void* CreateView(const Buffer& buffer, ffi::Shape shape, DLDataType type_hint, + const std::string& mem_scope = "global") { return buffer.data; } @@ -105,22 +103,22 @@ class Allocator { * \param dev is the device where this view is created * \param data The view pointer to be freed. */ - TVM_RUNTIME_DLL virtual void FreeView(Device dev, void* data) {} + virtual void FreeView(Device dev, void* data) {} /*! \brief Free a buffer allocated by the allocator. * \param buffer The buffer to free. */ - TVM_RUNTIME_DLL virtual void Free(const Buffer& buffer) = 0; + virtual void Free(const Buffer& buffer) = 0; /*! \brief Clear the allocated memory. */ - TVM_RUNTIME_DLL virtual void Clear(); + virtual void Clear(); /*! \brief The amount of memory currently allocated. * \return The amount of memory currently allocated. */ - TVM_RUNTIME_DLL virtual size_t UsedMemory() const = 0; + virtual size_t UsedMemory() const = 0; protected: /*! \brief Check if the given memory scope is allowed to allocate by the allocator. */ - TVM_RUNTIME_DLL virtual bool AllowMemoryScope(const std::string& mem_scope) const; + virtual bool AllowMemoryScope(const std::string& mem_scope) const; private: AllocatorType type_; diff --git a/include/tvm/runtime/vm/tensor_cache_support.h b/include/tvm/runtime/vm/tensor_cache_support.h index 3580fdef2a25..ea997f0755bd 100644 --- a/include/tvm/runtime/vm/tensor_cache_support.h +++ b/include/tvm/runtime/vm/tensor_cache_support.h @@ -86,7 +86,8 @@ struct TensorCacheMetadata { /*! \brief Load the metadata from a specific directory */ TVM_RUNTIME_DLL static TensorCacheMetadata Load(const std::string& path); /*! \brief Load the metadata from a given JSON string */ - static TensorCacheMetadata LoadFromStr(const std::string& json_str, const std::string& path); + TVM_RUNTIME_DLL static TensorCacheMetadata LoadFromStr(const std::string& json_str, + const std::string& path); }; } // namespace vm diff --git a/include/tvm/s_tir/random_engine.h b/include/tvm/s_tir/random_engine.h index 0acfd50fbed2..059791be1a6f 100644 --- a/include/tvm/s_tir/random_engine.h +++ b/include/tvm/s_tir/random_engine.h @@ -61,7 +61,10 @@ class LinearCongruentialEngine { * \brief Get a device random state * \return The random state */ - static TRandState DeviceRandom() { return (std::random_device()()) % modulus; } + static TRandState DeviceRandom() { + std::random_device rd; + return rd() % modulus; + } /*! * \brief Operator to move the random state to the next and return the new random state. According diff --git a/python/tvm/base.py b/python/tvm/base.py index 40823edd843c..11601a9dcc72 100644 --- a/python/tvm/base.py +++ b/python/tvm/base.py @@ -22,6 +22,8 @@ import os import sys +from tvm_ffi.libinfo import load_lib_ctypes + from . import libinfo # ---------------------------- @@ -48,16 +50,23 @@ # compiler library is simply not present (runtime-only wheel), only the # runtime is loaded and ``_LIB`` aliases ``_LIB_RUNTIME``. _extra_lib_paths = libinfo.package_lib_paths() -_LIB_RUNTIME = libinfo.load_lib_ctypes( +_LIB_RUNTIME = load_lib_ctypes( "tvm", "tvm_runtime", "RTLD_GLOBAL", extra_lib_paths=_extra_lib_paths ) +# After libtvm_runtime.so is in the global symbol namespace, scan the same +# directory for per-backend DSOs (libtvm_runtime_cuda.so, etc.) and load each +# with RTLD_GLOBAL so their static initializers register device backends. +# Failures are swallowed silently — a missing driver just means that backend +# is unavailable, not an error. +libinfo.load_backend_libs(_LIB_RUNTIME._name) + _RUNTIME_ONLY = libinfo.use_runtime_lib() if _RUNTIME_ONLY: _LIB = _LIB_RUNTIME else: try: - _LIB = libinfo.load_lib_ctypes( + _LIB = load_lib_ctypes( "tvm", "tvm_compiler", "RTLD_LOCAL", extra_lib_paths=_extra_lib_paths ) except RuntimeError: diff --git a/python/tvm/libinfo.py b/python/tvm/libinfo.py index ae79df81f67c..136d85a49f30 100644 --- a/python/tvm/libinfo.py +++ b/python/tvm/libinfo.py @@ -18,12 +18,12 @@ from __future__ import annotations -import ctypes -import importlib.metadata as im import os import sys from pathlib import Path +from tvm_ffi.libinfo import load_lib_ctypes + def use_runtime_lib() -> bool: """Whether ``TVM_USE_RUNTIME_LIB`` requests runtime-only mode. @@ -39,108 +39,39 @@ def package_lib_paths() -> list[Path]: Anchored on this file's location (``python/tvm/libinfo.py``), the list covers the wheel-install layout (``python/tvm/lib/``) and the in-tree dev - build layouts (``/build/lib/`` and ``/lib/``). Callers + build layouts (``/build/lib/`` and ``/lib/``). + ``TVM_LIBRARY_PATH`` is prepended when set so it takes priority. Callers pick the basenames they want (e.g. ``libtvm_runtime.so``) and the load mode; this function only returns the search path. """ pkg = Path(__file__).parent # python/tvm/ - return [ + paths: list[Path] = [] + if os.environ.get("TVM_LIBRARY_PATH"): + paths.append(Path(os.environ["TVM_LIBRARY_PATH"])) + paths += [ pkg / "lib", # wheel layout pkg.parent.parent / "build" / "lib", # dev: /build/lib pkg.parent.parent / "lib", # dev: /lib ] + return paths -# Mirror of ``tvm_ffi.libinfo.{load_lib_ctypes,_find_library_by_basename}`` with -# the ``extra_lib_paths`` parameter from apache/tvm-ffi#570 so dev-mode lookups -# anchor on the *caller's* package root rather than tvm-ffi's own ``__file__``. -# Once apache/tvm-ffi#570 lands and the submodule bumps, drop these and switch -# ``base.py`` back to ``from tvm_ffi.libinfo import load_lib_ctypes``. - +_BACKEND_RUNTIME_LIBS = ["cuda", "vulkan", "opencl", "metal", "rocm", "hexagon", "extra"] -def _find_library_by_basename( - package: str, - target_name: str, - extra_lib_paths: list[Path] | None = None, -) -> Path: - """Resolve ``lib.{so,dylib,dll}`` for ``package``. - Search order: wheel-install RECORD walk → caller-supplied - ``extra_lib_paths`` → ``PATH`` / ``LD_LIBRARY_PATH`` / - ``DYLD_LIBRARY_PATH``. Raises ``RuntimeError`` listing every candidate - directory tried if nothing matches. - """ - if sys.platform.startswith("win32"): - lib_dll_names = (f"{target_name}.dll",) - elif sys.platform.startswith("darwin"): - lib_dll_names = (f"lib{target_name}.dylib", f"lib{target_name}.so") - else: - lib_dll_names = (f"lib{target_name}.so",) - - try: - dist = im.distribution(package) - record = dist.read_text("RECORD") or "" - for line in record.splitlines(): - partial_path, *_ = line.split(",") - if partial_path.endswith(lib_dll_names): - try: - path = (dist._path.parent / partial_path).resolve() - except OSError: - continue - if path.name in lib_dll_names and path.is_file(): - return path - except (im.PackageNotFoundError, OSError): - pass - - dll_paths: list[Path] = [] - if extra_lib_paths is not None: - for i, p in enumerate(extra_lib_paths): - if not isinstance(p, Path): - raise TypeError( - f"extra_lib_paths[{i}] must be a pathlib.Path, got {type(p).__name__}: {p!r}" - ) - dll_paths.extend(extra_lib_paths) - - if sys.platform.startswith("win32"): - dll_paths.extend(Path(p) for p in split_env_var("PATH", ";")) - elif sys.platform.startswith("darwin"): - dll_paths.extend(Path(p) for p in split_env_var("DYLD_LIBRARY_PATH", ":")) - dll_paths.extend(Path(p) for p in split_env_var("PATH", ":")) - else: - dll_paths.extend(Path(p) for p in split_env_var("LD_LIBRARY_PATH", ":")) - dll_paths.extend(Path(p) for p in split_env_var("PATH", ":")) - - for d in dll_paths: - for name in lib_dll_names: - try: - path = (d / name).resolve() - except OSError: - continue - if path.is_file(): - return path - - raise RuntimeError( - f"Cannot find library {', '.join(lib_dll_names)}; searched directories:\n " - + "\n ".join(str(p) for p in dll_paths) - ) - - -def load_lib_ctypes( - package: str, - target_name: str, - mode: str, - extra_lib_paths: list[Path] | None = None, -) -> ctypes.CDLL: - """Locate and ``ctypes.CDLL``-load ``lib`` for ``package``. - - ``mode`` is one of ``"RTLD_LOCAL"`` / ``"RTLD_GLOBAL"`` (resolved against - ``ctypes``). On Windows, the library's directory is registered via - ``os.add_dll_directory`` before the load. - """ - lib_path = _find_library_by_basename(package, target_name, extra_lib_paths) - if sys.platform.startswith("win32"): - os.add_dll_directory(str(lib_path.parent)) - return ctypes.CDLL(str(lib_path), getattr(ctypes, mode)) +def load_backend_libs(runtime_lib_path: str) -> None: + """Try to load each known backend runtime DSO; failures are silent.""" + runtime_dir = Path(runtime_lib_path).resolve().parent + for backend in _BACKEND_RUNTIME_LIBS: + try: + load_lib_ctypes( + package="tvm", + target_name=f"tvm_runtime_{backend}", + mode="RTLD_GLOBAL", + extra_lib_paths=[runtime_dir], + ) + except (OSError, FileNotFoundError, RuntimeError): + pass def split_env_var(env_var, split): diff --git a/python/tvm/runtime/__init__.py b/python/tvm/runtime/__init__.py index d4d4a6e5a1b4..67839fed02a2 100644 --- a/python/tvm/runtime/__init__.py +++ b/python/tvm/runtime/__init__.py @@ -44,7 +44,12 @@ load_param_dict_from_file, ) -from . import disco +try: + from . import disco +except (ImportError, ValueError): + # disco C++ runtime is in libtvm_runtime_extra which may not be present. + # Make the disco module optional. + disco = None # type: ignore[assignment] from .support import _regex_match from tvm_ffi import Shape as ShapeTuple diff --git a/tests/python/relax/test_frontend_onnx.py b/tests/python/relax/test_frontend_onnx.py index 2b0194f08578..427881243663 100644 --- a/tests/python/relax/test_frontend_onnx.py +++ b/tests/python/relax/test_frontend_onnx.py @@ -2311,9 +2311,7 @@ def test_layer_norm_with_nd_gamma_beta(): def test_rms_norm(): # Basic test: default axis=-1 - rms_norm_node = helper.make_node( - "RMSNormalization", ["input", "scale"], ["Y"], epsilon=1e-05 - ) + rms_norm_node = helper.make_node("RMSNormalization", ["input", "scale"], ["Y"], epsilon=1e-05) graph = helper.make_graph( [rms_norm_node],