diff --git a/CMakeLists.txt b/CMakeLists.txt
index e4065075cce8..46958c542e73 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,6 +51,7 @@ tvm_option(USE_HEXAGON_SDK "Path to the Hexagon SDK root (required for Hexagon s
 tvm_option(USE_HEXAGON_RPC "Enable Hexagon RPC using minRPC implementation over Android." OFF)
 tvm_option(USE_HEXAGON_GTEST "Path to Hexagon specific gtest version for runtime cpp tests." /path/to/hexagon/gtest)
 tvm_option(USE_HEXAGON_EXTERNAL_LIBS "Path to git repo containing external Hexagon runtime sources or libraries" OFF)
+
 tvm_option(USE_RPC "Build with RPC" ON)
 tvm_option(USE_THREADS "Build with thread support" ON)
 tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" OFF)
@@ -112,6 +113,18 @@ include_directories(SYSTEM ${COMPILER_RT_PATH})
 # initial variables
 set(TVM_LINKER_LIBS "")
 set(TVM_RUNTIME_LINKER_LIBS "")
+# Early target creation so contrib cmake files can call
+# target_link_libraries(tvm_runtime_extra PRIVATE <object_lib>) directly.
+add_library(tvm_runtime_extra SHARED)
+set_target_properties(tvm_runtime_extra PROPERTIES LINKER_LANGUAGE CXX)
+# INTERFACE target carrying compile definitions for OBJECT libs that build
+# into tvm_runtime_extra. On MSVC, TVM_RUNTIME_EXPORTS makes TVM_RUNTIME_DLL
+# expand to __declspec(dllexport) so that functions defined in extra modules
+# are properly exported from tvm_runtime_extra.dll.
+add_library(tvm_runtime_extra_defs INTERFACE)
+target_link_libraries(tvm_runtime_extra_defs INTERFACE tvm_ffi_header)
+target_compile_definitions(tvm_runtime_extra_defs
+  INTERFACE TVM_RUNTIME_EXPORTS TVM_FFI_EXPORTS)
 
 
 # Check if this is being run on its own or as a subdirectory for another project
@@ -328,10 +341,10 @@ tvm_file_glob(GLOB RUNTIME_SRCS
   src/runtime/*.cc
   src/runtime/vm/*.cc
   src/runtime/memory/*.cc
-  src/runtime/disco/*.cc
   src/runtime/minrpc/*.cc
-  src/runtime/vm/*.cc
 )
+# Note: src/runtime/disco/** moves to libtvm_runtime_extra.
+# Note: src/runtime/{cuda,vulkan,opencl,metal,rocm,hexagon}/* move to per-backend DSOs.
 set(TVM_RUNTIME_EXT_OBJS "")
 
 if(BUILD_FOR_HEXAGON)
@@ -343,17 +356,11 @@ if(BUILD_FOR_HEXAGON)
   add_definitions(-D_MACH_I32=int)
 endif()
 
-# distributed disco runtime are disabled for hexagon
-if (NOT BUILD_FOR_HEXAGON)
-  tvm_file_glob(GLOB RUNTIME_DISCO_DISTRIBUTED_SRCS src/runtime/disco/distributed/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_DISCO_DISTRIBUTED_SRCS})
-endif()
-
 # Package runtime rules
 if(NOT USE_RTTI)
 endif()
 
-if (INDEX_DEFAULT_I64)
+if(INDEX_DEFAULT_I64)
   add_definitions(-DTVM_INDEX_DEFAULT_I64=1)
 endif()
 
@@ -362,36 +369,8 @@ if(USE_RPC)
   tvm_file_glob(GLOB RUNTIME_RPC_SRCS src/runtime/rpc/*.cc)
   list(APPEND RUNTIME_SRCS ${RUNTIME_RPC_SRCS})
 endif(USE_RPC)
-
-if(USE_CUDA AND USE_NCCL)
-  message(STATUS "Build with NCCL...")
-  find_nccl(${USE_NCCL})
-  include_directories(SYSTEM ${NCCL_INCLUDE_DIR})
-  tvm_file_glob(GLOB RUNTIME_NCCL_SRC src/runtime/disco/nccl/*.cc src/runtime/disco/cuda_ipc/*.cc 3rdparty/tensorrt_llm/*.cu)
-  set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=0")
-  list(APPEND RUNTIME_SRCS ${RUNTIME_NCCL_SRC})
-endif()
-
-if (USE_CUDA AND USE_NVSHMEM)
-  message(STATUS "Build with NVSHMEM...")
-  find_nvshmem(${USE_NVSHMEM})
-  if (NOT NVSHMEM_FOUND)
-    message(FATAL_ERROR "Cannot find NVSHMEM, USE_NVSHMEM=" ${USE_NVSHMEM})
-  endif()
-  set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
-  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-  tvm_file_glob(GLOB RUNTIME_NVSHMEM_SRCS src/runtime/contrib/nvshmem/*.cc src/runtime/contrib/nvshmem/*.cu)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_NVSHMEM_SRCS})
-endif()
-
-if(USE_ROCM AND USE_RCCL)
-  message(STATUS "Build with RCCL...")
-  find_rccl(${USE_RCCL})
-  include_directories(SYSTEM ${RCCL_INCLUDE_DIR})
-  tvm_file_glob(GLOB RUNTIME_RCCL_SRC src/runtime/disco/nccl/*.cc)
-  set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=1")
-  list(APPEND RUNTIME_SRCS ${RUNTIME_RCCL_SRC})
-endif()
+# Note: disco/**, NCCL, NVSHMEM, RCCL all move to libtvm_runtime_extra
+# (assembled inline below after all contrib cmake files).
 
 # Enable ctest if gtest is available
 if(USE_GTEST)
@@ -471,6 +450,90 @@ include(cmake/modules/contrib/ExampleNPU.cmake)
 include(cmake/modules/contrib/vllm.cmake)
 include(cmake/modules/Git.cmake)
 
+# ---- libtvm_runtime_extra assembly ----
+# Disco core sources.
+tvm_file_glob(GLOB _disco_core_srcs src/runtime/disco/*.cc)
+add_library(tvm_disco_objs OBJECT ${_disco_core_srcs})
+target_link_libraries(tvm_disco_objs PRIVATE tvm_runtime_extra_defs)
+target_link_libraries(tvm_runtime_extra PRIVATE tvm_disco_objs)
+
+# Distributed disco (disabled for Hexagon cross-compile).
+if(NOT BUILD_FOR_HEXAGON)
+  tvm_file_glob(GLOB _disco_dist_srcs src/runtime/disco/distributed/*.cc)
+  add_library(tvm_disco_distributed_objs OBJECT ${_disco_dist_srcs})
+  target_link_libraries(tvm_disco_distributed_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_disco_distributed_objs)
+endif()
+
+# NCCL / cuda_ipc — requires CUDA + NCCL.
+if(USE_CUDA AND USE_NCCL)
+  find_nccl(${USE_NCCL})
+  include_directories(SYSTEM ${NCCL_INCLUDE_DIR})
+  tvm_file_glob(GLOB _nccl_srcs src/runtime/disco/nccl/*.cc src/runtime/disco/cuda_ipc/*.cc 3rdparty/tensorrt_llm/*.cu)
+  set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=0")
+  add_library(tvm_nccl_objs OBJECT ${_nccl_srcs})
+  target_link_libraries(tvm_nccl_objs PRIVATE tvm_runtime_extra_defs)
+  find_library(LIBRT rt)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_nccl_objs nccl ${LIBRT})
+endif()
+
+# NVSHMEM.
+if(USE_CUDA AND USE_NVSHMEM)
+  find_nvshmem(${USE_NVSHMEM})
+  if(NOT NVSHMEM_FOUND)
+    message(FATAL_ERROR "Cannot find NVSHMEM, USE_NVSHMEM=" ${USE_NVSHMEM})
+  endif()
+  set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+  tvm_file_glob(GLOB _nvshmem_srcs src/runtime/contrib/nvshmem/*.cc src/runtime/contrib/nvshmem/*.cu)
+  add_library(tvm_nvshmem_objs OBJECT ${_nvshmem_srcs})
+  target_link_libraries(tvm_nvshmem_objs PRIVATE tvm_runtime_extra_defs)
+  target_include_directories(tvm_nvshmem_objs PUBLIC ${NVSHMEM_INCLUDE_DIR})
+  find_library(NVSHMEM_HOST nvshmem_host ${NVSHMEM_LIB_DIR})
+  find_library(NVSHMEM_DEVICE nvshmem_device ${NVSHMEM_LIB_DIR})
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_nvshmem_objs ${NVSHMEM_HOST} ${NVSHMEM_DEVICE})
+  set_target_properties(tvm_runtime_extra PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+endif()
+
+# RCCL.
+if(USE_ROCM AND USE_RCCL)
+  find_rccl(${USE_RCCL})
+  include_directories(SYSTEM ${RCCL_INCLUDE_DIR})
+  tvm_file_glob(GLOB _rccl_srcs src/runtime/disco/nccl/*.cc)
+  set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=1")
+  add_library(tvm_rccl_objs OBJECT ${_rccl_srcs})
+  target_link_libraries(tvm_rccl_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_rccl_objs rccl)
+endif()
+
+target_link_libraries(tvm_runtime_extra PUBLIC tvm_runtime)
+
+# If disco/cuda_ipc is included, link the CUDA DSO.
+if(USE_CUDA)
+  target_link_libraries(tvm_runtime_extra PUBLIC tvm_runtime_cuda)
+endif()
+
+# CUTLASS fpA_intB_gemm and flash_attn are separate shared libs.
+if(USE_CUDA AND USE_CUTLASS)
+  target_link_libraries(tvm_runtime_extra PRIVATE fpA_intB_gemm fpA_intB_gemm_tvm)
+  target_link_libraries(tvm_runtime_extra PRIVATE -Wl,--no-as-needed flash_attn)
+endif()
+
+if(TVM_VISIBILITY_FLAG)
+  set_property(TARGET tvm_runtime_extra APPEND PROPERTY LINK_OPTIONS "${TVM_VISIBILITY_FLAG}")
+endif()
+
+set_target_properties(tvm_runtime_extra PROPERTIES
+  LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+  ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+)
+
+install(TARGETS tvm_runtime_extra DESTINATION lib${LIB_SUFFIX})
+if(TVM_BUILD_PYTHON_MODULE)
+  install(TARGETS tvm_runtime_extra DESTINATION "lib")
+endif()
+
 add_library(tvm_objs OBJECT ${COMPILER_SRCS})
 add_library(tvm_runtime_objs OBJECT ${RUNTIME_SRCS})
 target_link_libraries(tvm_objs PUBLIC tvm_ffi_header)
@@ -762,45 +825,17 @@ dump_options_to_file("${TVM_ALL_OPTIONS}")
 
 if(USE_CUDA AND USE_CUTLASS)
   install(TARGETS fpA_intB_gemm EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
-  # fpA_intB_gemm is a separate shared library; link it into the runtime so
-  # the runtime exposes its kernels and tvm_compiler picks them up
-  # transitively at run time.
-  target_link_libraries(tvm_runtime PRIVATE fpA_intB_gemm)
-  # fpA_intB_gemm_tvm is an OBJECT library carrying the
-  # `fastertransformer.gemm_fp16_int` global registration. Linking it into
-  # both tvm_runtime and tvm_compiler causes the static initializer to run
-  # twice (once per shared library). Anchor it in tvm_runtime only.
-  target_link_libraries(tvm_runtime PRIVATE fpA_intB_gemm_tvm)
-
   install(TARGETS flash_attn EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
-  target_link_libraries(tvm_runtime PRIVATE -Wl,--no-as-needed flash_attn)
+  # fpA_intB_gemm, fpA_intB_gemm_tvm, and flash_attn are linked by
+  # tvm_runtime_extra (see the inline assembly block above); no link needed here.
 endif()
 
 if(USE_CUDA AND USE_NVTX)
   set_source_files_properties(src/runtime/nvtx.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NVTX_ENABLED=1")
 endif()
 
-if(USE_CUDA AND USE_NCCL)
-  find_library(LIBRT rt)
-  # Runtime-only dependency.
-  target_link_libraries(tvm_runtime PRIVATE nccl ${LIBRT})
-endif()
-
-
-if (USE_CUDA AND USE_NVSHMEM)
-  target_include_directories(tvm_runtime_objs PUBLIC ${NVSHMEM_INCLUDE_DIR})
-  find_library(NVSHMEM_HOST nvshmem_host ${NVSHMEM_LIB_DIR})
-  find_library(NVSHMEM_DEVICE nvshmem_device ${NVSHMEM_LIB_DIR})
-  # Runtime-only dependency.
-  target_link_libraries(tvm_runtime PRIVATE ${NVSHMEM_HOST} ${NVSHMEM_DEVICE})
-  set_target_properties(tvm_runtime PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-  set_target_properties(tvm_compiler PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-endif()
-
-if(USE_ROCM AND USE_RCCL)
-  # Runtime-only dependency.
-  target_link_libraries(tvm_runtime PRIVATE rccl)
-endif()
+# Note: NCCL, NVSHMEM, RCCL target_link_libraries are handled in the inline
+# libtvm_runtime_extra assembly block above.
 
 # Python package installation configuration
 # This section ensures that all necessary files are installed for the Python wheel
diff --git a/ci/jenkins/data.py b/ci/jenkins/data.py
index 99e54e85330e..44cdba1d02b2 100644
--- a/ci/jenkins/data.py
+++ b/ci/jenkins/data.py
@@ -44,6 +44,11 @@
         "build/lib/libtvm_compiler.so",
         "build/lib/libtvm_runtime.so",
         "build/lib/libtvm_ffi.so",
+        "build/lib/libtvm_runtime_cuda.so",
+        "build/lib/libtvm_runtime_vulkan.so",
+        "build/lib/libtvm_runtime_opencl.so",
+        "build/lib/libtvm_runtime_rocm.so",
+        "build/lib/libtvm_runtime_extra.so",
         "build/libtvm_allvisible.so",
         "build/config.cmake",
     ],
diff --git a/ci/jenkins/generated/arm_jenkinsfile.groovy b/ci/jenkins/generated/arm_jenkinsfile.groovy
index 18c77a97da5b..a457cc8ee005 100644
--- a/ci/jenkins/generated/arm_jenkinsfile.groovy
+++ b/ci/jenkins/generated/arm_jenkinsfile.groovy
@@ -60,7 +60,7 @@
 // 'python3 jenkins/generate.py'
 // Note: This timestamp is here to ensure that updates to the Jenkinsfile are
 // always rebased on main before merging:
-// Generated at 2026-04-25T15:49:49.180036
+// Generated at 2026-05-21T18:31:35.598730
 
 import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
 // These are set at runtime from data in ci/jenkins/docker-images.yml, update
@@ -496,7 +496,7 @@ def run_build(node_type) {
         cmake_build(ci_arm, 'build')
         make_cpp_tests(ci_arm, 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/arm --items build/lib/libtvm_compiler.so build/lib/libtvm_runtime.so build/lib/libtvm_ffi.so build/config.cmake build/cpptest build/build.ninja build/CMakeFiles/rules.ninja",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/arm --bundle tvm_lib --bundle cpptest",
             label: 'Upload artifacts to S3',
           )
           })
diff --git a/ci/jenkins/generated/cpu_jenkinsfile.groovy b/ci/jenkins/generated/cpu_jenkinsfile.groovy
index 4c7eb8402de6..995e96662ff4 100644
--- a/ci/jenkins/generated/cpu_jenkinsfile.groovy
+++ b/ci/jenkins/generated/cpu_jenkinsfile.groovy
@@ -60,7 +60,7 @@
 // 'python3 jenkins/generate.py'
 // Note: This timestamp is here to ensure that updates to the Jenkinsfile are
 // always rebased on main before merging:
-// Generated at 2026-04-25T15:49:49.168038
+// Generated at 2026-05-21T18:31:35.583867
 
 import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
 // These are set at runtime from data in ci/jenkins/docker-images.yml, update
@@ -496,7 +496,7 @@ def run_build(node_type) {
         cmake_build(ci_cpu, 'build')
         make_cpp_tests(ci_cpu, 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cpu --items build/lib/libtvm_compiler.so build/lib/libtvm_runtime.so build/lib/libtvm_ffi.so build/config.cmake build/cpptest build/build.ninja build/CMakeFiles/rules.ninja",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cpu --bundle tvm_lib --bundle cpptest",
             label: 'Upload artifacts to S3',
           )
           })
diff --git a/ci/jenkins/generated/gpu_jenkinsfile.groovy b/ci/jenkins/generated/gpu_jenkinsfile.groovy
index 5afd4aa0b2c2..539e379bf623 100644
--- a/ci/jenkins/generated/gpu_jenkinsfile.groovy
+++ b/ci/jenkins/generated/gpu_jenkinsfile.groovy
@@ -60,7 +60,7 @@
 // 'python3 jenkins/generate.py'
 // Note: This timestamp is here to ensure that updates to the Jenkinsfile are
 // always rebased on main before merging:
-// Generated at 2026-04-25T15:49:49.200674
+// Generated at 2026-05-21T18:31:35.612295
 
 import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
 // These are set at runtime from data in ci/jenkins/docker-images.yml, update
@@ -492,7 +492,7 @@ def run_build(node_type) {
             sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh build"
         cmake_build("${ci_gpu} --no-gpu", 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu --items build/lib/libtvm_compiler.so build/lib/libtvm_runtime.so build/lib/libtvm_ffi.so build/config.cmake build/3rdparty/libflash_attn/src/libflash_attn.so build/3rdparty/cutlass_fpA_intB_gemm/cutlass_kernels/libfpA_intB_gemm.so",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu --bundle tvm_lib --bundle tvm_lib_gpu_extra",
             label: 'Upload artifacts to S3',
           )
 
@@ -502,7 +502,7 @@ def run_build(node_type) {
         sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu_other.sh build"
         cmake_build("${ci_gpu} --no-gpu", 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu2 --items build/lib/libtvm_compiler.so build/lib/libtvm_runtime.so build/lib/libtvm_ffi.so build/config.cmake",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu2 --bundle tvm_lib",
             label: 'Upload artifacts to S3',
           )
           })
diff --git a/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2 b/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2
index 06a1660ecb21..d2e479d5e87a 100644
--- a/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2
+++ b/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2
@@ -31,7 +31,7 @@
   )
   cmake_build(ci_cpu, 'build')
   make_cpp_tests(ci_cpu, 'build')
-  {{ m.upload_artifacts(tag='cpu', filenames=tvm_lib + cpptest) }}
+  {{ m.upload_artifacts(tag='cpu', bundles=["tvm_lib", "cpptest"]) }}
 {% endcall %}
 
 {% set test_method_names = [] %}
diff --git a/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2 b/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2
index 2b7f5f75c98c..7ab5256419f0 100644
--- a/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2
+++ b/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2
@@ -27,7 +27,7 @@
 ) %}
   sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh build"
   cmake_build("${ci_gpu} --no-gpu", 'build')
-  {{ m.upload_artifacts(tag='gpu', filenames=tvm_lib + tvm_lib_gpu_extra) }}
+  {{ m.upload_artifacts(tag='gpu', bundles=["tvm_lib", "tvm_lib_gpu_extra"]) }}
 
   // compiler test
   sh "rm -rf build"
diff --git a/ci/scripts/jenkins/s3.py b/ci/scripts/jenkins/s3.py
index eb986dec996f..e2e65dcadb8a 100755
--- a/ci/scripts/jenkins/s3.py
+++ b/ci/scripts/jenkins/s3.py
@@ -170,7 +170,13 @@ def s3(source: str, destination: str, recursive: bool) -> list[str]:
             if item != ".":
                 source = s3_path + "/" + item
                 recursive = False
-            stdout = s3(source=source, destination=item, recursive=recursive)
+            try:
+                stdout = s3(source=source, destination=item, recursive=recursive)
+            except Exception:
+                # Optional artifacts (e.g. per-backend device runtime DSOs) may not
+                # exist in S3 when the build config didn't produce them. Skip silently.
+                logging.warning(f"Download failed for {item}, skipping (may be optional)")
+                continue
             files = parse_output_files(stdout)
             chmod(files)
             for file in files:
diff --git a/cmake/modules/CUDA.cmake b/cmake/modules/CUDA.cmake
index a79e55883739..e56396c1a620 100644
--- a/cmake/modules/CUDA.cmake
+++ b/cmake/modules/CUDA.cmake
@@ -50,12 +50,6 @@ if(USE_CUDA)
   # [0] https://github.com/Kitware/CMake/commit/6377a438
   set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_INCLUDES 0)
 
-  tvm_file_glob(GLOB RUNTIME_CUDA_SRCS src/runtime/cuda/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_CUDA_SRCS})
-
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUDART_LIBRARY})
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUDA_LIBRARY})
-
   if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
     if(CMAKE_VERSION VERSION_LESS "3.24")
       message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES not set. Please upgrade CMake to 3.24 to use native, or set CMAKE_CUDA_ARCHITECTURES manually")
@@ -63,77 +57,102 @@ if(USE_CUDA)
     message(STATUS "CMAKE_CUDA_ARCHITECTURES not set, using native")
     set(CMAKE_CUDA_ARCHITECTURES native)
   endif()
+endif(USE_CUDA)
 
-  if(USE_CUDNN)
-    message(STATUS "Build with cuDNN support")
-    include_directories(SYSTEM ${CUDA_CUDNN_INCLUDE_DIRS})
-    tvm_file_glob(GLOB CUDNN_RELAX_CONTRIB_SRC src/relax/backend/contrib/cudnn/*.cc)
-    list(APPEND COMPILER_SRCS ${CUDNN_RELAX_CONTRIB_SRC})
-    tvm_file_glob(GLOB CONTRIB_CUDNN_SRCS src/runtime/contrib/cudnn/*.cc)
-    list(APPEND RUNTIME_SRCS ${CONTRIB_CUDNN_SRCS})
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUDNN_LIBRARY})
-  endif(USE_CUDNN)
-
-  if (USE_CUDNN_FRONTEND)
-    message(STATUS "Build with cuDNN Frontend support")
-    if (IS_DIRECTORY ${USE_CUDNN_FRONTEND})
-      find_file(CUDNN_FRONTEND_HEADER cudnn_frontend.h HINTS ${USE_CUDNN_FRONTEND}/include)
-      include_directories(SYSTEM ${USE_CUDNN_FRONTEND}/include)
-    else()
-      find_file(CUDNN_FRONTEND_HEADER cudnn_frontend.h)
-    endif()
-    if (NOT CUDNN_FRONTEND_HEADER)
-      message(FATAL_ERROR "Cannot find cudnn_frontend.h, please set USE_CUDNN_FRONTEND to the path of the cuDNN frontend header")
-    endif()
-    tvm_file_glob(GLOB CONTRIB_CUDNN_FRONTEND_SRCS src/runtime/contrib/cudnn/cudnn_frontend/*.cc)
-    set_property(SOURCE ${CONTRIB_CUDNN_SRCS} APPEND PROPERTY COMPILE_DEFINITIONS TVM_USE_CUDNN_FRONTEND=1)
-    list(APPEND RUNTIME_SRCS ${CONTRIB_CUDNN_FRONTEND_SRCS})
-  endif(USE_CUDNN_FRONTEND)
-
-  if(USE_CUBLAS)
-    message(STATUS "Build with cuBLAS support")
-    tvm_file_glob(GLOB CUBLAS_CONTRIB_SRC src/relax/backend/contrib/cublas/*.cc)
-    list(APPEND COMPILER_SRCS ${CUBLAS_CONTRIB_SRC})
-    tvm_file_glob(GLOB CONTRIB_CUBLAS_SRCS src/runtime/contrib/cublas/*.cc)
-    list(APPEND RUNTIME_SRCS ${CONTRIB_CUBLAS_SRCS})
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUBLAS_LIBRARY})
-    if(NOT CUDA_CUBLASLT_LIBRARY STREQUAL "CUDA_CUBLASLT_LIBRARY-NOTFOUND")
-      list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUBLASLT_LIBRARY})
-    endif()
-  endif(USE_CUBLAS)
+if(USE_CUDA)
+  message(STATUS "Build cuda device runtime")
 
-  if(USE_THRUST)
-    message(STATUS "Build with Thrust support")
-    tvm_file_glob(GLOB CONTRIB_THRUST_SRC src/runtime/contrib/thrust/*.cu)
-    add_library(tvm_thrust_objs OBJECT ${CONTRIB_THRUST_SRC})
-    target_link_libraries(tvm_thrust_objs PRIVATE tvm_ffi_header)
-    target_compile_options(tvm_thrust_objs PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>)
-    if (NOT USE_THRUST MATCHES ${IS_TRUE_PATTERN})
-      find_package(CCCL REQUIRED COMPONENTS Thrust)
-      target_link_libraries(tvm_thrust_objs PRIVATE CCCL::Thrust)
-    endif()
-    list(APPEND TVM_RUNTIME_EXT_OBJS $<TARGET_OBJECTS:tvm_thrust_objs>)
-  endif(USE_THRUST)
+  tvm_file_glob(GLOB RUNTIME_CUDA_SRCS src/runtime/cuda/*.cc)
+  tvm_file_glob(GLOB VM_CUDA_BUILTIN_SRC_CC src/runtime/vm/cuda/*.cc)
 
-  if(USE_CURAND)
-    message(STATUS "Build with cuRAND support")
-    message(STATUS "${CUDA_CURAND_LIBRARY}")
-    tvm_file_glob(GLOB CONTRIB_CURAND_SRC_CC src/runtime/contrib/curand/*.cc)
-    tvm_file_glob(GLOB CONTRIB_CURAND_SRC_CU src/runtime/contrib/curand/*.cu)
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CURAND_LIBRARY})
-    list(APPEND RUNTIME_SRCS ${CONTRIB_CURAND_SRC_CC})
-    list(APPEND RUNTIME_SRCS ${CONTRIB_CURAND_SRC_CU})
-  endif(USE_CURAND)
+  add_library(tvm_runtime_cuda_objs OBJECT ${RUNTIME_CUDA_SRCS} ${VM_CUDA_BUILTIN_SRC_CC})
+  target_link_libraries(tvm_runtime_cuda_objs PUBLIC tvm_ffi_header)
+  set_target_properties(tvm_runtime_cuda_objs PROPERTIES POSITION_INDEPENDENT_CODE ON)
+  if(TVM_VISIBILITY_FLAG)
+    target_compile_options(tvm_runtime_cuda_objs PRIVATE "${TVM_VISIBILITY_FLAG}")
+  endif()
+  add_library(tvm_runtime_cuda SHARED $<TARGET_OBJECTS:tvm_runtime_cuda_objs>)
+  target_link_libraries(tvm_runtime_cuda PUBLIC tvm_runtime ${CUDA_CUDART_LIBRARY} ${CUDA_CUDA_LIBRARY})
+  set_target_properties(tvm_runtime_cuda PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+  )
+  install(TARGETS tvm_runtime_cuda DESTINATION lib${LIB_SUFFIX})
+  if(TVM_BUILD_PYTHON_MODULE)
+    install(TARGETS tvm_runtime_cuda DESTINATION "lib")
+  endif()
 
   if(USE_NVTX)
     message(STATUS "Build with NVTX support")
-    message(STATUS "${CUDA_NVTX_LIBRARY}")
-    cmake_minimum_required(VERSION 3.13) # to compile CUDA code
-    enable_language(CUDA)
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_NVTX_LIBRARY})
-  endif(USE_NVTX)
-
-  # Add CUDA builtins to RelaxVM
-  tvm_file_glob(GLOB VM_CUDA_BUILTIN_SRC_CC src/runtime/vm/cuda/*.cc)
-  list(APPEND RUNTIME_SRCS ${VM_CUDA_BUILTIN_SRC_CC})
+    target_link_libraries(tvm_runtime_cuda PRIVATE ${CUDA_NVTX_LIBRARY})
+  endif()
 endif(USE_CUDA)
+
+# Contrib sources gated by USE_CUDA go into libtvm_runtime_extra.
+# See the RuntimeExtra assembly block in CMakeLists.txt.
+
+if(USE_CUDA AND USE_CUDNN)
+  message(STATUS "Build with cuDNN support")
+  include_directories(SYSTEM ${CUDA_CUDNN_INCLUDE_DIRS})
+  tvm_file_glob(GLOB CUDNN_RELAX_CONTRIB_SRC src/relax/backend/contrib/cudnn/*.cc)
+  list(APPEND COMPILER_SRCS ${CUDNN_RELAX_CONTRIB_SRC})
+  tvm_file_glob(GLOB CONTRIB_CUDNN_SRCS src/runtime/contrib/cudnn/*.cc)
+  add_library(tvm_cudnn_objs OBJECT ${CONTRIB_CUDNN_SRCS})
+  target_link_libraries(tvm_cudnn_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_cudnn_objs ${CUDA_CUDNN_LIBRARY})
+endif(USE_CUDNN)
+
+if(USE_CUDA AND USE_CUDNN_FRONTEND)
+  message(STATUS "Build with cuDNN Frontend support")
+  if(IS_DIRECTORY ${USE_CUDNN_FRONTEND})
+    find_file(CUDNN_FRONTEND_HEADER cudnn_frontend.h HINTS ${USE_CUDNN_FRONTEND}/include)
+    include_directories(SYSTEM ${USE_CUDNN_FRONTEND}/include)
+  else()
+    find_file(CUDNN_FRONTEND_HEADER cudnn_frontend.h)
+  endif()
+  if(NOT CUDNN_FRONTEND_HEADER)
+    message(FATAL_ERROR "Cannot find cudnn_frontend.h, please set USE_CUDNN_FRONTEND to the path of the cuDNN frontend header")
+  endif()
+  tvm_file_glob(GLOB CONTRIB_CUDNN_FRONTEND_SRCS src/runtime/contrib/cudnn/cudnn_frontend/*.cc)
+  set_source_files_properties(${CONTRIB_CUDNN_SRCS} PROPERTIES COMPILE_DEFINITIONS TVM_USE_CUDNN_FRONTEND=1)
+  add_library(tvm_cudnn_frontend_objs OBJECT ${CONTRIB_CUDNN_FRONTEND_SRCS})
+  target_link_libraries(tvm_cudnn_frontend_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_cudnn_frontend_objs)
+endif(USE_CUDA AND USE_CUDNN_FRONTEND)
+
+if(USE_CUDA AND USE_CUBLAS)
+  message(STATUS "Build with cuBLAS support")
+  tvm_file_glob(GLOB CUBLAS_CONTRIB_SRC src/relax/backend/contrib/cublas/*.cc)
+  list(APPEND COMPILER_SRCS ${CUBLAS_CONTRIB_SRC})
+  tvm_file_glob(GLOB CONTRIB_CUBLAS_SRCS src/runtime/contrib/cublas/*.cc)
+  add_library(tvm_cublas_objs OBJECT ${CONTRIB_CUBLAS_SRCS})
+  target_link_libraries(tvm_cublas_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_cublas_objs ${CUDA_CUBLAS_LIBRARY})
+  if(NOT CUDA_CUBLASLT_LIBRARY STREQUAL "CUDA_CUBLASLT_LIBRARY-NOTFOUND")
+    target_link_libraries(tvm_runtime_extra PRIVATE ${CUDA_CUBLASLT_LIBRARY})
+  endif()
+endif(USE_CUDA AND USE_CUBLAS)
+
+if(USE_CUDA AND USE_THRUST)
+  message(STATUS "Build with Thrust support")
+  tvm_file_glob(GLOB CONTRIB_THRUST_SRC src/runtime/contrib/thrust/*.cu)
+  add_library(tvm_thrust_objs OBJECT ${CONTRIB_THRUST_SRC})
+  target_link_libraries(tvm_thrust_objs PRIVATE tvm_runtime_extra_defs)
+  target_compile_options(tvm_thrust_objs PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>)
+  if(NOT USE_THRUST MATCHES ${IS_TRUE_PATTERN})
+    find_package(CCCL REQUIRED COMPONENTS Thrust)
+    target_link_libraries(tvm_thrust_objs PRIVATE CCCL::Thrust)
+  endif()
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_thrust_objs)
+endif(USE_CUDA AND USE_THRUST)
+
+if(USE_CUDA AND USE_CURAND)
+  message(STATUS "Build with cuRAND support")
+  message(STATUS "${CUDA_CURAND_LIBRARY}")
+  tvm_file_glob(GLOB CONTRIB_CURAND_SRC_CC src/runtime/contrib/curand/*.cc)
+  tvm_file_glob(GLOB CONTRIB_CURAND_SRC_CU src/runtime/contrib/curand/*.cu)
+  add_library(tvm_curand_objs OBJECT ${CONTRIB_CURAND_SRC_CC} ${CONTRIB_CURAND_SRC_CU})
+  target_link_libraries(tvm_curand_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_curand_objs ${CUDA_CURAND_LIBRARY})
+endif(USE_CUDA AND USE_CURAND)
diff --git a/cmake/modules/Hexagon.cmake b/cmake/modules/Hexagon.cmake
index 59953744084f..370d968e623d 100644
--- a/cmake/modules/Hexagon.cmake
+++ b/cmake/modules/Hexagon.cmake
@@ -119,7 +119,8 @@ function(add_hexagon_wrapper_paths)
 endfunction()
 
 if(BUILD_FOR_HEXAGON)
-  # Common sources for TVM runtime with Hexagon support
+  # When building FOR Hexagon (the DSP itself), all runtime sources go into
+  # the single libtvm_runtime (static or shared). No per-backend DSO split.
   file_glob_append(RUNTIME_HEXAGON_SRCS
     "${TVMRT_SOURCE_DIR}/hexagon/*.cc"
   )
@@ -156,7 +157,7 @@ if(BUILD_FOR_HEXAGON)
 
   set(USE_CUSTOM_LOGGING ON) # To use a custom logger
 
-# QHL support.
+  # QHL support.
   if(USE_HEXAGON_QHL)
     file_glob_append(TVM_QHL_WRAPPER_SRCS
       "${TVMRT_SOURCE_DIR}/hexagon/qhl/*.cc"
@@ -201,10 +202,10 @@ if(BUILD_FOR_HEXAGON)
   # Include hexagon external library runtime sources
   if(USE_HEXAGON_EXTERNAL_LIBS)
     # Check if the libs are provided as an absolute path
-    if (EXISTS ${USE_HEXAGON_EXTERNAL_LIBS})
+    if(EXISTS ${USE_HEXAGON_EXTERNAL_LIBS})
     # Check if the libs are provided as a git url
     elseif(USE_HEXAGON_EXTERNAL_LIBS MATCHES "\.git$")
-      if (NOT DEFINED HEXAGON_EXTERNAL_LIBS_SHA)
+      if(NOT DEFINED HEXAGON_EXTERNAL_LIBS_SHA)
         message(FATAL_ERROR "HEXAGON_EXTERNA_LIBS_SHA must be set when "
           "USE_HEXAGON_EXTERNAL_LIBS is set to a git repository")
       endif()
@@ -224,7 +225,7 @@ if(BUILD_FOR_HEXAGON)
       "${USE_HEXAGON_EXTERNAL_LIBS}/src/runtime/hexagon/*.cc"
     )
     list(APPEND RUNTIME_HEXAGON_SRCS "${HEXAGON_EXTERNAL_RUNTIME_SRCS}")
-    if (EXISTS "${USE_HEXAGON_EXTERNAL_LIBS}/HexagonExternalCompileFlags.cmake")
+    if(EXISTS "${USE_HEXAGON_EXTERNAL_LIBS}/HexagonExternalCompileFlags.cmake")
       # External libraries will define HEXAGON_EXTERNAL_LIBS_COMPILE_FLAGS,
       # changing this variable name will break downstream external libraries.
       include("${USE_HEXAGON_EXTERNAL_LIBS}/HexagonExternalCompileFlags.cmake")
@@ -329,4 +330,28 @@ if(USE_HEXAGON_RPC)
   endif()
 endif()   # USE_HEXAGON_RPC
 
-list(APPEND RUNTIME_SRCS ${RUNTIME_HEXAGON_SRCS} ${TVM_QHL_WRAPPER_SRCS})
+# When building for the Hexagon DSP itself, all sources fold into
+# libtvm_runtime (static/shared). When building for a host with
+# USE_HEXAGON=ON, create a separate libtvm_runtime_hexagon.so.
+if(BUILD_FOR_HEXAGON)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_HEXAGON_SRCS} ${TVM_QHL_WRAPPER_SRCS})
+elseif(USE_HEXAGON)
+  message(STATUS "Build hexagon device runtime")
+  add_library(tvm_runtime_hexagon_objs OBJECT ${RUNTIME_HEXAGON_SRCS} ${TVM_QHL_WRAPPER_SRCS})
+  target_link_libraries(tvm_runtime_hexagon_objs PUBLIC tvm_ffi_header)
+  set_target_properties(tvm_runtime_hexagon_objs PROPERTIES POSITION_INDEPENDENT_CODE ON)
+  if(TVM_VISIBILITY_FLAG)
+    target_compile_options(tvm_runtime_hexagon_objs PRIVATE "${TVM_VISIBILITY_FLAG}")
+  endif()
+  add_library(tvm_runtime_hexagon SHARED $<TARGET_OBJECTS:tvm_runtime_hexagon_objs>)
+  target_link_libraries(tvm_runtime_hexagon PUBLIC tvm_runtime)
+  set_target_properties(tvm_runtime_hexagon PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+  )
+  install(TARGETS tvm_runtime_hexagon DESTINATION lib${LIB_SUFFIX})
+  if(TVM_BUILD_PYTHON_MODULE)
+    install(TARGETS tvm_runtime_hexagon DESTINATION "lib")
+  endif()
+endif()
diff --git a/cmake/modules/Metal.cmake b/cmake/modules/Metal.cmake
index a9f0e9dd533e..73ba1f5d6a99 100644
--- a/cmake/modules/Metal.cmake
+++ b/cmake/modules/Metal.cmake
@@ -16,12 +16,28 @@
 # under the License.
 
 if(USE_METAL)
-  message(STATUS "Build with Metal support")
+  message(STATUS "Build metal device runtime")
   find_library(METAL_LIB Metal)
   find_library(FOUNDATION_LIB Foundation)
   tvm_file_glob(GLOB RUNTIME_METAL_SRCS src/runtime/metal/*.mm)
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${METAL_LIB} ${FOUNDATION_LIB})
-  list(APPEND RUNTIME_SRCS ${RUNTIME_METAL_SRCS})
+
+  add_library(tvm_runtime_metal_objs OBJECT ${RUNTIME_METAL_SRCS})
+  target_link_libraries(tvm_runtime_metal_objs PUBLIC tvm_ffi_header)
+  set_target_properties(tvm_runtime_metal_objs PROPERTIES POSITION_INDEPENDENT_CODE ON)
+  if(TVM_VISIBILITY_FLAG)
+    target_compile_options(tvm_runtime_metal_objs PRIVATE "${TVM_VISIBILITY_FLAG}")
+  endif()
+  add_library(tvm_runtime_metal SHARED $<TARGET_OBJECTS:tvm_runtime_metal_objs>)
+  target_link_libraries(tvm_runtime_metal PUBLIC tvm_runtime ${METAL_LIB} ${FOUNDATION_LIB})
+  set_target_properties(tvm_runtime_metal PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+  )
+  install(TARGETS tvm_runtime_metal DESTINATION lib${LIB_SUFFIX})
+  if(TVM_BUILD_PYTHON_MODULE)
+    install(TARGETS tvm_runtime_metal DESTINATION "lib")
+  endif()
 endif(USE_METAL)
 # When USE_METAL=OFF the codegen-side fallback in
 # src/target/metal/metal_fallback_module.cc handles construction; no opt
diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake
index 3076c5f2275b..a90e9cfe1469 100644
--- a/cmake/modules/OpenCL.cmake
+++ b/cmake/modules/OpenCL.cmake
@@ -18,6 +18,7 @@
 if(USE_OPENCL)
   tvm_file_glob(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc)
 
+  set(_opencl_libs "")
   if(${USE_OPENCL} MATCHES ${IS_TRUE_PATTERN})
     message(STATUS "Enabled runtime search for OpenCL library location")
     file_glob_append(RUNTIME_OPENCL_SRCS
@@ -27,14 +28,33 @@ if(USE_OPENCL)
   else()
     find_opencl(${USE_OPENCL})
     if(NOT OpenCL_FOUND)
-        message(FATAL_ERROR "Error! Cannot find specified OpenCL library")
+      message(FATAL_ERROR "Error! Cannot find specified OpenCL library")
     endif()
     message(STATUS "Build with OpenCL support")
     include_directories(SYSTEM ${OpenCL_INCLUDE_DIRS})
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${OpenCL_LIBRARIES})
+    list(APPEND _opencl_libs ${OpenCL_LIBRARIES})
+  endif()
+
+  message(STATUS "Build opencl device runtime")
+
+  add_library(tvm_runtime_opencl_objs OBJECT ${RUNTIME_OPENCL_SRCS})
+  target_link_libraries(tvm_runtime_opencl_objs PUBLIC tvm_ffi_header)
+  set_target_properties(tvm_runtime_opencl_objs PROPERTIES POSITION_INDEPENDENT_CODE ON)
+  if(TVM_VISIBILITY_FLAG)
+    target_compile_options(tvm_runtime_opencl_objs PRIVATE "${TVM_VISIBILITY_FLAG}")
+  endif()
+  add_library(tvm_runtime_opencl SHARED $<TARGET_OBJECTS:tvm_runtime_opencl_objs>)
+  target_link_libraries(tvm_runtime_opencl PUBLIC tvm_runtime ${_opencl_libs})
+  set_target_properties(tvm_runtime_opencl PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+  )
+  install(TARGETS tvm_runtime_opencl DESTINATION lib${LIB_SUFFIX})
+  if(TVM_BUILD_PYTHON_MODULE)
+    install(TARGETS tvm_runtime_opencl DESTINATION "lib")
   endif()
 
-  list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
   if(USE_OPENCL_ENABLE_HOST_PTR)
     add_definitions(-DOPENCL_ENABLE_HOST_PTR)
   endif(USE_OPENCL_ENABLE_HOST_PTR)
diff --git a/cmake/modules/ROCM.cmake b/cmake/modules/ROCM.cmake
index 366dde3a6957..bc0159377b01 100644
--- a/cmake/modules/ROCM.cmake
+++ b/cmake/modules/ROCM.cmake
@@ -26,45 +26,65 @@ if(ROCM_FOUND)
   add_definitions(-D__HIP_PLATFORM_AMD__=1)
 endif(ROCM_FOUND)
 
-
 if(USE_ROCM)
   if(NOT ROCM_FOUND)
     message(FATAL_ERROR "Cannot find ROCM, USE_ROCM=" ${USE_ROCM})
   endif()
-  message(STATUS "Build with ROCM support")
+  message(STATUS "Build rocm device runtime")
+
   tvm_file_glob(GLOB RUNTIME_ROCM_SRCS src/runtime/rocm/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_ROCM_SRCS})
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${ROCM_HIPHCC_LIBRARY})
-  if (ROCM_HSA_LIBRARY)
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${ROCM_HSA_LIBRARY})
+
+  set(_rocm_libs ${ROCM_HIPHCC_LIBRARY})
+  if(ROCM_HSA_LIBRARY)
+    list(APPEND _rocm_libs ${ROCM_HSA_LIBRARY})
   endif()
 
-  if(USE_HIPBLAS)
-    message(STATUS "Build with HIPBLAS support")
-    tvm_file_glob(GLOB HIPBLAS_CONTRIB_SRC src/relax/backend/contrib/hipblas/*.cc)
-    list(APPEND COMPILER_SRCS ${HIPBLAS_CONTRIB_SRC})
-    tvm_file_glob(GLOB HIPBLAS_CONTRIB_SRCS src/runtime/contrib/hipblas/*.cc)
-    list(APPEND RUNTIME_SRCS ${HIPBLAS_CONTRIB_SRCS})
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${ROCM_HIPBLAS_LIBRARY})
-    if(NOT ROCM_HIPBLASLT_LIBRARY STREQUAL "ROCM_HIPBLASLT_LIBRARY-NOTFOUND")
-      list(APPEND TVM_RUNTIME_LINKER_LIBS ${ROCM_HIPBLASLT_LIBRARY})
-    endif()
-  endif(USE_HIPBLAS)
+  add_library(tvm_runtime_rocm_objs OBJECT ${RUNTIME_ROCM_SRCS})
+  target_link_libraries(tvm_runtime_rocm_objs PUBLIC tvm_ffi_header)
+  set_target_properties(tvm_runtime_rocm_objs PROPERTIES POSITION_INDEPENDENT_CODE ON)
+  if(TVM_VISIBILITY_FLAG)
+    target_compile_options(tvm_runtime_rocm_objs PRIVATE "${TVM_VISIBILITY_FLAG}")
+  endif()
+  add_library(tvm_runtime_rocm SHARED $<TARGET_OBJECTS:tvm_runtime_rocm_objs>)
+  target_link_libraries(tvm_runtime_rocm PUBLIC tvm_runtime ${_rocm_libs})
+  set_target_properties(tvm_runtime_rocm PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+  )
+  install(TARGETS tvm_runtime_rocm DESTINATION lib${LIB_SUFFIX})
+  if(TVM_BUILD_PYTHON_MODULE)
+    install(TARGETS tvm_runtime_rocm DESTINATION "lib")
+  endif()
+endif(USE_ROCM)
 
-  if(USE_THRUST)
-    message(STATUS "Build with rocThrust support")
-    # We need to override CXX to hipcc. This is required by rocthrust
-    if (${CMAKE_CXX_COMPILER} MATCHES "hipcc$")
-      message(STATUS "Using hipcc compiler to compile rocthrust code.")
-    else()
-      message(FATAL_ERROR "Set CXX=hipcc to compile rocthrust code.")
-    endif()
+# HIPBLAS contrib goes into libtvm_runtime_extra.
+if(USE_ROCM AND USE_HIPBLAS)
+  message(STATUS "Build with HIPBLAS support")
+  tvm_file_glob(GLOB HIPBLAS_CONTRIB_SRC src/relax/backend/contrib/hipblas/*.cc)
+  list(APPEND COMPILER_SRCS ${HIPBLAS_CONTRIB_SRC})
+  tvm_file_glob(GLOB HIPBLAS_CONTRIB_SRCS src/runtime/contrib/hipblas/*.cc)
+  add_library(tvm_hipblas_objs OBJECT ${HIPBLAS_CONTRIB_SRCS})
+  target_link_libraries(tvm_hipblas_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_hipblas_objs ${ROCM_HIPBLAS_LIBRARY})
+  if(NOT ROCM_HIPBLASLT_LIBRARY STREQUAL "ROCM_HIPBLASLT_LIBRARY-NOTFOUND")
+    target_link_libraries(tvm_runtime_extra PRIVATE ${ROCM_HIPBLASLT_LIBRARY})
+  endif()
+endif(USE_ROCM AND USE_HIPBLAS)
 
-    find_package(rocprim REQUIRED)
-    find_package(rocthrust REQUIRED)
-    set_source_files_properties(src/runtime/contrib/thrust/thrust.cu PROPERTIES LANGUAGE CXX)
-    list(APPEND RUNTIME_SRCS src/runtime/contrib/thrust/thrust.cu)
-    list(APPEND TVM_RUNTIME_LINKER_LIBS roc::rocthrust)
-  endif(USE_THRUST)
+if(USE_ROCM AND USE_THRUST)
+  message(STATUS "Build with rocThrust support")
+  # We need to override CXX to hipcc. This is required by rocthrust
+  if(${CMAKE_CXX_COMPILER} MATCHES "hipcc$")
+    message(STATUS "Using hipcc compiler to compile rocthrust code.")
+  else()
+    message(FATAL_ERROR "Set CXX=hipcc to compile rocthrust code.")
+  endif()
 
-endif(USE_ROCM)
+  find_package(rocprim REQUIRED)
+  find_package(rocthrust REQUIRED)
+  set_source_files_properties(src/runtime/contrib/thrust/thrust.cu PROPERTIES LANGUAGE CXX)
+  add_library(tvm_rocthrust_objs OBJECT src/runtime/contrib/thrust/thrust.cu)
+  target_link_libraries(tvm_rocthrust_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_rocthrust_objs roc::rocthrust)
+endif(USE_ROCM AND USE_THRUST)
diff --git a/cmake/modules/Vulkan.cmake b/cmake/modules/Vulkan.cmake
index bce4dada8802..c64e5581c9c7 100644
--- a/cmake/modules/Vulkan.cmake
+++ b/cmake/modules/Vulkan.cmake
@@ -22,17 +22,12 @@ if(USE_VULKAN)
   if(NOT Vulkan_FOUND)
     message(FATAL_ERROR "Cannot find Vulkan, USE_VULKAN=" ${USE_VULKAN})
   endif()
-  if (USE_SPIRV_KHR_INTEGER_DOT_PRODUCT)
+  if(USE_SPIRV_KHR_INTEGER_DOT_PRODUCT)
     add_definitions(-DTVM_SPIRV_KHR_INTEGER_DOT_PRODUCT=1)
     message(STATUS "Enable SPIRV_KHR_INTEGER_DOT_PRODUCT")
   endif()
   include_directories(SYSTEM ${Vulkan_INCLUDE_DIRS})
   message(STATUS "Build with Vulkan support")
-  tvm_file_glob(GLOB RUNTIME_VULKAN_SRCS src/runtime/vulkan/*.cc)
-  # SPIR-V codegen tooling lives under src/target/vulkan/ alongside the
-  # fallback module.  The fallback module itself is always compiled (in
-  # CMakeLists.txt's CODEGEN_SRCS); the rest depends on spirv-tools and
-  # is only compiled when USE_VULKAN=ON.
   tvm_file_glob(GLOB COMPILER_VULKAN_SRCS
     src/target/vulkan/build_vulkan.cc
     src/target/vulkan/codegen_spirv.cc
@@ -41,9 +36,31 @@ if(USE_VULKAN)
     src/target/vulkan/spirv_support.cc
     src/target/vulkan/spirv_utils.cc
   )
-  list(APPEND RUNTIME_SRCS ${RUNTIME_VULKAN_SRCS})
   list(APPEND COMPILER_SRCS ${COMPILER_VULKAN_SRCS})
   list(APPEND TVM_LINKER_LIBS ${Vulkan_SPIRV_TOOLS_LIBRARY})
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${Vulkan_LIBRARY})
   add_definitions(-DTVM_ENABLE_SPIRV=1)
 endif(USE_VULKAN)
+
+if(USE_VULKAN)
+  message(STATUS "Build vulkan device runtime")
+
+  tvm_file_glob(GLOB RUNTIME_VULKAN_SRCS src/runtime/vulkan/*.cc)
+
+  add_library(tvm_runtime_vulkan_objs OBJECT ${RUNTIME_VULKAN_SRCS})
+  target_link_libraries(tvm_runtime_vulkan_objs PUBLIC tvm_ffi_header)
+  set_target_properties(tvm_runtime_vulkan_objs PROPERTIES POSITION_INDEPENDENT_CODE ON)
+  if(TVM_VISIBILITY_FLAG)
+    target_compile_options(tvm_runtime_vulkan_objs PRIVATE "${TVM_VISIBILITY_FLAG}")
+  endif()
+  add_library(tvm_runtime_vulkan SHARED $<TARGET_OBJECTS:tvm_runtime_vulkan_objs>)
+  target_link_libraries(tvm_runtime_vulkan PUBLIC tvm_runtime ${Vulkan_LIBRARY})
+  set_target_properties(tvm_runtime_vulkan PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+  )
+  install(TARGETS tvm_runtime_vulkan DESTINATION lib${LIB_SUFFIX})
+  if(TVM_BUILD_PYTHON_MODULE)
+    install(TARGETS tvm_runtime_vulkan DESTINATION "lib")
+  endif()
+endif(USE_VULKAN)
diff --git a/cmake/modules/contrib/BLAS.cmake b/cmake/modules/contrib/BLAS.cmake
index 542effb50463..cee3e2fc30e7 100644
--- a/cmake/modules/contrib/BLAS.cmake
+++ b/cmake/modules/contrib/BLAS.cmake
@@ -17,8 +17,9 @@
 
 if(USE_BLAS STREQUAL "openblas")
   find_library(BLAS_LIBRARY openblas)
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${BLAS_LIBRARY})
-  list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/cblas.cc)
+  add_library(tvm_blas_objs OBJECT src/runtime/contrib/cblas/cblas.cc)
+  target_link_libraries(tvm_blas_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_blas_objs ${BLAS_LIBRARY})
   message(STATUS "Using BLAS library " ${BLAS_LIBRARY})
   find_path(BLAS_INCLUDE_DIR cblas.h PATH_SUFFIXES openblas)
   if(BLAS_INCLUDE_DIR)
@@ -27,14 +28,16 @@ if(USE_BLAS STREQUAL "openblas")
   endif()
 elseif(USE_BLAS STREQUAL "atlas" OR USE_BLAS STREQUAL "blas")
   find_library(BLAS_LIBRARY cblas)
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${BLAS_LIBRARY})
-  list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/cblas.cc)
+  add_library(tvm_blas_objs OBJECT src/runtime/contrib/cblas/cblas.cc)
+  target_link_libraries(tvm_blas_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_blas_objs ${BLAS_LIBRARY})
   message(STATUS "Use BLAS library " ${BLAS_LIBRARY})
 elseif(USE_BLAS STREQUAL "apple")
   find_library(BLAS_LIBRARY Accelerate)
   include_directories(SYSTEM ${BLAS_LIBRARY}/Versions/Current/Frameworks/vecLib.framework/Versions/Current/Headers/)
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${BLAS_LIBRARY})
-  list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/cblas.cc)
+  add_library(tvm_blas_objs OBJECT src/runtime/contrib/cblas/cblas.cc)
+  target_link_libraries(tvm_blas_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_blas_objs ${BLAS_LIBRARY})
   message(STATUS "Use BLAS library " ${BLAS_LIBRARY})
 elseif(USE_BLAS STREQUAL "mkl")
   message(DEPRECATION "USE_BLAS=mkl is deprecated. Use USE_MKL=ON instead.")
@@ -63,8 +66,9 @@ if(USE_MKL OR USE_MKL_PATH)
     find_library(BLAS_LIBRARY_MKL NAMES mkl_rt HINTS ${USE_MKL}/lib/ ${USE_MKL}/lib/intel64_win)
   endif()
   include_directories(SYSTEM ${USE_MKL}/include)
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${BLAS_LIBRARY_MKL})
-  list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/mkl.cc)
+  add_library(tvm_mkl_objs OBJECT src/runtime/contrib/cblas/mkl.cc)
+  target_link_libraries(tvm_mkl_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_mkl_objs ${BLAS_LIBRARY_MKL})
   add_definitions(-DUSE_MKL_BLAS=1)
   message(STATUS "Use MKL library " ${BLAS_LIBRARY_MKL})
 endif()
diff --git a/cmake/modules/contrib/CLML.cmake b/cmake/modules/contrib/CLML.cmake
index 34b998ef9914..1d0f0f3a50cf 100644
--- a/cmake/modules/contrib/CLML.cmake
+++ b/cmake/modules/contrib/CLML.cmake
@@ -24,7 +24,7 @@ if(USE_CLML)
         list(APPEND COMPILER_SRCS ${CLML_RUNTIME_MODULE})
     endif()
     message(STATUS "Build with CLML support : " ${USE_CLML})
-    if (NOT USE_CLML STREQUAL "ON")
+    if(NOT USE_CLML STREQUAL "ON")
         set(CLML_VERSION_HEADER "${USE_CLML}/CL/cl_qcom_ml_ops.h")
         if(EXISTS ${CLML_VERSION_HEADER})
             file(READ ${CLML_VERSION_HEADER} ver)
@@ -45,7 +45,7 @@ endif()
 if(USE_CLML_GRAPH_EXECUTOR)
     set(CLML_PATH ${CMAKE_CURRENT_SOURCE_DIR}/clml)
     # Detect custom CLML path.
-    if (NOT USE_CLML_GRAPH_EXECUTOR STREQUAL "ON")
+    if(NOT USE_CLML_GRAPH_EXECUTOR STREQUAL "ON")
         set(CLML_PATH ${USE_CLML_GRAPH_EXECUTOR})
     endif()
 
@@ -68,8 +68,14 @@ if(USE_CLML_GRAPH_EXECUTOR)
             list(APPEND EXTERN_CLML_COMPUTE_LIB ${CLML_PATH}/lib/libOpenCL.so ${CLML_PATH}/lib/libOpenCL_system.so)
         endif()
     endif()
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_CLML_COMPUTE_LIB})
-    list(APPEND RUNTIME_SRCS ${CLML_CONTRIB_SRC})
+    add_library(tvm_clml_objs OBJECT ${CLML_CONTRIB_SRC})
+    target_link_libraries(tvm_clml_objs PRIVATE tvm_runtime_extra_defs)
+    # CLML depends on OpenCL runtime symbols — link the OpenCL DSO instead of
+    # duplicating sources (which would cause duplicate registrations).
+    target_link_libraries(tvm_runtime_extra PRIVATE tvm_clml_objs ${EXTERN_CLML_COMPUTE_LIB})
+    if(TARGET tvm_runtime_opencl)
+      target_link_libraries(tvm_runtime_extra PRIVATE tvm_runtime_opencl)
+    endif()
     message(STATUS "Build with CLML graph runtime support: "
             ${EXTERN_CLML_COMPUTE_LIB})
 
@@ -77,8 +83,6 @@ if(USE_CLML_GRAPH_EXECUTOR)
     add_definitions(-DTVM_GRAPH_EXECUTOR_CLML)
 
     message(STATUS "Enable OpenCL as fallback to CLML")
-    file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc)
-    list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
     set(USE_OPENCL ${CLML_PATH})
     if(USE_OPENCL_ENABLE_HOST_PTR)
         add_definitions(-DOPENCL_ENABLE_HOST_PTR)
diff --git a/cmake/modules/contrib/CUTLASS.cmake b/cmake/modules/contrib/CUTLASS.cmake
index a3a09f141c9e..7f44c2e6db0c 100644
--- a/cmake/modules/contrib/CUTLASS.cmake
+++ b/cmake/modules/contrib/CUTLASS.cmake
@@ -16,9 +16,6 @@
 # under the License.
 
 if(USE_CUDA AND USE_CUTLASS)
-  set(CUTLASS_GEN_COND "$<AND:$<BOOL:${USE_CUDA}>,$<BOOL:${USE_CUTLASS}>>")
-  set(CUTLASS_RUNTIME_OBJS "")
-
   tvm_file_glob(GLOB CUTLASS_CONTRIB_SRC
     src/relax/backend/contrib/cutlass/*.cc
   )
@@ -38,12 +35,12 @@ if(USE_CUDA AND USE_CUTLASS)
   set(CUTLASS_FPA_INTB_RUNTIME_SRCS "")
   list(APPEND CUTLASS_FPA_INTB_RUNTIME_SRCS src/runtime/contrib/cutlass/weight_preprocess.cc)
   add_library(fpA_intB_cutlass_objs OBJECT ${CUTLASS_FPA_INTB_RUNTIME_SRCS})
-  target_link_libraries(fpA_intB_cutlass_objs PRIVATE tvm_ffi_header)
+  target_link_libraries(fpA_intB_cutlass_objs PRIVATE tvm_runtime_extra_defs)
   target_include_directories(fpA_intB_cutlass_objs PRIVATE
     ${PROJECT_SOURCE_DIR}/3rdparty/cutlass_fpA_intB_gemm
     ${PROJECT_SOURCE_DIR}/3rdparty/cutlass_fpA_intB_gemm/cutlass/include
   )
-  list(APPEND CUTLASS_RUNTIME_OBJS "$<${CUTLASS_GEN_COND}:$<TARGET_OBJECTS:fpA_intB_cutlass_objs>>")
+  target_link_libraries(tvm_runtime_extra PRIVATE fpA_intB_cutlass_objs)
 
   ### Build cutlass runtime objects for flash attention
   add_subdirectory(${PROJECT_SOURCE_DIR}/3rdparty/libflash_attn)
@@ -56,13 +53,13 @@ if(USE_CUDA AND USE_CUTLASS)
   set(CUTLASS_DIR ${PROJECT_SOURCE_DIR}/3rdparty/cutlass)
   set(TVM_CUTLASS_RUNTIME_SRCS "")
 
-  if (CMAKE_CUDA_ARCHITECTURES MATCHES "90a")
+  if(CMAKE_CUDA_ARCHITECTURES MATCHES "90a")
     list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp16_group_gemm_sm90.cu)
     list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp8_group_gemm_sm90.cu)
     list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp8_gemm.cu)
     list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm_sm90.cu)
   endif()
-  if (CMAKE_CUDA_ARCHITECTURES MATCHES "100a")
+  if(CMAKE_CUDA_ARCHITECTURES MATCHES "100a")
     list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp16_group_gemm_sm100.cu)
     list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm_sm100.cu)
     list(APPEND TVM_CUTLASS_RUNTIME_SRCS src/runtime/contrib/cutlass/fp8_groupwise_scaled_group_gemm_sm100.cu)
@@ -74,14 +71,11 @@ if(USE_CUDA AND USE_CUTLASS)
       ${CUTLASS_DIR}/include
       ${PROJECT_SOURCE_DIR}/3rdparty/cutlass_fpA_intB_gemm/cutlass_extensions/include
     )
-    target_link_libraries(tvm_cutlass_objs PRIVATE tvm_ffi_header)
+    target_link_libraries(tvm_cutlass_objs PRIVATE tvm_runtime_extra_defs)
     # Note: enable this to get more detailed logs for cutlass kernels
     # target_compile_definitions(tvm_cutlass_objs PRIVATE CUTLASS_DEBUG_TRACE_LEVEL=2)
-    list(APPEND CUTLASS_RUNTIME_OBJS "$<${CUTLASS_GEN_COND}:$<TARGET_OBJECTS:tvm_cutlass_objs>>")
+    target_link_libraries(tvm_runtime_extra PRIVATE tvm_cutlass_objs)
   endif()
 
-  ### Add cutlass objects to list of TVM runtime extension objs
-  list(APPEND TVM_RUNTIME_EXT_OBJS "${CUTLASS_RUNTIME_OBJS}")
-
   message(STATUS "Build with CUTLASS")
 endif()
diff --git a/cmake/modules/contrib/CoreML.cmake b/cmake/modules/contrib/CoreML.cmake
index c530d8650fb2..94520f2b570f 100644
--- a/cmake/modules/contrib/CoreML.cmake
+++ b/cmake/modules/contrib/CoreML.cmake
@@ -20,6 +20,7 @@ if(USE_COREML)
   find_library(FOUNDATION_LIB Foundation)
   find_library(COREML_LIB Coreml)
   tvm_file_glob(GLOB COREML_CONTRIB_SRC src/runtime/contrib/coreml/*.mm)
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${FOUNDATION_LIB} ${COREML_LIB})
-  list(APPEND RUNTIME_SRCS ${COREML_CONTRIB_SRC})
+  add_library(tvm_coreml_objs OBJECT ${COREML_CONTRIB_SRC})
+  target_link_libraries(tvm_coreml_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_coreml_objs ${FOUNDATION_LIB} ${COREML_LIB})
 endif(USE_COREML)
diff --git a/cmake/modules/contrib/DNNL.cmake b/cmake/modules/contrib/DNNL.cmake
index e3d75677b547..191b04594b80 100644
--- a/cmake/modules/contrib/DNNL.cmake
+++ b/cmake/modules/contrib/DNNL.cmake
@@ -17,19 +17,20 @@
 
 if(IS_DIRECTORY ${USE_DNNL})
   find_library(EXTERN_LIBRARY_DNNL NAMES dnnl HINTS ${USE_DNNL}/lib/)
-  if (EXTERN_LIBRARY_DNNL STREQUAL "EXTERN_LIBRARY_DNNL-NOTFOUND")
+  if(EXTERN_LIBRARY_DNNL STREQUAL "EXTERN_LIBRARY_DNNL-NOTFOUND")
     message(WARNING "Cannot find DNNL library at ${USE_DNNL}.")
   else()
     add_definitions(-DUSE_JSON_RUNTIME=1)
     tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/relax/backend/contrib/dnnl/*.cc)
     list(APPEND COMPILER_SRCS ${DNNL_CONTRIB_SRC})
 
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
     tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl_json_runtime.cc
                                         src/runtime/contrib/dnnl/dnnl_utils.cc
                                         src/runtime/contrib/dnnl/dnnl.cc
                                         src/runtime/contrib/cblas/dnnl_blas.cc)
-    list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
+    add_library(tvm_dnnl_objs OBJECT ${DNNL_CONTRIB_SRC})
+    target_link_libraries(tvm_dnnl_objs PRIVATE tvm_runtime_extra_defs)
+    target_link_libraries(tvm_runtime_extra PRIVATE tvm_dnnl_objs ${EXTERN_LIBRARY_DNNL})
     message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL})
   endif()
 elseif((USE_DNNL STREQUAL "ON") OR (USE_DNNL STREQUAL "JSON"))
@@ -38,20 +39,22 @@ elseif((USE_DNNL STREQUAL "ON") OR (USE_DNNL STREQUAL "JSON"))
   list(APPEND COMPILER_SRCS ${DNNL_CONTRIB_SRC})
 
   find_library(EXTERN_LIBRARY_DNNL dnnl)
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
   tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl_json_runtime.cc
                                       src/runtime/contrib/dnnl/dnnl_utils.cc
                                       src/runtime/contrib/dnnl/dnnl.cc
                                       src/runtime/contrib/cblas/dnnl_blas.cc)
-  list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
+  add_library(tvm_dnnl_objs OBJECT ${DNNL_CONTRIB_SRC})
+  target_link_libraries(tvm_dnnl_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_dnnl_objs ${EXTERN_LIBRARY_DNNL})
   message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL})
 elseif(USE_DNNL STREQUAL "C_SRC")
   find_library(EXTERN_LIBRARY_DNNL dnnl)
-  list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
   tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl.cc
                                       src/runtime/contrib/dnnl/dnnl_utils.cc
                                       src/runtime/contrib/cblas/dnnl_blas.cc)
-  list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
+  add_library(tvm_dnnl_objs OBJECT ${DNNL_CONTRIB_SRC})
+  target_link_libraries(tvm_dnnl_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_dnnl_objs ${EXTERN_LIBRARY_DNNL})
   message(STATUS "Build with DNNL C source module: " ${EXTERN_LIBRARY_DNNL})
 elseif(USE_DNNL STREQUAL "OFF")
   # pass
diff --git a/cmake/modules/contrib/ExampleNPU.cmake b/cmake/modules/contrib/ExampleNPU.cmake
index 2fc53a4dfc82..51b023dbbf0d 100644
--- a/cmake/modules/contrib/ExampleNPU.cmake
+++ b/cmake/modules/contrib/ExampleNPU.cmake
@@ -28,12 +28,14 @@ if(USE_EXAMPLE_NPU_CODEGEN)
     endif()
 endif()
 
-# Example NPU Runtime
+# Example NPU Runtime — goes into libtvm_runtime_extra.
 if(USE_EXAMPLE_NPU_RUNTIME)
     message(STATUS "Build with Example NPU runtime")
 
     tvm_file_glob(GLOB RUNTIME_EXAMPLE_NPU_SRCS src/runtime/contrib/example_npu/*.cc)
-    list(APPEND RUNTIME_SRCS ${RUNTIME_EXAMPLE_NPU_SRCS})
+    add_library(tvm_example_npu_objs OBJECT ${RUNTIME_EXAMPLE_NPU_SRCS})
+    target_link_libraries(tvm_example_npu_objs PRIVATE tvm_runtime_extra_defs)
+    target_link_libraries(tvm_runtime_extra PRIVATE tvm_example_npu_objs)
 
     add_definitions(-DTVM_GRAPH_EXECUTOR_EXAMPLE_NPU)
 endif()
diff --git a/cmake/modules/contrib/NNAPI.cmake b/cmake/modules/contrib/NNAPI.cmake
index 23eb6dd11eda..496ce96a8060 100644
--- a/cmake/modules/contrib/NNAPI.cmake
+++ b/cmake/modules/contrib/NNAPI.cmake
@@ -27,13 +27,14 @@ if(USE_NNAPI_CODEGEN)
     endif()
 endif()
 
-# NNAPI Runtime
+# NNAPI Runtime — goes into libtvm_runtime_extra.
 if(USE_NNAPI_RUNTIME)
     message(STATUS "Build with NNAPI runtime")
 
     tvm_file_glob(GLOB RUNTIME_NNAPI_SRCS src/runtime/contrib/nnapi/*.cc)
-    list(APPEND RUNTIME_SRCS ${RUNTIME_NNAPI_SRCS})
-    list(APPEND TVM_RUNTIME_LINKER_LIBS neuralnetworks log)
+    add_library(tvm_nnapi_objs OBJECT ${RUNTIME_NNAPI_SRCS})
+    target_link_libraries(tvm_nnapi_objs PRIVATE tvm_runtime_extra_defs)
+    target_link_libraries(tvm_runtime_extra PRIVATE tvm_nnapi_objs neuralnetworks log)
 
     add_definitions(-DTVM_GRAPH_EXECUTOR_NNAPI)
 endif()
diff --git a/cmake/modules/contrib/Random.cmake b/cmake/modules/contrib/Random.cmake
index a003de1553ba..16e699fb62a6 100644
--- a/cmake/modules/contrib/Random.cmake
+++ b/cmake/modules/contrib/Random.cmake
@@ -18,5 +18,7 @@
 if(USE_RANDOM)
   message(STATUS "Build with contrib.random")
   tvm_file_glob(GLOB RANDOM_CONTRIB_SRC src/runtime/contrib/random/random.cc)
-  list(APPEND RUNTIME_SRCS ${RANDOM_CONTRIB_SRC})
+  add_library(tvm_random_objs OBJECT ${RANDOM_CONTRIB_SRC})
+  target_link_libraries(tvm_random_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_random_objs)
 endif(USE_RANDOM)
diff --git a/cmake/modules/contrib/Sort.cmake b/cmake/modules/contrib/Sort.cmake
index 4e4c9781216f..2fbeedd95e30 100644
--- a/cmake/modules/contrib/Sort.cmake
+++ b/cmake/modules/contrib/Sort.cmake
@@ -18,5 +18,7 @@
 if(USE_SORT)
   message(STATUS "Build with contrib.sort")
   tvm_file_glob(GLOB SORT_CONTRIB_SRC src/runtime/contrib/sort/*.cc)
-  list(APPEND RUNTIME_SRCS ${SORT_CONTRIB_SRC})
+  add_library(tvm_sort_objs OBJECT ${SORT_CONTRIB_SRC})
+  target_link_libraries(tvm_sort_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_sort_objs)
 endif(USE_SORT)
diff --git a/cmake/modules/contrib/TensorRT.cmake b/cmake/modules/contrib/TensorRT.cmake
index a9729ed99656..08a841a46df1 100644
--- a/cmake/modules/contrib/TensorRT.cmake
+++ b/cmake/modules/contrib/TensorRT.cmake
@@ -19,7 +19,7 @@
 # compilation of TensorRT modules without requiring TensorRT to be installed. The compiled modules
 # will only be able to be executed using a TVM built with USE_TENSORRT_RUNTIME=ON.
 
-include (FindPackageHandleStandardArgs)
+include(FindPackageHandleStandardArgs)
 
 if(USE_TENSORRT_CODEGEN)
     message(STATUS "Build with TensorRT codegen")
@@ -33,7 +33,7 @@ if(USE_TENSORRT_CODEGEN)
     endif()
 endif()
 
-# TensorRT Runtime
+# TensorRT Runtime — goes into libtvm_runtime_extra.
 if(USE_TENSORRT_RUNTIME)
     if(IS_DIRECTORY ${USE_TENSORRT_RUNTIME})
         set(TENSORRT_ROOT_DIR ${USE_TENSORRT_RUNTIME})
@@ -47,12 +47,12 @@ if(USE_TENSORRT_RUNTIME)
     endif()
     message(STATUS "TENSORRT_LIB_DIR: " ${TENSORRT_LIB_DIR})
     include_directories(${TENSORRT_INCLUDE_DIR})
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_LIB_DIR})
-
     # TRT runtime sources
     tvm_file_glob(GLOB RUNTIME_TENSORRT_SRCS src/runtime/contrib/tensorrt/*.cc)
     set_source_files_properties(${RUNTIME_TENSORRT_SRCS} PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
-    list(APPEND RUNTIME_SRCS ${RUNTIME_TENSORRT_SRCS})
+    add_library(tvm_tensorrt_objs OBJECT ${RUNTIME_TENSORRT_SRCS})
+    target_link_libraries(tvm_tensorrt_objs PRIVATE tvm_runtime_extra_defs)
+    target_link_libraries(tvm_runtime_extra PRIVATE tvm_tensorrt_objs ${TENSORRT_LIB_DIR})
 
     # Set defines
     add_definitions(-DTVM_GRAPH_EXECUTOR_TENSORRT)
diff --git a/cmake/modules/contrib/vllm.cmake b/cmake/modules/contrib/vllm.cmake
index 4a09edd02e58..7a571ff50508 100644
--- a/cmake/modules/contrib/vllm.cmake
+++ b/cmake/modules/contrib/vllm.cmake
@@ -21,5 +21,7 @@ if(USE_VLLM)
   enable_language(CUDA)
 
   tvm_file_glob(GLOB VLLM_CONTRIB_SRC src/runtime/contrib/vllm/*.cu src/runtime/contrib/vllm/*.cc)
-  list(APPEND RUNTIME_SRCS ${VLLM_CONTRIB_SRC})
+  add_library(tvm_vllm_objs OBJECT ${VLLM_CONTRIB_SRC})
+  target_link_libraries(tvm_vllm_objs PRIVATE tvm_runtime_extra_defs)
+  target_link_libraries(tvm_runtime_extra PRIVATE tvm_vllm_objs)
 endif(USE_VLLM)
diff --git a/include/tvm/runtime/memory/memory_manager.h b/include/tvm/runtime/memory/memory_manager.h
index 70cd8b7b3c77..9163f58f7d62 100644
--- a/include/tvm/runtime/memory/memory_manager.h
+++ b/include/tvm/runtime/memory/memory_manager.h
@@ -54,7 +54,7 @@ struct Buffer {
   AllocatorType alloc_type;
 };
 
-class Allocator {
+class TVM_RUNTIME_DLL Allocator {
  public:
   explicit Allocator(AllocatorType type) : type_(type) {}
   virtual ~Allocator() = default;
@@ -65,8 +65,8 @@ class Allocator {
    *  \param mem_scope The device memory scope hint.
    *  \return The empty Tensor.
    */
-  TVM_RUNTIME_DLL Tensor Empty(ffi::Shape shape, DLDataType dtype, Device dev,
-                               ffi::Optional<ffi::String> mem_scope = std::nullopt);
+  Tensor Empty(ffi::Shape shape, DLDataType dtype, Device dev,
+               ffi::Optional<ffi::String> mem_scope = std::nullopt);
   /*! \brief Return the allocator type. */
   inline AllocatorType type() const { return type_; }
   /*! \brief Allocate a buffer given a size, alignment and type.
@@ -76,8 +76,7 @@ class Allocator {
    *  \param type_hint A type hint to the allocator.
    *  \return A sized allocation in the form of a buffer.
    */
-  TVM_RUNTIME_DLL virtual Buffer Alloc(Device dev, size_t nbytes, size_t alignment,
-                                       DLDataType type_hint) = 0;
+  virtual Buffer Alloc(Device dev, size_t nbytes, size_t alignment, DLDataType type_hint) = 0;
   /*! \brief Allocate a buffer given a shape and type.
    *  \param dev The device where the array is allocated.
    *  \param shape The shape of the tensor.
@@ -85,8 +84,8 @@ class Allocator {
    *  \param mem_scope A memory scope of the buffer.
    *  \return A sized allocation in the form of a buffer.
    */
-  TVM_RUNTIME_DLL virtual Buffer Alloc(Device dev, ffi::Shape shape, DLDataType type_hint,
-                                       const std::string& mem_scope = "");
+  virtual Buffer Alloc(Device dev, ffi::Shape shape, DLDataType type_hint,
+                       const std::string& mem_scope = "");
 
   /*! \brief Create a view for the buffer given a shape, type and scope.
    *  \param buffer The existing buffer upon which we need to create a view.
@@ -95,9 +94,8 @@ class Allocator {
    *  \param mem_scope A memory scope of the view.
    *  \return A device pointer to the created view.
    */
-  TVM_RUNTIME_DLL virtual void* CreateView(const Buffer& buffer, ffi::Shape shape,
-                                           DLDataType type_hint,
-                                           const std::string& mem_scope = "global") {
+  virtual void* CreateView(const Buffer& buffer, ffi::Shape shape, DLDataType type_hint,
+                           const std::string& mem_scope = "global") {
     return buffer.data;
   }
 
@@ -105,22 +103,22 @@ class Allocator {
    *  \param dev is the device where this view is created
    *  \param data The view pointer to be freed.
    */
-  TVM_RUNTIME_DLL virtual void FreeView(Device dev, void* data) {}
+  virtual void FreeView(Device dev, void* data) {}
 
   /*! \brief Free a buffer allocated by the allocator.
    *  \param buffer The buffer to free.
    */
-  TVM_RUNTIME_DLL virtual void Free(const Buffer& buffer) = 0;
+  virtual void Free(const Buffer& buffer) = 0;
   /*! \brief Clear the allocated memory. */
-  TVM_RUNTIME_DLL virtual void Clear();
+  virtual void Clear();
   /*! \brief The amount of memory currently allocated.
    *  \return The amount of memory currently allocated.
    */
-  TVM_RUNTIME_DLL virtual size_t UsedMemory() const = 0;
+  virtual size_t UsedMemory() const = 0;
 
  protected:
   /*! \brief Check if the given memory scope is allowed to allocate by the allocator. */
-  TVM_RUNTIME_DLL virtual bool AllowMemoryScope(const std::string& mem_scope) const;
+  virtual bool AllowMemoryScope(const std::string& mem_scope) const;
 
  private:
   AllocatorType type_;
diff --git a/include/tvm/runtime/vm/tensor_cache_support.h b/include/tvm/runtime/vm/tensor_cache_support.h
index 3580fdef2a25..ea997f0755bd 100644
--- a/include/tvm/runtime/vm/tensor_cache_support.h
+++ b/include/tvm/runtime/vm/tensor_cache_support.h
@@ -86,7 +86,8 @@ struct TensorCacheMetadata {
   /*! \brief Load the metadata from a specific directory */
   TVM_RUNTIME_DLL static TensorCacheMetadata Load(const std::string& path);
   /*! \brief Load the metadata from a given JSON string */
-  static TensorCacheMetadata LoadFromStr(const std::string& json_str, const std::string& path);
+  TVM_RUNTIME_DLL static TensorCacheMetadata LoadFromStr(const std::string& json_str,
+                                                         const std::string& path);
 };
 
 }  // namespace vm
diff --git a/include/tvm/s_tir/random_engine.h b/include/tvm/s_tir/random_engine.h
index 0acfd50fbed2..059791be1a6f 100644
--- a/include/tvm/s_tir/random_engine.h
+++ b/include/tvm/s_tir/random_engine.h
@@ -61,7 +61,10 @@ class LinearCongruentialEngine {
    * \brief Get a device random state
    * \return The random state
    */
-  static TRandState DeviceRandom() { return (std::random_device()()) % modulus; }
+  static TRandState DeviceRandom() {
+    std::random_device rd;
+    return rd() % modulus;
+  }
 
   /*!
    * \brief Operator to move the random state to the next and return the new random state. According
diff --git a/python/tvm/base.py b/python/tvm/base.py
index 40823edd843c..11601a9dcc72 100644
--- a/python/tvm/base.py
+++ b/python/tvm/base.py
@@ -22,6 +22,8 @@
 import os
 import sys
 
+from tvm_ffi.libinfo import load_lib_ctypes
+
 from . import libinfo
 
 # ----------------------------
@@ -48,16 +50,23 @@
 # compiler library is simply not present (runtime-only wheel), only the
 # runtime is loaded and ``_LIB`` aliases ``_LIB_RUNTIME``.
 _extra_lib_paths = libinfo.package_lib_paths()
-_LIB_RUNTIME = libinfo.load_lib_ctypes(
+_LIB_RUNTIME = load_lib_ctypes(
     "tvm", "tvm_runtime", "RTLD_GLOBAL", extra_lib_paths=_extra_lib_paths
 )
 
+# After libtvm_runtime.so is in the global symbol namespace, scan the same
+# directory for per-backend DSOs (libtvm_runtime_cuda.so, etc.) and load each
+# with RTLD_GLOBAL so their static initializers register device backends.
+# Failures are swallowed silently — a missing driver just means that backend
+# is unavailable, not an error.
+libinfo.load_backend_libs(_LIB_RUNTIME._name)
+
 _RUNTIME_ONLY = libinfo.use_runtime_lib()
 if _RUNTIME_ONLY:
     _LIB = _LIB_RUNTIME
 else:
     try:
-        _LIB = libinfo.load_lib_ctypes(
+        _LIB = load_lib_ctypes(
             "tvm", "tvm_compiler", "RTLD_LOCAL", extra_lib_paths=_extra_lib_paths
         )
     except RuntimeError:
diff --git a/python/tvm/libinfo.py b/python/tvm/libinfo.py
index ae79df81f67c..136d85a49f30 100644
--- a/python/tvm/libinfo.py
+++ b/python/tvm/libinfo.py
@@ -18,12 +18,12 @@
 
 from __future__ import annotations
 
-import ctypes
-import importlib.metadata as im
 import os
 import sys
 from pathlib import Path
 
+from tvm_ffi.libinfo import load_lib_ctypes
+
 
 def use_runtime_lib() -> bool:
     """Whether ``TVM_USE_RUNTIME_LIB`` requests runtime-only mode.
@@ -39,108 +39,39 @@ def package_lib_paths() -> list[Path]:
 
     Anchored on this file's location (``python/tvm/libinfo.py``), the list
     covers the wheel-install layout (``python/tvm/lib/``) and the in-tree dev
-    build layouts (``<worktree>/build/lib/`` and ``<worktree>/lib/``). Callers
+    build layouts (``<worktree>/build/lib/`` and ``<worktree>/lib/``).
+    ``TVM_LIBRARY_PATH`` is prepended when set so it takes priority. Callers
     pick the basenames they want (e.g. ``libtvm_runtime.so``) and the load
     mode; this function only returns the search path.
     """
     pkg = Path(__file__).parent  # python/tvm/
-    return [
+    paths: list[Path] = []
+    if os.environ.get("TVM_LIBRARY_PATH"):
+        paths.append(Path(os.environ["TVM_LIBRARY_PATH"]))
+    paths += [
         pkg / "lib",  # wheel layout
         pkg.parent.parent / "build" / "lib",  # dev: <worktree>/build/lib
         pkg.parent.parent / "lib",  # dev: <worktree>/lib
     ]
+    return paths
 
 
-# Mirror of ``tvm_ffi.libinfo.{load_lib_ctypes,_find_library_by_basename}`` with
-# the ``extra_lib_paths`` parameter from apache/tvm-ffi#570 so dev-mode lookups
-# anchor on the *caller's* package root rather than tvm-ffi's own ``__file__``.
-# Once apache/tvm-ffi#570 lands and the submodule bumps, drop these and switch
-# ``base.py`` back to ``from tvm_ffi.libinfo import load_lib_ctypes``.
-
+_BACKEND_RUNTIME_LIBS = ["cuda", "vulkan", "opencl", "metal", "rocm", "hexagon", "extra"]
 
-def _find_library_by_basename(
-    package: str,
-    target_name: str,
-    extra_lib_paths: list[Path] | None = None,
-) -> Path:
-    """Resolve ``lib<target_name>.{so,dylib,dll}`` for ``package``.
 
-    Search order: wheel-install RECORD walk → caller-supplied
-    ``extra_lib_paths`` → ``PATH`` / ``LD_LIBRARY_PATH`` /
-    ``DYLD_LIBRARY_PATH``. Raises ``RuntimeError`` listing every candidate
-    directory tried if nothing matches.
-    """
-    if sys.platform.startswith("win32"):
-        lib_dll_names = (f"{target_name}.dll",)
-    elif sys.platform.startswith("darwin"):
-        lib_dll_names = (f"lib{target_name}.dylib", f"lib{target_name}.so")
-    else:
-        lib_dll_names = (f"lib{target_name}.so",)
-
-    try:
-        dist = im.distribution(package)
-        record = dist.read_text("RECORD") or ""
-        for line in record.splitlines():
-            partial_path, *_ = line.split(",")
-            if partial_path.endswith(lib_dll_names):
-                try:
-                    path = (dist._path.parent / partial_path).resolve()
-                except OSError:
-                    continue
-                if path.name in lib_dll_names and path.is_file():
-                    return path
-    except (im.PackageNotFoundError, OSError):
-        pass
-
-    dll_paths: list[Path] = []
-    if extra_lib_paths is not None:
-        for i, p in enumerate(extra_lib_paths):
-            if not isinstance(p, Path):
-                raise TypeError(
-                    f"extra_lib_paths[{i}] must be a pathlib.Path, got {type(p).__name__}: {p!r}"
-                )
-        dll_paths.extend(extra_lib_paths)
-
-    if sys.platform.startswith("win32"):
-        dll_paths.extend(Path(p) for p in split_env_var("PATH", ";"))
-    elif sys.platform.startswith("darwin"):
-        dll_paths.extend(Path(p) for p in split_env_var("DYLD_LIBRARY_PATH", ":"))
-        dll_paths.extend(Path(p) for p in split_env_var("PATH", ":"))
-    else:
-        dll_paths.extend(Path(p) for p in split_env_var("LD_LIBRARY_PATH", ":"))
-        dll_paths.extend(Path(p) for p in split_env_var("PATH", ":"))
-
-    for d in dll_paths:
-        for name in lib_dll_names:
-            try:
-                path = (d / name).resolve()
-            except OSError:
-                continue
-            if path.is_file():
-                return path
-
-    raise RuntimeError(
-        f"Cannot find library {', '.join(lib_dll_names)}; searched directories:\n  "
-        + "\n  ".join(str(p) for p in dll_paths)
-    )
-
-
-def load_lib_ctypes(
-    package: str,
-    target_name: str,
-    mode: str,
-    extra_lib_paths: list[Path] | None = None,
-) -> ctypes.CDLL:
-    """Locate and ``ctypes.CDLL``-load ``lib<target_name>`` for ``package``.
-
-    ``mode`` is one of ``"RTLD_LOCAL"`` / ``"RTLD_GLOBAL"`` (resolved against
-    ``ctypes``). On Windows, the library's directory is registered via
-    ``os.add_dll_directory`` before the load.
-    """
-    lib_path = _find_library_by_basename(package, target_name, extra_lib_paths)
-    if sys.platform.startswith("win32"):
-        os.add_dll_directory(str(lib_path.parent))
-    return ctypes.CDLL(str(lib_path), getattr(ctypes, mode))
+def load_backend_libs(runtime_lib_path: str) -> None:
+    """Try to load each known backend runtime DSO; failures are silent."""
+    runtime_dir = Path(runtime_lib_path).resolve().parent
+    for backend in _BACKEND_RUNTIME_LIBS:
+        try:
+            load_lib_ctypes(
+                package="tvm",
+                target_name=f"tvm_runtime_{backend}",
+                mode="RTLD_GLOBAL",
+                extra_lib_paths=[runtime_dir],
+            )
+        except (OSError, FileNotFoundError, RuntimeError):
+            pass
 
 
 def split_env_var(env_var, split):
diff --git a/python/tvm/runtime/__init__.py b/python/tvm/runtime/__init__.py
index d4d4a6e5a1b4..67839fed02a2 100644
--- a/python/tvm/runtime/__init__.py
+++ b/python/tvm/runtime/__init__.py
@@ -44,7 +44,12 @@
     load_param_dict_from_file,
 )
 
-from . import disco
+try:
+    from . import disco
+except (ImportError, ValueError):
+    # disco C++ runtime is in libtvm_runtime_extra which may not be present.
+    # Make the disco module optional.
+    disco = None  # type: ignore[assignment]
 
 from .support import _regex_match
 from tvm_ffi import Shape as ShapeTuple
diff --git a/tests/python/relax/test_frontend_onnx.py b/tests/python/relax/test_frontend_onnx.py
index 2b0194f08578..427881243663 100644
--- a/tests/python/relax/test_frontend_onnx.py
+++ b/tests/python/relax/test_frontend_onnx.py
@@ -2311,9 +2311,7 @@ def test_layer_norm_with_nd_gamma_beta():
 
 def test_rms_norm():
     # Basic test: default axis=-1
-    rms_norm_node = helper.make_node(
-        "RMSNormalization", ["input", "scale"], ["Y"], epsilon=1e-05
-    )
+    rms_norm_node = helper.make_node("RMSNormalization", ["input", "scale"], ["Y"], epsilon=1e-05)
 
     graph = helper.make_graph(
         [rms_norm_node],