diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c35af4b95eb..82794b185c3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -834,7 +834,7 @@ if(USE_CUDA AND USE_CUTLASS) endif() if(USE_CUDA AND USE_NVTX) - set_source_files_properties(src/runtime/nvtx.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NVTX_ENABLED=1") + add_compile_definitions(TVM_NVTX_ENABLED=1) endif() # Note: NCCL, NVSHMEM, RCCL target_link_libraries are handled in the inline diff --git a/include/tvm/runtime/nvtx.h b/include/tvm/support/cuda/nvtx.h similarity index 56% rename from include/tvm/runtime/nvtx.h rename to include/tvm/support/cuda/nvtx.h index 2dbaeb9257a0..ef9083cfcdd3 100644 --- a/include/tvm/runtime/nvtx.h +++ b/include/tvm/support/cuda/nvtx.h @@ -16,14 +16,29 @@ * specific language governing permissions and limitations * under the License. */ -#ifndef TVM_RUNTIME_NVTX_H_ -#define TVM_RUNTIME_NVTX_H_ - -#include +/*! + * \file tvm/support/cuda/nvtx.h + * \brief NVTX scoped range utility (header-only). + * + * Provides NVTXScopedRange: a lightweight RAII wrapper over + * nvtxRangePush/Pop. When TVM_NVTX_ENABLED is not defined or is 0, + * all methods are no-ops compiled away by the optimizer. + */ +#ifndef TVM_SUPPORT_CUDA_NVTX_H_ +#define TVM_SUPPORT_CUDA_NVTX_H_ #include + +#ifndef TVM_NVTX_ENABLED +#define TVM_NVTX_ENABLED 0 +#endif + +#if TVM_NVTX_ENABLED +#include +#endif // TVM_NVTX_ENABLED + namespace tvm { -namespace runtime { +namespace support { /*! * \brief A class to create a NVTX range. No-op if TVM is not built against NVTX. @@ -31,11 +46,19 @@ namespace runtime { class NVTXScopedRange { public: /*! \brief Enter an NVTX scoped range */ - TVM_RUNTIME_DLL explicit NVTXScopedRange(const char* name); +#if TVM_NVTX_ENABLED + explicit NVTXScopedRange(const char* name) { nvtxRangePush(name); } +#else + explicit NVTXScopedRange(const char* name) {} +#endif // TVM_NVTX_ENABLED /*! \brief Enter an NVTX scoped range */ explicit NVTXScopedRange(const std::string& name) : NVTXScopedRange(name.c_str()) {} - /*! \brief Exist an NVTX scoped range */ - TVM_RUNTIME_DLL ~NVTXScopedRange(); + /*! \brief Exit an NVTX scoped range */ +#if TVM_NVTX_ENABLED + ~NVTXScopedRange() { nvtxRangePop(); } +#else + ~NVTXScopedRange() {} +#endif // TVM_NVTX_ENABLED NVTXScopedRange(const NVTXScopedRange& other) = delete; NVTXScopedRange(NVTXScopedRange&& other) = delete; NVTXScopedRange& operator=(const NVTXScopedRange& other) = delete; @@ -43,12 +66,13 @@ class NVTXScopedRange { }; #ifdef _MSC_VER -#define TVM_NVTX_FUNC_SCOPE() NVTXScopedRange _nvtx_func_scope_(__FUNCSIG__); +#define TVM_NVTX_FUNC_SCOPE() ::tvm::support::NVTXScopedRange _nvtx_func_scope_(__FUNCSIG__); #else -#define TVM_NVTX_FUNC_SCOPE() NVTXScopedRange _nvtx_func_scope_(__PRETTY_FUNCTION__); +#define TVM_NVTX_FUNC_SCOPE() \ + ::tvm::support::NVTXScopedRange _nvtx_func_scope_(__PRETTY_FUNCTION__); #endif -} // namespace runtime +} // namespace support } // namespace tvm -#endif // TVM_RUNTIME_NVTX_H_ +#endif // TVM_SUPPORT_CUDA_NVTX_H_ diff --git a/src/runtime/nvtx.cc b/src/runtime/nvtx.cc deleted file mode 100644 index 9cfd788714a2..000000000000 --- a/src/runtime/nvtx.cc +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -#include -#ifndef TVM_NVTX_ENABLED -#define TVM_NVTX_ENABLED 0 -#endif - -#if TVM_NVTX_ENABLED -#include -#endif // TVM_NVTX_ENABLED - -#include - -namespace tvm { -namespace runtime { - -#if TVM_NVTX_ENABLED -NVTXScopedRange::NVTXScopedRange(const char* name) { nvtxRangePush(name); } -NVTXScopedRange::~NVTXScopedRange() { nvtxRangePop(); } -#else -NVTXScopedRange::NVTXScopedRange(const char* name) {} -NVTXScopedRange::~NVTXScopedRange() {} -#endif // TVM_NVTX_ENABLED - -} // namespace runtime -} // namespace tvm diff --git a/src/runtime/vm/attn_utils.h b/src/runtime/vm/attn_utils.h index 9f46a2d2eccd..2ee86bb075b7 100644 --- a/src/runtime/vm/attn_utils.h +++ b/src/runtime/vm/attn_utils.h @@ -27,8 +27,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/src/runtime/vm/paged_kv_cache.cc b/src/runtime/vm/paged_kv_cache.cc index 6e54f0bce092..e5c4576e01c1 100644 --- a/src/runtime/vm/paged_kv_cache.cc +++ b/src/runtime/vm/paged_kv_cache.cc @@ -27,8 +27,8 @@ #include #include #include -#include #include +#include #include #include @@ -2306,7 +2306,7 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { * invoked before running attention computation on device. */ void SyncAuxArrayToDevice() { - NVTXScopedRange range("SyncAuxArrayToDevice"); + support::NVTXScopedRange range("SyncAuxArrayToDevice"); TVM_FFI_ICHECK(dtype_aux_.bits == 32 && dtype_aux_.code == kDLInt); int64_t total_append_length = 0; int num_sequences = cur_append_lengths_.size(); diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index d6ffab9be018..0d84e64c7a02 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -25,8 +25,8 @@ #include #include #include -#include #include +#include #include @@ -547,7 +547,7 @@ void VirtualMachineImpl::InvokeClosurePacked(const ffi::ObjectRef& closure_or_pa packed_args[0] = static_cast(static_cast(this)); std::copy(args.data(), args.data() + args.size(), packed_args.begin() + 1); { - NVTXScopedRange scope("RelaxVM: " + clo->func_name); + support::NVTXScopedRange scope("RelaxVM: " + clo->func_name); clo->impl.CallPacked(ffi::PackedArgs(packed_args.data(), packed_args.size()), rv); } } diff --git a/web/emcc/wasm_runtime.cc b/web/emcc/wasm_runtime.cc index d2bfe326e1e9..b2b9a470be7e 100644 --- a/web/emcc/wasm_runtime.cc +++ b/web/emcc/wasm_runtime.cc @@ -63,7 +63,6 @@ #include "3rdparty/tvm-ffi/src/ffi/tensor.cc" #include "3rdparty/tvm-ffi/src/ffi/testing/testing.cc" #include "src/runtime/memory/memory_manager.cc" -#include "src/runtime/nvtx.cc" #include "src/runtime/vm/attn_backend.cc" #include "src/runtime/vm/builtin.cc" #include "src/runtime/vm/bytecode.cc"