Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,7 @@ if(USE_CUDA AND USE_CUTLASS)
endif()

if(USE_CUDA AND USE_NVTX)
set_source_files_properties(src/runtime/nvtx.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NVTX_ENABLED=1")
add_compile_definitions(TVM_NVTX_ENABLED=1)
endif()

# Note: NCCL, NVSHMEM, RCCL target_link_libraries are handled in the inline
Expand Down
48 changes: 36 additions & 12 deletions include/tvm/runtime/nvtx.h → include/tvm/support/cuda/nvtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,39 +16,63 @@
* specific language governing permissions and limitations
* under the License.
*/
#ifndef TVM_RUNTIME_NVTX_H_
#define TVM_RUNTIME_NVTX_H_

#include <tvm/runtime/base.h>
/*!
* \file tvm/support/cuda/nvtx.h
* \brief NVTX scoped range utility (header-only).
*
* Provides NVTXScopedRange: a lightweight RAII wrapper over
* nvtxRangePush/Pop. When TVM_NVTX_ENABLED is not defined or is 0,
* all methods are no-ops compiled away by the optimizer.
*/
#ifndef TVM_SUPPORT_CUDA_NVTX_H_
#define TVM_SUPPORT_CUDA_NVTX_H_

#include <string>

#ifndef TVM_NVTX_ENABLED
#define TVM_NVTX_ENABLED 0
#endif

#if TVM_NVTX_ENABLED
#include <nvtx3/nvToolsExt.h>
#endif // TVM_NVTX_ENABLED

namespace tvm {
namespace runtime {
namespace support {

/*!
* \brief A class to create a NVTX range. No-op if TVM is not built against NVTX.
*/
class NVTXScopedRange {
public:
/*! \brief Enter an NVTX scoped range */
TVM_RUNTIME_DLL explicit NVTXScopedRange(const char* name);
#if TVM_NVTX_ENABLED
explicit NVTXScopedRange(const char* name) { nvtxRangePush(name); }
#else
explicit NVTXScopedRange(const char* name) {}
#endif // TVM_NVTX_ENABLED
Comment thread
tqchen marked this conversation as resolved.
/*! \brief Enter an NVTX scoped range */
explicit NVTXScopedRange(const std::string& name) : NVTXScopedRange(name.c_str()) {}
/*! \brief Exist an NVTX scoped range */
TVM_RUNTIME_DLL ~NVTXScopedRange();
/*! \brief Exit an NVTX scoped range */
#if TVM_NVTX_ENABLED
~NVTXScopedRange() { nvtxRangePop(); }
#else
~NVTXScopedRange() {}
#endif // TVM_NVTX_ENABLED
NVTXScopedRange(const NVTXScopedRange& other) = delete;
NVTXScopedRange(NVTXScopedRange&& other) = delete;
NVTXScopedRange& operator=(const NVTXScopedRange& other) = delete;
NVTXScopedRange& operator=(NVTXScopedRange&& other) = delete;
};

#ifdef _MSC_VER
#define TVM_NVTX_FUNC_SCOPE() NVTXScopedRange _nvtx_func_scope_(__FUNCSIG__);
#define TVM_NVTX_FUNC_SCOPE() ::tvm::support::NVTXScopedRange _nvtx_func_scope_(__FUNCSIG__);
#else
#define TVM_NVTX_FUNC_SCOPE() NVTXScopedRange _nvtx_func_scope_(__PRETTY_FUNCTION__);
#define TVM_NVTX_FUNC_SCOPE() \
::tvm::support::NVTXScopedRange _nvtx_func_scope_(__PRETTY_FUNCTION__);
#endif

} // namespace runtime
} // namespace support
} // namespace tvm

#endif // TVM_RUNTIME_NVTX_H_
#endif // TVM_SUPPORT_CUDA_NVTX_H_
42 changes: 0 additions & 42 deletions src/runtime/nvtx.cc

This file was deleted.

2 changes: 1 addition & 1 deletion src/runtime/vm/attn_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
#include <tvm/ffi/container/map.h>
#include <tvm/ffi/container/shape.h>
#include <tvm/runtime/logging.h>
#include <tvm/runtime/nvtx.h>
#include <tvm/runtime/tensor.h>
#include <tvm/support/cuda/nvtx.h>

#include <algorithm>
#include <limits>
Expand Down
4 changes: 2 additions & 2 deletions src/runtime/vm/paged_kv_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
#include <tvm/runtime/device_api.h>
#include <tvm/runtime/disco/disco_worker.h>
#include <tvm/runtime/memory/memory_manager.h>
#include <tvm/runtime/nvtx.h>
#include <tvm/runtime/tensor.h>
#include <tvm/support/cuda/nvtx.h>

#include <algorithm>
#include <numeric>
Expand Down Expand Up @@ -2306,7 +2306,7 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj {
* invoked before running attention computation on device.
*/
void SyncAuxArrayToDevice() {
NVTXScopedRange range("SyncAuxArrayToDevice");
support::NVTXScopedRange range("SyncAuxArrayToDevice");
TVM_FFI_ICHECK(dtype_aux_.bits == 32 && dtype_aux_.code == kDLInt);
int64_t total_append_length = 0;
int num_sequences = cur_append_lengths_.size();
Expand Down
4 changes: 2 additions & 2 deletions src/runtime/vm/vm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
#include <tvm/ffi/function.h>
#include <tvm/runtime/logging.h>
#include <tvm/runtime/memory/memory_manager.h>
#include <tvm/runtime/nvtx.h>
#include <tvm/runtime/vm/vm.h>
#include <tvm/support/cuda/nvtx.h>

#include <thread>

Expand Down Expand Up @@ -547,7 +547,7 @@ void VirtualMachineImpl::InvokeClosurePacked(const ffi::ObjectRef& closure_or_pa
packed_args[0] = static_cast<void*>(static_cast<VirtualMachine*>(this));
std::copy(args.data(), args.data() + args.size(), packed_args.begin() + 1);
{
NVTXScopedRange scope("RelaxVM: " + clo->func_name);
support::NVTXScopedRange scope("RelaxVM: " + clo->func_name);
clo->impl.CallPacked(ffi::PackedArgs(packed_args.data(), packed_args.size()), rv);
}
}
Expand Down
1 change: 0 additions & 1 deletion web/emcc/wasm_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
#include "3rdparty/tvm-ffi/src/ffi/tensor.cc"
#include "3rdparty/tvm-ffi/src/ffi/testing/testing.cc"
#include "src/runtime/memory/memory_manager.cc"
#include "src/runtime/nvtx.cc"
#include "src/runtime/vm/attn_backend.cc"
#include "src/runtime/vm/builtin.cc"
#include "src/runtime/vm/bytecode.cc"
Expand Down
Loading