Skip to content

Commit

Permalink
[Target] Support obtain l2 cache size from target (#16039)
Browse files Browse the repository at this point in the history
* finished

* 1106
  • Loading branch information
Ubospica committed Nov 7, 2023
1 parent 2f20264 commit 448de89
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 2 deletions.
11 changes: 11 additions & 0 deletions include/tvm/target/tag.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ class TargetTagRegEntry {
* \param config The config dict for target creation
*/
inline TargetTagRegEntry& set_config(Map<String, ObjectRef> config);
/*!
* \brief Add a key-value pair to the config dict
* \param key The attribute name
* \param value The attribute value
*/
inline TargetTagRegEntry& with_config(String key, ObjectRef value);
/*! \brief Set name of the TargetTag to be the same as registry if it is empty */
inline TargetTagRegEntry& set_name();
/*!
Expand Down Expand Up @@ -131,6 +137,11 @@ inline TargetTagRegEntry& TargetTagRegEntry::set_config(Map<String, ObjectRef> c
return *this;
}

inline TargetTagRegEntry& TargetTagRegEntry::with_config(String key, ObjectRef value) {
tag_->config.Set(key, value);
return *this;
}

inline TargetTagRegEntry& TargetTagRegEntry::set_name() {
if (tag_->name.empty()) {
tag_->name = name;
Expand Down
7 changes: 5 additions & 2 deletions src/target/tag.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ TVM_REGISTER_TARGET_TAG("nvidia/jetson-orin-nano")
{"max_threads_per_block", Integer(1024)}, \
{"thread_warp_size", Integer(32)}, \
{"registers_per_block", Integer(RegPerBlock)}, \
});
})

// Naming convention for CUDA tags see https://developer.nvidia.com/cuda-gpus
// Parameters see Table 15. Technical Specifications per Compute Capability
Expand All @@ -129,7 +129,8 @@ TVM_REGISTER_CUDA_TAG("nvidia/tesla-k20", "sm_35", 49152, 65536);
TVM_REGISTER_CUDA_TAG("nvidia/tesla-c2075", "sm_20", 49152, 32768);
TVM_REGISTER_CUDA_TAG("nvidia/tesla-c2050", "sm_20", 49152, 32768);
TVM_REGISTER_CUDA_TAG("nvidia/tesla-c2070", "sm_20", 49152, 32768);
TVM_REGISTER_CUDA_TAG("nvidia/nvidia-a100", "sm_80", 49152, 65536);
TVM_REGISTER_CUDA_TAG("nvidia/nvidia-a100", "sm_80", 49152, 65536)
.with_config("l2_cache_size_bytes", Integer(41943040));
TVM_REGISTER_CUDA_TAG("nvidia/nvidia-a40", "sm_86", 49152, 65536);
TVM_REGISTER_CUDA_TAG("nvidia/nvidia-a30", "sm_80", 49152, 65536);
TVM_REGISTER_CUDA_TAG("nvidia/nvidia-a10", "sm_86", 49152, 65536);
Expand Down Expand Up @@ -231,6 +232,8 @@ TVM_REGISTER_CUDA_TAG("nvidia/nvidia-nvs-310", "sm_21", 49152, 32768);
TVM_REGISTER_CUDA_TAG("nvidia/nvs-5400m", "sm_21", 49152, 32768);
TVM_REGISTER_CUDA_TAG("nvidia/nvs-5200m", "sm_21", 49152, 32768);
TVM_REGISTER_CUDA_TAG("nvidia/nvs-4200m", "sm_21", 49152, 32768);
TVM_REGISTER_CUDA_TAG("nvidia/geforce-rtx-4090", "sm_89", 49152, 65536)
.with_config("l2_cache_size_bytes", Integer(75497472));
TVM_REGISTER_CUDA_TAG("nvidia/geforce-rtx-3090-ti", "sm_86", 49152, 65536);
TVM_REGISTER_CUDA_TAG("nvidia/geforce-rtx-3090", "sm_86", 49152, 65536);
TVM_REGISTER_CUDA_TAG("nvidia/geforce-rtx-3080-ti", "sm_86", 49152, 65536);
Expand Down
1 change: 1 addition & 0 deletions src/target/target_kind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ TVM_REGISTER_TARGET_KIND("cuda", kDLCUDA)
.add_attr_option<Integer>("max_threads_per_block")
.add_attr_option<Integer>("thread_warp_size", Integer(32))
.add_attr_option<Integer>("registers_per_block")
.add_attr_option<Integer>("l2_cache_size_bytes")
.add_attr_option<Integer>("max_num_threads", Integer(1024)) // TODO(@zxybazh): deprecate it
.set_default_keys({"cuda", "gpu"})
.set_target_parser(UpdateCUDAAttrs);
Expand Down
7 changes: 7 additions & 0 deletions tests/python/unittest/test_target_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,13 @@ def test_target_attr_bool_value():
assert target3.attrs["supports_float16"] == 0


def test_target_attr_l2_cache_size_bytes():
target0 = Target("nvidia/nvidia-a100")
assert target0.l2_cache_size_bytes == 41943040
target1 = Target("nvidia/geforce-rtx-4090")
assert target1.l2_cache_size_bytes == 75497472


def test_target_features():
target_no_features = Target("cuda")
assert target_no_features.features
Expand Down

0 comments on commit 448de89

Please sign in to comment.