From 327a274ccf9a0d3495f47f3fbfcc9d91cb3fb91f Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Fri, 25 Apr 2025 16:26:49 +0800 Subject: [PATCH 1/9] Simplify the environment variable setting to specify the memory pool type. --- ggml/src/ggml-cann/ggml-cann.cpp | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 605b6a73c3a13..1421b4d9e1840 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -213,7 +213,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { * @param device The device ID to associate with this buffer pool. */ explicit ggml_cann_pool_buf_prio(int device) : device(device) { - disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr; + disable_clean = getenv("GGML_CANN_POOL_DISABLE_CLEAN") != nullptr; } /** @@ -409,7 +409,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool { * @param device The device ID to associate with this buffer pool. */ explicit ggml_cann_pool_buf(int device) : device(device) { - disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr; + disable_clean = getenv("GGML_CANN_POOL_DISABLE_CLEAN") != nullptr; } /** @@ -730,16 +730,23 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { */ std::unique_ptr ggml_backend_cann_context::new_pool_for_device( int device) { - bool disable_vmm = (getenv("GGML_CANN_DISABLE_VMM_POOL") != nullptr); - if (!disable_vmm && ggml_cann_info().devices[device].vmm) { - GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device); - return std::unique_ptr(new ggml_cann_pool_vmm(device)); - } - bool enable_buf_prio = (getenv("GGML_CANN_ENABLE_BUF_PRIO_POOL") != nullptr); - if (enable_buf_prio) { + const char* env_var = getenv("GGML_CANN_MEM_POOL"); + std::string mem_pool_type(env_var ? env_var : ""); + std::transform(mem_pool_type.begin(), mem_pool_type.end(), mem_pool_type.begin(), ::tolower); + + if (mem_pool_type == "prio") { GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device); return std::unique_ptr(new ggml_cann_pool_buf_prio(device)); } + + if (mem_pool_type.empty() && ggml_cann_info().devices[device].vmm) { + GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device); + return std::unique_ptr(new ggml_cann_pool_vmm(device)); + }else{ + GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device); + return std::unique_ptr(new ggml_cann_pool_buf(device)); + } + GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device); return std::unique_ptr(new ggml_cann_pool_buf(device)); } From 30a1dc4708e1646cacd4504aac2eb16514397951 Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Fri, 25 Apr 2025 17:49:10 +0800 Subject: [PATCH 2/9] Adjust the GGML_CANN_ASYNC_MODE setting to accept yes, enable, 1, or on (case-insensitive) as valid options. --- ggml/src/ggml-cann/common.h | 8 +++++++- ggml/src/ggml-cann/ggml-cann.cpp | 15 +++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index 7ef80a4793314..aba281bf49d0b 100644 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -37,6 +37,7 @@ #include #include #include +#include #include "../include/ggml-cann.h" #include "../include/ggml.h" @@ -103,6 +104,8 @@ const ggml_cann_device_info& ggml_cann_info(); void ggml_cann_set_device(int32_t device); int32_t ggml_cann_get_device(); +static std::string to_lower_case(const char* env_var); + /** * @brief Abstract base class for memory pools used by CANN. */ @@ -354,7 +357,10 @@ struct ggml_backend_cann_context { : device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) { ggml_cann_set_device(device); description = aclrtGetSocName(); - async_mode = (getenv("GGML_CANN_ASYNC_MODE") != nullptr); + + std::string value = to_lower_case(getenv("GGML_CANN_ASYNC_MODE")); + std::set valid_values = {"on", "1", "yes", "enable"}; + async_mode = valid_values.find(value) != valid_values.end(); GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, device, async_mode ? "ON" : "OFF"); } diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 1421b4d9e1840..2adb1d068e736 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -92,6 +92,18 @@ int32_t ggml_cann_get_device() { return id; } +/** + * @brief Convert the value obtained from getenv to a lowercase std::string. + * + * @param env_var C-style string(char*) + * @return A string of type std::stringD. + */ +static std::string to_lower_case(const char* env_var){ + std::string mem_pool_type(env_var ? env_var : ""); + std::transform(mem_pool_type.begin(), mem_pool_type.end(), mem_pool_type.begin(), ::tolower); + return mem_pool_type; +} + /** * @brief Initialize the CANN device information. * @@ -731,8 +743,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { std::unique_ptr ggml_backend_cann_context::new_pool_for_device( int device) { const char* env_var = getenv("GGML_CANN_MEM_POOL"); - std::string mem_pool_type(env_var ? env_var : ""); - std::transform(mem_pool_type.begin(), mem_pool_type.end(), mem_pool_type.begin(), ::tolower); + std::string mem_pool_type = to_lower_case(env_var); if (mem_pool_type == "prio") { GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device); From ed225c0c8b4b27004409efa2f9192e969c11455b Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Sun, 27 Apr 2025 09:15:56 +0800 Subject: [PATCH 3/9] update --- ggml/src/ggml-cann/common.h | 4 ++-- ggml/src/ggml-cann/ggml-cann.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index aba281bf49d0b..68849430c3b57 100644 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -359,10 +359,10 @@ struct ggml_backend_cann_context { description = aclrtGetSocName(); std::string value = to_lower_case(getenv("GGML_CANN_ASYNC_MODE")); - std::set valid_values = {"on", "1", "yes", "enable"}; + std::set valid_values = {"on", "1", "yes", "y", "enable"}; async_mode = valid_values.find(value) != valid_values.end(); GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, - device, async_mode ? "ON" : "OFF"); + device, async_mode ? "ON" : "OFF");git } /** diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 2adb1d068e736..c7b690e886de7 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -96,7 +96,7 @@ int32_t ggml_cann_get_device() { * @brief Convert the value obtained from getenv to a lowercase std::string. * * @param env_var C-style string(char*) - * @return A string of type std::stringD. + * @return A string of type std::string. */ static std::string to_lower_case(const char* env_var){ std::string mem_pool_type(env_var ? env_var : ""); From 8cf4d1e52c317c3e0076074abd278077044736aa Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Sun, 27 Apr 2025 09:18:18 +0800 Subject: [PATCH 4/9] fix CI --- ggml/src/ggml-cann/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index 68849430c3b57..a4b6cca1f8b71 100644 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -362,7 +362,7 @@ struct ggml_backend_cann_context { std::set valid_values = {"on", "1", "yes", "y", "enable"}; async_mode = valid_values.find(value) != valid_values.end(); GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, - device, async_mode ? "ON" : "OFF");git + device, async_mode ? "ON" : "OFF"); } /** From 66a4e163d40f57bb8d4c07b37ee235b7e358a925 Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Tue, 13 May 2025 17:22:29 +0800 Subject: [PATCH 5/9] update --- ggml/src/ggml-cann/common.h | 11 ++++----- ggml/src/ggml-cann/ggml-cann.cpp | 39 +++++++++++++++++++------------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index a4b6cca1f8b71..eda0e4cb7aeba 100644 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include "../include/ggml-cann.h" #include "../include/ggml.h" @@ -104,7 +104,8 @@ const ggml_cann_device_info& ggml_cann_info(); void ggml_cann_set_device(int32_t device); int32_t ggml_cann_get_device(); -static std::string to_lower_case(const char* env_var); +std::optional get_env(const std::string& name); +bool parse_bool(const std::string& value); /** * @brief Abstract base class for memory pools used by CANN. @@ -358,11 +359,9 @@ struct ggml_backend_cann_context { ggml_cann_set_device(device); description = aclrtGetSocName(); - std::string value = to_lower_case(getenv("GGML_CANN_ASYNC_MODE")); - std::set valid_values = {"on", "1", "yes", "y", "enable"}; - async_mode = valid_values.find(value) != valid_values.end(); + bool async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or("")); GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, - device, async_mode ? "ON" : "OFF"); + device, async_mode ? "ON" : "OFF"); } /** diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index c7b690e886de7..0f40ea04ba87f 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include "ggml-impl.h" #include "ggml-backend-impl.h" @@ -93,15 +95,23 @@ int32_t ggml_cann_get_device() { } /** - * @brief Convert the value obtained from getenv to a lowercase std::string. - * - * @param env_var C-style string(char*) - * @return A string of type std::string. + * @brief Get the value of the specified environment variable (name). + * if not empty, return a std::string object */ -static std::string to_lower_case(const char* env_var){ - std::string mem_pool_type(env_var ? env_var : ""); - std::transform(mem_pool_type.begin(), mem_pool_type.end(), mem_pool_type.begin(), ::tolower); - return mem_pool_type; +std::optional get_env(const std::string& name) { + const char* val = std::getenv(name.c_str()); + if (!val) return std::nullopt; + return std::string(val); +} + +/** + * @brief Verify whether the environment variable is a valid value. + */ +bool parse_bool(const std::string& value) { + std::string res = value; + std::transform(res.begin(), res.end(), res.begin(), ::tolower); + std::set valid_values = {"on", "1", "yes", "y", "enable", "true"}; + return valid_values.find(res) != valid_values.end(); } /** @@ -225,7 +235,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { * @param device The device ID to associate with this buffer pool. */ explicit ggml_cann_pool_buf_prio(int device) : device(device) { - disable_clean = getenv("GGML_CANN_POOL_DISABLE_CLEAN") != nullptr; + disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or("")); } /** @@ -421,7 +431,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool { * @param device The device ID to associate with this buffer pool. */ explicit ggml_cann_pool_buf(int device) : device(device) { - disable_clean = getenv("GGML_CANN_POOL_DISABLE_CLEAN") != nullptr; + disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or("")); } /** @@ -742,20 +752,17 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { */ std::unique_ptr ggml_backend_cann_context::new_pool_for_device( int device) { - const char* env_var = getenv("GGML_CANN_MEM_POOL"); - std::string mem_pool_type = to_lower_case(env_var); + std::string mem_pool_type = get_env("GGML_CANN_MEM_POOL").value_or(""); + std::transform(mem_pool_type.begin(), mem_pool_type.end(), mem_pool_type.begin(), ::tolower); if (mem_pool_type == "prio") { GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device); return std::unique_ptr(new ggml_cann_pool_buf_prio(device)); } - if (mem_pool_type.empty() && ggml_cann_info().devices[device].vmm) { + if (ggml_cann_info().devices[device].vmm && mem_pool_type != "leg") { GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device); return std::unique_ptr(new ggml_cann_pool_vmm(device)); - }else{ - GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device); - return std::unique_ptr(new ggml_cann_pool_buf(device)); } GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device); From e720075bd959d5f73bdf5b4da2b818285bda579a Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Tue, 13 May 2025 17:25:04 +0800 Subject: [PATCH 6/9] delete whitespace --- ggml/src/ggml-cann/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index eda0e4cb7aeba..ba2cef0c25fb2 100644 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -361,7 +361,7 @@ struct ggml_backend_cann_context { bool async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or("")); GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, - device, async_mode ? "ON" : "OFF"); + device, async_mode ? "ON" : "OFF"); } /** From 8c3df5e6df21c4b72de345f33501918441e77597 Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Tue, 20 May 2025 10:24:42 +0800 Subject: [PATCH 7/9] fix according to review --- ggml/src/ggml-cann/ggml-cann.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 0f40ea04ba87f..360d3ae85f775 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include "ggml-impl.h" @@ -101,17 +101,17 @@ int32_t ggml_cann_get_device() { std::optional get_env(const std::string& name) { const char* val = std::getenv(name.c_str()); if (!val) return std::nullopt; - return std::string(val); + std::string res = std::string(val); + std::transform(res.begin(), res.end(), res.begin(), ::tolower); + return res; } /** * @brief Verify whether the environment variable is a valid value. */ bool parse_bool(const std::string& value) { - std::string res = value; - std::transform(res.begin(), res.end(), res.begin(), ::tolower); - std::set valid_values = {"on", "1", "yes", "y", "enable", "true"}; - return valid_values.find(res) != valid_values.end(); + std::unordered_set valid_values = {"on", "1", "yes", "y", "enable", "true"}; + return valid_values.find(value) != valid_values.end(); } /** @@ -753,7 +753,6 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { std::unique_ptr ggml_backend_cann_context::new_pool_for_device( int device) { std::string mem_pool_type = get_env("GGML_CANN_MEM_POOL").value_or(""); - std::transform(mem_pool_type.begin(), mem_pool_type.end(), mem_pool_type.begin(), ::tolower); if (mem_pool_type == "prio") { GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device); From f66e1528c0cf07b6c9d99d45f9e48957b266f4f2 Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Tue, 20 May 2025 11:22:45 +0800 Subject: [PATCH 8/9] update CANN.md --- docs/backend/CANN.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/docs/backend/CANN.md b/docs/backend/CANN.md index e172ec5c2a69e..5452333555218 100644 --- a/docs/backend/CANN.md +++ b/docs/backend/CANN.md @@ -8,6 +8,7 @@ - [DataType Supports](#datatype-supports) - [Docker](#docker) - [Linux](#linux) + - [Environment variable setup](#environment-variable-setup) - [TODO](#todo) @@ -281,5 +282,29 @@ cmake --build build --config release Please add the **[CANN]** prefix/tag in issues/PRs titles to help the CANN-team check/address them without delay. +## Environment variable setup + +### GGML_CANN_ASYNC_MODE + + `GGML_CANN_ASYNC_MODE` controls whether asynchronous commit mode is enabled(default closed state), which can help speed up model execution. Yes, enable, y, 1, on ,true(case insensitive) are all valid values to enable `GGML_CANN_ASYNC_MODE`, such as `export GGML_CANN_ASYNC_MODE=yEs`. + + + +### GGML_CANN_MEM_POOL + +three cases here: + +- By setting `export GGML_CANN_MEM_POOL=pRio` (the value is case-insensitive), you specify the use of a priority queue-based memory pool. + +- Legacy memory pools are enabled when VMM is not available or when `export GGML_CANN_MEM_POOL=leg` is set. + +- default VMM + +### GGML_CANN_DISABLE_BUF_POOL_CLEAN + +`GGML_CANN_DISABLE_BUF_POOL_CLEAN` is used to disable the buffer pool cleaning feature. + The values **yes**, **enable**, **y**, **1**, **on**, and **true** (case-insensitive) are all valid to enable `GGML_CANN_DISABLE_BUF_POOL_CLEAN`. such as `export GGML_CANN_DISABLE_BUF_POOL_CLEAN=yEs`. + + ## TODO - Support more models and data types. From cb7c13d3338c8fcc5bd5b0e7720d89d7d56bbd00 Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Fri, 23 May 2025 17:50:46 +0800 Subject: [PATCH 9/9] update CANN.md --- docs/backend/CANN.md | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/docs/backend/CANN.md b/docs/backend/CANN.md index 5452333555218..0a0d0d4dc10fd 100644 --- a/docs/backend/CANN.md +++ b/docs/backend/CANN.md @@ -286,25 +286,20 @@ Please add the **[CANN]** prefix/tag in issues/PRs titles to help the CANN-team ### GGML_CANN_ASYNC_MODE - `GGML_CANN_ASYNC_MODE` controls whether asynchronous commit mode is enabled(default closed state), which can help speed up model execution. Yes, enable, y, 1, on ,true(case insensitive) are all valid values to enable `GGML_CANN_ASYNC_MODE`, such as `export GGML_CANN_ASYNC_MODE=yEs`. - - +Enables asynchronous operator submission. Disabled by default. ### GGML_CANN_MEM_POOL -three cases here: +Specifies the memory pool management strategy: -- By setting `export GGML_CANN_MEM_POOL=pRio` (the value is case-insensitive), you specify the use of a priority queue-based memory pool. +- vmm: Utilizes a virtual memory manager pool. If hardware support for VMM is unavailable, falls back to the legacy (leg) memory pool. -- Legacy memory pools are enabled when VMM is not available or when `export GGML_CANN_MEM_POOL=leg` is set. - -- default VMM +- prio: Employs a priority queue-based memory pool management. +- leg: Uses a fixed-size buffer pool. ### GGML_CANN_DISABLE_BUF_POOL_CLEAN -`GGML_CANN_DISABLE_BUF_POOL_CLEAN` is used to disable the buffer pool cleaning feature. - The values **yes**, **enable**, **y**, **1**, **on**, and **true** (case-insensitive) are all valid to enable `GGML_CANN_DISABLE_BUF_POOL_CLEAN`. such as `export GGML_CANN_DISABLE_BUF_POOL_CLEAN=yEs`. - +Controls automatic cleanup of the memory pool. This option is only effective when using the prio or leg memory pool strategies. ## TODO - Support more models and data types.