Added cache_size to python binding of scheduler config.

iefode · May 28, 2024 · 28af66d · 28af66d
1 parent 65a793a
commit 28af66d
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 2 deletions.
diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp b/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp
@@ -90,6 +90,7 @@ PYBIND11_MODULE(py_continuous_batching, m) {
         .def_readwrite("max_num_batched_tokens", &SchedulerConfig::max_num_batched_tokens)
         .def_readwrite("num_kv_blocks", &SchedulerConfig::num_kv_blocks)
         .def_readwrite("block_size", &SchedulerConfig::block_size)
+        .def_readwrite("cache_size", &SchedulerConfig::cache_size)
         .def_readwrite("dynamic_split_fuse", &SchedulerConfig::dynamic_split_fuse)
         .def_readwrite("max_num_seqs", &SchedulerConfig::max_num_seqs);
 

diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/common.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/common.py
@@ -91,7 +91,7 @@ def get_scheduler_config(scheduler_params: dict = None) -> SchedulerConfig:
         # vLLM specific
         scheduler_config.max_num_batched_tokens = 256
         scheduler_config.max_num_seqs = 256
-        scheduler_config.num_kv_blocks = 500
+        scheduler_config.cache_size = 2
     else:
         for param, value in scheduler_params.items():
             setattr(scheduler_config, param, value)

diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py
@@ -11,7 +11,7 @@
     generate_and_compare_with_reference_text, get_greedy, get_beam_search, get_multinomial_temperature, \
     get_multinomial_temperature_and_top_k, get_multinomial_temperature_and_top_p, \
     get_multinomial_temperature_top_p_and_top_k, DEFAULT_SCHEDULER_CONFIG, get_greedy_with_repetition_penalty, \
-    generate_and_compare_with_hf, get_multinomial_temperature_and_repetition_penalty
+    generate_and_compare_with_hf, get_multinomial_temperature_and_repetition_penalty, get_scheduler_config
 
 
 @pytest.mark.precommit