Skip to content

Commit

Permalink
Added cache_size to python binding of scheduler config.
Browse files Browse the repository at this point in the history
  • Loading branch information
popovaan committed May 28, 2024
1 parent 65a793a commit 28af66d
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ PYBIND11_MODULE(py_continuous_batching, m) {
.def_readwrite("max_num_batched_tokens", &SchedulerConfig::max_num_batched_tokens)
.def_readwrite("num_kv_blocks", &SchedulerConfig::num_kv_blocks)
.def_readwrite("block_size", &SchedulerConfig::block_size)
.def_readwrite("cache_size", &SchedulerConfig::cache_size)
.def_readwrite("dynamic_split_fuse", &SchedulerConfig::dynamic_split_fuse)
.def_readwrite("max_num_seqs", &SchedulerConfig::max_num_seqs);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def get_scheduler_config(scheduler_params: dict = None) -> SchedulerConfig:
# vLLM specific
scheduler_config.max_num_batched_tokens = 256
scheduler_config.max_num_seqs = 256
scheduler_config.num_kv_blocks = 500
scheduler_config.cache_size = 2
else:
for param, value in scheduler_params.items():
setattr(scheduler_config, param, value)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
generate_and_compare_with_reference_text, get_greedy, get_beam_search, get_multinomial_temperature, \
get_multinomial_temperature_and_top_k, get_multinomial_temperature_and_top_p, \
get_multinomial_temperature_top_p_and_top_k, DEFAULT_SCHEDULER_CONFIG, get_greedy_with_repetition_penalty, \
generate_and_compare_with_hf, get_multinomial_temperature_and_repetition_penalty
generate_and_compare_with_hf, get_multinomial_temperature_and_repetition_penalty, get_scheduler_config


@pytest.mark.precommit
Expand Down

0 comments on commit 28af66d

Please sign in to comment.