From 1c99226e301d0c196a3a964c7df0c3768e0a73be Mon Sep 17 00:00:00 2001 From: michaelfeil <63565275+michaelfeil@users.noreply.github.com> Date: Tue, 25 Nov 2025 05:22:29 +0000 Subject: [PATCH 1/2] smarter --- router/src/lib.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index d83bd95c..338d150e 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -215,8 +215,10 @@ pub async fn run( tracing::info!("Maximum number of tokens per request: {max_input_length}"); - let tokenization_workers = tokenization_workers.unwrap_or_else(num_cpus::get); - + // fall-back to num_cpus - 1 to leave some CPU for the backend, and at most 64 workers. + let tokenization_workers = + tokenization_workers.unwrap_or_else(|| std::cmp::min(std::cmp::max(2, num_cpus::get() - 1), 64)); + // Try to load new ST Config let mut new_st_config: Option = None; let config_path = model_root.join("config_sentence_transformers.json"); From 7b4d4d21abf1b49e9ec8ffb08a543ef3e9c283d0 Mon Sep 17 00:00:00 2001 From: michaelfeil <63565275+michaelfeil@users.noreply.github.com> Date: Wed, 26 Nov 2025 08:04:05 +0000 Subject: [PATCH 2/2] max 1 --- router/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index 338d150e..360c6ebc 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -217,7 +217,7 @@ pub async fn run( // fall-back to num_cpus - 1 to leave some CPU for the backend, and at most 64 workers. let tokenization_workers = - tokenization_workers.unwrap_or_else(|| std::cmp::min(std::cmp::max(2, num_cpus::get() - 1), 64)); + tokenization_workers.unwrap_or_else(|| std::cmp::min(std::cmp::max(1, num_cpus::get() - 1), 64)); // Try to load new ST Config let mut new_st_config: Option = None;