diff --git a/docs/source/basic_tutorials/launcher.md b/docs/source/basic_tutorials/launcher.md index de7c995dc1..1e5b6fd230 100644 --- a/docs/source/basic_tutorials/launcher.md +++ b/docs/source/basic_tutorials/launcher.md @@ -162,7 +162,7 @@ Options: This setting is only applied if there is room in the batch as defined by `max_batch_total_tokens`. [env: WAITING_SERVED_RATIO=] - [default: 1.2] + [default: 0.3] ``` ## MAX_BATCH_PREFILL_TOKENS diff --git a/launcher/src/main.rs b/launcher/src/main.rs index f264e0000d..28226fb426 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -251,7 +251,7 @@ struct Args { /// /// This setting is only applied if there is room in the batch /// as defined by `max_batch_total_tokens`. - #[clap(default_value = "1.2", long, env)] + #[clap(default_value = "0.3", long, env)] waiting_served_ratio: f32, /// Limits the number of tokens for the prefill operation.