Skip to content

Commit

Permalink
Adding some wiggle room.
Browse files Browse the repository at this point in the history
  • Loading branch information
Narsil committed Apr 12, 2024
1 parent 9176ecb commit 289b072
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 3 deletions.
4 changes: 3 additions & 1 deletion launcher/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1340,7 +1340,9 @@ fn main() -> Result<(), LauncherError> {
let value: u32 = if let Some(max_batch_size) = args.max_batch_size {
max_batch_size * max_input_tokens
} else {
max_input_tokens
// Adding some edge in order to account for potential block_size alignement
// issue.
max_input_tokens + 50
} as u32;
tracing::info!("Default `max_batch_prefill_tokens` to {value}");
value
Expand Down
6 changes: 6 additions & 0 deletions router/src/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,14 @@ impl State {
token_budget: u32,
) -> Option<NextBatch> {
if self.entries.is_empty() {
tracing::debug!("No queue");
return None;
}

// Check if we have enough entries
if let Some(min_size) = min_size {
if self.entries.len() < min_size {
tracing::debug!("Not enough entries");
return None;
}
}
Expand All @@ -218,6 +220,7 @@ impl State {
// was dropped by the client)
if entry.response_tx.is_closed() {
metrics::increment_counter!("tgi_request_failure", "err" => "dropped");
tracing::debug!("Dropping entry");
continue;
}

Expand Down Expand Up @@ -254,10 +257,12 @@ impl State {
{
// Entry is over budget
// Add it back to the front
tracing::debug!("Over budget: prefill_tokens={prefill_tokens} > {prefill_token_budget} || {prefill_tokens} + {decode_tokens} + {} > {token_budget}", self.speculate);
self.entries.push_front((id, entry));
break;
}

tracing::debug!("Accepting entry");
// Create a new span to link the batch back to this entry
let entry_batch_span = info_span!(parent: &entry.span, "infer");
// Add relationships
Expand Down Expand Up @@ -288,6 +293,7 @@ impl State {

// Empty batch
if batch_requests.is_empty() {
tracing::debug!("Filterered out all entries");
return None;
}

Expand Down
4 changes: 2 additions & 2 deletions router/src/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,13 @@ impl Validation {
} else {
return Err(ValidationError::UnsetMaxNewTokens);
};
let input_length = truncate.unwrap_or(self.max_input_length);
let mut input_length = truncate.unwrap_or(self.max_input_length);

// We don't have a tokenizer, therefore we have no idea how long is the query, let
// them through and hope for the best.
// Validate MaxNewTokens
if (input_length as u32 + max_new_tokens) > self.max_total_tokens as u32 {
// input_length = input_length.saturating_sub(max_new_tokens as usize);
input_length = input_length.saturating_sub(max_new_tokens as usize);
// return Err(ValidationError::MaxNewTokens(
// self.max_total_tokens - self.max_input_length,
// max_new_tokens,
Expand Down

0 comments on commit 289b072

Please sign in to comment.