diff --git a/core/src/infer.rs b/core/src/infer.rs index a2ff22c5..6a41e98c 100644 --- a/core/src/infer.rs +++ b/core/src/infer.rs @@ -550,8 +550,8 @@ async fn backend_task(backend: Backend, mut embed_receiver: mpsc::Receiver { let results = backend.predict(batch.1).await; - // Handle sending responses in another thread to avoid starving the backend - std::thread::spawn(move || match results { + // Handle sending responses in a blocking task to avoid starving the backend + tokio::task::spawn_blocking(move || match results { Ok((mut predictions, inference_duration)) => { batch.0.into_iter().enumerate().for_each(|(i, m)| { let infer_metadata = InferMetadata { @@ -581,8 +581,8 @@ async fn backend_task(backend: Backend, mut embed_receiver: mpsc::Receiver { let results = backend.embed(batch.1).await; - // Handle sending responses in another thread to avoid starving the backend - std::thread::spawn(move || match results { + // Handle sending responses in a blocking task to avoid starving the backend + tokio::task::spawn_blocking(move || match results { Ok((mut embeddings, inference_duration)) => { batch.0.into_iter().enumerate().for_each(|(i, m)| { let metadata = InferMetadata {