From 1d71d3ac5849ed46db782aa11ad95b1ee2f08915 Mon Sep 17 00:00:00 2001 From: vishal Date: Fri, 4 Sep 2020 18:44:07 -0400 Subject: [PATCH 1/3] Update serve.py --- pkg/workloads/cortex/serve/serve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/workloads/cortex/serve/serve.py b/pkg/workloads/cortex/serve/serve.py index c64df9191b..716ed7f9cd 100644 --- a/pkg/workloads/cortex/serve/serve.py +++ b/pkg/workloads/cortex/serve/serve.py @@ -214,7 +214,7 @@ def predict(request: Request): if util.has_method(predictor_impl, "post_predict"): kwargs = build_post_predict_kwargs(prediction, request) - tasks.add_task(predictor_impl.post_predict, **kwargs) + request_thread_pool.submit(predictor_impl.post_predict, **kwargs) if len(tasks.tasks) > 0: response.background = tasks From 8bf472ba9d87c357f5aa6474044a1e2029bac219 Mon Sep 17 00:00:00 2001 From: vishal Date: Wed, 23 Sep 2020 11:38:49 -0400 Subject: [PATCH 2/3] Update predictors.md --- docs/deployments/realtime-api/predictors.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/deployments/realtime-api/predictors.md b/docs/deployments/realtime-api/predictors.md index 61beff0773..afd00ffbd0 100644 --- a/docs/deployments/realtime-api/predictors.md +++ b/docs/deployments/realtime-api/predictors.md @@ -86,6 +86,9 @@ class PythonPredictor: Useful for tasks that the client doesn't need to wait on before receiving a response such as recording metrics or storing results. + It is recommended to specify multiple threads `threads_per_process` + in the api configuration yaml if this function is specified. + Args: response (optional): The response as returned by the predict method. payload (optional): The request payload (see below for the possible @@ -245,6 +248,9 @@ class TensorFlowPredictor: Useful for tasks that the client doesn't need to wait on before receiving a response such as recording metrics or storing results. + It is recommended to specify multiple threads `threads_per_process` + in the api configuration yaml if this function is specified. + Args: response (optional): The response as returned by the predict method. payload (optional): The request payload (see below for the possible @@ -353,6 +359,9 @@ class ONNXPredictor: Useful for tasks that the client doesn't need to wait on before receiving a response such as recording metrics or storing results. + It is recommended to specify multiple threads `threads_per_process` + in the api configuration yaml if this function is specified. + Args: response (optional): The response as returned by the predict method. payload (optional): The request payload (see below for the possible From 5822b14f2fe6466ab38f1dd28ba1facbb6fdbff7 Mon Sep 17 00:00:00 2001 From: vishal Date: Wed, 23 Sep 2020 14:49:53 -0400 Subject: [PATCH 3/3] Update predictors.md --- docs/deployments/realtime-api/predictors.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/deployments/realtime-api/predictors.md b/docs/deployments/realtime-api/predictors.md index afd00ffbd0..0dcba3529d 100644 --- a/docs/deployments/realtime-api/predictors.md +++ b/docs/deployments/realtime-api/predictors.md @@ -86,8 +86,9 @@ class PythonPredictor: Useful for tasks that the client doesn't need to wait on before receiving a response such as recording metrics or storing results. - It is recommended to specify multiple threads `threads_per_process` - in the api configuration yaml if this function is specified. + Note: post_predict() and predict() run in the same thread pool. The + size of the thread pool can be increased by updating + `threads_per_process` in the api configuration yaml. Args: response (optional): The response as returned by the predict method. @@ -248,8 +249,9 @@ class TensorFlowPredictor: Useful for tasks that the client doesn't need to wait on before receiving a response such as recording metrics or storing results. - It is recommended to specify multiple threads `threads_per_process` - in the api configuration yaml if this function is specified. + Note: post_predict() and predict() run in the same thread pool. The + size of the thread pool can be increased by updating + `threads_per_process` in the api configuration yaml. Args: response (optional): The response as returned by the predict method. @@ -359,8 +361,9 @@ class ONNXPredictor: Useful for tasks that the client doesn't need to wait on before receiving a response such as recording metrics or storing results. - It is recommended to specify multiple threads `threads_per_process` - in the api configuration yaml if this function is specified. + Note: post_predict() and predict() run in the same thread pool. The + size of the thread pool can be increased by updating + `threads_per_process` in the api configuration yaml. Args: response (optional): The response as returned by the predict method.