diff --git a/docs/deployments/realtime-api/predictors.md b/docs/deployments/realtime-api/predictors.md index 97d89e702d..1869a5a133 100644 --- a/docs/deployments/realtime-api/predictors.md +++ b/docs/deployments/realtime-api/predictors.md @@ -532,3 +532,20 @@ def predict(self, payload): content=data, media_type="text/plain") return response ``` + +## Chaining APIs + +It is possible to make requests from one API to another within a Cortex cluster. All running APIs are accessible from within the predictor at `http://api-:8888/predict`, where `` is the name of the API you are making a request to. + +For example, if there is an api named `text-generator` running in the cluster, you could make a request to it from a different API by using: + +```python +import requests + +class PythonPredictor: + def predict(self, payload): + response = requests.post("http://api-text-generator:8888/predict", json={"text": "machine learning is"}) + # ... +``` + +Note that the autoscaling configuration (i.e. `target_replica_concurrency`) for the API that is making the request should be modified with the understanding that requests will still be considered "in-flight" with the first API as the request is being fulfilled in the second API (during which it will also be considered "in-flight" with the second API). See more details in the [autoscaling docs](autoscaling.md). diff --git a/pkg/workloads/cortex/serve/serve.py b/pkg/workloads/cortex/serve/serve.py index c584156616..2d4265743d 100644 --- a/pkg/workloads/cortex/serve/serve.py +++ b/pkg/workloads/cortex/serve/serve.py @@ -21,6 +21,7 @@ from concurrent.futures import ThreadPoolExecutor import threading import math +import uuid import asyncio from typing import Any @@ -121,7 +122,10 @@ async def register_request(request: Request, call_next): try: if is_prediction_request(request): if local_cache["provider"] != "local": - request_id = request.headers["x-request-id"] + if "x-request-id" in request.headers: + request_id = request.headers["x-request-id"] + else: + request_id = uuid.uuid1() file_id = f"/mnt/requests/{request_id}" open(file_id, "a").close()