In [None]:
import asyncio
import httpx

# Define the endpoint for the containerized API
API_URL = "http://localhost:8000/inference"  # Update the URL and endpoint as needed

# Example data to send in the POST request
PAYLOAD = {"input": "This is a test input for the model."}

# Number of parallel requests
NUM_REQUESTS = 10


async def send_request(client: httpx.AsyncClient, request_id: int):
    """Send a single POST request to the containerized API and print the response."""
    try:
        response = await client.post(API_URL, json=PAYLOAD)
        print(f"Request {request_id}: Status {response.status_code}, Response: {response.json()}")
    except Exception as e:
        print(f"Request {request_id}: Failed with error: {e}")


async def main():
    """Send multiple POST requests in parallel."""
    async with httpx.AsyncClient() as client:
        tasks = [send_request(client, i) for i in range(NUM_REQUESTS)]
        await asyncio.gather(*tasks)


if __name__ == "__main__":
    asyncio.run(main())
