diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 46b9b6b2..3b005e52 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.1.0-alpha.9"
+ ".": "0.1.0-alpha.10"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index fcc630a7..ce83998c 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 76
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/digitalocean%2Fgradientai-e8b3cbc80e18e4f7f277010349f25e1319156704f359911dc464cc21a0d077a6.yml
openapi_spec_hash: c773d792724f5647ae25a5ae4ccec208
-config_hash: 0bc3af28d4abd9be8bcc81f615bc832d
+config_hash: 9b44ce3fd39c43f2001bc11934e6b1b0
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4d5ad5ba..2969f29b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,15 @@
# Changelog
+## 0.1.0-alpha.10 (2025-06-28)
+
+Full Changelog: [v0.1.0-alpha.9...v0.1.0-alpha.10](https://github.com/digitalocean/gradientai-python/compare/v0.1.0-alpha.9...v0.1.0-alpha.10)
+
+### Features
+
+* **api:** manual updates ([0e5effc](https://github.com/digitalocean/gradientai-python/commit/0e5effc727cebe88ea38f0ec4c3fcb45ffeb4924))
+* **api:** manual updates ([d510ae0](https://github.com/digitalocean/gradientai-python/commit/d510ae03f13669af7f47093af06a00609e9b7c07))
+* **api:** manual updates ([c5bc3ca](https://github.com/digitalocean/gradientai-python/commit/c5bc3caa477945dc19bbf90661ffeea86370189d))
+
## 0.1.0-alpha.9 (2025-06-28)
Full Changelog: [v0.1.0-alpha.8...v0.1.0-alpha.9](https://github.com/digitalocean/gradientai-python/compare/v0.1.0-alpha.8...v0.1.0-alpha.9)
diff --git a/README.md b/README.md
index d62c11da..fa88a1e0 100644
--- a/README.md
+++ b/README.md
@@ -39,12 +39,13 @@ print(api_client.agents.list())
completion = inference_client.chat.completions.create(
messages=[
{
- "content": "string",
- "role": "system",
+ "role": "user",
+ "content": "What is the capital of France?",
}
],
- model="llama3-8b-instruct",
+ model="llama3.3-70b-instruct",
)
+
print(completion.choices[0].message)
```
@@ -72,13 +73,13 @@ async def main() -> None:
completion = await client.agents.chat.completions.create(
messages=[
{
- "content": "string",
- "role": "system",
+ "role": "user",
+ "content": "What is the capital of France?",
}
],
- model="llama3-8b-instruct",
+ model="llama3.3-70b-instruct",
)
- print(completion.id)
+ print(completion.choices)
asyncio.run(main())
@@ -114,41 +115,61 @@ async def main() -> None:
completion = await client.agents.chat.completions.create(
messages=[
{
- "content": "string",
- "role": "system",
+ "role": "user",
+ "content": "What is the capital of France?",
}
],
- model="llama3-8b-instruct",
+ model="llama3.3-70b-instruct",
)
- print(completion.id)
+ print(completion.choices)
asyncio.run(main())
```
-## Streaming
-Support for streaming responses are available by Server Side Events (SSE) for Serverless Inference and Agents.
-```
-import os
+## Streaming responses
+
+We provide support for streaming responses using Server Side Events (SSE).
+
+```python
from gradientai import GradientAI
-client = GradientAI(
- inference_key=os.environ.get("GRADIENTAI_INFERENCE_KEY")
-)
+client = GradientAI()
-response = client.chat.completions.create(
+stream = client.agents.chat.completions.create(
+ messages=[
+ {
+ "role": "user",
+ "content": "What is the capital of France?",
+ }
+ ],
model="llama3.3-70b-instruct",
- messages=[{ "role": "user", "content": "Write a story about a brave squirrel."}],
stream=True,
)
+for completion in stream:
+ print(completion.choices)
+```
-for chunk in response:
- if len(chunk.choices) > 0:
- if chunk.choices[0].delta.content:
- print(chunk.choices[0].delta.content, end="", flush=True)
+The async client uses the exact same interface.
-```
+```python
+from gradientai import AsyncGradientAI
+
+client = AsyncGradientAI()
+stream = await client.agents.chat.completions.create(
+ messages=[
+ {
+ "role": "user",
+ "content": "What is the capital of France?",
+ }
+ ],
+ model="llama3.3-70b-instruct",
+ stream=True,
+)
+async for completion in stream:
+ print(completion.choices)
+```
## Using types
@@ -197,8 +218,14 @@ from gradientai import GradientAI
client = GradientAI()
try:
- client.agents.versions.list(
- uuid="REPLACE_ME",
+ client.agents.chat.completions.create(
+ messages=[
+ {
+ "role": "user",
+ "content": "What is the capital of France?",
+ }
+ ],
+ model="llama3.3-70b-instruct",
)
except gradientai.APIConnectionError as e:
print("The server could not be reached")
@@ -242,8 +269,14 @@ client = GradientAI(
)
# Or, configure per-request:
-client.with_options(max_retries=5).agents.versions.list(
- uuid="REPLACE_ME",
+client.with_options(max_retries=5).agents.chat.completions.create(
+ messages=[
+ {
+ "role": "user",
+ "content": "What is the capital of France?",
+ }
+ ],
+ model="llama3.3-70b-instruct",
)
```
@@ -267,8 +300,14 @@ client = GradientAI(
)
# Override per-request:
-client.with_options(timeout=5.0).agents.versions.list(
- uuid="REPLACE_ME",
+client.with_options(timeout=5.0).agents.chat.completions.create(
+ messages=[
+ {
+ "role": "user",
+ "content": "What is the capital of France?",
+ }
+ ],
+ model="llama3.3-70b-instruct",
)
```
@@ -310,13 +349,17 @@ The "raw" Response object can be accessed by prefixing `.with_raw_response.` to
from gradientai import GradientAI
client = GradientAI()
-response = client.agents.versions.with_raw_response.list(
- uuid="REPLACE_ME",
+response = client.agents.chat.completions.with_raw_response.create(
+ messages=[{
+ "role": "user",
+ "content": "What is the capital of France?",
+ }],
+ model="llama3.3-70b-instruct",
)
print(response.headers.get('X-My-Header'))
-version = response.parse() # get the object that `agents.versions.list()` would have returned
-print(version.agent_versions)
+completion = response.parse() # get the object that `agents.chat.completions.create()` would have returned
+print(completion.choices)
```
These methods return an [`APIResponse`](https://github.com/digitalocean/gradientai-python/tree/main/src/gradientai/_response.py) object.
@@ -330,8 +373,14 @@ The above interface eagerly reads the full response body when you make the reque
To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods.
```python
-with client.agents.versions.with_streaming_response.list(
- uuid="REPLACE_ME",
+with client.agents.chat.completions.with_streaming_response.create(
+ messages=[
+ {
+ "role": "user",
+ "content": "What is the capital of France?",
+ }
+ ],
+ model="llama3.3-70b-instruct",
) as response:
print(response.headers.get("X-My-Header"))
diff --git a/api.md b/api.md
index 52551f1f..fc8d20eb 100644
--- a/api.md
+++ b/api.md
@@ -253,6 +253,20 @@ Methods:
- client.agents.routes.add(path_child_agent_uuid, \*, path_parent_agent_uuid, \*\*params) -> RouteAddResponse
- client.agents.routes.view(uuid) -> RouteViewResponse
+# Chat
+
+## Completions
+
+Types:
+
+```python
+from gradientai.types.chat import ChatCompletionChunk, CompletionCreateResponse
+```
+
+Methods:
+
+- client.chat.completions.create(\*\*params) -> CompletionCreateResponse
+
# ModelProviders
## Anthropic
@@ -389,20 +403,6 @@ Methods:
- client.knowledge_bases.indexing_jobs.retrieve_data_sources(indexing_job_uuid) -> IndexingJobRetrieveDataSourcesResponse
- client.knowledge_bases.indexing_jobs.update_cancel(path_uuid, \*\*params) -> IndexingJobUpdateCancelResponse
-# Chat
-
-## Completions
-
-Types:
-
-```python
-from gradientai.types.chat import ChatCompletionChunk, CompletionCreateResponse
-```
-
-Methods:
-
-- client.chat.completions.create(\*\*params) -> CompletionCreateResponse
-
# Inference
## APIKeys
diff --git a/pyproject.toml b/pyproject.toml
index 87c4aeeb..1a2e8c01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "c63a5cfe-b235-4fbe-8bbb-82a9e02a482a-python"
-version = "0.1.0-alpha.9"
+version = "0.1.0-alpha.10"
description = "The official Python library for GradientAI"
dynamic = ["readme"]
license = "Apache-2.0"
diff --git a/src/gradientai/_client.py b/src/gradientai/_client.py
index 939d8c6f..c9fe6733 100644
--- a/src/gradientai/_client.py
+++ b/src/gradientai/_client.py
@@ -125,6 +125,12 @@ def agents(self) -> AgentsResource:
return AgentsResource(self)
+ @cached_property
+ def chat(self) -> ChatResource:
+ from .resources.chat import ChatResource
+
+ return ChatResource(self)
+
@cached_property
def model_providers(self) -> ModelProvidersResource:
from .resources.model_providers import ModelProvidersResource
@@ -143,12 +149,6 @@ def knowledge_bases(self) -> KnowledgeBasesResource:
return KnowledgeBasesResource(self)
- @cached_property
- def chat(self) -> ChatResource:
- from .resources.chat import ChatResource
-
- return ChatResource(self)
-
@cached_property
def inference(self) -> InferenceResource:
from .resources.inference import InferenceResource
@@ -365,6 +365,12 @@ def agents(self) -> AsyncAgentsResource:
return AsyncAgentsResource(self)
+ @cached_property
+ def chat(self) -> AsyncChatResource:
+ from .resources.chat import AsyncChatResource
+
+ return AsyncChatResource(self)
+
@cached_property
def model_providers(self) -> AsyncModelProvidersResource:
from .resources.model_providers import AsyncModelProvidersResource
@@ -383,12 +389,6 @@ def knowledge_bases(self) -> AsyncKnowledgeBasesResource:
return AsyncKnowledgeBasesResource(self)
- @cached_property
- def chat(self) -> AsyncChatResource:
- from .resources.chat import AsyncChatResource
-
- return AsyncChatResource(self)
-
@cached_property
def inference(self) -> AsyncInferenceResource:
from .resources.inference import AsyncInferenceResource
@@ -545,6 +545,12 @@ def agents(self) -> agents.AgentsResourceWithRawResponse:
return AgentsResourceWithRawResponse(self._client.agents)
+ @cached_property
+ def chat(self) -> chat.ChatResourceWithRawResponse:
+ from .resources.chat import ChatResourceWithRawResponse
+
+ return ChatResourceWithRawResponse(self._client.chat)
+
@cached_property
def model_providers(self) -> model_providers.ModelProvidersResourceWithRawResponse:
from .resources.model_providers import ModelProvidersResourceWithRawResponse
@@ -563,12 +569,6 @@ def knowledge_bases(self) -> knowledge_bases.KnowledgeBasesResourceWithRawRespon
return KnowledgeBasesResourceWithRawResponse(self._client.knowledge_bases)
- @cached_property
- def chat(self) -> chat.ChatResourceWithRawResponse:
- from .resources.chat import ChatResourceWithRawResponse
-
- return ChatResourceWithRawResponse(self._client.chat)
-
@cached_property
def inference(self) -> inference.InferenceResourceWithRawResponse:
from .resources.inference import InferenceResourceWithRawResponse
@@ -594,6 +594,12 @@ def agents(self) -> agents.AsyncAgentsResourceWithRawResponse:
return AsyncAgentsResourceWithRawResponse(self._client.agents)
+ @cached_property
+ def chat(self) -> chat.AsyncChatResourceWithRawResponse:
+ from .resources.chat import AsyncChatResourceWithRawResponse
+
+ return AsyncChatResourceWithRawResponse(self._client.chat)
+
@cached_property
def model_providers(self) -> model_providers.AsyncModelProvidersResourceWithRawResponse:
from .resources.model_providers import AsyncModelProvidersResourceWithRawResponse
@@ -612,12 +618,6 @@ def knowledge_bases(self) -> knowledge_bases.AsyncKnowledgeBasesResourceWithRawR
return AsyncKnowledgeBasesResourceWithRawResponse(self._client.knowledge_bases)
- @cached_property
- def chat(self) -> chat.AsyncChatResourceWithRawResponse:
- from .resources.chat import AsyncChatResourceWithRawResponse
-
- return AsyncChatResourceWithRawResponse(self._client.chat)
-
@cached_property
def inference(self) -> inference.AsyncInferenceResourceWithRawResponse:
from .resources.inference import AsyncInferenceResourceWithRawResponse
@@ -643,6 +643,12 @@ def agents(self) -> agents.AgentsResourceWithStreamingResponse:
return AgentsResourceWithStreamingResponse(self._client.agents)
+ @cached_property
+ def chat(self) -> chat.ChatResourceWithStreamingResponse:
+ from .resources.chat import ChatResourceWithStreamingResponse
+
+ return ChatResourceWithStreamingResponse(self._client.chat)
+
@cached_property
def model_providers(self) -> model_providers.ModelProvidersResourceWithStreamingResponse:
from .resources.model_providers import ModelProvidersResourceWithStreamingResponse
@@ -661,12 +667,6 @@ def knowledge_bases(self) -> knowledge_bases.KnowledgeBasesResourceWithStreaming
return KnowledgeBasesResourceWithStreamingResponse(self._client.knowledge_bases)
- @cached_property
- def chat(self) -> chat.ChatResourceWithStreamingResponse:
- from .resources.chat import ChatResourceWithStreamingResponse
-
- return ChatResourceWithStreamingResponse(self._client.chat)
-
@cached_property
def inference(self) -> inference.InferenceResourceWithStreamingResponse:
from .resources.inference import InferenceResourceWithStreamingResponse
@@ -692,6 +692,12 @@ def agents(self) -> agents.AsyncAgentsResourceWithStreamingResponse:
return AsyncAgentsResourceWithStreamingResponse(self._client.agents)
+ @cached_property
+ def chat(self) -> chat.AsyncChatResourceWithStreamingResponse:
+ from .resources.chat import AsyncChatResourceWithStreamingResponse
+
+ return AsyncChatResourceWithStreamingResponse(self._client.chat)
+
@cached_property
def model_providers(self) -> model_providers.AsyncModelProvidersResourceWithStreamingResponse:
from .resources.model_providers import AsyncModelProvidersResourceWithStreamingResponse
@@ -710,12 +716,6 @@ def knowledge_bases(self) -> knowledge_bases.AsyncKnowledgeBasesResourceWithStre
return AsyncKnowledgeBasesResourceWithStreamingResponse(self._client.knowledge_bases)
- @cached_property
- def chat(self) -> chat.AsyncChatResourceWithStreamingResponse:
- from .resources.chat import AsyncChatResourceWithStreamingResponse
-
- return AsyncChatResourceWithStreamingResponse(self._client.chat)
-
@cached_property
def inference(self) -> inference.AsyncInferenceResourceWithStreamingResponse:
from .resources.inference import AsyncInferenceResourceWithStreamingResponse
diff --git a/src/gradientai/_version.py b/src/gradientai/_version.py
index 5cd8ca49..f1fdf3c0 100644
--- a/src/gradientai/_version.py
+++ b/src/gradientai/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "gradientai"
-__version__ = "0.1.0-alpha.9" # x-release-please-version
+__version__ = "0.1.0-alpha.10" # x-release-please-version
diff --git a/src/gradientai/resources/__init__.py b/src/gradientai/resources/__init__.py
index 785bf1ac..b56e7e4c 100644
--- a/src/gradientai/resources/__init__.py
+++ b/src/gradientai/resources/__init__.py
@@ -64,6 +64,12 @@
"AsyncAgentsResourceWithRawResponse",
"AgentsResourceWithStreamingResponse",
"AsyncAgentsResourceWithStreamingResponse",
+ "ChatResource",
+ "AsyncChatResource",
+ "ChatResourceWithRawResponse",
+ "AsyncChatResourceWithRawResponse",
+ "ChatResourceWithStreamingResponse",
+ "AsyncChatResourceWithStreamingResponse",
"ModelProvidersResource",
"AsyncModelProvidersResource",
"ModelProvidersResourceWithRawResponse",
@@ -82,12 +88,6 @@
"AsyncKnowledgeBasesResourceWithRawResponse",
"KnowledgeBasesResourceWithStreamingResponse",
"AsyncKnowledgeBasesResourceWithStreamingResponse",
- "ChatResource",
- "AsyncChatResource",
- "ChatResourceWithRawResponse",
- "AsyncChatResourceWithRawResponse",
- "ChatResourceWithStreamingResponse",
- "AsyncChatResourceWithStreamingResponse",
"InferenceResource",
"AsyncInferenceResource",
"InferenceResourceWithRawResponse",
diff --git a/tests/test_client.py b/tests/test_client.py
index 137fabed..16220895 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -819,20 +819,36 @@ def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str
@mock.patch("gradientai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@pytest.mark.respx(base_url=base_url)
def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, client: GradientAI) -> None:
- respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+ respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
with pytest.raises(APITimeoutError):
- client.agents.versions.with_streaming_response.list(uuid="uuid").__enter__()
+ client.agents.chat.completions.with_streaming_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ ).__enter__()
assert _get_open_connections(self.client) == 0
@mock.patch("gradientai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@pytest.mark.respx(base_url=base_url)
def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client: GradientAI) -> None:
- respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(return_value=httpx.Response(500))
+ respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500))
with pytest.raises(APIStatusError):
- client.agents.versions.with_streaming_response.list(uuid="uuid").__enter__()
+ client.agents.chat.completions.with_streaming_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ ).__enter__()
assert _get_open_connections(self.client) == 0
@pytest.mark.parametrize("failures_before_success", [0, 2, 4])
@@ -859,9 +875,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
return httpx.Response(500)
return httpx.Response(200)
- respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
+ respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
- response = client.agents.versions.with_raw_response.list(uuid="uuid")
+ response = client.agents.chat.completions.with_raw_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ )
assert response.retries_taken == failures_before_success
assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
@@ -883,10 +907,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
return httpx.Response(500)
return httpx.Response(200)
- respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
+ respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
- response = client.agents.versions.with_raw_response.list(
- uuid="uuid", extra_headers={"x-stainless-retry-count": Omit()}
+ response = client.agents.chat.completions.with_raw_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ extra_headers={"x-stainless-retry-count": Omit()},
)
assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
@@ -908,10 +939,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
return httpx.Response(500)
return httpx.Response(200)
- respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
+ respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
- response = client.agents.versions.with_raw_response.list(
- uuid="uuid", extra_headers={"x-stainless-retry-count": "42"}
+ response = client.agents.chat.completions.with_raw_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ extra_headers={"x-stainless-retry-count": "42"},
)
assert response.http_request.headers.get("x-stainless-retry-count") == "42"
@@ -1734,10 +1772,18 @@ async def test_parse_retry_after_header(self, remaining_retries: int, retry_afte
async def test_retrying_timeout_errors_doesnt_leak(
self, respx_mock: MockRouter, async_client: AsyncGradientAI
) -> None:
- respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+ respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
with pytest.raises(APITimeoutError):
- await async_client.agents.versions.with_streaming_response.list(uuid="uuid").__aenter__()
+ await async_client.agents.chat.completions.with_streaming_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ ).__aenter__()
assert _get_open_connections(self.client) == 0
@@ -1746,10 +1792,18 @@ async def test_retrying_timeout_errors_doesnt_leak(
async def test_retrying_status_errors_doesnt_leak(
self, respx_mock: MockRouter, async_client: AsyncGradientAI
) -> None:
- respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(return_value=httpx.Response(500))
+ respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500))
with pytest.raises(APIStatusError):
- await async_client.agents.versions.with_streaming_response.list(uuid="uuid").__aenter__()
+ await async_client.agents.chat.completions.with_streaming_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ ).__aenter__()
assert _get_open_connections(self.client) == 0
@pytest.mark.parametrize("failures_before_success", [0, 2, 4])
@@ -1777,9 +1831,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
return httpx.Response(500)
return httpx.Response(200)
- respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
+ respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
- response = await client.agents.versions.with_raw_response.list(uuid="uuid")
+ response = await client.agents.chat.completions.with_raw_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ )
assert response.retries_taken == failures_before_success
assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
@@ -1802,10 +1864,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
return httpx.Response(500)
return httpx.Response(200)
- respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
+ respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
- response = await client.agents.versions.with_raw_response.list(
- uuid="uuid", extra_headers={"x-stainless-retry-count": Omit()}
+ response = await client.agents.chat.completions.with_raw_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ extra_headers={"x-stainless-retry-count": Omit()},
)
assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
@@ -1828,10 +1897,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
return httpx.Response(500)
return httpx.Response(200)
- respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
-
- response = await client.agents.versions.with_raw_response.list(
- uuid="uuid", extra_headers={"x-stainless-retry-count": "42"}
+ respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+ response = await client.agents.chat.completions.with_raw_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ extra_headers={"x-stainless-retry-count": "42"},
)
assert response.http_request.headers.get("x-stainless-retry-count") == "42"