diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 46b9b6b2..3b005e52 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0-alpha.9" + ".": "0.1.0-alpha.10" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index fcc630a7..ce83998c 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 76 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/digitalocean%2Fgradientai-e8b3cbc80e18e4f7f277010349f25e1319156704f359911dc464cc21a0d077a6.yml openapi_spec_hash: c773d792724f5647ae25a5ae4ccec208 -config_hash: 0bc3af28d4abd9be8bcc81f615bc832d +config_hash: 9b44ce3fd39c43f2001bc11934e6b1b0 diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d5ad5ba..2969f29b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## 0.1.0-alpha.10 (2025-06-28) + +Full Changelog: [v0.1.0-alpha.9...v0.1.0-alpha.10](https://github.com/digitalocean/gradientai-python/compare/v0.1.0-alpha.9...v0.1.0-alpha.10) + +### Features + +* **api:** manual updates ([0e5effc](https://github.com/digitalocean/gradientai-python/commit/0e5effc727cebe88ea38f0ec4c3fcb45ffeb4924)) +* **api:** manual updates ([d510ae0](https://github.com/digitalocean/gradientai-python/commit/d510ae03f13669af7f47093af06a00609e9b7c07)) +* **api:** manual updates ([c5bc3ca](https://github.com/digitalocean/gradientai-python/commit/c5bc3caa477945dc19bbf90661ffeea86370189d)) + ## 0.1.0-alpha.9 (2025-06-28) Full Changelog: [v0.1.0-alpha.8...v0.1.0-alpha.9](https://github.com/digitalocean/gradientai-python/compare/v0.1.0-alpha.8...v0.1.0-alpha.9) diff --git a/README.md b/README.md index d62c11da..fa88a1e0 100644 --- a/README.md +++ b/README.md @@ -39,12 +39,13 @@ print(api_client.agents.list()) completion = inference_client.chat.completions.create( messages=[ { - "content": "string", - "role": "system", + "role": "user", + "content": "What is the capital of France?", } ], - model="llama3-8b-instruct", + model="llama3.3-70b-instruct", ) + print(completion.choices[0].message) ``` @@ -72,13 +73,13 @@ async def main() -> None: completion = await client.agents.chat.completions.create( messages=[ { - "content": "string", - "role": "system", + "role": "user", + "content": "What is the capital of France?", } ], - model="llama3-8b-instruct", + model="llama3.3-70b-instruct", ) - print(completion.id) + print(completion.choices) asyncio.run(main()) @@ -114,41 +115,61 @@ async def main() -> None: completion = await client.agents.chat.completions.create( messages=[ { - "content": "string", - "role": "system", + "role": "user", + "content": "What is the capital of France?", } ], - model="llama3-8b-instruct", + model="llama3.3-70b-instruct", ) - print(completion.id) + print(completion.choices) asyncio.run(main()) ``` -## Streaming -Support for streaming responses are available by Server Side Events (SSE) for Serverless Inference and Agents. -``` -import os +## Streaming responses + +We provide support for streaming responses using Server Side Events (SSE). + +```python from gradientai import GradientAI -client = GradientAI( - inference_key=os.environ.get("GRADIENTAI_INFERENCE_KEY") -) +client = GradientAI() -response = client.chat.completions.create( +stream = client.agents.chat.completions.create( + messages=[ + { + "role": "user", + "content": "What is the capital of France?", + } + ], model="llama3.3-70b-instruct", - messages=[{ "role": "user", "content": "Write a story about a brave squirrel."}], stream=True, ) +for completion in stream: + print(completion.choices) +``` -for chunk in response: - if len(chunk.choices) > 0: - if chunk.choices[0].delta.content: - print(chunk.choices[0].delta.content, end="", flush=True) +The async client uses the exact same interface. -``` +```python +from gradientai import AsyncGradientAI + +client = AsyncGradientAI() +stream = await client.agents.chat.completions.create( + messages=[ + { + "role": "user", + "content": "What is the capital of France?", + } + ], + model="llama3.3-70b-instruct", + stream=True, +) +async for completion in stream: + print(completion.choices) +``` ## Using types @@ -197,8 +218,14 @@ from gradientai import GradientAI client = GradientAI() try: - client.agents.versions.list( - uuid="REPLACE_ME", + client.agents.chat.completions.create( + messages=[ + { + "role": "user", + "content": "What is the capital of France?", + } + ], + model="llama3.3-70b-instruct", ) except gradientai.APIConnectionError as e: print("The server could not be reached") @@ -242,8 +269,14 @@ client = GradientAI( ) # Or, configure per-request: -client.with_options(max_retries=5).agents.versions.list( - uuid="REPLACE_ME", +client.with_options(max_retries=5).agents.chat.completions.create( + messages=[ + { + "role": "user", + "content": "What is the capital of France?", + } + ], + model="llama3.3-70b-instruct", ) ``` @@ -267,8 +300,14 @@ client = GradientAI( ) # Override per-request: -client.with_options(timeout=5.0).agents.versions.list( - uuid="REPLACE_ME", +client.with_options(timeout=5.0).agents.chat.completions.create( + messages=[ + { + "role": "user", + "content": "What is the capital of France?", + } + ], + model="llama3.3-70b-instruct", ) ``` @@ -310,13 +349,17 @@ The "raw" Response object can be accessed by prefixing `.with_raw_response.` to from gradientai import GradientAI client = GradientAI() -response = client.agents.versions.with_raw_response.list( - uuid="REPLACE_ME", +response = client.agents.chat.completions.with_raw_response.create( + messages=[{ + "role": "user", + "content": "What is the capital of France?", + }], + model="llama3.3-70b-instruct", ) print(response.headers.get('X-My-Header')) -version = response.parse() # get the object that `agents.versions.list()` would have returned -print(version.agent_versions) +completion = response.parse() # get the object that `agents.chat.completions.create()` would have returned +print(completion.choices) ``` These methods return an [`APIResponse`](https://github.com/digitalocean/gradientai-python/tree/main/src/gradientai/_response.py) object. @@ -330,8 +373,14 @@ The above interface eagerly reads the full response body when you make the reque To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods. ```python -with client.agents.versions.with_streaming_response.list( - uuid="REPLACE_ME", +with client.agents.chat.completions.with_streaming_response.create( + messages=[ + { + "role": "user", + "content": "What is the capital of France?", + } + ], + model="llama3.3-70b-instruct", ) as response: print(response.headers.get("X-My-Header")) diff --git a/api.md b/api.md index 52551f1f..fc8d20eb 100644 --- a/api.md +++ b/api.md @@ -253,6 +253,20 @@ Methods: - client.agents.routes.add(path_child_agent_uuid, \*, path_parent_agent_uuid, \*\*params) -> RouteAddResponse - client.agents.routes.view(uuid) -> RouteViewResponse +# Chat + +## Completions + +Types: + +```python +from gradientai.types.chat import ChatCompletionChunk, CompletionCreateResponse +``` + +Methods: + +- client.chat.completions.create(\*\*params) -> CompletionCreateResponse + # ModelProviders ## Anthropic @@ -389,20 +403,6 @@ Methods: - client.knowledge_bases.indexing_jobs.retrieve_data_sources(indexing_job_uuid) -> IndexingJobRetrieveDataSourcesResponse - client.knowledge_bases.indexing_jobs.update_cancel(path_uuid, \*\*params) -> IndexingJobUpdateCancelResponse -# Chat - -## Completions - -Types: - -```python -from gradientai.types.chat import ChatCompletionChunk, CompletionCreateResponse -``` - -Methods: - -- client.chat.completions.create(\*\*params) -> CompletionCreateResponse - # Inference ## APIKeys diff --git a/pyproject.toml b/pyproject.toml index 87c4aeeb..1a2e8c01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "c63a5cfe-b235-4fbe-8bbb-82a9e02a482a-python" -version = "0.1.0-alpha.9" +version = "0.1.0-alpha.10" description = "The official Python library for GradientAI" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/gradientai/_client.py b/src/gradientai/_client.py index 939d8c6f..c9fe6733 100644 --- a/src/gradientai/_client.py +++ b/src/gradientai/_client.py @@ -125,6 +125,12 @@ def agents(self) -> AgentsResource: return AgentsResource(self) + @cached_property + def chat(self) -> ChatResource: + from .resources.chat import ChatResource + + return ChatResource(self) + @cached_property def model_providers(self) -> ModelProvidersResource: from .resources.model_providers import ModelProvidersResource @@ -143,12 +149,6 @@ def knowledge_bases(self) -> KnowledgeBasesResource: return KnowledgeBasesResource(self) - @cached_property - def chat(self) -> ChatResource: - from .resources.chat import ChatResource - - return ChatResource(self) - @cached_property def inference(self) -> InferenceResource: from .resources.inference import InferenceResource @@ -365,6 +365,12 @@ def agents(self) -> AsyncAgentsResource: return AsyncAgentsResource(self) + @cached_property + def chat(self) -> AsyncChatResource: + from .resources.chat import AsyncChatResource + + return AsyncChatResource(self) + @cached_property def model_providers(self) -> AsyncModelProvidersResource: from .resources.model_providers import AsyncModelProvidersResource @@ -383,12 +389,6 @@ def knowledge_bases(self) -> AsyncKnowledgeBasesResource: return AsyncKnowledgeBasesResource(self) - @cached_property - def chat(self) -> AsyncChatResource: - from .resources.chat import AsyncChatResource - - return AsyncChatResource(self) - @cached_property def inference(self) -> AsyncInferenceResource: from .resources.inference import AsyncInferenceResource @@ -545,6 +545,12 @@ def agents(self) -> agents.AgentsResourceWithRawResponse: return AgentsResourceWithRawResponse(self._client.agents) + @cached_property + def chat(self) -> chat.ChatResourceWithRawResponse: + from .resources.chat import ChatResourceWithRawResponse + + return ChatResourceWithRawResponse(self._client.chat) + @cached_property def model_providers(self) -> model_providers.ModelProvidersResourceWithRawResponse: from .resources.model_providers import ModelProvidersResourceWithRawResponse @@ -563,12 +569,6 @@ def knowledge_bases(self) -> knowledge_bases.KnowledgeBasesResourceWithRawRespon return KnowledgeBasesResourceWithRawResponse(self._client.knowledge_bases) - @cached_property - def chat(self) -> chat.ChatResourceWithRawResponse: - from .resources.chat import ChatResourceWithRawResponse - - return ChatResourceWithRawResponse(self._client.chat) - @cached_property def inference(self) -> inference.InferenceResourceWithRawResponse: from .resources.inference import InferenceResourceWithRawResponse @@ -594,6 +594,12 @@ def agents(self) -> agents.AsyncAgentsResourceWithRawResponse: return AsyncAgentsResourceWithRawResponse(self._client.agents) + @cached_property + def chat(self) -> chat.AsyncChatResourceWithRawResponse: + from .resources.chat import AsyncChatResourceWithRawResponse + + return AsyncChatResourceWithRawResponse(self._client.chat) + @cached_property def model_providers(self) -> model_providers.AsyncModelProvidersResourceWithRawResponse: from .resources.model_providers import AsyncModelProvidersResourceWithRawResponse @@ -612,12 +618,6 @@ def knowledge_bases(self) -> knowledge_bases.AsyncKnowledgeBasesResourceWithRawR return AsyncKnowledgeBasesResourceWithRawResponse(self._client.knowledge_bases) - @cached_property - def chat(self) -> chat.AsyncChatResourceWithRawResponse: - from .resources.chat import AsyncChatResourceWithRawResponse - - return AsyncChatResourceWithRawResponse(self._client.chat) - @cached_property def inference(self) -> inference.AsyncInferenceResourceWithRawResponse: from .resources.inference import AsyncInferenceResourceWithRawResponse @@ -643,6 +643,12 @@ def agents(self) -> agents.AgentsResourceWithStreamingResponse: return AgentsResourceWithStreamingResponse(self._client.agents) + @cached_property + def chat(self) -> chat.ChatResourceWithStreamingResponse: + from .resources.chat import ChatResourceWithStreamingResponse + + return ChatResourceWithStreamingResponse(self._client.chat) + @cached_property def model_providers(self) -> model_providers.ModelProvidersResourceWithStreamingResponse: from .resources.model_providers import ModelProvidersResourceWithStreamingResponse @@ -661,12 +667,6 @@ def knowledge_bases(self) -> knowledge_bases.KnowledgeBasesResourceWithStreaming return KnowledgeBasesResourceWithStreamingResponse(self._client.knowledge_bases) - @cached_property - def chat(self) -> chat.ChatResourceWithStreamingResponse: - from .resources.chat import ChatResourceWithStreamingResponse - - return ChatResourceWithStreamingResponse(self._client.chat) - @cached_property def inference(self) -> inference.InferenceResourceWithStreamingResponse: from .resources.inference import InferenceResourceWithStreamingResponse @@ -692,6 +692,12 @@ def agents(self) -> agents.AsyncAgentsResourceWithStreamingResponse: return AsyncAgentsResourceWithStreamingResponse(self._client.agents) + @cached_property + def chat(self) -> chat.AsyncChatResourceWithStreamingResponse: + from .resources.chat import AsyncChatResourceWithStreamingResponse + + return AsyncChatResourceWithStreamingResponse(self._client.chat) + @cached_property def model_providers(self) -> model_providers.AsyncModelProvidersResourceWithStreamingResponse: from .resources.model_providers import AsyncModelProvidersResourceWithStreamingResponse @@ -710,12 +716,6 @@ def knowledge_bases(self) -> knowledge_bases.AsyncKnowledgeBasesResourceWithStre return AsyncKnowledgeBasesResourceWithStreamingResponse(self._client.knowledge_bases) - @cached_property - def chat(self) -> chat.AsyncChatResourceWithStreamingResponse: - from .resources.chat import AsyncChatResourceWithStreamingResponse - - return AsyncChatResourceWithStreamingResponse(self._client.chat) - @cached_property def inference(self) -> inference.AsyncInferenceResourceWithStreamingResponse: from .resources.inference import AsyncInferenceResourceWithStreamingResponse diff --git a/src/gradientai/_version.py b/src/gradientai/_version.py index 5cd8ca49..f1fdf3c0 100644 --- a/src/gradientai/_version.py +++ b/src/gradientai/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "gradientai" -__version__ = "0.1.0-alpha.9" # x-release-please-version +__version__ = "0.1.0-alpha.10" # x-release-please-version diff --git a/src/gradientai/resources/__init__.py b/src/gradientai/resources/__init__.py index 785bf1ac..b56e7e4c 100644 --- a/src/gradientai/resources/__init__.py +++ b/src/gradientai/resources/__init__.py @@ -64,6 +64,12 @@ "AsyncAgentsResourceWithRawResponse", "AgentsResourceWithStreamingResponse", "AsyncAgentsResourceWithStreamingResponse", + "ChatResource", + "AsyncChatResource", + "ChatResourceWithRawResponse", + "AsyncChatResourceWithRawResponse", + "ChatResourceWithStreamingResponse", + "AsyncChatResourceWithStreamingResponse", "ModelProvidersResource", "AsyncModelProvidersResource", "ModelProvidersResourceWithRawResponse", @@ -82,12 +88,6 @@ "AsyncKnowledgeBasesResourceWithRawResponse", "KnowledgeBasesResourceWithStreamingResponse", "AsyncKnowledgeBasesResourceWithStreamingResponse", - "ChatResource", - "AsyncChatResource", - "ChatResourceWithRawResponse", - "AsyncChatResourceWithRawResponse", - "ChatResourceWithStreamingResponse", - "AsyncChatResourceWithStreamingResponse", "InferenceResource", "AsyncInferenceResource", "InferenceResourceWithRawResponse", diff --git a/tests/test_client.py b/tests/test_client.py index 137fabed..16220895 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -819,20 +819,36 @@ def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str @mock.patch("gradientai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @pytest.mark.respx(base_url=base_url) def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, client: GradientAI) -> None: - respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=httpx.TimeoutException("Test timeout error")) + respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error")) with pytest.raises(APITimeoutError): - client.agents.versions.with_streaming_response.list(uuid="uuid").__enter__() + client.agents.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ).__enter__() assert _get_open_connections(self.client) == 0 @mock.patch("gradientai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @pytest.mark.respx(base_url=base_url) def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client: GradientAI) -> None: - respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(return_value=httpx.Response(500)) + respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500)) with pytest.raises(APIStatusError): - client.agents.versions.with_streaming_response.list(uuid="uuid").__enter__() + client.agents.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ).__enter__() assert _get_open_connections(self.client) == 0 @pytest.mark.parametrize("failures_before_success", [0, 2, 4]) @@ -859,9 +875,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: return httpx.Response(500) return httpx.Response(200) - respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler) + respx_mock.post("/chat/completions").mock(side_effect=retry_handler) - response = client.agents.versions.with_raw_response.list(uuid="uuid") + response = client.agents.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ) assert response.retries_taken == failures_before_success assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success @@ -883,10 +907,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: return httpx.Response(500) return httpx.Response(200) - respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler) + respx_mock.post("/chat/completions").mock(side_effect=retry_handler) - response = client.agents.versions.with_raw_response.list( - uuid="uuid", extra_headers={"x-stainless-retry-count": Omit()} + response = client.agents.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + extra_headers={"x-stainless-retry-count": Omit()}, ) assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0 @@ -908,10 +939,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: return httpx.Response(500) return httpx.Response(200) - respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler) + respx_mock.post("/chat/completions").mock(side_effect=retry_handler) - response = client.agents.versions.with_raw_response.list( - uuid="uuid", extra_headers={"x-stainless-retry-count": "42"} + response = client.agents.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + extra_headers={"x-stainless-retry-count": "42"}, ) assert response.http_request.headers.get("x-stainless-retry-count") == "42" @@ -1734,10 +1772,18 @@ async def test_parse_retry_after_header(self, remaining_retries: int, retry_afte async def test_retrying_timeout_errors_doesnt_leak( self, respx_mock: MockRouter, async_client: AsyncGradientAI ) -> None: - respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=httpx.TimeoutException("Test timeout error")) + respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error")) with pytest.raises(APITimeoutError): - await async_client.agents.versions.with_streaming_response.list(uuid="uuid").__aenter__() + await async_client.agents.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ).__aenter__() assert _get_open_connections(self.client) == 0 @@ -1746,10 +1792,18 @@ async def test_retrying_timeout_errors_doesnt_leak( async def test_retrying_status_errors_doesnt_leak( self, respx_mock: MockRouter, async_client: AsyncGradientAI ) -> None: - respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(return_value=httpx.Response(500)) + respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500)) with pytest.raises(APIStatusError): - await async_client.agents.versions.with_streaming_response.list(uuid="uuid").__aenter__() + await async_client.agents.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ).__aenter__() assert _get_open_connections(self.client) == 0 @pytest.mark.parametrize("failures_before_success", [0, 2, 4]) @@ -1777,9 +1831,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: return httpx.Response(500) return httpx.Response(200) - respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler) + respx_mock.post("/chat/completions").mock(side_effect=retry_handler) - response = await client.agents.versions.with_raw_response.list(uuid="uuid") + response = await client.agents.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ) assert response.retries_taken == failures_before_success assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success @@ -1802,10 +1864,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: return httpx.Response(500) return httpx.Response(200) - respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler) + respx_mock.post("/chat/completions").mock(side_effect=retry_handler) - response = await client.agents.versions.with_raw_response.list( - uuid="uuid", extra_headers={"x-stainless-retry-count": Omit()} + response = await client.agents.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + extra_headers={"x-stainless-retry-count": Omit()}, ) assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0 @@ -1828,10 +1897,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: return httpx.Response(500) return httpx.Response(200) - respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler) - - response = await client.agents.versions.with_raw_response.list( - uuid="uuid", extra_headers={"x-stainless-retry-count": "42"} + respx_mock.post("/chat/completions").mock(side_effect=retry_handler) + + response = await client.agents.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + extra_headers={"x-stainless-retry-count": "42"}, ) assert response.http_request.headers.get("x-stainless-retry-count") == "42"