diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 4f9005ea..b5db7ce1 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.1.0-alpha.6"
+ ".": "0.1.0-alpha.7"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index a1e73eb0..79a36ab0 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 77
+configured_endpoints: 76
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/digitalocean%2Fgradientai-e8b3cbc80e18e4f7f277010349f25e1319156704f359911dc464cc21a0d077a6.yml
openapi_spec_hash: c773d792724f5647ae25a5ae4ccec208
-config_hash: 9c2519464cf5de240e34bd89b9f65706
+config_hash: f0976fbc552ea878bb527447b5e663c9
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d9b29735..15fec91a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,16 @@
# Changelog
+## 0.1.0-alpha.7 (2025-06-27)
+
+Full Changelog: [v0.1.0-alpha.6...v0.1.0-alpha.7](https://github.com/digitalocean/gradientai-python/compare/v0.1.0-alpha.6...v0.1.0-alpha.7)
+
+### Features
+
+* **api:** manual updates ([63b9ec0](https://github.com/digitalocean/gradientai-python/commit/63b9ec02a646dad258afbd048db8db1af8d4401b))
+* **api:** manual updates ([5247aee](https://github.com/digitalocean/gradientai-python/commit/5247aee6d6052f6380fbe892d7c2bd9a8d0a32c0))
+* **api:** manual updates ([aa9e2c7](https://github.com/digitalocean/gradientai-python/commit/aa9e2c78956162f6195fdbaa1c95754ee4af207e))
+* **client:** add agent_domain option ([b4b6260](https://github.com/digitalocean/gradientai-python/commit/b4b62609a12a1dfa0b505e9ec54334b776fb0515))
+
## 0.1.0-alpha.6 (2025-06-27)
Full Changelog: [v0.1.0-alpha.5...v0.1.0-alpha.6](https://github.com/digitalocean/gradientai-python/compare/v0.1.0-alpha.5...v0.1.0-alpha.6)
diff --git a/README.md b/README.md
index 24b0975b..d5bd9c97 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ client = GradientAI(
api_key=os.environ.get("GRADIENTAI_API_KEY"), # This is the default and can be omitted
)
-completion = client.chat.completions.create(
+completion = client.agents.chat.completions.create(
messages=[
{
"content": "string",
@@ -63,7 +63,7 @@ client = AsyncGradientAI(
async def main() -> None:
- completion = await client.chat.completions.create(
+ completion = await client.agents.chat.completions.create(
messages=[
{
"content": "string",
@@ -105,7 +105,7 @@ async def main() -> None:
api_key=os.environ.get("GRADIENTAI_API_KEY"), # This is the default and can be omitted
http_client=DefaultAioHttpClient(),
) as client:
- completion = await client.chat.completions.create(
+ completion = await client.agents.chat.completions.create(
messages=[
{
"content": "string",
@@ -138,7 +138,7 @@ from gradientai import GradientAI
client = GradientAI()
-completion = client.chat.completions.create(
+completion = client.agents.chat.completions.create(
messages=[
{
"content": "string",
diff --git a/api.md b/api.md
index 9a2dd757..dc48f7b3 100644
--- a/api.md
+++ b/api.md
@@ -1,7 +1,7 @@
# Shared Types
```python
-from gradientai.types import APILinks, APIMeta
+from gradientai.types import APILinks, APIMeta, ChatCompletionTokenLogprob
```
# Agents
@@ -58,6 +58,20 @@ Methods:
- client.agents.api_keys.delete(api_key_uuid, \*, agent_uuid) -> APIKeyDeleteResponse
- client.agents.api_keys.regenerate(api_key_uuid, \*, agent_uuid) -> APIKeyRegenerateResponse
+## Chat
+
+### Completions
+
+Types:
+
+```python
+from gradientai.types.agents.chat import CompletionCreateResponse
+```
+
+Methods:
+
+- client.agents.chat.completions.create(\*\*params) -> CompletionCreateResponse
+
## EvaluationMetrics
Types:
@@ -382,7 +396,7 @@ Methods:
Types:
```python
-from gradientai.types.chat import ChatCompletionTokenLogprob, CompletionCreateResponse
+from gradientai.types.chat import CompletionCreateResponse
```
Methods:
@@ -419,10 +433,9 @@ Methods:
Types:
```python
-from gradientai.types import APIAgreement, APIModel, APIModelVersion, Model, ModelListResponse
+from gradientai.types import APIAgreement, APIModel, APIModelVersion, ModelListResponse
```
Methods:
-- client.models.retrieve(model) -> Model
-- client.models.list() -> ModelListResponse
+- client.models.list(\*\*params) -> ModelListResponse
diff --git a/pyproject.toml b/pyproject.toml
index 0f04322b..29531941 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "c63a5cfe-b235-4fbe-8bbb-82a9e02a482a-python"
-version = "0.1.0-alpha.6"
+version = "0.1.0-alpha.7"
description = "The official Python library for GradientAI"
dynamic = ["readme"]
license = "Apache-2.0"
diff --git a/src/gradientai/_client.py b/src/gradientai/_client.py
index 0020ed16..327273c9 100644
--- a/src/gradientai/_client.py
+++ b/src/gradientai/_client.py
@@ -57,12 +57,14 @@ class GradientAI(SyncAPIClient):
# client options
api_key: str | None
inference_key: str | None
+ agent_domain: str | None
def __init__(
self,
*,
api_key: str | None = None,
inference_key: str | None = None,
+ agent_domain: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
@@ -96,6 +98,8 @@ def __init__(
inference_key = os.environ.get("GRADIENTAI_INFERENCE_KEY")
self.inference_key = inference_key
+ self.agent_domain = agent_domain
+
if base_url is None:
base_url = os.environ.get("GRADIENT_AI_BASE_URL")
self._base_url_overridden = base_url is not None
@@ -201,6 +205,7 @@ def copy(
*,
api_key: str | None = None,
inference_key: str | None = None,
+ agent_domain: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
http_client: httpx.Client | None = None,
@@ -236,6 +241,7 @@ def copy(
client = self.__class__(
api_key=api_key or self.api_key,
inference_key=inference_key or self.inference_key,
+ agent_domain=agent_domain or self.agent_domain,
base_url=base_url or self.base_url,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
http_client=http_client,
@@ -289,12 +295,14 @@ class AsyncGradientAI(AsyncAPIClient):
# client options
api_key: str | None
inference_key: str | None
+ agent_domain: str | None
def __init__(
self,
*,
api_key: str | None = None,
inference_key: str | None = None,
+ agent_domain: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
@@ -328,6 +336,8 @@ def __init__(
inference_key = os.environ.get("GRADIENTAI_INFERENCE_KEY")
self.inference_key = inference_key
+ self.agent_domain = agent_domain
+
if base_url is None:
base_url = os.environ.get("GRADIENT_AI_BASE_URL")
self._base_url_overridden = base_url is not None
@@ -433,6 +443,7 @@ def copy(
*,
api_key: str | None = None,
inference_key: str | None = None,
+ agent_domain: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
http_client: httpx.AsyncClient | None = None,
@@ -468,6 +479,7 @@ def copy(
client = self.__class__(
api_key=api_key or self.api_key,
inference_key=inference_key or self.inference_key,
+ agent_domain=agent_domain or self.agent_domain,
base_url=base_url or self.base_url,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
http_client=http_client,
diff --git a/src/gradientai/_version.py b/src/gradientai/_version.py
index b8ef5fc0..d4e6dde6 100644
--- a/src/gradientai/_version.py
+++ b/src/gradientai/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "gradientai"
-__version__ = "0.1.0-alpha.6" # x-release-please-version
+__version__ = "0.1.0-alpha.7" # x-release-please-version
diff --git a/src/gradientai/resources/agents/__init__.py b/src/gradientai/resources/agents/__init__.py
index f5423f00..51075283 100644
--- a/src/gradientai/resources/agents/__init__.py
+++ b/src/gradientai/resources/agents/__init__.py
@@ -1,5 +1,13 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from .chat import (
+ ChatResource,
+ AsyncChatResource,
+ ChatResourceWithRawResponse,
+ AsyncChatResourceWithRawResponse,
+ ChatResourceWithStreamingResponse,
+ AsyncChatResourceWithStreamingResponse,
+)
from .agents import (
AgentsResource,
AsyncAgentsResource,
@@ -88,6 +96,12 @@
"AsyncAPIKeysResourceWithRawResponse",
"APIKeysResourceWithStreamingResponse",
"AsyncAPIKeysResourceWithStreamingResponse",
+ "ChatResource",
+ "AsyncChatResource",
+ "ChatResourceWithRawResponse",
+ "AsyncChatResourceWithRawResponse",
+ "ChatResourceWithStreamingResponse",
+ "AsyncChatResourceWithStreamingResponse",
"EvaluationMetricsResource",
"AsyncEvaluationMetricsResource",
"EvaluationMetricsResourceWithRawResponse",
diff --git a/src/gradientai/resources/agents/agents.py b/src/gradientai/resources/agents/agents.py
index 0a6e183c..200e9fc0 100644
--- a/src/gradientai/resources/agents/agents.py
+++ b/src/gradientai/resources/agents/agents.py
@@ -41,6 +41,14 @@
AsyncVersionsResourceWithStreamingResponse,
)
from ..._compat import cached_property
+from .chat.chat import (
+ ChatResource,
+ AsyncChatResource,
+ ChatResourceWithRawResponse,
+ AsyncChatResourceWithRawResponse,
+ ChatResourceWithStreamingResponse,
+ AsyncChatResourceWithStreamingResponse,
+)
from .functions import (
FunctionsResource,
AsyncFunctionsResource,
@@ -114,6 +122,10 @@ class AgentsResource(SyncAPIResource):
def api_keys(self) -> APIKeysResource:
return APIKeysResource(self._client)
+ @cached_property
+ def chat(self) -> ChatResource:
+ return ChatResource(self._client)
+
@cached_property
def evaluation_metrics(self) -> EvaluationMetricsResource:
return EvaluationMetricsResource(self._client)
@@ -498,6 +510,10 @@ class AsyncAgentsResource(AsyncAPIResource):
def api_keys(self) -> AsyncAPIKeysResource:
return AsyncAPIKeysResource(self._client)
+ @cached_property
+ def chat(self) -> AsyncChatResource:
+ return AsyncChatResource(self._client)
+
@cached_property
def evaluation_metrics(self) -> AsyncEvaluationMetricsResource:
return AsyncEvaluationMetricsResource(self._client)
@@ -904,6 +920,10 @@ def __init__(self, agents: AgentsResource) -> None:
def api_keys(self) -> APIKeysResourceWithRawResponse:
return APIKeysResourceWithRawResponse(self._agents.api_keys)
+ @cached_property
+ def chat(self) -> ChatResourceWithRawResponse:
+ return ChatResourceWithRawResponse(self._agents.chat)
+
@cached_property
def evaluation_metrics(self) -> EvaluationMetricsResourceWithRawResponse:
return EvaluationMetricsResourceWithRawResponse(self._agents.evaluation_metrics)
@@ -964,6 +984,10 @@ def __init__(self, agents: AsyncAgentsResource) -> None:
def api_keys(self) -> AsyncAPIKeysResourceWithRawResponse:
return AsyncAPIKeysResourceWithRawResponse(self._agents.api_keys)
+ @cached_property
+ def chat(self) -> AsyncChatResourceWithRawResponse:
+ return AsyncChatResourceWithRawResponse(self._agents.chat)
+
@cached_property
def evaluation_metrics(self) -> AsyncEvaluationMetricsResourceWithRawResponse:
return AsyncEvaluationMetricsResourceWithRawResponse(self._agents.evaluation_metrics)
@@ -1024,6 +1048,10 @@ def __init__(self, agents: AgentsResource) -> None:
def api_keys(self) -> APIKeysResourceWithStreamingResponse:
return APIKeysResourceWithStreamingResponse(self._agents.api_keys)
+ @cached_property
+ def chat(self) -> ChatResourceWithStreamingResponse:
+ return ChatResourceWithStreamingResponse(self._agents.chat)
+
@cached_property
def evaluation_metrics(self) -> EvaluationMetricsResourceWithStreamingResponse:
return EvaluationMetricsResourceWithStreamingResponse(self._agents.evaluation_metrics)
@@ -1084,6 +1112,10 @@ def __init__(self, agents: AsyncAgentsResource) -> None:
def api_keys(self) -> AsyncAPIKeysResourceWithStreamingResponse:
return AsyncAPIKeysResourceWithStreamingResponse(self._agents.api_keys)
+ @cached_property
+ def chat(self) -> AsyncChatResourceWithStreamingResponse:
+ return AsyncChatResourceWithStreamingResponse(self._agents.chat)
+
@cached_property
def evaluation_metrics(self) -> AsyncEvaluationMetricsResourceWithStreamingResponse:
return AsyncEvaluationMetricsResourceWithStreamingResponse(self._agents.evaluation_metrics)
diff --git a/src/gradientai/resources/agents/chat/__init__.py b/src/gradientai/resources/agents/chat/__init__.py
new file mode 100644
index 00000000..ec960eb4
--- /dev/null
+++ b/src/gradientai/resources/agents/chat/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .chat import (
+ ChatResource,
+ AsyncChatResource,
+ ChatResourceWithRawResponse,
+ AsyncChatResourceWithRawResponse,
+ ChatResourceWithStreamingResponse,
+ AsyncChatResourceWithStreamingResponse,
+)
+from .completions import (
+ CompletionsResource,
+ AsyncCompletionsResource,
+ CompletionsResourceWithRawResponse,
+ AsyncCompletionsResourceWithRawResponse,
+ CompletionsResourceWithStreamingResponse,
+ AsyncCompletionsResourceWithStreamingResponse,
+)
+
+__all__ = [
+ "CompletionsResource",
+ "AsyncCompletionsResource",
+ "CompletionsResourceWithRawResponse",
+ "AsyncCompletionsResourceWithRawResponse",
+ "CompletionsResourceWithStreamingResponse",
+ "AsyncCompletionsResourceWithStreamingResponse",
+ "ChatResource",
+ "AsyncChatResource",
+ "ChatResourceWithRawResponse",
+ "AsyncChatResourceWithRawResponse",
+ "ChatResourceWithStreamingResponse",
+ "AsyncChatResourceWithStreamingResponse",
+]
diff --git a/src/gradientai/resources/agents/chat/chat.py b/src/gradientai/resources/agents/chat/chat.py
new file mode 100644
index 00000000..c87bd158
--- /dev/null
+++ b/src/gradientai/resources/agents/chat/chat.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ...._compat import cached_property
+from .completions import (
+ CompletionsResource,
+ AsyncCompletionsResource,
+ CompletionsResourceWithRawResponse,
+ AsyncCompletionsResourceWithRawResponse,
+ CompletionsResourceWithStreamingResponse,
+ AsyncCompletionsResourceWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["ChatResource", "AsyncChatResource"]
+
+
+class ChatResource(SyncAPIResource):
+ @cached_property
+ def completions(self) -> CompletionsResource:
+ return CompletionsResource(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> ChatResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
+ """
+ return ChatResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> ChatResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
+ """
+ return ChatResourceWithStreamingResponse(self)
+
+
+class AsyncChatResource(AsyncAPIResource):
+ @cached_property
+ def completions(self) -> AsyncCompletionsResource:
+ return AsyncCompletionsResource(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncChatResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncChatResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncChatResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
+ """
+ return AsyncChatResourceWithStreamingResponse(self)
+
+
+class ChatResourceWithRawResponse:
+ def __init__(self, chat: ChatResource) -> None:
+ self._chat = chat
+
+ @cached_property
+ def completions(self) -> CompletionsResourceWithRawResponse:
+ return CompletionsResourceWithRawResponse(self._chat.completions)
+
+
+class AsyncChatResourceWithRawResponse:
+ def __init__(self, chat: AsyncChatResource) -> None:
+ self._chat = chat
+
+ @cached_property
+ def completions(self) -> AsyncCompletionsResourceWithRawResponse:
+ return AsyncCompletionsResourceWithRawResponse(self._chat.completions)
+
+
+class ChatResourceWithStreamingResponse:
+ def __init__(self, chat: ChatResource) -> None:
+ self._chat = chat
+
+ @cached_property
+ def completions(self) -> CompletionsResourceWithStreamingResponse:
+ return CompletionsResourceWithStreamingResponse(self._chat.completions)
+
+
+class AsyncChatResourceWithStreamingResponse:
+ def __init__(self, chat: AsyncChatResource) -> None:
+ self._chat = chat
+
+ @cached_property
+ def completions(self) -> AsyncCompletionsResourceWithStreamingResponse:
+ return AsyncCompletionsResourceWithStreamingResponse(self._chat.completions)
diff --git a/src/gradientai/resources/agents/chat/completions.py b/src/gradientai/resources/agents/chat/completions.py
new file mode 100644
index 00000000..a213bf05
--- /dev/null
+++ b/src/gradientai/resources/agents/chat/completions.py
@@ -0,0 +1,385 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+
+import httpx
+
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from ...._base_client import make_request_options
+from ....types.agents.chat import completion_create_params
+from ....types.agents.chat.completion_create_response import CompletionCreateResponse
+
+__all__ = ["CompletionsResource", "AsyncCompletionsResource"]
+
+
+class CompletionsResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> CompletionsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
+ """
+ return CompletionsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
+ """
+ return CompletionsResourceWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ messages: Iterable[completion_create_params.Message],
+ model: str,
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+ n: Optional[int] | NotGiven = NOT_GIVEN,
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+ stream: Optional[bool] | NotGiven = NOT_GIVEN,
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ user: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> CompletionCreateResponse:
+ """
+ Creates a model response for the given chat conversation.
+
+ Args:
+ messages: A list of messages comprising the conversation so far.
+
+ model: Model ID used to generate the response.
+
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+ existing frequency in the text so far, decreasing the model's likelihood to
+ repeat the same line verbatim.
+
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+ Accepts a JSON object that maps tokens (specified by their token ID in the
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+ bias is added to the logits generated by the model prior to sampling. The exact
+ effect will vary per model, but values between -1 and 1 should decrease or
+ increase likelihood of selection; values like -100 or 100 should result in a ban
+ or exclusive selection of the relevant token.
+
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
+ returns the log probabilities of each output token returned in the `content` of
+ `message`.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run.
+
+ max_tokens: The maximum number of tokens that can be generated in the completion.
+
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
+ context length.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ n: How many chat completion choices to generate for each input message. Note that
+ you will be charged based on the number of generated tokens across all of the
+ choices. Keep `n` as `1` to minimize costs.
+
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+ whether they appear in the text so far, increasing the model's likelihood to
+ talk about new topics.
+
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
+ returned text will not contain the stop sequence.
+
+ stream: If set to true, the model response data will be streamed to the client as it is
+ generated using server-sent events.
+
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic. We generally recommend altering this or `top_p` but
+ not both.
+
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+ return at each token position, each with an associated log probability.
+ `logprobs` must be set to `true` if this parameter is used.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or `temperature` but not both.
+
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
+ monitor and detect abuse.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/chat/completions"
+ if self._client._base_url_overridden
+ else "https://inference.do-ai.run/v1/chat/completions",
+ body=maybe_transform(
+ {
+ "messages": messages,
+ "model": model,
+ "frequency_penalty": frequency_penalty,
+ "logit_bias": logit_bias,
+ "logprobs": logprobs,
+ "max_completion_tokens": max_completion_tokens,
+ "max_tokens": max_tokens,
+ "metadata": metadata,
+ "n": n,
+ "presence_penalty": presence_penalty,
+ "stop": stop,
+ "stream": stream,
+ "stream_options": stream_options,
+ "temperature": temperature,
+ "top_logprobs": top_logprobs,
+ "top_p": top_p,
+ "user": user,
+ },
+ completion_create_params.CompletionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=CompletionCreateResponse,
+ )
+
+
+class AsyncCompletionsResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncCompletionsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
+ """
+ return AsyncCompletionsResourceWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ messages: Iterable[completion_create_params.Message],
+ model: str,
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+ n: Optional[int] | NotGiven = NOT_GIVEN,
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+ stream: Optional[bool] | NotGiven = NOT_GIVEN,
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ user: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> CompletionCreateResponse:
+ """
+ Creates a model response for the given chat conversation.
+
+ Args:
+ messages: A list of messages comprising the conversation so far.
+
+ model: Model ID used to generate the response.
+
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+ existing frequency in the text so far, decreasing the model's likelihood to
+ repeat the same line verbatim.
+
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+ Accepts a JSON object that maps tokens (specified by their token ID in the
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+ bias is added to the logits generated by the model prior to sampling. The exact
+ effect will vary per model, but values between -1 and 1 should decrease or
+ increase likelihood of selection; values like -100 or 100 should result in a ban
+ or exclusive selection of the relevant token.
+
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
+ returns the log probabilities of each output token returned in the `content` of
+ `message`.
+
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run.
+
+ max_tokens: The maximum number of tokens that can be generated in the completion.
+
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
+ context length.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ n: How many chat completion choices to generate for each input message. Note that
+ you will be charged based on the number of generated tokens across all of the
+ choices. Keep `n` as `1` to minimize costs.
+
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+ whether they appear in the text so far, increasing the model's likelihood to
+ talk about new topics.
+
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
+ returned text will not contain the stop sequence.
+
+ stream: If set to true, the model response data will be streamed to the client as it is
+ generated using server-sent events.
+
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+ make the output more random, while lower values like 0.2 will make it more
+ focused and deterministic. We generally recommend altering this or `top_p` but
+ not both.
+
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+ return at each token position, each with an associated log probability.
+ `logprobs` must be set to `true` if this parameter is used.
+
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or `temperature` but not both.
+
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
+ monitor and detect abuse.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/chat/completions"
+ if self._client._base_url_overridden
+ else "https://inference.do-ai.run/v1/chat/completions",
+ body=await async_maybe_transform(
+ {
+ "messages": messages,
+ "model": model,
+ "frequency_penalty": frequency_penalty,
+ "logit_bias": logit_bias,
+ "logprobs": logprobs,
+ "max_completion_tokens": max_completion_tokens,
+ "max_tokens": max_tokens,
+ "metadata": metadata,
+ "n": n,
+ "presence_penalty": presence_penalty,
+ "stop": stop,
+ "stream": stream,
+ "stream_options": stream_options,
+ "temperature": temperature,
+ "top_logprobs": top_logprobs,
+ "top_p": top_p,
+ "user": user,
+ },
+ completion_create_params.CompletionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=CompletionCreateResponse,
+ )
+
+
+class CompletionsResourceWithRawResponse:
+ def __init__(self, completions: CompletionsResource) -> None:
+ self._completions = completions
+
+ self.create = to_raw_response_wrapper(
+ completions.create,
+ )
+
+
+class AsyncCompletionsResourceWithRawResponse:
+ def __init__(self, completions: AsyncCompletionsResource) -> None:
+ self._completions = completions
+
+ self.create = async_to_raw_response_wrapper(
+ completions.create,
+ )
+
+
+class CompletionsResourceWithStreamingResponse:
+ def __init__(self, completions: CompletionsResource) -> None:
+ self._completions = completions
+
+ self.create = to_streamed_response_wrapper(
+ completions.create,
+ )
+
+
+class AsyncCompletionsResourceWithStreamingResponse:
+ def __init__(self, completions: AsyncCompletionsResource) -> None:
+ self._completions = completions
+
+ self.create = async_to_streamed_response_wrapper(
+ completions.create,
+ )
diff --git a/src/gradientai/resources/models.py b/src/gradientai/resources/models.py
index da5462ae..c8e78b9b 100644
--- a/src/gradientai/resources/models.py
+++ b/src/gradientai/resources/models.py
@@ -2,9 +2,14 @@
from __future__ import annotations
+from typing import List
+from typing_extensions import Literal
+
import httpx
+from ..types import model_list_params
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
@@ -13,7 +18,6 @@
async_to_raw_response_wrapper,
async_to_streamed_response_wrapper,
)
-from ..types.model import Model
from .._base_client import make_request_options
from ..types.model_list_response import ModelListResponse
@@ -40,22 +44,52 @@ def with_streaming_response(self) -> ModelsResourceWithStreamingResponse:
"""
return ModelsResourceWithStreamingResponse(self)
- def retrieve(
+ def list(
self,
- model: str,
*,
+ page: int | NotGiven = NOT_GIVEN,
+ per_page: int | NotGiven = NOT_GIVEN,
+ public_only: bool | NotGiven = NOT_GIVEN,
+ usecases: List[
+ Literal[
+ "MODEL_USECASE_UNKNOWN",
+ "MODEL_USECASE_AGENT",
+ "MODEL_USECASE_FINETUNED",
+ "MODEL_USECASE_KNOWLEDGEBASE",
+ "MODEL_USECASE_GUARDRAIL",
+ "MODEL_USECASE_REASONING",
+ "MODEL_USECASE_SERVERLESS",
+ ]
+ ]
+ | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> Model:
+ ) -> ModelListResponse:
"""
- Retrieves a model instance, providing basic information about the model such as
- the owner and permissioning.
+ To list all models, send a GET request to `/v2/gen-ai/models`.
Args:
+ page: page number.
+
+ per_page: items per page.
+
+ public_only: only include models that are publicly available.
+
+ usecases: include only models defined for the listed usecases.
+
+ - MODEL_USECASE_UNKNOWN: The use case of the model is unknown
+ - MODEL_USECASE_AGENT: The model maybe used in an agent
+ - MODEL_USECASE_FINETUNED: The model maybe used for fine tuning
+ - MODEL_USECASE_KNOWLEDGEBASE: The model maybe used for knowledge bases
+ (embedding models)
+ - MODEL_USECASE_GUARDRAIL: The model maybe used for guardrails
+ - MODEL_USECASE_REASONING: The model usecase for reasoning
+ - MODEL_USECASE_SERVERLESS: The model usecase for serverless inference
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -64,36 +98,24 @@ def retrieve(
timeout: Override the client-level default timeout for this request, in seconds
"""
- if not model:
- raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
return self._get(
- f"/models/{model}"
+ "/v2/gen-ai/models"
if self._client._base_url_overridden
- else f"https://inference.do-ai.run/v1/models/{model}",
+ else "https://api.digitalocean.com/v2/gen-ai/models",
options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Model,
- )
-
- def list(
- self,
- *,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> ModelListResponse:
- """
- Lists the currently available models, and provides basic information about each
- one such as the owner and availability.
- """
- return self._get(
- "/models" if self._client._base_url_overridden else "https://inference.do-ai.run/v1/models",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "page": page,
+ "per_page": per_page,
+ "public_only": public_only,
+ "usecases": usecases,
+ },
+ model_list_params.ModelListParams,
+ ),
),
cast_to=ModelListResponse,
)
@@ -119,22 +141,52 @@ def with_streaming_response(self) -> AsyncModelsResourceWithStreamingResponse:
"""
return AsyncModelsResourceWithStreamingResponse(self)
- async def retrieve(
+ async def list(
self,
- model: str,
*,
+ page: int | NotGiven = NOT_GIVEN,
+ per_page: int | NotGiven = NOT_GIVEN,
+ public_only: bool | NotGiven = NOT_GIVEN,
+ usecases: List[
+ Literal[
+ "MODEL_USECASE_UNKNOWN",
+ "MODEL_USECASE_AGENT",
+ "MODEL_USECASE_FINETUNED",
+ "MODEL_USECASE_KNOWLEDGEBASE",
+ "MODEL_USECASE_GUARDRAIL",
+ "MODEL_USECASE_REASONING",
+ "MODEL_USECASE_SERVERLESS",
+ ]
+ ]
+ | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> Model:
+ ) -> ModelListResponse:
"""
- Retrieves a model instance, providing basic information about the model such as
- the owner and permissioning.
+ To list all models, send a GET request to `/v2/gen-ai/models`.
Args:
+ page: page number.
+
+ per_page: items per page.
+
+ public_only: only include models that are publicly available.
+
+ usecases: include only models defined for the listed usecases.
+
+ - MODEL_USECASE_UNKNOWN: The use case of the model is unknown
+ - MODEL_USECASE_AGENT: The model maybe used in an agent
+ - MODEL_USECASE_FINETUNED: The model maybe used for fine tuning
+ - MODEL_USECASE_KNOWLEDGEBASE: The model maybe used for knowledge bases
+ (embedding models)
+ - MODEL_USECASE_GUARDRAIL: The model maybe used for guardrails
+ - MODEL_USECASE_REASONING: The model usecase for reasoning
+ - MODEL_USECASE_SERVERLESS: The model usecase for serverless inference
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -143,36 +195,24 @@ async def retrieve(
timeout: Override the client-level default timeout for this request, in seconds
"""
- if not model:
- raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
return await self._get(
- f"/models/{model}"
+ "/v2/gen-ai/models"
if self._client._base_url_overridden
- else f"https://inference.do-ai.run/v1/models/{model}",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Model,
- )
-
- async def list(
- self,
- *,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> ModelListResponse:
- """
- Lists the currently available models, and provides basic information about each
- one such as the owner and availability.
- """
- return await self._get(
- "/models" if self._client._base_url_overridden else "https://inference.do-ai.run/v1/models",
+ else "https://api.digitalocean.com/v2/gen-ai/models",
options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=await async_maybe_transform(
+ {
+ "page": page,
+ "per_page": per_page,
+ "public_only": public_only,
+ "usecases": usecases,
+ },
+ model_list_params.ModelListParams,
+ ),
),
cast_to=ModelListResponse,
)
@@ -182,9 +222,6 @@ class ModelsResourceWithRawResponse:
def __init__(self, models: ModelsResource) -> None:
self._models = models
- self.retrieve = to_raw_response_wrapper(
- models.retrieve,
- )
self.list = to_raw_response_wrapper(
models.list,
)
@@ -194,9 +231,6 @@ class AsyncModelsResourceWithRawResponse:
def __init__(self, models: AsyncModelsResource) -> None:
self._models = models
- self.retrieve = async_to_raw_response_wrapper(
- models.retrieve,
- )
self.list = async_to_raw_response_wrapper(
models.list,
)
@@ -206,9 +240,6 @@ class ModelsResourceWithStreamingResponse:
def __init__(self, models: ModelsResource) -> None:
self._models = models
- self.retrieve = to_streamed_response_wrapper(
- models.retrieve,
- )
self.list = to_streamed_response_wrapper(
models.list,
)
@@ -218,9 +249,6 @@ class AsyncModelsResourceWithStreamingResponse:
def __init__(self, models: AsyncModelsResource) -> None:
self._models = models
- self.retrieve = async_to_streamed_response_wrapper(
- models.retrieve,
- )
self.list = async_to_streamed_response_wrapper(
models.list,
)
diff --git a/src/gradientai/types/__init__.py b/src/gradientai/types/__init__.py
index 626c3840..4ec63b92 100644
--- a/src/gradientai/types/__init__.py
+++ b/src/gradientai/types/__init__.py
@@ -2,14 +2,15 @@
from __future__ import annotations
-from .model import Model as Model
-from .shared import APIMeta as APIMeta, APILinks as APILinks
+from .shared import APIMeta as APIMeta, APILinks as APILinks, ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
from .api_agent import APIAgent as APIAgent
+from .api_model import APIModel as APIModel
from .api_agreement import APIAgreement as APIAgreement
from .api_workspace import APIWorkspace as APIWorkspace
from .api_agent_model import APIAgentModel as APIAgentModel
from .agent_list_params import AgentListParams as AgentListParams
from .api_model_version import APIModelVersion as APIModelVersion
+from .model_list_params import ModelListParams as ModelListParams
from .api_knowledge_base import APIKnowledgeBase as APIKnowledgeBase
from .region_list_params import RegionListParams as RegionListParams
from .agent_create_params import AgentCreateParams as AgentCreateParams
diff --git a/src/gradientai/types/agents/chat/__init__.py b/src/gradientai/types/agents/chat/__init__.py
new file mode 100644
index 00000000..9384ac14
--- /dev/null
+++ b/src/gradientai/types/agents/chat/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
diff --git a/src/gradientai/types/agents/chat/completion_create_params.py b/src/gradientai/types/agents/chat/completion_create_params.py
new file mode 100644
index 00000000..11d032ff
--- /dev/null
+++ b/src/gradientai/types/agents/chat/completion_create_params.py
@@ -0,0 +1,185 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+ "CompletionCreateParams",
+ "Message",
+ "MessageChatCompletionRequestSystemMessage",
+ "MessageChatCompletionRequestDeveloperMessage",
+ "MessageChatCompletionRequestUserMessage",
+ "MessageChatCompletionRequestAssistantMessage",
+ "StreamOptions",
+]
+
+
+class CompletionCreateParams(TypedDict, total=False):
+ messages: Required[Iterable[Message]]
+ """A list of messages comprising the conversation so far."""
+
+ model: Required[str]
+ """Model ID used to generate the response."""
+
+ frequency_penalty: Optional[float]
+ """Number between -2.0 and 2.0.
+
+ Positive values penalize new tokens based on their existing frequency in the
+ text so far, decreasing the model's likelihood to repeat the same line verbatim.
+ """
+
+ logit_bias: Optional[Dict[str, int]]
+ """Modify the likelihood of specified tokens appearing in the completion.
+
+ Accepts a JSON object that maps tokens (specified by their token ID in the
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+ bias is added to the logits generated by the model prior to sampling. The exact
+ effect will vary per model, but values between -1 and 1 should decrease or
+ increase likelihood of selection; values like -100 or 100 should result in a ban
+ or exclusive selection of the relevant token.
+ """
+
+ logprobs: Optional[bool]
+ """Whether to return log probabilities of the output tokens or not.
+
+ If true, returns the log probabilities of each output token returned in the
+ `content` of `message`.
+ """
+
+ max_completion_tokens: Optional[int]
+ """
+ The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run.
+ """
+
+ max_tokens: Optional[int]
+ """The maximum number of tokens that can be generated in the completion.
+
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
+ context length.
+ """
+
+ metadata: Optional[Dict[str, str]]
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ n: Optional[int]
+ """How many chat completion choices to generate for each input message.
+
+ Note that you will be charged based on the number of generated tokens across all
+ of the choices. Keep `n` as `1` to minimize costs.
+ """
+
+ presence_penalty: Optional[float]
+ """Number between -2.0 and 2.0.
+
+ Positive values penalize new tokens based on whether they appear in the text so
+ far, increasing the model's likelihood to talk about new topics.
+ """
+
+ stop: Union[Optional[str], List[str], None]
+ """Up to 4 sequences where the API will stop generating further tokens.
+
+ The returned text will not contain the stop sequence.
+ """
+
+ stream: Optional[bool]
+ """
+ If set to true, the model response data will be streamed to the client as it is
+ generated using server-sent events.
+ """
+
+ stream_options: Optional[StreamOptions]
+ """Options for streaming response. Only set this when you set `stream: true`."""
+
+ temperature: Optional[float]
+ """What sampling temperature to use, between 0 and 2.
+
+ Higher values like 0.8 will make the output more random, while lower values like
+ 0.2 will make it more focused and deterministic. We generally recommend altering
+ this or `top_p` but not both.
+ """
+
+ top_logprobs: Optional[int]
+ """
+ An integer between 0 and 20 specifying the number of most likely tokens to
+ return at each token position, each with an associated log probability.
+ `logprobs` must be set to `true` if this parameter is used.
+ """
+
+ top_p: Optional[float]
+ """
+ An alternative to sampling with temperature, called nucleus sampling, where the
+ model considers the results of the tokens with top_p probability mass. So 0.1
+ means only the tokens comprising the top 10% probability mass are considered.
+
+ We generally recommend altering this or `temperature` but not both.
+ """
+
+ user: str
+ """
+ A unique identifier representing your end-user, which can help DigitalOcean to
+ monitor and detect abuse.
+ """
+
+
+class MessageChatCompletionRequestSystemMessage(TypedDict, total=False):
+ content: Required[Union[str, List[str]]]
+ """The contents of the system message."""
+
+ role: Required[Literal["system"]]
+ """The role of the messages author, in this case `system`."""
+
+
+class MessageChatCompletionRequestDeveloperMessage(TypedDict, total=False):
+ content: Required[Union[str, List[str]]]
+ """The contents of the developer message."""
+
+ role: Required[Literal["developer"]]
+ """The role of the messages author, in this case `developer`."""
+
+
+class MessageChatCompletionRequestUserMessage(TypedDict, total=False):
+ content: Required[Union[str, List[str]]]
+ """The contents of the user message."""
+
+ role: Required[Literal["user"]]
+ """The role of the messages author, in this case `user`."""
+
+
+class MessageChatCompletionRequestAssistantMessage(TypedDict, total=False):
+ role: Required[Literal["assistant"]]
+ """The role of the messages author, in this case `assistant`."""
+
+ content: Union[str, List[str], None]
+ """The contents of the assistant message."""
+
+
+Message: TypeAlias = Union[
+ MessageChatCompletionRequestSystemMessage,
+ MessageChatCompletionRequestDeveloperMessage,
+ MessageChatCompletionRequestUserMessage,
+ MessageChatCompletionRequestAssistantMessage,
+]
+
+
+class StreamOptions(TypedDict, total=False):
+ include_usage: bool
+ """If set, an additional chunk will be streamed before the `data: [DONE]` message.
+
+ The `usage` field on this chunk shows the token usage statistics for the entire
+ request, and the `choices` field will always be an empty array.
+
+ All other chunks will also include a `usage` field, but with a null value.
+ **NOTE:** If the stream is interrupted, you may not receive the final usage
+ chunk which contains the total token usage for the request.
+ """
diff --git a/src/gradientai/types/agents/chat/completion_create_response.py b/src/gradientai/types/agents/chat/completion_create_response.py
new file mode 100644
index 00000000..f2860c31
--- /dev/null
+++ b/src/gradientai/types/agents/chat/completion_create_response.py
@@ -0,0 +1,81 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ...shared.chat_completion_token_logprob import ChatCompletionTokenLogprob
+
+__all__ = ["CompletionCreateResponse", "Choice", "ChoiceLogprobs", "ChoiceMessage", "Usage"]
+
+
+class ChoiceLogprobs(BaseModel):
+ content: Optional[List[ChatCompletionTokenLogprob]] = None
+ """A list of message content tokens with log probability information."""
+
+ refusal: Optional[List[ChatCompletionTokenLogprob]] = None
+ """A list of message refusal tokens with log probability information."""
+
+
+class ChoiceMessage(BaseModel):
+ content: Optional[str] = None
+ """The contents of the message."""
+
+ refusal: Optional[str] = None
+ """The refusal message generated by the model."""
+
+ role: Literal["assistant"]
+ """The role of the author of this message."""
+
+
+class Choice(BaseModel):
+ finish_reason: Literal["stop", "length"]
+ """The reason the model stopped generating tokens.
+
+ This will be `stop` if the model hit a natural stop point or a provided stop
+ sequence, or `length` if the maximum number of tokens specified in the request
+ was reached.
+ """
+
+ index: int
+ """The index of the choice in the list of choices."""
+
+ logprobs: Optional[ChoiceLogprobs] = None
+ """Log probability information for the choice."""
+
+ message: ChoiceMessage
+ """A chat completion message generated by the model."""
+
+
+class Usage(BaseModel):
+ completion_tokens: int
+ """Number of tokens in the generated completion."""
+
+ prompt_tokens: int
+ """Number of tokens in the prompt."""
+
+ total_tokens: int
+ """Total number of tokens used in the request (prompt + completion)."""
+
+
+class CompletionCreateResponse(BaseModel):
+ id: str
+ """A unique identifier for the chat completion."""
+
+ choices: List[Choice]
+ """A list of chat completion choices.
+
+ Can be more than one if `n` is greater than 1.
+ """
+
+ created: int
+ """The Unix timestamp (in seconds) of when the chat completion was created."""
+
+ model: str
+ """The model used for the chat completion."""
+
+ object: Literal["chat.completion"]
+ """The object type, which is always `chat.completion`."""
+
+ usage: Optional[Usage] = None
+ """Usage statistics for the completion request."""
diff --git a/src/gradientai/types/api_model.py b/src/gradientai/types/api_model.py
new file mode 100644
index 00000000..c2bc1edd
--- /dev/null
+++ b/src/gradientai/types/api_model.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from datetime import datetime
+
+from .._models import BaseModel
+from .api_agreement import APIAgreement
+from .api_model_version import APIModelVersion
+
+__all__ = ["APIModel"]
+
+
+class APIModel(BaseModel):
+ agreement: Optional[APIAgreement] = None
+
+ created_at: Optional[datetime] = None
+
+ is_foundational: Optional[bool] = None
+
+ name: Optional[str] = None
+
+ parent_uuid: Optional[str] = None
+
+ updated_at: Optional[datetime] = None
+
+ upload_complete: Optional[bool] = None
+
+ url: Optional[str] = None
+
+ uuid: Optional[str] = None
+
+ version: Optional[APIModelVersion] = None
diff --git a/src/gradientai/types/chat/__init__.py b/src/gradientai/types/chat/__init__.py
index 59553f68..9384ac14 100644
--- a/src/gradientai/types/chat/__init__.py
+++ b/src/gradientai/types/chat/__init__.py
@@ -4,4 +4,3 @@
from .completion_create_params import CompletionCreateParams as CompletionCreateParams
from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
-from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
diff --git a/src/gradientai/types/chat/completion_create_response.py b/src/gradientai/types/chat/completion_create_response.py
index 1ac59a28..1791373b 100644
--- a/src/gradientai/types/chat/completion_create_response.py
+++ b/src/gradientai/types/chat/completion_create_response.py
@@ -4,7 +4,7 @@
from typing_extensions import Literal
from ..._models import BaseModel
-from .chat_completion_token_logprob import ChatCompletionTokenLogprob
+from ..shared.chat_completion_token_logprob import ChatCompletionTokenLogprob
__all__ = ["CompletionCreateResponse", "Choice", "ChoiceLogprobs", "ChoiceMessage", "Usage"]
diff --git a/src/gradientai/types/model.py b/src/gradientai/types/model.py
deleted file mode 100644
index 2631ee8d..00000000
--- a/src/gradientai/types/model.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["Model"]
-
-
-class Model(BaseModel):
- id: str
- """The model identifier, which can be referenced in the API endpoints."""
-
- created: int
- """The Unix timestamp (in seconds) when the model was created."""
-
- object: Literal["model"]
- """The object type, which is always "model"."""
-
- owned_by: str
- """The organization that owns the model."""
diff --git a/src/gradientai/types/model_list_params.py b/src/gradientai/types/model_list_params.py
new file mode 100644
index 00000000..4abc1dc1
--- /dev/null
+++ b/src/gradientai/types/model_list_params.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ModelListParams"]
+
+
+class ModelListParams(TypedDict, total=False):
+ page: int
+ """page number."""
+
+ per_page: int
+ """items per page."""
+
+ public_only: bool
+ """only include models that are publicly available."""
+
+ usecases: List[
+ Literal[
+ "MODEL_USECASE_UNKNOWN",
+ "MODEL_USECASE_AGENT",
+ "MODEL_USECASE_FINETUNED",
+ "MODEL_USECASE_KNOWLEDGEBASE",
+ "MODEL_USECASE_GUARDRAIL",
+ "MODEL_USECASE_REASONING",
+ "MODEL_USECASE_SERVERLESS",
+ ]
+ ]
+ """include only models defined for the listed usecases.
+
+ - MODEL_USECASE_UNKNOWN: The use case of the model is unknown
+ - MODEL_USECASE_AGENT: The model maybe used in an agent
+ - MODEL_USECASE_FINETUNED: The model maybe used for fine tuning
+ - MODEL_USECASE_KNOWLEDGEBASE: The model maybe used for knowledge bases
+ (embedding models)
+ - MODEL_USECASE_GUARDRAIL: The model maybe used for guardrails
+ - MODEL_USECASE_REASONING: The model usecase for reasoning
+ - MODEL_USECASE_SERVERLESS: The model usecase for serverless inference
+ """
diff --git a/src/gradientai/types/model_list_response.py b/src/gradientai/types/model_list_response.py
index 8f835449..47651759 100644
--- a/src/gradientai/types/model_list_response.py
+++ b/src/gradientai/types/model_list_response.py
@@ -1,15 +1,18 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import List
-from typing_extensions import Literal
+from typing import List, Optional
-from .model import Model
from .._models import BaseModel
+from .api_model import APIModel
+from .shared.api_meta import APIMeta
+from .shared.api_links import APILinks
__all__ = ["ModelListResponse"]
class ModelListResponse(BaseModel):
- data: List[Model]
+ links: Optional[APILinks] = None
- object: Literal["list"]
+ meta: Optional[APIMeta] = None
+
+ models: Optional[List[APIModel]] = None
diff --git a/src/gradientai/types/shared/__init__.py b/src/gradientai/types/shared/__init__.py
index 5f02d62f..dc71bdd3 100644
--- a/src/gradientai/types/shared/__init__.py
+++ b/src/gradientai/types/shared/__init__.py
@@ -2,3 +2,4 @@
from .api_meta import APIMeta as APIMeta
from .api_links import APILinks as APILinks
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
diff --git a/src/gradientai/types/chat/chat_completion_token_logprob.py b/src/gradientai/types/shared/chat_completion_token_logprob.py
similarity index 100%
rename from src/gradientai/types/chat/chat_completion_token_logprob.py
rename to src/gradientai/types/shared/chat_completion_token_logprob.py
diff --git a/tests/api_resources/agents/chat/__init__.py b/tests/api_resources/agents/chat/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/agents/chat/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/agents/chat/test_completions.py b/tests/api_resources/agents/chat/test_completions.py
new file mode 100644
index 00000000..89d531a5
--- /dev/null
+++ b/tests/api_resources/agents/chat/test_completions.py
@@ -0,0 +1,186 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from gradientai import GradientAI, AsyncGradientAI
+from tests.utils import assert_matches_type
+from gradientai.types.agents.chat import CompletionCreateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestCompletions:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_method_create(self, client: GradientAI) -> None:
+ completion = client.agents.chat.completions.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ )
+ assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_method_create_with_all_params(self, client: GradientAI) -> None:
+ completion = client.agents.chat.completions.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ frequency_penalty=-2,
+ logit_bias={"foo": 0},
+ logprobs=True,
+ max_completion_tokens=256,
+ max_tokens=0,
+ metadata={"foo": "string"},
+ n=1,
+ presence_penalty=-2,
+ stop="\n",
+ stream=True,
+ stream_options={"include_usage": True},
+ temperature=1,
+ top_logprobs=0,
+ top_p=1,
+ user="user-1234",
+ )
+ assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_raw_response_create(self, client: GradientAI) -> None:
+ response = client.agents.chat.completions.with_raw_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ completion = response.parse()
+ assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_streaming_response_create(self, client: GradientAI) -> None:
+ with client.agents.chat.completions.with_streaming_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ completion = response.parse()
+ assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncCompletions:
+ parametrize = pytest.mark.parametrize(
+ "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+ )
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_method_create(self, async_client: AsyncGradientAI) -> None:
+ completion = await async_client.agents.chat.completions.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ )
+ assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_method_create_with_all_params(self, async_client: AsyncGradientAI) -> None:
+ completion = await async_client.agents.chat.completions.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ frequency_penalty=-2,
+ logit_bias={"foo": 0},
+ logprobs=True,
+ max_completion_tokens=256,
+ max_tokens=0,
+ metadata={"foo": "string"},
+ n=1,
+ presence_penalty=-2,
+ stop="\n",
+ stream=True,
+ stream_options={"include_usage": True},
+ temperature=1,
+ top_logprobs=0,
+ top_p=1,
+ user="user-1234",
+ )
+ assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_raw_response_create(self, async_client: AsyncGradientAI) -> None:
+ response = await async_client.agents.chat.completions.with_raw_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ completion = await response.parse()
+ assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_streaming_response_create(self, async_client: AsyncGradientAI) -> None:
+ async with async_client.agents.chat.completions.with_streaming_response.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="llama3-8b-instruct",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ completion = await response.parse()
+ assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index afee0c1f..5e119f71 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -9,7 +9,7 @@
from gradientai import GradientAI, AsyncGradientAI
from tests.utils import assert_matches_type
-from gradientai.types import Model, ModelListResponse
+from gradientai.types import ModelListResponse
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -19,50 +19,19 @@ class TestModels:
@pytest.mark.skip()
@parametrize
- def test_method_retrieve(self, client: GradientAI) -> None:
- model = client.models.retrieve(
- "llama3-8b-instruct",
- )
- assert_matches_type(Model, model, path=["response"])
+ def test_method_list(self, client: GradientAI) -> None:
+ model = client.models.list()
+ assert_matches_type(ModelListResponse, model, path=["response"])
@pytest.mark.skip()
@parametrize
- def test_raw_response_retrieve(self, client: GradientAI) -> None:
- response = client.models.with_raw_response.retrieve(
- "llama3-8b-instruct",
+ def test_method_list_with_all_params(self, client: GradientAI) -> None:
+ model = client.models.list(
+ page=0,
+ per_page=0,
+ public_only=True,
+ usecases=["MODEL_USECASE_UNKNOWN"],
)
-
- assert response.is_closed is True
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- model = response.parse()
- assert_matches_type(Model, model, path=["response"])
-
- @pytest.mark.skip()
- @parametrize
- def test_streaming_response_retrieve(self, client: GradientAI) -> None:
- with client.models.with_streaming_response.retrieve(
- "llama3-8b-instruct",
- ) as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
- model = response.parse()
- assert_matches_type(Model, model, path=["response"])
-
- assert cast(Any, response.is_closed) is True
-
- @pytest.mark.skip()
- @parametrize
- def test_path_params_retrieve(self, client: GradientAI) -> None:
- with pytest.raises(ValueError, match=r"Expected a non-empty value for `model` but received ''"):
- client.models.with_raw_response.retrieve(
- "",
- )
-
- @pytest.mark.skip()
- @parametrize
- def test_method_list(self, client: GradientAI) -> None:
- model = client.models.list()
assert_matches_type(ModelListResponse, model, path=["response"])
@pytest.mark.skip()
@@ -95,50 +64,19 @@ class TestAsyncModels:
@pytest.mark.skip()
@parametrize
- async def test_method_retrieve(self, async_client: AsyncGradientAI) -> None:
- model = await async_client.models.retrieve(
- "llama3-8b-instruct",
- )
- assert_matches_type(Model, model, path=["response"])
+ async def test_method_list(self, async_client: AsyncGradientAI) -> None:
+ model = await async_client.models.list()
+ assert_matches_type(ModelListResponse, model, path=["response"])
@pytest.mark.skip()
@parametrize
- async def test_raw_response_retrieve(self, async_client: AsyncGradientAI) -> None:
- response = await async_client.models.with_raw_response.retrieve(
- "llama3-8b-instruct",
+ async def test_method_list_with_all_params(self, async_client: AsyncGradientAI) -> None:
+ model = await async_client.models.list(
+ page=0,
+ per_page=0,
+ public_only=True,
+ usecases=["MODEL_USECASE_UNKNOWN"],
)
-
- assert response.is_closed is True
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- model = await response.parse()
- assert_matches_type(Model, model, path=["response"])
-
- @pytest.mark.skip()
- @parametrize
- async def test_streaming_response_retrieve(self, async_client: AsyncGradientAI) -> None:
- async with async_client.models.with_streaming_response.retrieve(
- "llama3-8b-instruct",
- ) as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
- model = await response.parse()
- assert_matches_type(Model, model, path=["response"])
-
- assert cast(Any, response.is_closed) is True
-
- @pytest.mark.skip()
- @parametrize
- async def test_path_params_retrieve(self, async_client: AsyncGradientAI) -> None:
- with pytest.raises(ValueError, match=r"Expected a non-empty value for `model` but received ''"):
- await async_client.models.with_raw_response.retrieve(
- "",
- )
-
- @pytest.mark.skip()
- @parametrize
- async def test_method_list(self, async_client: AsyncGradientAI) -> None:
- model = await async_client.models.list()
assert_matches_type(ModelListResponse, model, path=["response"])
@pytest.mark.skip()