diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 4f9005ea..b5db7ce1 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0-alpha.6" + ".": "0.1.0-alpha.7" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index a1e73eb0..79a36ab0 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ -configured_endpoints: 77 +configured_endpoints: 76 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/digitalocean%2Fgradientai-e8b3cbc80e18e4f7f277010349f25e1319156704f359911dc464cc21a0d077a6.yml openapi_spec_hash: c773d792724f5647ae25a5ae4ccec208 -config_hash: 9c2519464cf5de240e34bd89b9f65706 +config_hash: f0976fbc552ea878bb527447b5e663c9 diff --git a/CHANGELOG.md b/CHANGELOG.md index d9b29735..15fec91a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## 0.1.0-alpha.7 (2025-06-27) + +Full Changelog: [v0.1.0-alpha.6...v0.1.0-alpha.7](https://github.com/digitalocean/gradientai-python/compare/v0.1.0-alpha.6...v0.1.0-alpha.7) + +### Features + +* **api:** manual updates ([63b9ec0](https://github.com/digitalocean/gradientai-python/commit/63b9ec02a646dad258afbd048db8db1af8d4401b)) +* **api:** manual updates ([5247aee](https://github.com/digitalocean/gradientai-python/commit/5247aee6d6052f6380fbe892d7c2bd9a8d0a32c0)) +* **api:** manual updates ([aa9e2c7](https://github.com/digitalocean/gradientai-python/commit/aa9e2c78956162f6195fdbaa1c95754ee4af207e)) +* **client:** add agent_domain option ([b4b6260](https://github.com/digitalocean/gradientai-python/commit/b4b62609a12a1dfa0b505e9ec54334b776fb0515)) + ## 0.1.0-alpha.6 (2025-06-27) Full Changelog: [v0.1.0-alpha.5...v0.1.0-alpha.6](https://github.com/digitalocean/gradientai-python/compare/v0.1.0-alpha.5...v0.1.0-alpha.6) diff --git a/README.md b/README.md index 24b0975b..d5bd9c97 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ client = GradientAI( api_key=os.environ.get("GRADIENTAI_API_KEY"), # This is the default and can be omitted ) -completion = client.chat.completions.create( +completion = client.agents.chat.completions.create( messages=[ { "content": "string", @@ -63,7 +63,7 @@ client = AsyncGradientAI( async def main() -> None: - completion = await client.chat.completions.create( + completion = await client.agents.chat.completions.create( messages=[ { "content": "string", @@ -105,7 +105,7 @@ async def main() -> None: api_key=os.environ.get("GRADIENTAI_API_KEY"), # This is the default and can be omitted http_client=DefaultAioHttpClient(), ) as client: - completion = await client.chat.completions.create( + completion = await client.agents.chat.completions.create( messages=[ { "content": "string", @@ -138,7 +138,7 @@ from gradientai import GradientAI client = GradientAI() -completion = client.chat.completions.create( +completion = client.agents.chat.completions.create( messages=[ { "content": "string", diff --git a/api.md b/api.md index 9a2dd757..dc48f7b3 100644 --- a/api.md +++ b/api.md @@ -1,7 +1,7 @@ # Shared Types ```python -from gradientai.types import APILinks, APIMeta +from gradientai.types import APILinks, APIMeta, ChatCompletionTokenLogprob ``` # Agents @@ -58,6 +58,20 @@ Methods: - client.agents.api_keys.delete(api_key_uuid, \*, agent_uuid) -> APIKeyDeleteResponse - client.agents.api_keys.regenerate(api_key_uuid, \*, agent_uuid) -> APIKeyRegenerateResponse +## Chat + +### Completions + +Types: + +```python +from gradientai.types.agents.chat import CompletionCreateResponse +``` + +Methods: + +- client.agents.chat.completions.create(\*\*params) -> CompletionCreateResponse + ## EvaluationMetrics Types: @@ -382,7 +396,7 @@ Methods: Types: ```python -from gradientai.types.chat import ChatCompletionTokenLogprob, CompletionCreateResponse +from gradientai.types.chat import CompletionCreateResponse ``` Methods: @@ -419,10 +433,9 @@ Methods: Types: ```python -from gradientai.types import APIAgreement, APIModel, APIModelVersion, Model, ModelListResponse +from gradientai.types import APIAgreement, APIModel, APIModelVersion, ModelListResponse ``` Methods: -- client.models.retrieve(model) -> Model -- client.models.list() -> ModelListResponse +- client.models.list(\*\*params) -> ModelListResponse diff --git a/pyproject.toml b/pyproject.toml index 0f04322b..29531941 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "c63a5cfe-b235-4fbe-8bbb-82a9e02a482a-python" -version = "0.1.0-alpha.6" +version = "0.1.0-alpha.7" description = "The official Python library for GradientAI" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/gradientai/_client.py b/src/gradientai/_client.py index 0020ed16..327273c9 100644 --- a/src/gradientai/_client.py +++ b/src/gradientai/_client.py @@ -57,12 +57,14 @@ class GradientAI(SyncAPIClient): # client options api_key: str | None inference_key: str | None + agent_domain: str | None def __init__( self, *, api_key: str | None = None, inference_key: str | None = None, + agent_domain: str | None = None, base_url: str | httpx.URL | None = None, timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, @@ -96,6 +98,8 @@ def __init__( inference_key = os.environ.get("GRADIENTAI_INFERENCE_KEY") self.inference_key = inference_key + self.agent_domain = agent_domain + if base_url is None: base_url = os.environ.get("GRADIENT_AI_BASE_URL") self._base_url_overridden = base_url is not None @@ -201,6 +205,7 @@ def copy( *, api_key: str | None = None, inference_key: str | None = None, + agent_domain: str | None = None, base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, http_client: httpx.Client | None = None, @@ -236,6 +241,7 @@ def copy( client = self.__class__( api_key=api_key or self.api_key, inference_key=inference_key or self.inference_key, + agent_domain=agent_domain or self.agent_domain, base_url=base_url or self.base_url, timeout=self.timeout if isinstance(timeout, NotGiven) else timeout, http_client=http_client, @@ -289,12 +295,14 @@ class AsyncGradientAI(AsyncAPIClient): # client options api_key: str | None inference_key: str | None + agent_domain: str | None def __init__( self, *, api_key: str | None = None, inference_key: str | None = None, + agent_domain: str | None = None, base_url: str | httpx.URL | None = None, timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, @@ -328,6 +336,8 @@ def __init__( inference_key = os.environ.get("GRADIENTAI_INFERENCE_KEY") self.inference_key = inference_key + self.agent_domain = agent_domain + if base_url is None: base_url = os.environ.get("GRADIENT_AI_BASE_URL") self._base_url_overridden = base_url is not None @@ -433,6 +443,7 @@ def copy( *, api_key: str | None = None, inference_key: str | None = None, + agent_domain: str | None = None, base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, http_client: httpx.AsyncClient | None = None, @@ -468,6 +479,7 @@ def copy( client = self.__class__( api_key=api_key or self.api_key, inference_key=inference_key or self.inference_key, + agent_domain=agent_domain or self.agent_domain, base_url=base_url or self.base_url, timeout=self.timeout if isinstance(timeout, NotGiven) else timeout, http_client=http_client, diff --git a/src/gradientai/_version.py b/src/gradientai/_version.py index b8ef5fc0..d4e6dde6 100644 --- a/src/gradientai/_version.py +++ b/src/gradientai/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "gradientai" -__version__ = "0.1.0-alpha.6" # x-release-please-version +__version__ = "0.1.0-alpha.7" # x-release-please-version diff --git a/src/gradientai/resources/agents/__init__.py b/src/gradientai/resources/agents/__init__.py index f5423f00..51075283 100644 --- a/src/gradientai/resources/agents/__init__.py +++ b/src/gradientai/resources/agents/__init__.py @@ -1,5 +1,13 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +from .chat import ( + ChatResource, + AsyncChatResource, + ChatResourceWithRawResponse, + AsyncChatResourceWithRawResponse, + ChatResourceWithStreamingResponse, + AsyncChatResourceWithStreamingResponse, +) from .agents import ( AgentsResource, AsyncAgentsResource, @@ -88,6 +96,12 @@ "AsyncAPIKeysResourceWithRawResponse", "APIKeysResourceWithStreamingResponse", "AsyncAPIKeysResourceWithStreamingResponse", + "ChatResource", + "AsyncChatResource", + "ChatResourceWithRawResponse", + "AsyncChatResourceWithRawResponse", + "ChatResourceWithStreamingResponse", + "AsyncChatResourceWithStreamingResponse", "EvaluationMetricsResource", "AsyncEvaluationMetricsResource", "EvaluationMetricsResourceWithRawResponse", diff --git a/src/gradientai/resources/agents/agents.py b/src/gradientai/resources/agents/agents.py index 0a6e183c..200e9fc0 100644 --- a/src/gradientai/resources/agents/agents.py +++ b/src/gradientai/resources/agents/agents.py @@ -41,6 +41,14 @@ AsyncVersionsResourceWithStreamingResponse, ) from ..._compat import cached_property +from .chat.chat import ( + ChatResource, + AsyncChatResource, + ChatResourceWithRawResponse, + AsyncChatResourceWithRawResponse, + ChatResourceWithStreamingResponse, + AsyncChatResourceWithStreamingResponse, +) from .functions import ( FunctionsResource, AsyncFunctionsResource, @@ -114,6 +122,10 @@ class AgentsResource(SyncAPIResource): def api_keys(self) -> APIKeysResource: return APIKeysResource(self._client) + @cached_property + def chat(self) -> ChatResource: + return ChatResource(self._client) + @cached_property def evaluation_metrics(self) -> EvaluationMetricsResource: return EvaluationMetricsResource(self._client) @@ -498,6 +510,10 @@ class AsyncAgentsResource(AsyncAPIResource): def api_keys(self) -> AsyncAPIKeysResource: return AsyncAPIKeysResource(self._client) + @cached_property + def chat(self) -> AsyncChatResource: + return AsyncChatResource(self._client) + @cached_property def evaluation_metrics(self) -> AsyncEvaluationMetricsResource: return AsyncEvaluationMetricsResource(self._client) @@ -904,6 +920,10 @@ def __init__(self, agents: AgentsResource) -> None: def api_keys(self) -> APIKeysResourceWithRawResponse: return APIKeysResourceWithRawResponse(self._agents.api_keys) + @cached_property + def chat(self) -> ChatResourceWithRawResponse: + return ChatResourceWithRawResponse(self._agents.chat) + @cached_property def evaluation_metrics(self) -> EvaluationMetricsResourceWithRawResponse: return EvaluationMetricsResourceWithRawResponse(self._agents.evaluation_metrics) @@ -964,6 +984,10 @@ def __init__(self, agents: AsyncAgentsResource) -> None: def api_keys(self) -> AsyncAPIKeysResourceWithRawResponse: return AsyncAPIKeysResourceWithRawResponse(self._agents.api_keys) + @cached_property + def chat(self) -> AsyncChatResourceWithRawResponse: + return AsyncChatResourceWithRawResponse(self._agents.chat) + @cached_property def evaluation_metrics(self) -> AsyncEvaluationMetricsResourceWithRawResponse: return AsyncEvaluationMetricsResourceWithRawResponse(self._agents.evaluation_metrics) @@ -1024,6 +1048,10 @@ def __init__(self, agents: AgentsResource) -> None: def api_keys(self) -> APIKeysResourceWithStreamingResponse: return APIKeysResourceWithStreamingResponse(self._agents.api_keys) + @cached_property + def chat(self) -> ChatResourceWithStreamingResponse: + return ChatResourceWithStreamingResponse(self._agents.chat) + @cached_property def evaluation_metrics(self) -> EvaluationMetricsResourceWithStreamingResponse: return EvaluationMetricsResourceWithStreamingResponse(self._agents.evaluation_metrics) @@ -1084,6 +1112,10 @@ def __init__(self, agents: AsyncAgentsResource) -> None: def api_keys(self) -> AsyncAPIKeysResourceWithStreamingResponse: return AsyncAPIKeysResourceWithStreamingResponse(self._agents.api_keys) + @cached_property + def chat(self) -> AsyncChatResourceWithStreamingResponse: + return AsyncChatResourceWithStreamingResponse(self._agents.chat) + @cached_property def evaluation_metrics(self) -> AsyncEvaluationMetricsResourceWithStreamingResponse: return AsyncEvaluationMetricsResourceWithStreamingResponse(self._agents.evaluation_metrics) diff --git a/src/gradientai/resources/agents/chat/__init__.py b/src/gradientai/resources/agents/chat/__init__.py new file mode 100644 index 00000000..ec960eb4 --- /dev/null +++ b/src/gradientai/resources/agents/chat/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .chat import ( + ChatResource, + AsyncChatResource, + ChatResourceWithRawResponse, + AsyncChatResourceWithRawResponse, + ChatResourceWithStreamingResponse, + AsyncChatResourceWithStreamingResponse, +) +from .completions import ( + CompletionsResource, + AsyncCompletionsResource, + CompletionsResourceWithRawResponse, + AsyncCompletionsResourceWithRawResponse, + CompletionsResourceWithStreamingResponse, + AsyncCompletionsResourceWithStreamingResponse, +) + +__all__ = [ + "CompletionsResource", + "AsyncCompletionsResource", + "CompletionsResourceWithRawResponse", + "AsyncCompletionsResourceWithRawResponse", + "CompletionsResourceWithStreamingResponse", + "AsyncCompletionsResourceWithStreamingResponse", + "ChatResource", + "AsyncChatResource", + "ChatResourceWithRawResponse", + "AsyncChatResourceWithRawResponse", + "ChatResourceWithStreamingResponse", + "AsyncChatResourceWithStreamingResponse", +] diff --git a/src/gradientai/resources/agents/chat/chat.py b/src/gradientai/resources/agents/chat/chat.py new file mode 100644 index 00000000..c87bd158 --- /dev/null +++ b/src/gradientai/resources/agents/chat/chat.py @@ -0,0 +1,102 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from ...._compat import cached_property +from .completions import ( + CompletionsResource, + AsyncCompletionsResource, + CompletionsResourceWithRawResponse, + AsyncCompletionsResourceWithRawResponse, + CompletionsResourceWithStreamingResponse, + AsyncCompletionsResourceWithStreamingResponse, +) +from ...._resource import SyncAPIResource, AsyncAPIResource + +__all__ = ["ChatResource", "AsyncChatResource"] + + +class ChatResource(SyncAPIResource): + @cached_property + def completions(self) -> CompletionsResource: + return CompletionsResource(self._client) + + @cached_property + def with_raw_response(self) -> ChatResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers + """ + return ChatResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> ChatResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response + """ + return ChatResourceWithStreamingResponse(self) + + +class AsyncChatResource(AsyncAPIResource): + @cached_property + def completions(self) -> AsyncCompletionsResource: + return AsyncCompletionsResource(self._client) + + @cached_property + def with_raw_response(self) -> AsyncChatResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers + """ + return AsyncChatResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncChatResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response + """ + return AsyncChatResourceWithStreamingResponse(self) + + +class ChatResourceWithRawResponse: + def __init__(self, chat: ChatResource) -> None: + self._chat = chat + + @cached_property + def completions(self) -> CompletionsResourceWithRawResponse: + return CompletionsResourceWithRawResponse(self._chat.completions) + + +class AsyncChatResourceWithRawResponse: + def __init__(self, chat: AsyncChatResource) -> None: + self._chat = chat + + @cached_property + def completions(self) -> AsyncCompletionsResourceWithRawResponse: + return AsyncCompletionsResourceWithRawResponse(self._chat.completions) + + +class ChatResourceWithStreamingResponse: + def __init__(self, chat: ChatResource) -> None: + self._chat = chat + + @cached_property + def completions(self) -> CompletionsResourceWithStreamingResponse: + return CompletionsResourceWithStreamingResponse(self._chat.completions) + + +class AsyncChatResourceWithStreamingResponse: + def __init__(self, chat: AsyncChatResource) -> None: + self._chat = chat + + @cached_property + def completions(self) -> AsyncCompletionsResourceWithStreamingResponse: + return AsyncCompletionsResourceWithStreamingResponse(self._chat.completions) diff --git a/src/gradientai/resources/agents/chat/completions.py b/src/gradientai/resources/agents/chat/completions.py new file mode 100644 index 00000000..a213bf05 --- /dev/null +++ b/src/gradientai/resources/agents/chat/completions.py @@ -0,0 +1,385 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, List, Union, Iterable, Optional + +import httpx + +from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ...._utils import maybe_transform, async_maybe_transform +from ...._compat import cached_property +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from ...._base_client import make_request_options +from ....types.agents.chat import completion_create_params +from ....types.agents.chat.completion_create_response import CompletionCreateResponse + +__all__ = ["CompletionsResource", "AsyncCompletionsResource"] + + +class CompletionsResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> CompletionsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers + """ + return CompletionsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response + """ + return CompletionsResourceWithStreamingResponse(self) + + def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[bool] | NotGiven = NOT_GIVEN, + stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse: + """ + Creates a model response for the given chat conversation. + + Args: + messages: A list of messages comprising the conversation so far. + + model: Model ID used to generate the response. + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + logprobs: Whether to return log probabilities of the output tokens or not. If true, + returns the log probabilities of each output token returned in the `content` of + `message`. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. + + max_tokens: The maximum number of tokens that can be generated in the completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + n: How many chat completion choices to generate for each input message. Note that + you will be charged based on the number of generated tokens across all of the + choices. Keep `n` as `1` to minimize costs. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using server-sent events. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + `logprobs` must be set to `true` if this parameter is used. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help DigitalOcean to + monitor and detect abuse. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/chat/completions" + if self._client._base_url_overridden + else "https://inference.do-ai.run/v1/chat/completions", + body=maybe_transform( + { + "messages": messages, + "model": model, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, + "max_tokens": max_tokens, + "metadata": metadata, + "n": n, + "presence_penalty": presence_penalty, + "stop": stop, + "stream": stream, + "stream_options": stream_options, + "temperature": temperature, + "top_logprobs": top_logprobs, + "top_p": top_p, + "user": user, + }, + completion_create_params.CompletionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=CompletionCreateResponse, + ) + + +class AsyncCompletionsResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers + """ + return AsyncCompletionsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response + """ + return AsyncCompletionsResourceWithStreamingResponse(self) + + async def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[bool] | NotGiven = NOT_GIVEN, + stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse: + """ + Creates a model response for the given chat conversation. + + Args: + messages: A list of messages comprising the conversation so far. + + model: Model ID used to generate the response. + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + logprobs: Whether to return log probabilities of the output tokens or not. If true, + returns the log probabilities of each output token returned in the `content` of + `message`. + + max_completion_tokens: The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. + + max_tokens: The maximum number of tokens that can be generated in the completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + + metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful + for storing additional information about the object in a structured format, and + querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + + n: How many chat completion choices to generate for each input message. Note that + you will be charged based on the number of generated tokens across all of the + choices. Keep `n` as `1` to minimize costs. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + stream: If set to true, the model response data will be streamed to the client as it is + generated using server-sent events. + + stream_options: Options for streaming response. Only set this when you set `stream: true`. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. We generally recommend altering this or `top_p` but + not both. + + top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + `logprobs` must be set to `true` if this parameter is used. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help DigitalOcean to + monitor and detect abuse. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/chat/completions" + if self._client._base_url_overridden + else "https://inference.do-ai.run/v1/chat/completions", + body=await async_maybe_transform( + { + "messages": messages, + "model": model, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, + "max_tokens": max_tokens, + "metadata": metadata, + "n": n, + "presence_penalty": presence_penalty, + "stop": stop, + "stream": stream, + "stream_options": stream_options, + "temperature": temperature, + "top_logprobs": top_logprobs, + "top_p": top_p, + "user": user, + }, + completion_create_params.CompletionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=CompletionCreateResponse, + ) + + +class CompletionsResourceWithRawResponse: + def __init__(self, completions: CompletionsResource) -> None: + self._completions = completions + + self.create = to_raw_response_wrapper( + completions.create, + ) + + +class AsyncCompletionsResourceWithRawResponse: + def __init__(self, completions: AsyncCompletionsResource) -> None: + self._completions = completions + + self.create = async_to_raw_response_wrapper( + completions.create, + ) + + +class CompletionsResourceWithStreamingResponse: + def __init__(self, completions: CompletionsResource) -> None: + self._completions = completions + + self.create = to_streamed_response_wrapper( + completions.create, + ) + + +class AsyncCompletionsResourceWithStreamingResponse: + def __init__(self, completions: AsyncCompletionsResource) -> None: + self._completions = completions + + self.create = async_to_streamed_response_wrapper( + completions.create, + ) diff --git a/src/gradientai/resources/models.py b/src/gradientai/resources/models.py index da5462ae..c8e78b9b 100644 --- a/src/gradientai/resources/models.py +++ b/src/gradientai/resources/models.py @@ -2,9 +2,14 @@ from __future__ import annotations +from typing import List +from typing_extensions import Literal + import httpx +from ..types import model_list_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( @@ -13,7 +18,6 @@ async_to_raw_response_wrapper, async_to_streamed_response_wrapper, ) -from ..types.model import Model from .._base_client import make_request_options from ..types.model_list_response import ModelListResponse @@ -40,22 +44,52 @@ def with_streaming_response(self) -> ModelsResourceWithStreamingResponse: """ return ModelsResourceWithStreamingResponse(self) - def retrieve( + def list( self, - model: str, *, + page: int | NotGiven = NOT_GIVEN, + per_page: int | NotGiven = NOT_GIVEN, + public_only: bool | NotGiven = NOT_GIVEN, + usecases: List[ + Literal[ + "MODEL_USECASE_UNKNOWN", + "MODEL_USECASE_AGENT", + "MODEL_USECASE_FINETUNED", + "MODEL_USECASE_KNOWLEDGEBASE", + "MODEL_USECASE_GUARDRAIL", + "MODEL_USECASE_REASONING", + "MODEL_USECASE_SERVERLESS", + ] + ] + | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> Model: + ) -> ModelListResponse: """ - Retrieves a model instance, providing basic information about the model such as - the owner and permissioning. + To list all models, send a GET request to `/v2/gen-ai/models`. Args: + page: page number. + + per_page: items per page. + + public_only: only include models that are publicly available. + + usecases: include only models defined for the listed usecases. + + - MODEL_USECASE_UNKNOWN: The use case of the model is unknown + - MODEL_USECASE_AGENT: The model maybe used in an agent + - MODEL_USECASE_FINETUNED: The model maybe used for fine tuning + - MODEL_USECASE_KNOWLEDGEBASE: The model maybe used for knowledge bases + (embedding models) + - MODEL_USECASE_GUARDRAIL: The model maybe used for guardrails + - MODEL_USECASE_REASONING: The model usecase for reasoning + - MODEL_USECASE_SERVERLESS: The model usecase for serverless inference + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -64,36 +98,24 @@ def retrieve( timeout: Override the client-level default timeout for this request, in seconds """ - if not model: - raise ValueError(f"Expected a non-empty value for `model` but received {model!r}") return self._get( - f"/models/{model}" + "/v2/gen-ai/models" if self._client._base_url_overridden - else f"https://inference.do-ai.run/v1/models/{model}", + else "https://api.digitalocean.com/v2/gen-ai/models", options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout - ), - cast_to=Model, - ) - - def list( - self, - *, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ModelListResponse: - """ - Lists the currently available models, and provides basic information about each - one such as the owner and availability. - """ - return self._get( - "/models" if self._client._base_url_overridden else "https://inference.do-ai.run/v1/models", - options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "page": page, + "per_page": per_page, + "public_only": public_only, + "usecases": usecases, + }, + model_list_params.ModelListParams, + ), ), cast_to=ModelListResponse, ) @@ -119,22 +141,52 @@ def with_streaming_response(self) -> AsyncModelsResourceWithStreamingResponse: """ return AsyncModelsResourceWithStreamingResponse(self) - async def retrieve( + async def list( self, - model: str, *, + page: int | NotGiven = NOT_GIVEN, + per_page: int | NotGiven = NOT_GIVEN, + public_only: bool | NotGiven = NOT_GIVEN, + usecases: List[ + Literal[ + "MODEL_USECASE_UNKNOWN", + "MODEL_USECASE_AGENT", + "MODEL_USECASE_FINETUNED", + "MODEL_USECASE_KNOWLEDGEBASE", + "MODEL_USECASE_GUARDRAIL", + "MODEL_USECASE_REASONING", + "MODEL_USECASE_SERVERLESS", + ] + ] + | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> Model: + ) -> ModelListResponse: """ - Retrieves a model instance, providing basic information about the model such as - the owner and permissioning. + To list all models, send a GET request to `/v2/gen-ai/models`. Args: + page: page number. + + per_page: items per page. + + public_only: only include models that are publicly available. + + usecases: include only models defined for the listed usecases. + + - MODEL_USECASE_UNKNOWN: The use case of the model is unknown + - MODEL_USECASE_AGENT: The model maybe used in an agent + - MODEL_USECASE_FINETUNED: The model maybe used for fine tuning + - MODEL_USECASE_KNOWLEDGEBASE: The model maybe used for knowledge bases + (embedding models) + - MODEL_USECASE_GUARDRAIL: The model maybe used for guardrails + - MODEL_USECASE_REASONING: The model usecase for reasoning + - MODEL_USECASE_SERVERLESS: The model usecase for serverless inference + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -143,36 +195,24 @@ async def retrieve( timeout: Override the client-level default timeout for this request, in seconds """ - if not model: - raise ValueError(f"Expected a non-empty value for `model` but received {model!r}") return await self._get( - f"/models/{model}" + "/v2/gen-ai/models" if self._client._base_url_overridden - else f"https://inference.do-ai.run/v1/models/{model}", - options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout - ), - cast_to=Model, - ) - - async def list( - self, - *, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ModelListResponse: - """ - Lists the currently available models, and provides basic information about each - one such as the owner and availability. - """ - return await self._get( - "/models" if self._client._base_url_overridden else "https://inference.do-ai.run/v1/models", + else "https://api.digitalocean.com/v2/gen-ai/models", options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=await async_maybe_transform( + { + "page": page, + "per_page": per_page, + "public_only": public_only, + "usecases": usecases, + }, + model_list_params.ModelListParams, + ), ), cast_to=ModelListResponse, ) @@ -182,9 +222,6 @@ class ModelsResourceWithRawResponse: def __init__(self, models: ModelsResource) -> None: self._models = models - self.retrieve = to_raw_response_wrapper( - models.retrieve, - ) self.list = to_raw_response_wrapper( models.list, ) @@ -194,9 +231,6 @@ class AsyncModelsResourceWithRawResponse: def __init__(self, models: AsyncModelsResource) -> None: self._models = models - self.retrieve = async_to_raw_response_wrapper( - models.retrieve, - ) self.list = async_to_raw_response_wrapper( models.list, ) @@ -206,9 +240,6 @@ class ModelsResourceWithStreamingResponse: def __init__(self, models: ModelsResource) -> None: self._models = models - self.retrieve = to_streamed_response_wrapper( - models.retrieve, - ) self.list = to_streamed_response_wrapper( models.list, ) @@ -218,9 +249,6 @@ class AsyncModelsResourceWithStreamingResponse: def __init__(self, models: AsyncModelsResource) -> None: self._models = models - self.retrieve = async_to_streamed_response_wrapper( - models.retrieve, - ) self.list = async_to_streamed_response_wrapper( models.list, ) diff --git a/src/gradientai/types/__init__.py b/src/gradientai/types/__init__.py index 626c3840..4ec63b92 100644 --- a/src/gradientai/types/__init__.py +++ b/src/gradientai/types/__init__.py @@ -2,14 +2,15 @@ from __future__ import annotations -from .model import Model as Model -from .shared import APIMeta as APIMeta, APILinks as APILinks +from .shared import APIMeta as APIMeta, APILinks as APILinks, ChatCompletionTokenLogprob as ChatCompletionTokenLogprob from .api_agent import APIAgent as APIAgent +from .api_model import APIModel as APIModel from .api_agreement import APIAgreement as APIAgreement from .api_workspace import APIWorkspace as APIWorkspace from .api_agent_model import APIAgentModel as APIAgentModel from .agent_list_params import AgentListParams as AgentListParams from .api_model_version import APIModelVersion as APIModelVersion +from .model_list_params import ModelListParams as ModelListParams from .api_knowledge_base import APIKnowledgeBase as APIKnowledgeBase from .region_list_params import RegionListParams as RegionListParams from .agent_create_params import AgentCreateParams as AgentCreateParams diff --git a/src/gradientai/types/agents/chat/__init__.py b/src/gradientai/types/agents/chat/__init__.py new file mode 100644 index 00000000..9384ac14 --- /dev/null +++ b/src/gradientai/types/agents/chat/__init__.py @@ -0,0 +1,6 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from .completion_create_params import CompletionCreateParams as CompletionCreateParams +from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse diff --git a/src/gradientai/types/agents/chat/completion_create_params.py b/src/gradientai/types/agents/chat/completion_create_params.py new file mode 100644 index 00000000..11d032ff --- /dev/null +++ b/src/gradientai/types/agents/chat/completion_create_params.py @@ -0,0 +1,185 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, List, Union, Iterable, Optional +from typing_extensions import Literal, Required, TypeAlias, TypedDict + +__all__ = [ + "CompletionCreateParams", + "Message", + "MessageChatCompletionRequestSystemMessage", + "MessageChatCompletionRequestDeveloperMessage", + "MessageChatCompletionRequestUserMessage", + "MessageChatCompletionRequestAssistantMessage", + "StreamOptions", +] + + +class CompletionCreateParams(TypedDict, total=False): + messages: Required[Iterable[Message]] + """A list of messages comprising the conversation so far.""" + + model: Required[str] + """Model ID used to generate the response.""" + + frequency_penalty: Optional[float] + """Number between -2.0 and 2.0. + + Positive values penalize new tokens based on their existing frequency in the + text so far, decreasing the model's likelihood to repeat the same line verbatim. + """ + + logit_bias: Optional[Dict[str, int]] + """Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + """ + + logprobs: Optional[bool] + """Whether to return log probabilities of the output tokens or not. + + If true, returns the log probabilities of each output token returned in the + `content` of `message`. + """ + + max_completion_tokens: Optional[int] + """ + The maximum number of completion tokens that may be used over the course of the + run. The run will make a best effort to use only the number of completion tokens + specified, across multiple turns of the run. + """ + + max_tokens: Optional[int] + """The maximum number of tokens that can be generated in the completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + """ + + metadata: Optional[Dict[str, str]] + """Set of 16 key-value pairs that can be attached to an object. + + This can be useful for storing additional information about the object in a + structured format, and querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings with + a maximum length of 512 characters. + """ + + n: Optional[int] + """How many chat completion choices to generate for each input message. + + Note that you will be charged based on the number of generated tokens across all + of the choices. Keep `n` as `1` to minimize costs. + """ + + presence_penalty: Optional[float] + """Number between -2.0 and 2.0. + + Positive values penalize new tokens based on whether they appear in the text so + far, increasing the model's likelihood to talk about new topics. + """ + + stop: Union[Optional[str], List[str], None] + """Up to 4 sequences where the API will stop generating further tokens. + + The returned text will not contain the stop sequence. + """ + + stream: Optional[bool] + """ + If set to true, the model response data will be streamed to the client as it is + generated using server-sent events. + """ + + stream_options: Optional[StreamOptions] + """Options for streaming response. Only set this when you set `stream: true`.""" + + temperature: Optional[float] + """What sampling temperature to use, between 0 and 2. + + Higher values like 0.8 will make the output more random, while lower values like + 0.2 will make it more focused and deterministic. We generally recommend altering + this or `top_p` but not both. + """ + + top_logprobs: Optional[int] + """ + An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + `logprobs` must be set to `true` if this parameter is used. + """ + + top_p: Optional[float] + """ + An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + """ + + user: str + """ + A unique identifier representing your end-user, which can help DigitalOcean to + monitor and detect abuse. + """ + + +class MessageChatCompletionRequestSystemMessage(TypedDict, total=False): + content: Required[Union[str, List[str]]] + """The contents of the system message.""" + + role: Required[Literal["system"]] + """The role of the messages author, in this case `system`.""" + + +class MessageChatCompletionRequestDeveloperMessage(TypedDict, total=False): + content: Required[Union[str, List[str]]] + """The contents of the developer message.""" + + role: Required[Literal["developer"]] + """The role of the messages author, in this case `developer`.""" + + +class MessageChatCompletionRequestUserMessage(TypedDict, total=False): + content: Required[Union[str, List[str]]] + """The contents of the user message.""" + + role: Required[Literal["user"]] + """The role of the messages author, in this case `user`.""" + + +class MessageChatCompletionRequestAssistantMessage(TypedDict, total=False): + role: Required[Literal["assistant"]] + """The role of the messages author, in this case `assistant`.""" + + content: Union[str, List[str], None] + """The contents of the assistant message.""" + + +Message: TypeAlias = Union[ + MessageChatCompletionRequestSystemMessage, + MessageChatCompletionRequestDeveloperMessage, + MessageChatCompletionRequestUserMessage, + MessageChatCompletionRequestAssistantMessage, +] + + +class StreamOptions(TypedDict, total=False): + include_usage: bool + """If set, an additional chunk will be streamed before the `data: [DONE]` message. + + The `usage` field on this chunk shows the token usage statistics for the entire + request, and the `choices` field will always be an empty array. + + All other chunks will also include a `usage` field, but with a null value. + **NOTE:** If the stream is interrupted, you may not receive the final usage + chunk which contains the total token usage for the request. + """ diff --git a/src/gradientai/types/agents/chat/completion_create_response.py b/src/gradientai/types/agents/chat/completion_create_response.py new file mode 100644 index 00000000..f2860c31 --- /dev/null +++ b/src/gradientai/types/agents/chat/completion_create_response.py @@ -0,0 +1,81 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional +from typing_extensions import Literal + +from ...._models import BaseModel +from ...shared.chat_completion_token_logprob import ChatCompletionTokenLogprob + +__all__ = ["CompletionCreateResponse", "Choice", "ChoiceLogprobs", "ChoiceMessage", "Usage"] + + +class ChoiceLogprobs(BaseModel): + content: Optional[List[ChatCompletionTokenLogprob]] = None + """A list of message content tokens with log probability information.""" + + refusal: Optional[List[ChatCompletionTokenLogprob]] = None + """A list of message refusal tokens with log probability information.""" + + +class ChoiceMessage(BaseModel): + content: Optional[str] = None + """The contents of the message.""" + + refusal: Optional[str] = None + """The refusal message generated by the model.""" + + role: Literal["assistant"] + """The role of the author of this message.""" + + +class Choice(BaseModel): + finish_reason: Literal["stop", "length"] + """The reason the model stopped generating tokens. + + This will be `stop` if the model hit a natural stop point or a provided stop + sequence, or `length` if the maximum number of tokens specified in the request + was reached. + """ + + index: int + """The index of the choice in the list of choices.""" + + logprobs: Optional[ChoiceLogprobs] = None + """Log probability information for the choice.""" + + message: ChoiceMessage + """A chat completion message generated by the model.""" + + +class Usage(BaseModel): + completion_tokens: int + """Number of tokens in the generated completion.""" + + prompt_tokens: int + """Number of tokens in the prompt.""" + + total_tokens: int + """Total number of tokens used in the request (prompt + completion).""" + + +class CompletionCreateResponse(BaseModel): + id: str + """A unique identifier for the chat completion.""" + + choices: List[Choice] + """A list of chat completion choices. + + Can be more than one if `n` is greater than 1. + """ + + created: int + """The Unix timestamp (in seconds) of when the chat completion was created.""" + + model: str + """The model used for the chat completion.""" + + object: Literal["chat.completion"] + """The object type, which is always `chat.completion`.""" + + usage: Optional[Usage] = None + """Usage statistics for the completion request.""" diff --git a/src/gradientai/types/api_model.py b/src/gradientai/types/api_model.py new file mode 100644 index 00000000..c2bc1edd --- /dev/null +++ b/src/gradientai/types/api_model.py @@ -0,0 +1,32 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from datetime import datetime + +from .._models import BaseModel +from .api_agreement import APIAgreement +from .api_model_version import APIModelVersion + +__all__ = ["APIModel"] + + +class APIModel(BaseModel): + agreement: Optional[APIAgreement] = None + + created_at: Optional[datetime] = None + + is_foundational: Optional[bool] = None + + name: Optional[str] = None + + parent_uuid: Optional[str] = None + + updated_at: Optional[datetime] = None + + upload_complete: Optional[bool] = None + + url: Optional[str] = None + + uuid: Optional[str] = None + + version: Optional[APIModelVersion] = None diff --git a/src/gradientai/types/chat/__init__.py b/src/gradientai/types/chat/__init__.py index 59553f68..9384ac14 100644 --- a/src/gradientai/types/chat/__init__.py +++ b/src/gradientai/types/chat/__init__.py @@ -4,4 +4,3 @@ from .completion_create_params import CompletionCreateParams as CompletionCreateParams from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse -from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob diff --git a/src/gradientai/types/chat/completion_create_response.py b/src/gradientai/types/chat/completion_create_response.py index 1ac59a28..1791373b 100644 --- a/src/gradientai/types/chat/completion_create_response.py +++ b/src/gradientai/types/chat/completion_create_response.py @@ -4,7 +4,7 @@ from typing_extensions import Literal from ..._models import BaseModel -from .chat_completion_token_logprob import ChatCompletionTokenLogprob +from ..shared.chat_completion_token_logprob import ChatCompletionTokenLogprob __all__ = ["CompletionCreateResponse", "Choice", "ChoiceLogprobs", "ChoiceMessage", "Usage"] diff --git a/src/gradientai/types/model.py b/src/gradientai/types/model.py deleted file mode 100644 index 2631ee8d..00000000 --- a/src/gradientai/types/model.py +++ /dev/null @@ -1,21 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from typing_extensions import Literal - -from .._models import BaseModel - -__all__ = ["Model"] - - -class Model(BaseModel): - id: str - """The model identifier, which can be referenced in the API endpoints.""" - - created: int - """The Unix timestamp (in seconds) when the model was created.""" - - object: Literal["model"] - """The object type, which is always "model".""" - - owned_by: str - """The organization that owns the model.""" diff --git a/src/gradientai/types/model_list_params.py b/src/gradientai/types/model_list_params.py new file mode 100644 index 00000000..4abc1dc1 --- /dev/null +++ b/src/gradientai/types/model_list_params.py @@ -0,0 +1,42 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List +from typing_extensions import Literal, TypedDict + +__all__ = ["ModelListParams"] + + +class ModelListParams(TypedDict, total=False): + page: int + """page number.""" + + per_page: int + """items per page.""" + + public_only: bool + """only include models that are publicly available.""" + + usecases: List[ + Literal[ + "MODEL_USECASE_UNKNOWN", + "MODEL_USECASE_AGENT", + "MODEL_USECASE_FINETUNED", + "MODEL_USECASE_KNOWLEDGEBASE", + "MODEL_USECASE_GUARDRAIL", + "MODEL_USECASE_REASONING", + "MODEL_USECASE_SERVERLESS", + ] + ] + """include only models defined for the listed usecases. + + - MODEL_USECASE_UNKNOWN: The use case of the model is unknown + - MODEL_USECASE_AGENT: The model maybe used in an agent + - MODEL_USECASE_FINETUNED: The model maybe used for fine tuning + - MODEL_USECASE_KNOWLEDGEBASE: The model maybe used for knowledge bases + (embedding models) + - MODEL_USECASE_GUARDRAIL: The model maybe used for guardrails + - MODEL_USECASE_REASONING: The model usecase for reasoning + - MODEL_USECASE_SERVERLESS: The model usecase for serverless inference + """ diff --git a/src/gradientai/types/model_list_response.py b/src/gradientai/types/model_list_response.py index 8f835449..47651759 100644 --- a/src/gradientai/types/model_list_response.py +++ b/src/gradientai/types/model_list_response.py @@ -1,15 +1,18 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List -from typing_extensions import Literal +from typing import List, Optional -from .model import Model from .._models import BaseModel +from .api_model import APIModel +from .shared.api_meta import APIMeta +from .shared.api_links import APILinks __all__ = ["ModelListResponse"] class ModelListResponse(BaseModel): - data: List[Model] + links: Optional[APILinks] = None - object: Literal["list"] + meta: Optional[APIMeta] = None + + models: Optional[List[APIModel]] = None diff --git a/src/gradientai/types/shared/__init__.py b/src/gradientai/types/shared/__init__.py index 5f02d62f..dc71bdd3 100644 --- a/src/gradientai/types/shared/__init__.py +++ b/src/gradientai/types/shared/__init__.py @@ -2,3 +2,4 @@ from .api_meta import APIMeta as APIMeta from .api_links import APILinks as APILinks +from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob diff --git a/src/gradientai/types/chat/chat_completion_token_logprob.py b/src/gradientai/types/shared/chat_completion_token_logprob.py similarity index 100% rename from src/gradientai/types/chat/chat_completion_token_logprob.py rename to src/gradientai/types/shared/chat_completion_token_logprob.py diff --git a/tests/api_resources/agents/chat/__init__.py b/tests/api_resources/agents/chat/__init__.py new file mode 100644 index 00000000..fd8019a9 --- /dev/null +++ b/tests/api_resources/agents/chat/__init__.py @@ -0,0 +1 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. diff --git a/tests/api_resources/agents/chat/test_completions.py b/tests/api_resources/agents/chat/test_completions.py new file mode 100644 index 00000000..89d531a5 --- /dev/null +++ b/tests/api_resources/agents/chat/test_completions.py @@ -0,0 +1,186 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from gradientai import GradientAI, AsyncGradientAI +from tests.utils import assert_matches_type +from gradientai.types.agents.chat import CompletionCreateResponse + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestCompletions: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @pytest.mark.skip() + @parametrize + def test_method_create(self, client: GradientAI) -> None: + completion = client.agents.chat.completions.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_method_create_with_all_params(self, client: GradientAI) -> None: + completion = client.agents.chat.completions.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + frequency_penalty=-2, + logit_bias={"foo": 0}, + logprobs=True, + max_completion_tokens=256, + max_tokens=0, + metadata={"foo": "string"}, + n=1, + presence_penalty=-2, + stop="\n", + stream=True, + stream_options={"include_usage": True}, + temperature=1, + top_logprobs=0, + top_p=1, + user="user-1234", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_raw_response_create(self, client: GradientAI) -> None: + response = client.agents.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + completion = response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_streaming_response_create(self, client: GradientAI) -> None: + with client.agents.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + completion = response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + assert cast(Any, response.is_closed) is True + + +class TestAsyncCompletions: + parametrize = pytest.mark.parametrize( + "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"] + ) + + @pytest.mark.skip() + @parametrize + async def test_method_create(self, async_client: AsyncGradientAI) -> None: + completion = await async_client.agents.chat.completions.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_method_create_with_all_params(self, async_client: AsyncGradientAI) -> None: + completion = await async_client.agents.chat.completions.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + frequency_penalty=-2, + logit_bias={"foo": 0}, + logprobs=True, + max_completion_tokens=256, + max_tokens=0, + metadata={"foo": "string"}, + n=1, + presence_penalty=-2, + stop="\n", + stream=True, + stream_options={"include_usage": True}, + temperature=1, + top_logprobs=0, + top_p=1, + user="user-1234", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_raw_response_create(self, async_client: AsyncGradientAI) -> None: + response = await async_client.agents.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + completion = await response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_streaming_response_create(self, async_client: AsyncGradientAI) -> None: + async with async_client.agents.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="llama3-8b-instruct", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + completion = await response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py index afee0c1f..5e119f71 100644 --- a/tests/api_resources/test_models.py +++ b/tests/api_resources/test_models.py @@ -9,7 +9,7 @@ from gradientai import GradientAI, AsyncGradientAI from tests.utils import assert_matches_type -from gradientai.types import Model, ModelListResponse +from gradientai.types import ModelListResponse base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -19,50 +19,19 @@ class TestModels: @pytest.mark.skip() @parametrize - def test_method_retrieve(self, client: GradientAI) -> None: - model = client.models.retrieve( - "llama3-8b-instruct", - ) - assert_matches_type(Model, model, path=["response"]) + def test_method_list(self, client: GradientAI) -> None: + model = client.models.list() + assert_matches_type(ModelListResponse, model, path=["response"]) @pytest.mark.skip() @parametrize - def test_raw_response_retrieve(self, client: GradientAI) -> None: - response = client.models.with_raw_response.retrieve( - "llama3-8b-instruct", + def test_method_list_with_all_params(self, client: GradientAI) -> None: + model = client.models.list( + page=0, + per_page=0, + public_only=True, + usecases=["MODEL_USECASE_UNKNOWN"], ) - - assert response.is_closed is True - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - model = response.parse() - assert_matches_type(Model, model, path=["response"]) - - @pytest.mark.skip() - @parametrize - def test_streaming_response_retrieve(self, client: GradientAI) -> None: - with client.models.with_streaming_response.retrieve( - "llama3-8b-instruct", - ) as response: - assert not response.is_closed - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - - model = response.parse() - assert_matches_type(Model, model, path=["response"]) - - assert cast(Any, response.is_closed) is True - - @pytest.mark.skip() - @parametrize - def test_path_params_retrieve(self, client: GradientAI) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `model` but received ''"): - client.models.with_raw_response.retrieve( - "", - ) - - @pytest.mark.skip() - @parametrize - def test_method_list(self, client: GradientAI) -> None: - model = client.models.list() assert_matches_type(ModelListResponse, model, path=["response"]) @pytest.mark.skip() @@ -95,50 +64,19 @@ class TestAsyncModels: @pytest.mark.skip() @parametrize - async def test_method_retrieve(self, async_client: AsyncGradientAI) -> None: - model = await async_client.models.retrieve( - "llama3-8b-instruct", - ) - assert_matches_type(Model, model, path=["response"]) + async def test_method_list(self, async_client: AsyncGradientAI) -> None: + model = await async_client.models.list() + assert_matches_type(ModelListResponse, model, path=["response"]) @pytest.mark.skip() @parametrize - async def test_raw_response_retrieve(self, async_client: AsyncGradientAI) -> None: - response = await async_client.models.with_raw_response.retrieve( - "llama3-8b-instruct", + async def test_method_list_with_all_params(self, async_client: AsyncGradientAI) -> None: + model = await async_client.models.list( + page=0, + per_page=0, + public_only=True, + usecases=["MODEL_USECASE_UNKNOWN"], ) - - assert response.is_closed is True - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - model = await response.parse() - assert_matches_type(Model, model, path=["response"]) - - @pytest.mark.skip() - @parametrize - async def test_streaming_response_retrieve(self, async_client: AsyncGradientAI) -> None: - async with async_client.models.with_streaming_response.retrieve( - "llama3-8b-instruct", - ) as response: - assert not response.is_closed - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - - model = await response.parse() - assert_matches_type(Model, model, path=["response"]) - - assert cast(Any, response.is_closed) is True - - @pytest.mark.skip() - @parametrize - async def test_path_params_retrieve(self, async_client: AsyncGradientAI) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `model` but received ''"): - await async_client.models.with_raw_response.retrieve( - "", - ) - - @pytest.mark.skip() - @parametrize - async def test_method_list(self, async_client: AsyncGradientAI) -> None: - model = await async_client.models.list() assert_matches_type(ModelListResponse, model, path=["response"]) @pytest.mark.skip()