From 997e0e7e750120abd01b64df5046683aa5fc6500 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 9 Jan 2025 03:09:49 +0000 Subject: [PATCH] feat(api): api update --- .stats.yml | 2 +- src/groq/resources/chat/completions.py | 16 ++++++++++++++-- src/groq/types/chat/completion_create_params.py | 8 +++++++- tests/api_resources/chat/test_completions.py | 4 ++-- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/.stats.yml b/.stats.yml index c917b4ee..ba76caea 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 7 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/groqcloud%2Fgroqcloud-b33d3547dd14387df0951ff9638519f7c1a125db265801f63e9136cfa5e4578b.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/groqcloud%2Fgroqcloud-33be0d612b9f1153b86f53e95bf7c571af2f1e466bda2330b632e6c05832e2a6.yml diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py index ccdcc1b3..7840189b 100644 --- a/src/groq/resources/chat/completions.py +++ b/src/groq/resources/chat/completions.py @@ -66,7 +66,7 @@ def create( presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, - service_tier: Optional[Literal["on_demand", "flex"]] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "on_demand", "flex"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, stream: Optional[bool] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, @@ -144,6 +144,12 @@ def create( the same result. Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. + service_tier: The service tier to use for the request. Defaults to `on_demand`. + + - `auto` will automatically select the highest tier available within the rate + limits of your organization. + - `flex` uses the flex tier, which will succeed or fail quickly. + stop: Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. @@ -264,7 +270,7 @@ async def create( presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, - service_tier: Optional[Literal["on_demand", "flex"]] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "on_demand", "flex"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, stream: Optional[bool] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, @@ -342,6 +348,12 @@ async def create( the same result. Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. + service_tier: The service tier to use for the request. Defaults to `on_demand`. + + - `auto` will automatically select the highest tier available within the rate + limits of your organization. + - `flex` uses the flex tier, which will succeed or fail quickly. + stop: Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. diff --git a/src/groq/types/chat/completion_create_params.py b/src/groq/types/chat/completion_create_params.py index e77aa0c0..770578ed 100644 --- a/src/groq/types/chat/completion_create_params.py +++ b/src/groq/types/chat/completion_create_params.py @@ -106,7 +106,13 @@ class CompletionCreateParams(TypedDict, total=False): `system_fingerprint` response parameter to monitor changes in the backend. """ - service_tier: Optional[Literal["on_demand", "flex"]] + service_tier: Optional[Literal["auto", "on_demand", "flex"]] + """The service tier to use for the request. Defaults to `on_demand`. + + - `auto` will automatically select the highest tier available within the rate + limits of your organization. + - `flex` uses the flex tier, which will succeed or fail quickly. + """ stop: Union[Optional[str], List[str], None] """Up to 4 sequences where the API will stop generating further tokens. diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index 45dcc7c0..53bbc1a8 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -58,7 +58,7 @@ def test_method_create_with_all_params(self, client: Groq) -> None: presence_penalty=-2, response_format={"type": "text"}, seed=0, - service_tier="on_demand", + service_tier="auto", stop="\n", stream=True, temperature=1, @@ -160,7 +160,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N presence_penalty=-2, response_format={"type": "text"}, seed=0, - service_tier="on_demand", + service_tier="auto", stop="\n", stream=True, temperature=1,