From aa5137b0f26cd449860c2c45a55e9f3c829a6d16 Mon Sep 17 00:00:00 2001 From: Graden Rea Date: Wed, 14 Feb 2024 15:08:48 -0800 Subject: [PATCH 1/6] Add examples --- examples/chat_completion.py | 55 +++++++++++++++++++ examples/chat_completion_async.py | 60 +++++++++++++++++++++ examples/chat_completion_async_streaming.py | 58 ++++++++++++++++++++ examples/chat_completion_stop.py | 58 ++++++++++++++++++++ examples/chat_completion_streaming.py | 56 +++++++++++++++++++ 5 files changed, 287 insertions(+) create mode 100644 examples/chat_completion.py create mode 100644 examples/chat_completion_async.py create mode 100644 examples/chat_completion_async_streaming.py create mode 100644 examples/chat_completion_stop.py create mode 100644 examples/chat_completion_streaming.py diff --git a/examples/chat_completion.py b/examples/chat_completion.py new file mode 100644 index 00000000..18ea48ec --- /dev/null +++ b/examples/chat_completion.py @@ -0,0 +1,55 @@ +from groq import Groq + +client = Groq() + +chat_completion = client.chat.completions.create( + # + # Required parameters + # + messages=[ + # Set an optional system message. This sets the behavior of the + # assistant and can be used to provide specific instructions for + # how it should behave throughout the conversation. + { + "role": "system", + "content": "you are a helpful assistant." + }, + # Set a user message for the assistant to respond to. + { + "role": "user", + "content": "Explain the importance of low latency LLMs", + } + ], + + # The language model which will generate the completion. + model="mixtral-8x7b-32768", + + # + # Optional parameters + # + + # Controls randomness: lowering results in less random completions. + # As the temperature approaches zero, the model will become deterministic + # and repetitive. + temperature=0.5, + + # The maximum number of tokens to generate. Requests can use up to + # 2048 tokens shared between prompt and completion. + max_tokens=1024, + + # Controls diversity via nucleus sampling: 0.5 means half of all + # likelihood-weighted options are considered. + top_p=1, + + # A stop sequence is a predefined or user-specified text string that + # signals an AI to stop generating content, ensuring its responses + # remain focused and concise. Examples include punctuation marks and + # markers like "[end]". + stop=None, + + # If set, partial message deltas will be sent. + stream=False, +) + +# Print the completion returned by the LLM. +print(chat_completion.choices[0].message.content) diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py new file mode 100644 index 00000000..58b1974b --- /dev/null +++ b/examples/chat_completion_async.py @@ -0,0 +1,60 @@ +import asyncio + +from groq import AsyncGroq + +async def main(): + client = AsyncGroq() + + chat_completion = await client.chat.completions.create( + # + # Required parameters + # + messages=[ + # Set an optional system message. This sets the behavior of the + # assistant and can be used to provide specific instructions for + # how it should behave throughout the conversation. + { + "role": "system", + "content": "you are a helpful assistant." + }, + # Set a user message for the assistant to respond to. + { + "role": "user", + "content": "Explain the importance of low latency LLMs", + } + ], + + # The language model which will generate the completion. + model="mixtral-8x7b-32768", + + # + # Optional parameters + # + + # Controls randomness: lowering results in less random completions. + # As the temperature approaches zero, the model will become + # deterministic and repetitive. + temperature=0.5, + + # The maximum number of tokens to generate. Requests can use up to + # 2048 tokens shared between prompt and completion. + max_tokens=1024, + + # Controls diversity via nucleus sampling: 0.5 means half of all + # likelihood-weighted options are considered. + top_p=1, + + # A stop sequence is a predefined or user-specified text string that + # signals an AI to stop generating content, ensuring its responses + # remain focused and concise. Examples include punctuation marks and + # markers like "[end]". + stop=None, + + # If set, partial message deltas will be sent. + stream=False, + ) + + # Print the completion returned by the LLM. + print(chat_completion.choices[0].message.content) + +asyncio.run(main()) diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py new file mode 100644 index 00000000..1e71f737 --- /dev/null +++ b/examples/chat_completion_async_streaming.py @@ -0,0 +1,58 @@ +import asyncio + +from groq import AsyncGroq + +async def main(): + client = AsyncGroq() + + stream = await client.chat.completions.create( + # + # Required parameters + # + messages=[ + # Set an optional system message. This sets the behavior of the + # assistant and can be used to provide specific instructions for + # how it should behave throughout the conversation. + { + "role": "system", + "content": "you are a helpful assistant." + }, + # Set a user message for the assistant to respond to. + { + "role": "user", + "content": "Explain the importance of low latency LLMs", + } + ], + + # The language model which will generate the completion. + model="mixtral-8x7b-32768", + + # + # Optional parameters + # + + # Controls randomness: lowering results in less random completions. + # As the temperature approaches zero, the model will become + # deterministic and repetitive. + temperature=0.5, + + # The maximum number of tokens to generate. Requests can use up to + # 2048 tokens shared between prompt and completion. + max_tokens=1024, + + # A stop sequence is a predefined or user-specified text string that + # signals an AI to stop generating content, ensuring its responses + # remain focused and concise. Examples include punctuation marks and + # markers like "[end]". + stop=None, + + # Controls diversity via nucleus sampling: 0.5 means half of all + # likelihood-weighted options are considered. + stream=True, + ) + + # Print the incremental deltas returned by the LLM. + async for chunk in stream: + print(chunk.choices[0].delta.content, end="") + +asyncio.run(main()) diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py new file mode 100644 index 00000000..1c749bca --- /dev/null +++ b/examples/chat_completion_stop.py @@ -0,0 +1,58 @@ +from groq import Groq + +client = Groq() + +chat_completion = client.chat.completions.create( + # + # Required parameters + # + messages=[ + # Set an optional system message. This sets the behavior of the + # assistant and can be used to provide specific instructions for + # how it should behave throughout the conversation. + { + "role": "system", + "content": "you are a helpful assistant." + }, + # Set a user message for the assistant to respond to. + { + "role": "user", + "content": "Count to 10. Your response must begin with \"1, \". example: 1, 2, 3, ...", + } + ], + + # The language model which will generate the completion. + model="mixtral-8x7b-32768", + + # + # Optional parameters + # + + # Controls randomness: lowering results in less random completions. + # As the temperature approaches zero, the model will become deterministic + # and repetitive. + temperature=0.5, + + # The maximum number of tokens to generate. Requests can use up to + # 2048 tokens shared between prompt and completion. + max_tokens=1024, + + # Controls diversity via nucleus sampling: 0.5 means half of all + # likelihood-weighted options are considered. + top_p=1, + + # A stop sequence is a predefined or user-specified text string that + # signals an AI to stop generating content, ensuring its responses + # remain focused and concise. Examples include punctuation marks and + # markers like "[end]". + # For this example, we will use ", 6" so that the llm stops counting at 5. + # If multiple stop values are needed, an array of string may be passed, + # stop=[", 6", ", six", ", Six"] + stop=", 6", + + # If set, partial message deltas will be sent. + stream=False, +) + +# Print the completion returned by the LLM. +print(chat_completion.choices[0].message.content) diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py new file mode 100644 index 00000000..dd3edbcd --- /dev/null +++ b/examples/chat_completion_streaming.py @@ -0,0 +1,56 @@ +from groq import Groq + +client = Groq() + +stream = client.chat.completions.create( + # + # Required parameters + # + messages=[ + # Set an optional system message. This sets the behavior of the + # assistant and can be used to provide specific instructions for + # how it should behave throughout the conversation. + { + "role": "system", + "content": "you are a helpful assistant." + }, + # Set a user message for the assistant to respond to. + { + "role": "user", + "content": "Explain the importance of low latency LLMs", + } + ], + + # The language model which will generate the completion. + model="mixtral-8x7b-32768", + + # + # Optional parameters + # + + # Controls randomness: lowering results in less random completions. + # As the temperature approaches zero, the model will become deterministic + # and repetitive. + temperature=0.5, + + # The maximum number of tokens to generate. Requests can use up to + # 2048 tokens shared between prompt and completion. + max_tokens=1024, + + # Controls diversity via nucleus sampling: 0.5 means half of all + # likelihood-weighted options are considered. + top_p=1, + + # A stop sequence is a predefined or user-specified text string that + # signals an AI to stop generating content, ensuring its responses + # remain focused and concise. Examples include punctuation marks and + # markers like "[end]". + stop=None, + + # If set, partial message deltas will be sent. + stream=True, +) + +# Print the incremental deltas returned by the LLM. +for chunk in stream: + print(chunk.choices[0].delta.content, end="") From 3907c478a623074d39528a0109318df4af61a777 Mon Sep 17 00:00:00 2001 From: Graden Rea Date: Wed, 14 Feb 2024 15:40:49 -0800 Subject: [PATCH 2/6] Add streaming support --- src/groq/_streaming.py | 4 + src/groq/resources/chat/completions.py | 198 ++++++++++++++++++++++++- 2 files changed, 199 insertions(+), 3 deletions(-) diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py index ac0ea8aa..2769874c 100644 --- a/src/groq/_streaming.py +++ b/src/groq/_streaming.py @@ -53,6 +53,8 @@ def __stream__(self) -> Iterator[_T]: iterator = self._iter_events() for sse in iterator: + if sse.data.startswith("[DONE]"): + break yield process_data(data=sse.json(), cast_to=cast_to, response=response) # Ensure the entire stream is consumed @@ -106,6 +108,8 @@ async def __aiter__(self) -> AsyncIterator[_T]: async def _iter_events(self) -> AsyncIterator[ServerSentEvent]: async for sse in self._decoder.aiter(self.response.aiter_lines()): + if sse.data.startswith("[DONE]"): + break yield sse async def __stream__(self) -> AsyncIterator[_T]: diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py index 9f332f5a..26618039 100644 --- a/src/groq/resources/chat/completions.py +++ b/src/groq/resources/chat/completions.py @@ -2,10 +2,11 @@ from __future__ import annotations -from typing import Dict, List, Union, Iterable, Optional +from typing import Dict, List, Literal, Union, Iterable, Optional, overload import httpx +from ...lib.chat_completion_chunk import ChatCompletionChunk from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven from ..._utils import maybe_transform from ..._compat import cached_property @@ -16,6 +17,7 @@ async_to_raw_response_wrapper, async_to_streamed_response_wrapper, ) +from ..._streaming import AsyncStream, Stream from ...types.chat import ChatCompletion, completion_create_params from ..._base_client import ( make_request_options, @@ -33,6 +35,7 @@ def with_raw_response(self) -> CompletionsWithRawResponse: def with_streaming_response(self) -> CompletionsWithStreamingResponse: return CompletionsWithStreamingResponse(self) + @overload def create( self, *, @@ -47,7 +50,7 @@ def create( response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -61,6 +64,98 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: + ... + + @overload + def create( + self, + *, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Literal[True], + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[ChatCompletionChunk]: + ... + + @overload + def create( + self, + *, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: bool, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | Stream[ChatCompletionChunk]: + ... + + def create( + self, + *, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | Stream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -105,6 +200,8 @@ def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, + stream=stream or False, + stream_cls=Stream[ChatCompletionChunk], ) @@ -117,6 +214,7 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse: def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse: return AsyncCompletionsWithStreamingResponse(self) + @overload async def create( self, *, @@ -131,7 +229,7 @@ async def create( response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -145,6 +243,98 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: + ... + + @overload + async def create( + self, + *, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Literal[True], + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[ChatCompletionChunk]: + ... + + @overload + async def create( + self, + *, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: bool, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: + ... + + async def create( + self, + *, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -189,6 +379,8 @@ async def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, + stream=stream or False, + stream_cls=AsyncStream[ChatCompletionChunk], ) From af9a838e240bb0f7385bc33fb18ce246427ca2f7 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 15 Feb 2024 23:04:59 +0000 Subject: [PATCH 3/6] chore: go live (#5) --- .github/workflows/ci.yml | 4 +- README.md | 4 +- bin/check-release-environment | 4 +- bin/check-test-server | 0 bin/test | 0 examples/chat_completion.py | 14 +- examples/chat_completion_async.py | 16 +- examples/chat_completion_async_streaming.py | 15 +- examples/chat_completion_stop.py | 16 +- examples/chat_completion_streaming.py | 14 +- src/groq/_streaming.py | 4 - src/groq/resources/chat/completions.py | 214 +----------------- src/groq/types/chat/chat_completion.py | 16 +- .../types/chat/completion_create_params.py | 16 +- tests/api_resources/chat/test_completions.py | 124 ++++++++-- 15 files changed, 158 insertions(+), 303 deletions(-) mode change 100644 => 100755 bin/check-test-server mode change 100644 => 100755 bin/test diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ebfa5cee..aea1868f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,10 +2,10 @@ name: CI on: push: branches: - - main + - stainless pull_request: branches: - - main + - stainless jobs: lint: diff --git a/README.md b/README.md index 0c14ce0e..5c0abddb 100644 --- a/README.md +++ b/README.md @@ -261,9 +261,9 @@ completion = response.parse() # get the object that `chat.completions.create()` print(completion.id) ``` -These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object. +These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object. -The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. +The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. #### `.with_streaming_response` diff --git a/bin/check-release-environment b/bin/check-release-environment index 29306d89..e35a3711 100644 --- a/bin/check-release-environment +++ b/bin/check-release-environment @@ -6,9 +6,9 @@ if [ -z "${PYPI_TOKEN}" ]; then errors+=("The GROQ_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.") fi -len=${#errors[@]} +lenErrors=${#errors[@]} -if [[ len -gt 0 ]]; then +if [[ lenErrors -gt 0 ]]; then echo -e "Found the following errors in the release environment:\n" for error in "${errors[@]}"; do diff --git a/bin/check-test-server b/bin/check-test-server old mode 100644 new mode 100755 diff --git a/bin/test b/bin/test old mode 100644 new mode 100755 diff --git a/examples/chat_completion.py b/examples/chat_completion.py index 18ea48ec..06e7664a 100644 --- a/examples/chat_completion.py +++ b/examples/chat_completion.py @@ -10,43 +10,33 @@ # Set an optional system message. This sets the behavior of the # assistant and can be used to provide specific instructions for # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, + {"role": "system", "content": "you are a helpful assistant."}, # Set a user message for the assistant to respond to. { "role": "user", "content": "Explain the importance of low latency LLMs", - } + }, ], - # The language model which will generate the completion. model="mixtral-8x7b-32768", - # # Optional parameters # - # Controls randomness: lowering results in less random completions. # As the temperature approaches zero, the model will become deterministic # and repetitive. temperature=0.5, - # The maximum number of tokens to generate. Requests can use up to # 2048 tokens shared between prompt and completion. max_tokens=1024, - # Controls diversity via nucleus sampling: 0.5 means half of all # likelihood-weighted options are considered. top_p=1, - # A stop sequence is a predefined or user-specified text string that # signals an AI to stop generating content, ensuring its responses # remain focused and concise. Examples include punctuation marks and # markers like "[end]". stop=None, - # If set, partial message deltas will be sent. stream=False, ) diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py index 58b1974b..b1f0fde3 100644 --- a/examples/chat_completion_async.py +++ b/examples/chat_completion_async.py @@ -2,6 +2,7 @@ from groq import AsyncGroq + async def main(): client = AsyncGroq() @@ -13,43 +14,33 @@ async def main(): # Set an optional system message. This sets the behavior of the # assistant and can be used to provide specific instructions for # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, + {"role": "system", "content": "you are a helpful assistant."}, # Set a user message for the assistant to respond to. { "role": "user", "content": "Explain the importance of low latency LLMs", - } + }, ], - # The language model which will generate the completion. model="mixtral-8x7b-32768", - # # Optional parameters # - # Controls randomness: lowering results in less random completions. # As the temperature approaches zero, the model will become # deterministic and repetitive. temperature=0.5, - # The maximum number of tokens to generate. Requests can use up to # 2048 tokens shared between prompt and completion. max_tokens=1024, - # Controls diversity via nucleus sampling: 0.5 means half of all # likelihood-weighted options are considered. top_p=1, - # A stop sequence is a predefined or user-specified text string that # signals an AI to stop generating content, ensuring its responses # remain focused and concise. Examples include punctuation marks and # markers like "[end]". stop=None, - # If set, partial message deltas will be sent. stream=False, ) @@ -57,4 +48,5 @@ async def main(): # Print the completion returned by the LLM. print(chat_completion.choices[0].message.content) + asyncio.run(main()) diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py index 1e71f737..594d61af 100644 --- a/examples/chat_completion_async_streaming.py +++ b/examples/chat_completion_async_streaming.py @@ -2,6 +2,7 @@ from groq import AsyncGroq + async def main(): client = AsyncGroq() @@ -13,39 +14,30 @@ async def main(): # Set an optional system message. This sets the behavior of the # assistant and can be used to provide specific instructions for # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, + {"role": "system", "content": "you are a helpful assistant."}, # Set a user message for the assistant to respond to. { "role": "user", "content": "Explain the importance of low latency LLMs", - } + }, ], - # The language model which will generate the completion. model="mixtral-8x7b-32768", - # # Optional parameters # - # Controls randomness: lowering results in less random completions. # As the temperature approaches zero, the model will become # deterministic and repetitive. temperature=0.5, - # The maximum number of tokens to generate. Requests can use up to # 2048 tokens shared between prompt and completion. max_tokens=1024, - # A stop sequence is a predefined or user-specified text string that # signals an AI to stop generating content, ensuring its responses # remain focused and concise. Examples include punctuation marks and # markers like "[end]". stop=None, - # Controls diversity via nucleus sampling: 0.5 means half of all # likelihood-weighted options are considered. stream=True, @@ -55,4 +47,5 @@ async def main(): async for chunk in stream: print(chunk.choices[0].delta.content, end="") + asyncio.run(main()) diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py index 1c749bca..d68a90df 100644 --- a/examples/chat_completion_stop.py +++ b/examples/chat_completion_stop.py @@ -10,37 +10,28 @@ # Set an optional system message. This sets the behavior of the # assistant and can be used to provide specific instructions for # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, + {"role": "system", "content": "you are a helpful assistant."}, # Set a user message for the assistant to respond to. { "role": "user", - "content": "Count to 10. Your response must begin with \"1, \". example: 1, 2, 3, ...", - } + "content": 'Count to 10. Your response must begin with "1, ". example: 1, 2, 3, ...', + }, ], - # The language model which will generate the completion. model="mixtral-8x7b-32768", - # # Optional parameters # - # Controls randomness: lowering results in less random completions. # As the temperature approaches zero, the model will become deterministic # and repetitive. temperature=0.5, - # The maximum number of tokens to generate. Requests can use up to # 2048 tokens shared between prompt and completion. max_tokens=1024, - # Controls diversity via nucleus sampling: 0.5 means half of all # likelihood-weighted options are considered. top_p=1, - # A stop sequence is a predefined or user-specified text string that # signals an AI to stop generating content, ensuring its responses # remain focused and concise. Examples include punctuation marks and @@ -49,7 +40,6 @@ # If multiple stop values are needed, an array of string may be passed, # stop=[", 6", ", six", ", Six"] stop=", 6", - # If set, partial message deltas will be sent. stream=False, ) diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py index dd3edbcd..5bae3a79 100644 --- a/examples/chat_completion_streaming.py +++ b/examples/chat_completion_streaming.py @@ -10,43 +10,33 @@ # Set an optional system message. This sets the behavior of the # assistant and can be used to provide specific instructions for # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, + {"role": "system", "content": "you are a helpful assistant."}, # Set a user message for the assistant to respond to. { "role": "user", "content": "Explain the importance of low latency LLMs", - } + }, ], - # The language model which will generate the completion. model="mixtral-8x7b-32768", - # # Optional parameters # - # Controls randomness: lowering results in less random completions. # As the temperature approaches zero, the model will become deterministic # and repetitive. temperature=0.5, - # The maximum number of tokens to generate. Requests can use up to # 2048 tokens shared between prompt and completion. max_tokens=1024, - # Controls diversity via nucleus sampling: 0.5 means half of all # likelihood-weighted options are considered. top_p=1, - # A stop sequence is a predefined or user-specified text string that # signals an AI to stop generating content, ensuring its responses # remain focused and concise. Examples include punctuation marks and # markers like "[end]". stop=None, - # If set, partial message deltas will be sent. stream=True, ) diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py index 2769874c..ac0ea8aa 100644 --- a/src/groq/_streaming.py +++ b/src/groq/_streaming.py @@ -53,8 +53,6 @@ def __stream__(self) -> Iterator[_T]: iterator = self._iter_events() for sse in iterator: - if sse.data.startswith("[DONE]"): - break yield process_data(data=sse.json(), cast_to=cast_to, response=response) # Ensure the entire stream is consumed @@ -108,8 +106,6 @@ async def __aiter__(self) -> AsyncIterator[_T]: async def _iter_events(self) -> AsyncIterator[ServerSentEvent]: async for sse in self._decoder.aiter(self.response.aiter_lines()): - if sse.data.startswith("[DONE]"): - break yield sse async def __stream__(self) -> AsyncIterator[_T]: diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py index 26618039..f1c5a023 100644 --- a/src/groq/resources/chat/completions.py +++ b/src/groq/resources/chat/completions.py @@ -2,11 +2,10 @@ from __future__ import annotations -from typing import Dict, List, Literal, Union, Iterable, Optional, overload +from typing import Dict, List, Union, Iterable, Optional import httpx -from ...lib.chat_completion_chunk import ChatCompletionChunk from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven from ..._utils import maybe_transform from ..._compat import cached_property @@ -17,7 +16,6 @@ async_to_raw_response_wrapper, async_to_streamed_response_wrapper, ) -from ..._streaming import AsyncStream, Stream from ...types.chat import ChatCompletion, completion_create_params from ..._base_client import ( make_request_options, @@ -35,22 +33,21 @@ def with_raw_response(self) -> CompletionsWithRawResponse: def with_streaming_response(self) -> CompletionsWithStreamingResponse: return CompletionsWithStreamingResponse(self) - @overload def create( self, *, + messages: Iterable[completion_create_params.Message], + model: str, frequency_penalty: float | NotGiven = NOT_GIVEN, logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, logprobs: bool | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, n: int | NotGiven = NOT_GIVEN, presence_penalty: float | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream: bool | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -64,98 +61,6 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: - ... - - @overload - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Literal[True], - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> Stream[ChatCompletionChunk]: - ... - - @overload - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | Stream[ChatCompletionChunk]: - ... - - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | Stream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -175,12 +80,12 @@ def create( "/openai/v1/chat/completions", body=maybe_transform( { + "messages": messages, + "model": model, "frequency_penalty": frequency_penalty, "logit_bias": logit_bias, "logprobs": logprobs, "max_tokens": max_tokens, - "messages": messages, - "model": model, "n": n, "presence_penalty": presence_penalty, "response_format": response_format, @@ -200,8 +105,6 @@ def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, - stream=stream or False, - stream_cls=Stream[ChatCompletionChunk], ) @@ -214,22 +117,21 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse: def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse: return AsyncCompletionsWithStreamingResponse(self) - @overload async def create( self, *, + messages: Iterable[completion_create_params.Message], + model: str, frequency_penalty: float | NotGiven = NOT_GIVEN, logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, logprobs: bool | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, n: int | NotGiven = NOT_GIVEN, presence_penalty: float | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream: bool | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -243,98 +145,6 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: - ... - - @overload - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Literal[True], - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> AsyncStream[ChatCompletionChunk]: - ... - - @overload - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: - ... - - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -354,12 +164,12 @@ async def create( "/openai/v1/chat/completions", body=maybe_transform( { + "messages": messages, + "model": model, "frequency_penalty": frequency_penalty, "logit_bias": logit_bias, "logprobs": logprobs, "max_tokens": max_tokens, - "messages": messages, - "model": model, "n": n, "presence_penalty": presence_penalty, "response_format": response_format, @@ -379,8 +189,6 @@ async def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, - stream=stream or False, - stream_cls=AsyncStream[ChatCompletionChunk], ) diff --git a/src/groq/types/chat/chat_completion.py b/src/groq/types/chat/chat_completion.py index 77365e4a..1ff1f5d5 100644 --- a/src/groq/types/chat/chat_completion.py +++ b/src/groq/types/chat/chat_completion.py @@ -54,21 +54,21 @@ class ChoiceMessageToolCall(BaseModel): class ChoiceMessage(BaseModel): - content: Optional[str] = None + content: str - role: Optional[str] = None + role: str tool_calls: Optional[List[ChoiceMessageToolCall]] = None class Choice(BaseModel): - finish_reason: Optional[str] = None + finish_reason: str - index: Optional[int] = None + index: int - logprobs: Optional[ChoiceLogprobs] = None + logprobs: ChoiceLogprobs - message: Optional[ChoiceMessage] = None + message: ChoiceMessage class Usage(BaseModel): @@ -86,9 +86,9 @@ class Usage(BaseModel): class ChatCompletion(BaseModel): - id: Optional[str] = None + choices: List[Choice] - choices: Optional[List[Choice]] = None + id: Optional[str] = None created: Optional[int] = None diff --git a/src/groq/types/chat/completion_create_params.py b/src/groq/types/chat/completion_create_params.py index 1aea7838..0f9712bc 100644 --- a/src/groq/types/chat/completion_create_params.py +++ b/src/groq/types/chat/completion_create_params.py @@ -3,7 +3,7 @@ from __future__ import annotations from typing import Dict, List, Union, Iterable, Optional -from typing_extensions import Annotated, TypedDict +from typing_extensions import Required, Annotated, TypedDict from ..._utils import PropertyInfo @@ -22,6 +22,10 @@ class CompletionCreateParams(TypedDict, total=False): + messages: Required[Iterable[Message]] + + model: Required[str] + frequency_penalty: float logit_bias: Dict[str, int] @@ -30,10 +34,6 @@ class CompletionCreateParams(TypedDict, total=False): max_tokens: int - messages: Iterable[Message] - - model: str - n: int presence_penalty: float @@ -78,11 +78,11 @@ class MessageToolCall(TypedDict, total=False): class Message(TypedDict, total=False): - content: str + content: Required[str] - name: str + role: Required[str] - role: str + name: str tool_call_id: str """ToolMessage Fields""" diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index 1eabfe70..1fdfc34b 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -19,16 +19,28 @@ class TestCompletions: @parametrize def test_method_create(self, client: Groq) -> None: - completion = client.chat.completions.create() + completion = client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert_matches_type(ChatCompletion, completion, path=["response"]) @parametrize def test_method_create_with_all_params(self, client: Groq) -> None: completion = client.chat.completions.create( - frequency_penalty=0, - logit_bias={"foo": 0}, - logprobs=True, - max_tokens=0, messages=[ { "content": "string", @@ -128,6 +140,10 @@ def test_method_create_with_all_params(self, client: Groq) -> None: }, ], model="string", + frequency_penalty=0, + logit_bias={"foo": 0}, + logprobs=True, + max_tokens=0, n=0, presence_penalty=0, response_format={"type": "string"}, @@ -176,7 +192,23 @@ def test_method_create_with_all_params(self, client: Groq) -> None: @parametrize def test_raw_response_create(self, client: Groq) -> None: - response = client.chat.completions.with_raw_response.create() + response = client.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -185,7 +217,23 @@ def test_raw_response_create(self, client: Groq) -> None: @parametrize def test_streaming_response_create(self, client: Groq) -> None: - with client.chat.completions.with_streaming_response.create() as response: + with client.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -200,16 +248,28 @@ class TestAsyncCompletions: @parametrize async def test_method_create(self, async_client: AsyncGroq) -> None: - completion = await async_client.chat.completions.create() + completion = await async_client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert_matches_type(ChatCompletion, completion, path=["response"]) @parametrize async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> None: completion = await async_client.chat.completions.create( - frequency_penalty=0, - logit_bias={"foo": 0}, - logprobs=True, - max_tokens=0, messages=[ { "content": "string", @@ -309,6 +369,10 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N }, ], model="string", + frequency_penalty=0, + logit_bias={"foo": 0}, + logprobs=True, + max_tokens=0, n=0, presence_penalty=0, response_format={"type": "string"}, @@ -357,7 +421,23 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N @parametrize async def test_raw_response_create(self, async_client: AsyncGroq) -> None: - response = await async_client.chat.completions.with_raw_response.create() + response = await async_client.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -366,7 +446,23 @@ async def test_raw_response_create(self, async_client: AsyncGroq) -> None: @parametrize async def test_streaming_response_create(self, async_client: AsyncGroq) -> None: - async with async_client.chat.completions.with_streaming_response.create() as response: + async with async_client.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" From b9b55b41cb158efd155f9cda829808c877493afd Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 21 Feb 2024 01:49:56 +0000 Subject: [PATCH 4/6] chore: update branch (#8) --- .devcontainer/Dockerfile | 2 +- .github/workflows/ci.yml | 6 ++-- .github/workflows/publish-pypi.yml | 2 +- README.md | 8 ++--- pyproject.toml | 2 +- requirements-dev.lock | 48 ++++++++++++++++++++++++++++-- requirements.lock | 21 +++++++++++++ 7 files changed, 77 insertions(+), 12 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 6eb00725..dd939620 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} USER vscode -RUN curl -sSf https://rye-up.com/get | RYE_VERSION="0.15.2" RYE_INSTALL_OPTION="--yes" bash +RUN curl -sSf https://rye-up.com/get | RYE_VERSION="0.24.0" RYE_INSTALL_OPTION="--yes" bash ENV PATH=/home/vscode/.rye/shims:$PATH RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aea1868f..fca066ea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,10 +2,10 @@ name: CI on: push: branches: - - stainless + - main pull_request: branches: - - stainless + - main jobs: lint: @@ -21,7 +21,7 @@ jobs: curl -sSf https://rye-up.com/get | bash echo "$HOME/.rye/shims" >> $GITHUB_PATH env: - RYE_VERSION: 0.15.2 + RYE_VERSION: 0.24.0 RYE_INSTALL_OPTION: "--yes" - name: Install dependencies diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index 14ddce27..962b8c4f 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -21,7 +21,7 @@ jobs: curl -sSf https://rye-up.com/get | bash echo "$HOME/.rye/shims" >> $GITHUB_PATH env: - RYE_VERSION: 0.15.2 + RYE_VERSION: 0.24.0 RYE_INSTALL_OPTION: "--yes" - name: Publish to PyPI diff --git a/README.md b/README.md index 5c0abddb..449ef182 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Groq Python API library -[![PyPI version](https://img.shields.io/pypi/v/groq.svg)](https://pypi.org/project/groq/) +[![PyPI version](https://img.shields.io/pypi/v/groq-sdk.svg)](https://pypi.org/project/groq-sdk/) The Groq Python library provides convenient access to the Groq REST API from any Python 3.7+ application. The library includes type definitions for all request params and response fields, @@ -13,7 +13,7 @@ The REST API documentation can be found [on console.groq.com](https://console.gr ## Installation ```sh -pip install groq +pip install groq-sdk ``` ## Usage @@ -261,9 +261,9 @@ completion = response.parse() # get the object that `chat.completions.create()` print(completion.id) ``` -These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object. +These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object. -The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. +The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. #### `.with_streaming_response` diff --git a/pyproject.toml b/pyproject.toml index f3c156bb..9a9918fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "groq" +name = "groq-sdk" version = "0.1.0" description = "The official Python library for the groq API" readme = "README.md" diff --git a/requirements-dev.lock b/requirements-dev.lock index a0134d65..36fa001d 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -5,48 +5,92 @@ # pre: false # features: [] # all-features: true +# with-sources: false -e file:. annotated-types==0.6.0 + # via pydantic anyio==4.1.0 + # via groq-sdk + # via httpx argcomplete==3.1.2 + # via nox attrs==23.1.0 + # via pytest certifi==2023.7.22 + # via httpcore + # via httpx colorlog==6.7.0 + # via nox dirty-equals==0.6.0 distlib==0.3.7 + # via virtualenv distro==1.8.0 + # via groq-sdk exceptiongroup==1.1.3 + # via anyio filelock==3.12.4 + # via virtualenv h11==0.14.0 + # via httpcore httpcore==1.0.2 + # via httpx httpx==0.25.2 + # via groq-sdk + # via respx idna==3.4 + # via anyio + # via httpx importlib-metadata==7.0.0 iniconfig==2.0.0 + # via pytest mypy==1.7.1 mypy-extensions==1.0.0 + # via mypy nodeenv==1.8.0 + # via pyright nox==2023.4.22 packaging==23.2 + # via nox + # via pytest platformdirs==3.11.0 + # via virtualenv pluggy==1.3.0 + # via pytest py==1.11.0 + # via pytest pydantic==2.4.2 + # via groq-sdk pydantic-core==2.10.1 + # via pydantic pyright==1.1.332 pytest==7.1.1 + # via pytest-asyncio pytest-asyncio==0.21.1 python-dateutil==2.8.2 + # via time-machine pytz==2023.3.post1 + # via dirty-equals respx==0.20.2 ruff==0.1.9 +setuptools==68.2.2 + # via nodeenv six==1.16.0 + # via python-dateutil sniffio==1.3.0 + # via anyio + # via groq-sdk + # via httpx time-machine==2.9.0 tomli==2.0.1 + # via mypy + # via pytest typing-extensions==4.8.0 + # via groq-sdk + # via mypy + # via pydantic + # via pydantic-core virtualenv==20.24.5 + # via nox zipp==3.17.0 -# The following packages are considered to be unsafe in a requirements file: -setuptools==68.2.2 + # via importlib-metadata diff --git a/requirements.lock b/requirements.lock index 2022a5c5..579ab697 100644 --- a/requirements.lock +++ b/requirements.lock @@ -5,18 +5,39 @@ # pre: false # features: [] # all-features: true +# with-sources: false -e file:. annotated-types==0.6.0 + # via pydantic anyio==4.1.0 + # via groq-sdk + # via httpx certifi==2023.7.22 + # via httpcore + # via httpx distro==1.8.0 + # via groq-sdk exceptiongroup==1.1.3 + # via anyio h11==0.14.0 + # via httpcore httpcore==1.0.2 + # via httpx httpx==0.25.2 + # via groq-sdk idna==3.4 + # via anyio + # via httpx pydantic==2.4.2 + # via groq-sdk pydantic-core==2.10.1 + # via pydantic sniffio==1.3.0 + # via anyio + # via groq-sdk + # via httpx typing-extensions==4.8.0 + # via groq-sdk + # via pydantic + # via pydantic-core From 0c0d20405a96167f060a03a2b8a58a49d9a1c7c8 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 21 Feb 2024 01:52:55 +0000 Subject: [PATCH 5/6] feat: update via SDK Studio (#10) --- .github/workflows/ci.yml | 4 ++-- README.md | 8 ++++---- pyproject.toml | 2 +- requirements-dev.lock | 12 ++++++------ requirements.lock | 12 ++++++------ 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fca066ea..dfb911fb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,10 +2,10 @@ name: CI on: push: branches: - - main + - stainless pull_request: branches: - - main + - stainless jobs: lint: diff --git a/README.md b/README.md index 449ef182..5c0abddb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Groq Python API library -[![PyPI version](https://img.shields.io/pypi/v/groq-sdk.svg)](https://pypi.org/project/groq-sdk/) +[![PyPI version](https://img.shields.io/pypi/v/groq.svg)](https://pypi.org/project/groq/) The Groq Python library provides convenient access to the Groq REST API from any Python 3.7+ application. The library includes type definitions for all request params and response fields, @@ -13,7 +13,7 @@ The REST API documentation can be found [on console.groq.com](https://console.gr ## Installation ```sh -pip install groq-sdk +pip install groq ``` ## Usage @@ -261,9 +261,9 @@ completion = response.parse() # get the object that `chat.completions.create()` print(completion.id) ``` -These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object. +These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object. -The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. +The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. #### `.with_streaming_response` diff --git a/pyproject.toml b/pyproject.toml index 9a9918fe..f3c156bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "groq-sdk" +name = "groq" version = "0.1.0" description = "The official Python library for the groq API" readme = "README.md" diff --git a/requirements-dev.lock b/requirements-dev.lock index 36fa001d..fb76b532 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -11,7 +11,7 @@ annotated-types==0.6.0 # via pydantic anyio==4.1.0 - # via groq-sdk + # via groq # via httpx argcomplete==3.1.2 # via nox @@ -26,7 +26,7 @@ dirty-equals==0.6.0 distlib==0.3.7 # via virtualenv distro==1.8.0 - # via groq-sdk + # via groq exceptiongroup==1.1.3 # via anyio filelock==3.12.4 @@ -36,7 +36,7 @@ h11==0.14.0 httpcore==1.0.2 # via httpx httpx==0.25.2 - # via groq-sdk + # via groq # via respx idna==3.4 # via anyio @@ -60,7 +60,7 @@ pluggy==1.3.0 py==1.11.0 # via pytest pydantic==2.4.2 - # via groq-sdk + # via groq pydantic-core==2.10.1 # via pydantic pyright==1.1.332 @@ -79,14 +79,14 @@ six==1.16.0 # via python-dateutil sniffio==1.3.0 # via anyio - # via groq-sdk + # via groq # via httpx time-machine==2.9.0 tomli==2.0.1 # via mypy # via pytest typing-extensions==4.8.0 - # via groq-sdk + # via groq # via mypy # via pydantic # via pydantic-core diff --git a/requirements.lock b/requirements.lock index 579ab697..991544f0 100644 --- a/requirements.lock +++ b/requirements.lock @@ -11,13 +11,13 @@ annotated-types==0.6.0 # via pydantic anyio==4.1.0 - # via groq-sdk + # via groq # via httpx certifi==2023.7.22 # via httpcore # via httpx distro==1.8.0 - # via groq-sdk + # via groq exceptiongroup==1.1.3 # via anyio h11==0.14.0 @@ -25,19 +25,19 @@ h11==0.14.0 httpcore==1.0.2 # via httpx httpx==0.25.2 - # via groq-sdk + # via groq idna==3.4 # via anyio # via httpx pydantic==2.4.2 - # via groq-sdk + # via groq pydantic-core==2.10.1 # via pydantic sniffio==1.3.0 # via anyio - # via groq-sdk + # via groq # via httpx typing-extensions==4.8.0 - # via groq-sdk + # via groq # via pydantic # via pydantic-core From 214765e7581616d4a2761933340548276e8f25d6 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 21 Feb 2024 01:53:04 +0000 Subject: [PATCH 6/6] release: 0.2.0 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 19 +++++++++++++++++++ pyproject.toml | 2 +- src/groq/_version.py | 2 +- 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 3d2ac0bd..10f30916 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0" + ".": "0.2.0" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index f643576b..50dc1242 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,24 @@ # Changelog +## 0.2.0 (2024-02-21) + +Full Changelog: [v0.1.0...v0.2.0](https://github.com/groq/groq-python/compare/v0.1.0...v0.2.0) + +### Features + +* Add initial Stainless SDK ([d5a8512](https://github.com/groq/groq-python/commit/d5a851262e04e625dde130367ed91d8f95683599)) +* Add initial Stainless SDK ([316de2c](https://github.com/groq/groq-python/commit/316de2ccfeb76e36fe34bb8656ea90a8d42a7d00)) +* create default branch ([7e00266](https://github.com/groq/groq-python/commit/7e00266e3c691d92d508e753e2c14c03297c09f9)) +* update via SDK Studio ([#10](https://github.com/groq/groq-python/issues/10)) ([0c0d204](https://github.com/groq/groq-python/commit/0c0d20405a96167f060a03a2b8a58a49d9a1c7c8)) +* update via SDK Studio ([#3](https://github.com/groq/groq-python/issues/3)) ([8d92c08](https://github.com/groq/groq-python/commit/8d92c086e320c2715e02bc79807ff872e84c0b0f)) + + +### Chores + +* go live ([#2](https://github.com/groq/groq-python/issues/2)) ([ba81c42](https://github.com/groq/groq-python/commit/ba81c42d6d0fd6d47819e0d58962235cb70ca4f1)) +* go live ([#5](https://github.com/groq/groq-python/issues/5)) ([af9a838](https://github.com/groq/groq-python/commit/af9a838e240bb0f7385bc33fb18ce246427ca2f7)) +* update branch ([#8](https://github.com/groq/groq-python/issues/8)) ([b9b55b4](https://github.com/groq/groq-python/commit/b9b55b41cb158efd155f9cda829808c877493afd)) + ## 0.1.0 (2024-02-10) Full Changelog: [v0.0.1...v0.1.0](https://github.com/definitive-io/groqcloud-python/compare/v0.0.1...v0.1.0) diff --git a/pyproject.toml b/pyproject.toml index f3c156bb..d65c5213 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "groq" -version = "0.1.0" +version = "0.2.0" description = "The official Python library for the groq API" readme = "README.md" license = "Apache-2.0" diff --git a/src/groq/_version.py b/src/groq/_version.py index 0282ec9b..40c5272b 100644 --- a/src/groq/_version.py +++ b/src/groq/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. __title__ = "groq" -__version__ = "0.1.0" # x-release-please-version +__version__ = "0.2.0" # x-release-please-version