From aa5137b0f26cd449860c2c45a55e9f3c829a6d16 Mon Sep 17 00:00:00 2001
From: Graden Rea <Graden@definitive.io>
Date: Wed, 14 Feb 2024 15:08:48 -0800
Subject: [PATCH 1/6] Add examples

---
 examples/chat_completion.py                 | 55 +++++++++++++++++++
 examples/chat_completion_async.py           | 60 +++++++++++++++++++++
 examples/chat_completion_async_streaming.py | 58 ++++++++++++++++++++
 examples/chat_completion_stop.py            | 58 ++++++++++++++++++++
 examples/chat_completion_streaming.py       | 56 +++++++++++++++++++
 5 files changed, 287 insertions(+)
 create mode 100644 examples/chat_completion.py
 create mode 100644 examples/chat_completion_async.py
 create mode 100644 examples/chat_completion_async_streaming.py
 create mode 100644 examples/chat_completion_stop.py
 create mode 100644 examples/chat_completion_streaming.py

diff --git a/examples/chat_completion.py b/examples/chat_completion.py
new file mode 100644
index 00000000..18ea48ec
--- /dev/null
+++ b/examples/chat_completion.py
@@ -0,0 +1,55 @@
+from groq import Groq
+
+client = Groq()
+
+chat_completion = client.chat.completions.create(
+    #
+    # Required parameters
+    #
+    messages=[
+        # Set an optional system message. This sets the behavior of the
+        # assistant and can be used to provide specific instructions for
+        # how it should behave throughout the conversation.
+        {
+            "role": "system",
+            "content": "you are a helpful assistant."
+        },
+        # Set a user message for the assistant to respond to.
+        {
+            "role": "user",
+            "content": "Explain the importance of low latency LLMs",
+        }
+    ],
+
+    # The language model which will generate the completion.
+    model="mixtral-8x7b-32768",
+
+    #
+    # Optional parameters
+    #
+
+    # Controls randomness: lowering results in less random completions.
+    # As the temperature approaches zero, the model will become deterministic
+    # and repetitive.
+    temperature=0.5,
+
+    # The maximum number of tokens to generate. Requests can use up to
+    # 2048 tokens shared between prompt and completion.
+    max_tokens=1024,
+
+    # Controls diversity via nucleus sampling: 0.5 means half of all
+    # likelihood-weighted options are considered.
+    top_p=1,
+
+    # A stop sequence is a predefined or user-specified text string that
+    # signals an AI to stop generating content, ensuring its responses
+    # remain focused and concise. Examples include punctuation marks and
+    # markers like "[end]".
+    stop=None,
+
+    # If set, partial message deltas will be sent.
+    stream=False,
+)
+
+# Print the completion returned by the LLM.
+print(chat_completion.choices[0].message.content)
diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py
new file mode 100644
index 00000000..58b1974b
--- /dev/null
+++ b/examples/chat_completion_async.py
@@ -0,0 +1,60 @@
+import asyncio
+
+from groq import AsyncGroq
+
+async def main():
+    client = AsyncGroq()
+
+    chat_completion = await client.chat.completions.create(
+        #
+        # Required parameters
+        #
+        messages=[
+            # Set an optional system message. This sets the behavior of the
+            # assistant and can be used to provide specific instructions for
+            # how it should behave throughout the conversation.
+            {
+                "role": "system",
+                "content": "you are a helpful assistant."
+            },
+            # Set a user message for the assistant to respond to.
+            {
+                "role": "user",
+                "content": "Explain the importance of low latency LLMs",
+            }
+        ],
+
+        # The language model which will generate the completion.
+        model="mixtral-8x7b-32768",
+
+        #
+        # Optional parameters
+        #
+
+        # Controls randomness: lowering results in less random completions.
+        # As the temperature approaches zero, the model will become
+        # deterministic and repetitive.
+        temperature=0.5,
+
+        # The maximum number of tokens to generate. Requests can use up to
+        # 2048 tokens shared between prompt and completion.
+        max_tokens=1024,
+
+        # Controls diversity via nucleus sampling: 0.5 means half of all
+        # likelihood-weighted options are considered.
+        top_p=1,
+
+        # A stop sequence is a predefined or user-specified text string that
+        # signals an AI to stop generating content, ensuring its responses
+        # remain focused and concise. Examples include punctuation marks and
+        # markers like "[end]".
+        stop=None,
+
+        # If set, partial message deltas will be sent.
+        stream=False,
+    )
+
+    # Print the completion returned by the LLM.
+    print(chat_completion.choices[0].message.content)
+
+asyncio.run(main())
diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py
new file mode 100644
index 00000000..1e71f737
--- /dev/null
+++ b/examples/chat_completion_async_streaming.py
@@ -0,0 +1,58 @@
+import asyncio
+
+from groq import AsyncGroq
+
+async def main():
+    client = AsyncGroq()
+
+    stream = await client.chat.completions.create(
+        #
+        # Required parameters
+        #
+        messages=[
+            # Set an optional system message. This sets the behavior of the
+            # assistant and can be used to provide specific instructions for
+            # how it should behave throughout the conversation.
+            {
+                "role": "system",
+                "content": "you are a helpful assistant."
+            },
+            # Set a user message for the assistant to respond to.
+            {
+                "role": "user",
+                "content": "Explain the importance of low latency LLMs",
+            }
+        ],
+
+        # The language model which will generate the completion.
+        model="mixtral-8x7b-32768",
+
+        #
+        # Optional parameters
+        #
+
+        # Controls randomness: lowering results in less random completions.
+        # As the temperature approaches zero, the model will become
+        # deterministic and repetitive.
+        temperature=0.5,
+
+        # The maximum number of tokens to generate. Requests can use up to
+        # 2048 tokens shared between prompt and completion.
+        max_tokens=1024,
+
+        # A stop sequence is a predefined or user-specified text string that
+        # signals an AI to stop generating content, ensuring its responses
+        # remain focused and concise. Examples include punctuation marks and
+        # markers like "[end]".
+        stop=None,
+
+        # Controls diversity via nucleus sampling: 0.5 means half of all
+        # likelihood-weighted options are considered.
+        stream=True,
+    )
+
+    # Print the incremental deltas returned by the LLM.
+    async for chunk in stream:
+        print(chunk.choices[0].delta.content, end="")
+
+asyncio.run(main())
diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py
new file mode 100644
index 00000000..1c749bca
--- /dev/null
+++ b/examples/chat_completion_stop.py
@@ -0,0 +1,58 @@
+from groq import Groq
+
+client = Groq()
+
+chat_completion = client.chat.completions.create(
+    #
+    # Required parameters
+    #
+    messages=[
+        # Set an optional system message. This sets the behavior of the
+        # assistant and can be used to provide specific instructions for
+        # how it should behave throughout the conversation.
+        {
+            "role": "system",
+            "content": "you are a helpful assistant."
+        },
+        # Set a user message for the assistant to respond to.
+        {
+            "role": "user",
+            "content": "Count to 10.  Your response must begin with \"1, \".  example: 1, 2, 3, ...",
+        }
+    ],
+
+    # The language model which will generate the completion.
+    model="mixtral-8x7b-32768",
+
+    #
+    # Optional parameters
+    #
+
+    # Controls randomness: lowering results in less random completions.
+    # As the temperature approaches zero, the model will become deterministic
+    # and repetitive.
+    temperature=0.5,
+
+    # The maximum number of tokens to generate. Requests can use up to
+    # 2048 tokens shared between prompt and completion.
+    max_tokens=1024,
+
+    # Controls diversity via nucleus sampling: 0.5 means half of all
+    # likelihood-weighted options are considered.
+    top_p=1,
+
+    # A stop sequence is a predefined or user-specified text string that
+    # signals an AI to stop generating content, ensuring its responses
+    # remain focused and concise. Examples include punctuation marks and
+    # markers like "[end]".
+    # For this example, we will use ", 6" so that the llm stops counting at 5.
+    # If multiple stop values are needed, an array of string may be passed,
+    # stop=[", 6", ", six", ", Six"]
+    stop=", 6",
+
+    # If set, partial message deltas will be sent.
+    stream=False,
+)
+
+# Print the completion returned by the LLM.
+print(chat_completion.choices[0].message.content)
diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py
new file mode 100644
index 00000000..dd3edbcd
--- /dev/null
+++ b/examples/chat_completion_streaming.py
@@ -0,0 +1,56 @@
+from groq import Groq
+
+client = Groq()
+
+stream = client.chat.completions.create(
+    #
+    # Required parameters
+    #
+    messages=[
+        # Set an optional system message. This sets the behavior of the
+        # assistant and can be used to provide specific instructions for
+        # how it should behave throughout the conversation.
+        {
+            "role": "system",
+            "content": "you are a helpful assistant."
+        },
+        # Set a user message for the assistant to respond to.
+        {
+            "role": "user",
+            "content": "Explain the importance of low latency LLMs",
+        }
+    ],
+
+    # The language model which will generate the completion.
+    model="mixtral-8x7b-32768",
+
+    #
+    # Optional parameters
+    #
+
+    # Controls randomness: lowering results in less random completions.
+    # As the temperature approaches zero, the model will become deterministic
+    # and repetitive.
+    temperature=0.5,
+
+    # The maximum number of tokens to generate. Requests can use up to
+    # 2048 tokens shared between prompt and completion.
+    max_tokens=1024,
+
+    # Controls diversity via nucleus sampling: 0.5 means half of all
+    # likelihood-weighted options are considered.
+    top_p=1,
+
+    # A stop sequence is a predefined or user-specified text string that
+    # signals an AI to stop generating content, ensuring its responses
+    # remain focused and concise. Examples include punctuation marks and
+    # markers like "[end]".
+    stop=None,
+
+    # If set, partial message deltas will be sent.
+    stream=True,
+)
+
+# Print the incremental deltas returned by the LLM.
+for chunk in stream:
+    print(chunk.choices[0].delta.content, end="")

From 3907c478a623074d39528a0109318df4af61a777 Mon Sep 17 00:00:00 2001
From: Graden Rea <Graden@definitive.io>
Date: Wed, 14 Feb 2024 15:40:49 -0800
Subject: [PATCH 2/6] Add streaming support

---
 src/groq/_streaming.py                 |   4 +
 src/groq/resources/chat/completions.py | 198 ++++++++++++++++++++++++-
 2 files changed, 199 insertions(+), 3 deletions(-)

diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py
index ac0ea8aa..2769874c 100644
--- a/src/groq/_streaming.py
+++ b/src/groq/_streaming.py
@@ -53,6 +53,8 @@ def __stream__(self) -> Iterator[_T]:
         iterator = self._iter_events()
 
         for sse in iterator:
+            if sse.data.startswith("[DONE]"):
+                break
             yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
         # Ensure the entire stream is consumed
@@ -106,6 +108,8 @@ async def __aiter__(self) -> AsyncIterator[_T]:
 
     async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
         async for sse in self._decoder.aiter(self.response.aiter_lines()):
+            if sse.data.startswith("[DONE]"):
+                break
             yield sse
 
     async def __stream__(self) -> AsyncIterator[_T]:
diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py
index 9f332f5a..26618039 100644
--- a/src/groq/resources/chat/completions.py
+++ b/src/groq/resources/chat/completions.py
@@ -2,10 +2,11 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Iterable, Optional
+from typing import Dict, List, Literal, Union, Iterable, Optional, overload
 
 import httpx
 
+from ...lib.chat_completion_chunk import ChatCompletionChunk
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ..._utils import maybe_transform
 from ..._compat import cached_property
@@ -16,6 +17,7 @@
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
+from ..._streaming import AsyncStream, Stream
 from ...types.chat import ChatCompletion, completion_create_params
 from ..._base_client import (
     make_request_options,
@@ -33,6 +35,7 @@ def with_raw_response(self) -> CompletionsWithRawResponse:
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
         return CompletionsWithStreamingResponse(self)
 
+    @overload
     def create(
         self,
         *,
@@ -47,7 +50,7 @@ def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -61,6 +64,98 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Literal[True],
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ChatCompletionChunk]:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: bool,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        ...
+
+    def create(
+        self,
+        *,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -105,6 +200,8 @@ def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=Stream[ChatCompletionChunk],
         )
 
 
@@ -117,6 +214,7 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
         return AsyncCompletionsWithStreamingResponse(self)
 
+    @overload
     async def create(
         self,
         *,
@@ -131,7 +229,7 @@ async def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -145,6 +243,98 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Literal[True],
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ChatCompletionChunk]:
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: bool,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        ...
+
+    async def create(
+        self,
+        *,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -189,6 +379,8 @@ async def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=AsyncStream[ChatCompletionChunk],
         )
 
 

From af9a838e240bb0f7385bc33fb18ce246427ca2f7 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 15 Feb 2024 23:04:59 +0000
Subject: [PATCH 3/6] chore: go live (#5)

---
 .github/workflows/ci.yml                      |   4 +-
 README.md                                     |   4 +-
 bin/check-release-environment                 |   4 +-
 bin/check-test-server                         |   0
 bin/test                                      |   0
 examples/chat_completion.py                   |  14 +-
 examples/chat_completion_async.py             |  16 +-
 examples/chat_completion_async_streaming.py   |  15 +-
 examples/chat_completion_stop.py              |  16 +-
 examples/chat_completion_streaming.py         |  14 +-
 src/groq/_streaming.py                        |   4 -
 src/groq/resources/chat/completions.py        | 214 +-----------------
 src/groq/types/chat/chat_completion.py        |  16 +-
 .../types/chat/completion_create_params.py    |  16 +-
 tests/api_resources/chat/test_completions.py  | 124 ++++++++--
 15 files changed, 158 insertions(+), 303 deletions(-)
 mode change 100644 => 100755 bin/check-test-server
 mode change 100644 => 100755 bin/test

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ebfa5cee..aea1868f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,10 +2,10 @@ name: CI
 on:
   push:
     branches:
-      - main
+      - stainless
   pull_request:
     branches:
-      - main
+      - stainless
 
 jobs:
   lint:
diff --git a/README.md b/README.md
index 0c14ce0e..5c0abddb 100644
--- a/README.md
+++ b/README.md
@@ -261,9 +261,9 @@ completion = response.parse()  # get the object that `chat.completions.create()`
 print(completion.id)
 ```
 
-These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 
diff --git a/bin/check-release-environment b/bin/check-release-environment
index 29306d89..e35a3711 100644
--- a/bin/check-release-environment
+++ b/bin/check-release-environment
@@ -6,9 +6,9 @@ if [ -z "${PYPI_TOKEN}" ]; then
   errors+=("The GROQ_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
 
-len=${#errors[@]}
+lenErrors=${#errors[@]}
 
-if [[ len -gt 0 ]]; then
+if [[ lenErrors -gt 0 ]]; then
   echo -e "Found the following errors in the release environment:\n"
 
   for error in "${errors[@]}"; do
diff --git a/bin/check-test-server b/bin/check-test-server
old mode 100644
new mode 100755
diff --git a/bin/test b/bin/test
old mode 100644
new mode 100755
diff --git a/examples/chat_completion.py b/examples/chat_completion.py
index 18ea48ec..06e7664a 100644
--- a/examples/chat_completion.py
+++ b/examples/chat_completion.py
@@ -10,43 +10,33 @@
         # Set an optional system message. This sets the behavior of the
         # assistant and can be used to provide specific instructions for
         # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
+        {"role": "system", "content": "you are a helpful assistant."},
         # Set a user message for the assistant to respond to.
         {
             "role": "user",
             "content": "Explain the importance of low latency LLMs",
-        }
+        },
     ],
-
     # The language model which will generate the completion.
     model="mixtral-8x7b-32768",
-
     #
     # Optional parameters
     #
-
     # Controls randomness: lowering results in less random completions.
     # As the temperature approaches zero, the model will become deterministic
     # and repetitive.
     temperature=0.5,
-
     # The maximum number of tokens to generate. Requests can use up to
     # 2048 tokens shared between prompt and completion.
     max_tokens=1024,
-
     # Controls diversity via nucleus sampling: 0.5 means half of all
     # likelihood-weighted options are considered.
     top_p=1,
-
     # A stop sequence is a predefined or user-specified text string that
     # signals an AI to stop generating content, ensuring its responses
     # remain focused and concise. Examples include punctuation marks and
     # markers like "[end]".
     stop=None,
-
     # If set, partial message deltas will be sent.
     stream=False,
 )
diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py
index 58b1974b..b1f0fde3 100644
--- a/examples/chat_completion_async.py
+++ b/examples/chat_completion_async.py
@@ -2,6 +2,7 @@
 
 from groq import AsyncGroq
 
+
 async def main():
     client = AsyncGroq()
 
@@ -13,43 +14,33 @@ async def main():
             # Set an optional system message. This sets the behavior of the
             # assistant and can be used to provide specific instructions for
             # how it should behave throughout the conversation.
-            {
-                "role": "system",
-                "content": "you are a helpful assistant."
-            },
+            {"role": "system", "content": "you are a helpful assistant."},
             # Set a user message for the assistant to respond to.
             {
                 "role": "user",
                 "content": "Explain the importance of low latency LLMs",
-            }
+            },
         ],
-
         # The language model which will generate the completion.
         model="mixtral-8x7b-32768",
-
         #
         # Optional parameters
         #
-
         # Controls randomness: lowering results in less random completions.
         # As the temperature approaches zero, the model will become
         # deterministic and repetitive.
         temperature=0.5,
-
         # The maximum number of tokens to generate. Requests can use up to
         # 2048 tokens shared between prompt and completion.
         max_tokens=1024,
-
         # Controls diversity via nucleus sampling: 0.5 means half of all
         # likelihood-weighted options are considered.
         top_p=1,
-
         # A stop sequence is a predefined or user-specified text string that
         # signals an AI to stop generating content, ensuring its responses
         # remain focused and concise. Examples include punctuation marks and
         # markers like "[end]".
         stop=None,
-
         # If set, partial message deltas will be sent.
         stream=False,
     )
@@ -57,4 +48,5 @@ async def main():
     # Print the completion returned by the LLM.
     print(chat_completion.choices[0].message.content)
 
+
 asyncio.run(main())
diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py
index 1e71f737..594d61af 100644
--- a/examples/chat_completion_async_streaming.py
+++ b/examples/chat_completion_async_streaming.py
@@ -2,6 +2,7 @@
 
 from groq import AsyncGroq
 
+
 async def main():
     client = AsyncGroq()
 
@@ -13,39 +14,30 @@ async def main():
             # Set an optional system message. This sets the behavior of the
             # assistant and can be used to provide specific instructions for
             # how it should behave throughout the conversation.
-            {
-                "role": "system",
-                "content": "you are a helpful assistant."
-            },
+            {"role": "system", "content": "you are a helpful assistant."},
             # Set a user message for the assistant to respond to.
             {
                 "role": "user",
                 "content": "Explain the importance of low latency LLMs",
-            }
+            },
         ],
-
         # The language model which will generate the completion.
         model="mixtral-8x7b-32768",
-
         #
         # Optional parameters
         #
-
         # Controls randomness: lowering results in less random completions.
         # As the temperature approaches zero, the model will become
         # deterministic and repetitive.
         temperature=0.5,
-
         # The maximum number of tokens to generate. Requests can use up to
         # 2048 tokens shared between prompt and completion.
         max_tokens=1024,
-
         # A stop sequence is a predefined or user-specified text string that
         # signals an AI to stop generating content, ensuring its responses
         # remain focused and concise. Examples include punctuation marks and
         # markers like "[end]".
         stop=None,
-
         # Controls diversity via nucleus sampling: 0.5 means half of all
         # likelihood-weighted options are considered.
         stream=True,
@@ -55,4 +47,5 @@ async def main():
     async for chunk in stream:
         print(chunk.choices[0].delta.content, end="")
 
+
 asyncio.run(main())
diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py
index 1c749bca..d68a90df 100644
--- a/examples/chat_completion_stop.py
+++ b/examples/chat_completion_stop.py
@@ -10,37 +10,28 @@
         # Set an optional system message. This sets the behavior of the
         # assistant and can be used to provide specific instructions for
         # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
+        {"role": "system", "content": "you are a helpful assistant."},
         # Set a user message for the assistant to respond to.
         {
             "role": "user",
-            "content": "Count to 10.  Your response must begin with \"1, \".  example: 1, 2, 3, ...",
-        }
+            "content": 'Count to 10.  Your response must begin with "1, ".  example: 1, 2, 3, ...',
+        },
     ],
-
     # The language model which will generate the completion.
     model="mixtral-8x7b-32768",
-
     #
     # Optional parameters
     #
-
     # Controls randomness: lowering results in less random completions.
     # As the temperature approaches zero, the model will become deterministic
     # and repetitive.
     temperature=0.5,
-
     # The maximum number of tokens to generate. Requests can use up to
     # 2048 tokens shared between prompt and completion.
     max_tokens=1024,
-
     # Controls diversity via nucleus sampling: 0.5 means half of all
     # likelihood-weighted options are considered.
     top_p=1,
-
     # A stop sequence is a predefined or user-specified text string that
     # signals an AI to stop generating content, ensuring its responses
     # remain focused and concise. Examples include punctuation marks and
@@ -49,7 +40,6 @@
     # If multiple stop values are needed, an array of string may be passed,
     # stop=[", 6", ", six", ", Six"]
     stop=", 6",
-
     # If set, partial message deltas will be sent.
     stream=False,
 )
diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py
index dd3edbcd..5bae3a79 100644
--- a/examples/chat_completion_streaming.py
+++ b/examples/chat_completion_streaming.py
@@ -10,43 +10,33 @@
         # Set an optional system message. This sets the behavior of the
         # assistant and can be used to provide specific instructions for
         # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
+        {"role": "system", "content": "you are a helpful assistant."},
         # Set a user message for the assistant to respond to.
         {
             "role": "user",
             "content": "Explain the importance of low latency LLMs",
-        }
+        },
     ],
-
     # The language model which will generate the completion.
     model="mixtral-8x7b-32768",
-
     #
     # Optional parameters
     #
-
     # Controls randomness: lowering results in less random completions.
     # As the temperature approaches zero, the model will become deterministic
     # and repetitive.
     temperature=0.5,
-
     # The maximum number of tokens to generate. Requests can use up to
     # 2048 tokens shared between prompt and completion.
     max_tokens=1024,
-
     # Controls diversity via nucleus sampling: 0.5 means half of all
     # likelihood-weighted options are considered.
     top_p=1,
-
     # A stop sequence is a predefined or user-specified text string that
     # signals an AI to stop generating content, ensuring its responses
     # remain focused and concise. Examples include punctuation marks and
     # markers like "[end]".
     stop=None,
-
     # If set, partial message deltas will be sent.
     stream=True,
 )
diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py
index 2769874c..ac0ea8aa 100644
--- a/src/groq/_streaming.py
+++ b/src/groq/_streaming.py
@@ -53,8 +53,6 @@ def __stream__(self) -> Iterator[_T]:
         iterator = self._iter_events()
 
         for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
             yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
         # Ensure the entire stream is consumed
@@ -108,8 +106,6 @@ async def __aiter__(self) -> AsyncIterator[_T]:
 
     async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
         async for sse in self._decoder.aiter(self.response.aiter_lines()):
-            if sse.data.startswith("[DONE]"):
-                break
             yield sse
 
     async def __stream__(self) -> AsyncIterator[_T]:
diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py
index 26618039..f1c5a023 100644
--- a/src/groq/resources/chat/completions.py
+++ b/src/groq/resources/chat/completions.py
@@ -2,11 +2,10 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Literal, Union, Iterable, Optional, overload
+from typing import Dict, List, Union, Iterable, Optional
 
 import httpx
 
-from ...lib.chat_completion_chunk import ChatCompletionChunk
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ..._utils import maybe_transform
 from ..._compat import cached_property
@@ -17,7 +16,6 @@
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._streaming import AsyncStream, Stream
 from ...types.chat import ChatCompletion, completion_create_params
 from ..._base_client import (
     make_request_options,
@@ -35,22 +33,21 @@ def with_raw_response(self) -> CompletionsWithRawResponse:
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
         return CompletionsWithStreamingResponse(self)
 
-    @overload
     def create(
         self,
         *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
         logprobs: bool | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
         n: int | NotGiven = NOT_GIVEN,
         presence_penalty: float | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: bool | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -64,98 +61,6 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Literal[True],
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ChatCompletionChunk]:
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        ...
-
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -175,12 +80,12 @@ def create(
             "/openai/v1/chat/completions",
             body=maybe_transform(
                 {
+                    "messages": messages,
+                    "model": model,
                     "frequency_penalty": frequency_penalty,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
                     "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
                     "n": n,
                     "presence_penalty": presence_penalty,
                     "response_format": response_format,
@@ -200,8 +105,6 @@ def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=Stream[ChatCompletionChunk],
         )
 
 
@@ -214,22 +117,21 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
         return AsyncCompletionsWithStreamingResponse(self)
 
-    @overload
     async def create(
         self,
         *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
         logprobs: bool | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
         n: int | NotGiven = NOT_GIVEN,
         presence_penalty: float | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: bool | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -243,98 +145,6 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Literal[True],
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ChatCompletionChunk]:
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        ...
-
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -354,12 +164,12 @@ async def create(
             "/openai/v1/chat/completions",
             body=maybe_transform(
                 {
+                    "messages": messages,
+                    "model": model,
                     "frequency_penalty": frequency_penalty,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
                     "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
                     "n": n,
                     "presence_penalty": presence_penalty,
                     "response_format": response_format,
@@ -379,8 +189,6 @@ async def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=AsyncStream[ChatCompletionChunk],
         )
 
 
diff --git a/src/groq/types/chat/chat_completion.py b/src/groq/types/chat/chat_completion.py
index 77365e4a..1ff1f5d5 100644
--- a/src/groq/types/chat/chat_completion.py
+++ b/src/groq/types/chat/chat_completion.py
@@ -54,21 +54,21 @@ class ChoiceMessageToolCall(BaseModel):
 
 
 class ChoiceMessage(BaseModel):
-    content: Optional[str] = None
+    content: str
 
-    role: Optional[str] = None
+    role: str
 
     tool_calls: Optional[List[ChoiceMessageToolCall]] = None
 
 
 class Choice(BaseModel):
-    finish_reason: Optional[str] = None
+    finish_reason: str
 
-    index: Optional[int] = None
+    index: int
 
-    logprobs: Optional[ChoiceLogprobs] = None
+    logprobs: ChoiceLogprobs
 
-    message: Optional[ChoiceMessage] = None
+    message: ChoiceMessage
 
 
 class Usage(BaseModel):
@@ -86,9 +86,9 @@ class Usage(BaseModel):
 
 
 class ChatCompletion(BaseModel):
-    id: Optional[str] = None
+    choices: List[Choice]
 
-    choices: Optional[List[Choice]] = None
+    id: Optional[str] = None
 
     created: Optional[int] = None
 
diff --git a/src/groq/types/chat/completion_create_params.py b/src/groq/types/chat/completion_create_params.py
index 1aea7838..0f9712bc 100644
--- a/src/groq/types/chat/completion_create_params.py
+++ b/src/groq/types/chat/completion_create_params.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from typing import Dict, List, Union, Iterable, Optional
-from typing_extensions import Annotated, TypedDict
+from typing_extensions import Required, Annotated, TypedDict
 
 from ..._utils import PropertyInfo
 
@@ -22,6 +22,10 @@
 
 
 class CompletionCreateParams(TypedDict, total=False):
+    messages: Required[Iterable[Message]]
+
+    model: Required[str]
+
     frequency_penalty: float
 
     logit_bias: Dict[str, int]
@@ -30,10 +34,6 @@ class CompletionCreateParams(TypedDict, total=False):
 
     max_tokens: int
 
-    messages: Iterable[Message]
-
-    model: str
-
     n: int
 
     presence_penalty: float
@@ -78,11 +78,11 @@ class MessageToolCall(TypedDict, total=False):
 
 
 class Message(TypedDict, total=False):
-    content: str
+    content: Required[str]
 
-    name: str
+    role: Required[str]
 
-    role: str
+    name: str
 
     tool_call_id: str
     """ToolMessage Fields"""
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 1eabfe70..1fdfc34b 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -19,16 +19,28 @@ class TestCompletions:
 
     @parametrize
     def test_method_create(self, client: Groq) -> None:
-        completion = client.chat.completions.create()
+        completion = client.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: Groq) -> None:
         completion = client.chat.completions.create(
-            frequency_penalty=0,
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_tokens=0,
             messages=[
                 {
                     "content": "string",
@@ -128,6 +140,10 @@ def test_method_create_with_all_params(self, client: Groq) -> None:
                 },
             ],
             model="string",
+            frequency_penalty=0,
+            logit_bias={"foo": 0},
+            logprobs=True,
+            max_tokens=0,
             n=0,
             presence_penalty=0,
             response_format={"type": "string"},
@@ -176,7 +192,23 @@ def test_method_create_with_all_params(self, client: Groq) -> None:
 
     @parametrize
     def test_raw_response_create(self, client: Groq) -> None:
-        response = client.chat.completions.with_raw_response.create()
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -185,7 +217,23 @@ def test_raw_response_create(self, client: Groq) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: Groq) -> None:
-        with client.chat.completions.with_streaming_response.create() as response:
+        with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -200,16 +248,28 @@ class TestAsyncCompletions:
 
     @parametrize
     async def test_method_create(self, async_client: AsyncGroq) -> None:
-        completion = await async_client.chat.completions.create()
+        completion = await async_client.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> None:
         completion = await async_client.chat.completions.create(
-            frequency_penalty=0,
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_tokens=0,
             messages=[
                 {
                     "content": "string",
@@ -309,6 +369,10 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N
                 },
             ],
             model="string",
+            frequency_penalty=0,
+            logit_bias={"foo": 0},
+            logprobs=True,
+            max_tokens=0,
             n=0,
             presence_penalty=0,
             response_format={"type": "string"},
@@ -357,7 +421,23 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncGroq) -> None:
-        response = await async_client.chat.completions.with_raw_response.create()
+        response = await async_client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -366,7 +446,23 @@ async def test_raw_response_create(self, async_client: AsyncGroq) -> None:
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncGroq) -> None:
-        async with async_client.chat.completions.with_streaming_response.create() as response:
+        async with async_client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 

From b9b55b41cb158efd155f9cda829808c877493afd Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 21 Feb 2024 01:49:56 +0000
Subject: [PATCH 4/6] chore: update branch (#8)

---
 .devcontainer/Dockerfile           |  2 +-
 .github/workflows/ci.yml           |  6 ++--
 .github/workflows/publish-pypi.yml |  2 +-
 README.md                          |  8 ++---
 pyproject.toml                     |  2 +-
 requirements-dev.lock              | 48 ++++++++++++++++++++++++++++--
 requirements.lock                  | 21 +++++++++++++
 7 files changed, 77 insertions(+), 12 deletions(-)

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 6eb00725..dd939620 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
 
 USER vscode
 
-RUN curl -sSf https://rye-up.com/get | RYE_VERSION="0.15.2" RYE_INSTALL_OPTION="--yes" bash
+RUN curl -sSf https://rye-up.com/get | RYE_VERSION="0.24.0" RYE_INSTALL_OPTION="--yes" bash
 ENV PATH=/home/vscode/.rye/shims:$PATH
 
 RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index aea1868f..fca066ea 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,10 +2,10 @@ name: CI
 on:
   push:
     branches:
-      - stainless
+      - main
   pull_request:
     branches:
-      - stainless
+      - main
 
 jobs:
   lint:
@@ -21,7 +21,7 @@ jobs:
           curl -sSf https://rye-up.com/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.15.2
+          RYE_VERSION: 0.24.0
           RYE_INSTALL_OPTION: "--yes"
 
       - name: Install dependencies
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 14ddce27..962b8c4f 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -21,7 +21,7 @@ jobs:
           curl -sSf https://rye-up.com/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.15.2
+          RYE_VERSION: 0.24.0
           RYE_INSTALL_OPTION: "--yes"
 
       - name: Publish to PyPI
diff --git a/README.md b/README.md
index 5c0abddb..449ef182 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Groq Python API library
 
-[![PyPI version](https://img.shields.io/pypi/v/groq.svg)](https://pypi.org/project/groq/)
+[![PyPI version](https://img.shields.io/pypi/v/groq-sdk.svg)](https://pypi.org/project/groq-sdk/)
 
 The Groq Python library provides convenient access to the Groq REST API from any Python 3.7+
 application. The library includes type definitions for all request params and response fields,
@@ -13,7 +13,7 @@ The REST API documentation can be found [on console.groq.com](https://console.gr
 ## Installation
 
 ```sh
-pip install groq
+pip install groq-sdk
 ```
 
 ## Usage
@@ -261,9 +261,9 @@ completion = response.parse()  # get the object that `chat.completions.create()`
 print(completion.id)
 ```
 
-These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 
diff --git a/pyproject.toml b/pyproject.toml
index f3c156bb..9a9918fe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [project]
-name = "groq"
+name = "groq-sdk"
 version = "0.1.0"
 description = "The official Python library for the groq API"
 readme = "README.md"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index a0134d65..36fa001d 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -5,48 +5,92 @@
 #   pre: false
 #   features: []
 #   all-features: true
+#   with-sources: false
 
 -e file:.
 annotated-types==0.6.0
+    # via pydantic
 anyio==4.1.0
+    # via groq-sdk
+    # via httpx
 argcomplete==3.1.2
+    # via nox
 attrs==23.1.0
+    # via pytest
 certifi==2023.7.22
+    # via httpcore
+    # via httpx
 colorlog==6.7.0
+    # via nox
 dirty-equals==0.6.0
 distlib==0.3.7
+    # via virtualenv
 distro==1.8.0
+    # via groq-sdk
 exceptiongroup==1.1.3
+    # via anyio
 filelock==3.12.4
+    # via virtualenv
 h11==0.14.0
+    # via httpcore
 httpcore==1.0.2
+    # via httpx
 httpx==0.25.2
+    # via groq-sdk
+    # via respx
 idna==3.4
+    # via anyio
+    # via httpx
 importlib-metadata==7.0.0
 iniconfig==2.0.0
+    # via pytest
 mypy==1.7.1
 mypy-extensions==1.0.0
+    # via mypy
 nodeenv==1.8.0
+    # via pyright
 nox==2023.4.22
 packaging==23.2
+    # via nox
+    # via pytest
 platformdirs==3.11.0
+    # via virtualenv
 pluggy==1.3.0
+    # via pytest
 py==1.11.0
+    # via pytest
 pydantic==2.4.2
+    # via groq-sdk
 pydantic-core==2.10.1
+    # via pydantic
 pyright==1.1.332
 pytest==7.1.1
+    # via pytest-asyncio
 pytest-asyncio==0.21.1
 python-dateutil==2.8.2
+    # via time-machine
 pytz==2023.3.post1
+    # via dirty-equals
 respx==0.20.2
 ruff==0.1.9
+setuptools==68.2.2
+    # via nodeenv
 six==1.16.0
+    # via python-dateutil
 sniffio==1.3.0
+    # via anyio
+    # via groq-sdk
+    # via httpx
 time-machine==2.9.0
 tomli==2.0.1
+    # via mypy
+    # via pytest
 typing-extensions==4.8.0
+    # via groq-sdk
+    # via mypy
+    # via pydantic
+    # via pydantic-core
 virtualenv==20.24.5
+    # via nox
 zipp==3.17.0
-# The following packages are considered to be unsafe in a requirements file:
-setuptools==68.2.2
+    # via importlib-metadata
diff --git a/requirements.lock b/requirements.lock
index 2022a5c5..579ab697 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -5,18 +5,39 @@
 #   pre: false
 #   features: []
 #   all-features: true
+#   with-sources: false
 
 -e file:.
 annotated-types==0.6.0
+    # via pydantic
 anyio==4.1.0
+    # via groq-sdk
+    # via httpx
 certifi==2023.7.22
+    # via httpcore
+    # via httpx
 distro==1.8.0
+    # via groq-sdk
 exceptiongroup==1.1.3
+    # via anyio
 h11==0.14.0
+    # via httpcore
 httpcore==1.0.2
+    # via httpx
 httpx==0.25.2
+    # via groq-sdk
 idna==3.4
+    # via anyio
+    # via httpx
 pydantic==2.4.2
+    # via groq-sdk
 pydantic-core==2.10.1
+    # via pydantic
 sniffio==1.3.0
+    # via anyio
+    # via groq-sdk
+    # via httpx
 typing-extensions==4.8.0
+    # via groq-sdk
+    # via pydantic
+    # via pydantic-core

From 0c0d20405a96167f060a03a2b8a58a49d9a1c7c8 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 21 Feb 2024 01:52:55 +0000
Subject: [PATCH 5/6] feat: update via SDK Studio (#10)

---
 .github/workflows/ci.yml |  4 ++--
 README.md                |  8 ++++----
 pyproject.toml           |  2 +-
 requirements-dev.lock    | 12 ++++++------
 requirements.lock        | 12 ++++++------
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fca066ea..dfb911fb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,10 +2,10 @@ name: CI
 on:
   push:
     branches:
-      - main
+      - stainless
   pull_request:
     branches:
-      - main
+      - stainless
 
 jobs:
   lint:
diff --git a/README.md b/README.md
index 449ef182..5c0abddb 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Groq Python API library
 
-[![PyPI version](https://img.shields.io/pypi/v/groq-sdk.svg)](https://pypi.org/project/groq-sdk/)
+[![PyPI version](https://img.shields.io/pypi/v/groq.svg)](https://pypi.org/project/groq/)
 
 The Groq Python library provides convenient access to the Groq REST API from any Python 3.7+
 application. The library includes type definitions for all request params and response fields,
@@ -13,7 +13,7 @@ The REST API documentation can be found [on console.groq.com](https://console.gr
 ## Installation
 
 ```sh
-pip install groq-sdk
+pip install groq
 ```
 
 ## Usage
@@ -261,9 +261,9 @@ completion = response.parse()  # get the object that `chat.completions.create()`
 print(completion.id)
 ```
 
-These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 
diff --git a/pyproject.toml b/pyproject.toml
index 9a9918fe..f3c156bb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [project]
-name = "groq-sdk"
+name = "groq"
 version = "0.1.0"
 description = "The official Python library for the groq API"
 readme = "README.md"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 36fa001d..fb76b532 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -11,7 +11,7 @@
 annotated-types==0.6.0
     # via pydantic
 anyio==4.1.0
-    # via groq-sdk
+    # via groq
     # via httpx
 argcomplete==3.1.2
     # via nox
@@ -26,7 +26,7 @@ dirty-equals==0.6.0
 distlib==0.3.7
     # via virtualenv
 distro==1.8.0
-    # via groq-sdk
+    # via groq
 exceptiongroup==1.1.3
     # via anyio
 filelock==3.12.4
@@ -36,7 +36,7 @@ h11==0.14.0
 httpcore==1.0.2
     # via httpx
 httpx==0.25.2
-    # via groq-sdk
+    # via groq
     # via respx
 idna==3.4
     # via anyio
@@ -60,7 +60,7 @@ pluggy==1.3.0
 py==1.11.0
     # via pytest
 pydantic==2.4.2
-    # via groq-sdk
+    # via groq
 pydantic-core==2.10.1
     # via pydantic
 pyright==1.1.332
@@ -79,14 +79,14 @@ six==1.16.0
     # via python-dateutil
 sniffio==1.3.0
     # via anyio
-    # via groq-sdk
+    # via groq
     # via httpx
 time-machine==2.9.0
 tomli==2.0.1
     # via mypy
     # via pytest
 typing-extensions==4.8.0
-    # via groq-sdk
+    # via groq
     # via mypy
     # via pydantic
     # via pydantic-core
diff --git a/requirements.lock b/requirements.lock
index 579ab697..991544f0 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -11,13 +11,13 @@
 annotated-types==0.6.0
     # via pydantic
 anyio==4.1.0
-    # via groq-sdk
+    # via groq
     # via httpx
 certifi==2023.7.22
     # via httpcore
     # via httpx
 distro==1.8.0
-    # via groq-sdk
+    # via groq
 exceptiongroup==1.1.3
     # via anyio
 h11==0.14.0
@@ -25,19 +25,19 @@ h11==0.14.0
 httpcore==1.0.2
     # via httpx
 httpx==0.25.2
-    # via groq-sdk
+    # via groq
 idna==3.4
     # via anyio
     # via httpx
 pydantic==2.4.2
-    # via groq-sdk
+    # via groq
 pydantic-core==2.10.1
     # via pydantic
 sniffio==1.3.0
     # via anyio
-    # via groq-sdk
+    # via groq
     # via httpx
 typing-extensions==4.8.0
-    # via groq-sdk
+    # via groq
     # via pydantic
     # via pydantic-core

From 214765e7581616d4a2761933340548276e8f25d6 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 21 Feb 2024 01:53:04 +0000
Subject: [PATCH 6/6] release: 0.2.0

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 19 +++++++++++++++++++
 pyproject.toml                |  2 +-
 src/groq/_version.py          |  2 +-
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 3d2ac0bd..10f30916 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0"
+  ".": "0.2.0"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f643576b..50dc1242 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,24 @@
 # Changelog
 
+## 0.2.0 (2024-02-21)
+
+Full Changelog: [v0.1.0...v0.2.0](https://github.com/groq/groq-python/compare/v0.1.0...v0.2.0)
+
+### Features
+
+* Add initial Stainless SDK ([d5a8512](https://github.com/groq/groq-python/commit/d5a851262e04e625dde130367ed91d8f95683599))
+* Add initial Stainless SDK ([316de2c](https://github.com/groq/groq-python/commit/316de2ccfeb76e36fe34bb8656ea90a8d42a7d00))
+* create default branch ([7e00266](https://github.com/groq/groq-python/commit/7e00266e3c691d92d508e753e2c14c03297c09f9))
+* update via SDK Studio ([#10](https://github.com/groq/groq-python/issues/10)) ([0c0d204](https://github.com/groq/groq-python/commit/0c0d20405a96167f060a03a2b8a58a49d9a1c7c8))
+* update via SDK Studio ([#3](https://github.com/groq/groq-python/issues/3)) ([8d92c08](https://github.com/groq/groq-python/commit/8d92c086e320c2715e02bc79807ff872e84c0b0f))
+
+
+### Chores
+
+* go live ([#2](https://github.com/groq/groq-python/issues/2)) ([ba81c42](https://github.com/groq/groq-python/commit/ba81c42d6d0fd6d47819e0d58962235cb70ca4f1))
+* go live ([#5](https://github.com/groq/groq-python/issues/5)) ([af9a838](https://github.com/groq/groq-python/commit/af9a838e240bb0f7385bc33fb18ce246427ca2f7))
+* update branch ([#8](https://github.com/groq/groq-python/issues/8)) ([b9b55b4](https://github.com/groq/groq-python/commit/b9b55b41cb158efd155f9cda829808c877493afd))
+
 ## 0.1.0 (2024-02-10)
 
 Full Changelog: [v0.0.1...v0.1.0](https://github.com/definitive-io/groqcloud-python/compare/v0.0.1...v0.1.0)
diff --git a/pyproject.toml b/pyproject.toml
index f3c156bb..d65c5213 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "groq"
-version = "0.1.0"
+version = "0.2.0"
 description = "The official Python library for the groq API"
 readme = "README.md"
 license = "Apache-2.0"
diff --git a/src/groq/_version.py b/src/groq/_version.py
index 0282ec9b..40c5272b 100644
--- a/src/groq/_version.py
+++ b/src/groq/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless.
 
 __title__ = "groq"
-__version__ = "0.1.0"  # x-release-please-version
+__version__ = "0.2.0"  # x-release-please-version