diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 4f9005ea..b5db7ce1 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.6"
+  ".": "0.1.0-alpha.7"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index a1e73eb0..79a36ab0 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 77
+configured_endpoints: 76
 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/digitalocean%2Fgradientai-e8b3cbc80e18e4f7f277010349f25e1319156704f359911dc464cc21a0d077a6.yml
 openapi_spec_hash: c773d792724f5647ae25a5ae4ccec208
-config_hash: 9c2519464cf5de240e34bd89b9f65706
+config_hash: f0976fbc552ea878bb527447b5e663c9
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d9b29735..15fec91a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,16 @@
 # Changelog
 
+## 0.1.0-alpha.7 (2025-06-27)
+
+Full Changelog: [v0.1.0-alpha.6...v0.1.0-alpha.7](https://github.com/digitalocean/gradientai-python/compare/v0.1.0-alpha.6...v0.1.0-alpha.7)
+
+### Features
+
+* **api:** manual updates ([63b9ec0](https://github.com/digitalocean/gradientai-python/commit/63b9ec02a646dad258afbd048db8db1af8d4401b))
+* **api:** manual updates ([5247aee](https://github.com/digitalocean/gradientai-python/commit/5247aee6d6052f6380fbe892d7c2bd9a8d0a32c0))
+* **api:** manual updates ([aa9e2c7](https://github.com/digitalocean/gradientai-python/commit/aa9e2c78956162f6195fdbaa1c95754ee4af207e))
+* **client:** add agent_domain option ([b4b6260](https://github.com/digitalocean/gradientai-python/commit/b4b62609a12a1dfa0b505e9ec54334b776fb0515))
+
 ## 0.1.0-alpha.6 (2025-06-27)
 
 Full Changelog: [v0.1.0-alpha.5...v0.1.0-alpha.6](https://github.com/digitalocean/gradientai-python/compare/v0.1.0-alpha.5...v0.1.0-alpha.6)
diff --git a/README.md b/README.md
index 24b0975b..d5bd9c97 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ client = GradientAI(
     api_key=os.environ.get("GRADIENTAI_API_KEY"),  # This is the default and can be omitted
 )
 
-completion = client.chat.completions.create(
+completion = client.agents.chat.completions.create(
     messages=[
         {
             "content": "string",
@@ -63,7 +63,7 @@ client = AsyncGradientAI(
 
 
 async def main() -> None:
-    completion = await client.chat.completions.create(
+    completion = await client.agents.chat.completions.create(
         messages=[
             {
                 "content": "string",
@@ -105,7 +105,7 @@ async def main() -> None:
         api_key=os.environ.get("GRADIENTAI_API_KEY"),  # This is the default and can be omitted
         http_client=DefaultAioHttpClient(),
     ) as client:
-        completion = await client.chat.completions.create(
+        completion = await client.agents.chat.completions.create(
             messages=[
                 {
                     "content": "string",
@@ -138,7 +138,7 @@ from gradientai import GradientAI
 
 client = GradientAI()
 
-completion = client.chat.completions.create(
+completion = client.agents.chat.completions.create(
     messages=[
         {
             "content": "string",
diff --git a/api.md b/api.md
index 9a2dd757..dc48f7b3 100644
--- a/api.md
+++ b/api.md
@@ -1,7 +1,7 @@
 # Shared Types
 
 ```python
-from gradientai.types import APILinks, APIMeta
+from gradientai.types import APILinks, APIMeta, ChatCompletionTokenLogprob
 ```
 
 # Agents
@@ -58,6 +58,20 @@ Methods:
 - <code title="delete /v2/gen-ai/agents/{agent_uuid}/api_keys/{api_key_uuid}">client.agents.api_keys.<a href="./src/gradientai/resources/agents/api_keys.py">delete</a>(api_key_uuid, \*, agent_uuid) -> <a href="./src/gradientai/types/agents/api_key_delete_response.py">APIKeyDeleteResponse</a></code>
 - <code title="put /v2/gen-ai/agents/{agent_uuid}/api_keys/{api_key_uuid}/regenerate">client.agents.api_keys.<a href="./src/gradientai/resources/agents/api_keys.py">regenerate</a>(api_key_uuid, \*, agent_uuid) -> <a href="./src/gradientai/types/agents/api_key_regenerate_response.py">APIKeyRegenerateResponse</a></code>
 
+## Chat
+
+### Completions
+
+Types:
+
+```python
+from gradientai.types.agents.chat import CompletionCreateResponse
+```
+
+Methods:
+
+- <code title="post /chat/completions">client.agents.chat.completions.<a href="./src/gradientai/resources/agents/chat/completions.py">create</a>(\*\*<a href="src/gradientai/types/agents/chat/completion_create_params.py">params</a>) -> <a href="./src/gradientai/types/agents/chat/completion_create_response.py">CompletionCreateResponse</a></code>
+
 ## EvaluationMetrics
 
 Types:
@@ -382,7 +396,7 @@ Methods:
 Types:
 
 ```python
-from gradientai.types.chat import ChatCompletionTokenLogprob, CompletionCreateResponse
+from gradientai.types.chat import CompletionCreateResponse
 ```
 
 Methods:
@@ -419,10 +433,9 @@ Methods:
 Types:
 
 ```python
-from gradientai.types import APIAgreement, APIModel, APIModelVersion, Model, ModelListResponse
+from gradientai.types import APIAgreement, APIModel, APIModelVersion, ModelListResponse
 ```
 
 Methods:
 
-- <code title="get /models/{model}">client.models.<a href="./src/gradientai/resources/models.py">retrieve</a>(model) -> <a href="./src/gradientai/types/model.py">Model</a></code>
-- <code title="get /models">client.models.<a href="./src/gradientai/resources/models.py">list</a>() -> <a href="./src/gradientai/types/model_list_response.py">ModelListResponse</a></code>
+- <code title="get /v2/gen-ai/models">client.models.<a href="./src/gradientai/resources/models.py">list</a>(\*\*<a href="src/gradientai/types/model_list_params.py">params</a>) -> <a href="./src/gradientai/types/model_list_response.py">ModelListResponse</a></code>
diff --git a/pyproject.toml b/pyproject.toml
index 0f04322b..29531941 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "c63a5cfe-b235-4fbe-8bbb-82a9e02a482a-python"
-version = "0.1.0-alpha.6"
+version = "0.1.0-alpha.7"
 description = "The official Python library for GradientAI"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/gradientai/_client.py b/src/gradientai/_client.py
index 0020ed16..327273c9 100644
--- a/src/gradientai/_client.py
+++ b/src/gradientai/_client.py
@@ -57,12 +57,14 @@ class GradientAI(SyncAPIClient):
     # client options
     api_key: str | None
     inference_key: str | None
+    agent_domain: str | None
 
     def __init__(
         self,
         *,
         api_key: str | None = None,
         inference_key: str | None = None,
+        agent_domain: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -96,6 +98,8 @@ def __init__(
             inference_key = os.environ.get("GRADIENTAI_INFERENCE_KEY")
         self.inference_key = inference_key
 
+        self.agent_domain = agent_domain
+
         if base_url is None:
             base_url = os.environ.get("GRADIENT_AI_BASE_URL")
         self._base_url_overridden = base_url is not None
@@ -201,6 +205,7 @@ def copy(
         *,
         api_key: str | None = None,
         inference_key: str | None = None,
+        agent_domain: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.Client | None = None,
@@ -236,6 +241,7 @@ def copy(
         client = self.__class__(
             api_key=api_key or self.api_key,
             inference_key=inference_key or self.inference_key,
+            agent_domain=agent_domain or self.agent_domain,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
@@ -289,12 +295,14 @@ class AsyncGradientAI(AsyncAPIClient):
     # client options
     api_key: str | None
     inference_key: str | None
+    agent_domain: str | None
 
     def __init__(
         self,
         *,
         api_key: str | None = None,
         inference_key: str | None = None,
+        agent_domain: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -328,6 +336,8 @@ def __init__(
             inference_key = os.environ.get("GRADIENTAI_INFERENCE_KEY")
         self.inference_key = inference_key
 
+        self.agent_domain = agent_domain
+
         if base_url is None:
             base_url = os.environ.get("GRADIENT_AI_BASE_URL")
         self._base_url_overridden = base_url is not None
@@ -433,6 +443,7 @@ def copy(
         *,
         api_key: str | None = None,
         inference_key: str | None = None,
+        agent_domain: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.AsyncClient | None = None,
@@ -468,6 +479,7 @@ def copy(
         client = self.__class__(
             api_key=api_key or self.api_key,
             inference_key=inference_key or self.inference_key,
+            agent_domain=agent_domain or self.agent_domain,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
diff --git a/src/gradientai/_version.py b/src/gradientai/_version.py
index b8ef5fc0..d4e6dde6 100644
--- a/src/gradientai/_version.py
+++ b/src/gradientai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "gradientai"
-__version__ = "0.1.0-alpha.6"  # x-release-please-version
+__version__ = "0.1.0-alpha.7"  # x-release-please-version
diff --git a/src/gradientai/resources/agents/__init__.py b/src/gradientai/resources/agents/__init__.py
index f5423f00..51075283 100644
--- a/src/gradientai/resources/agents/__init__.py
+++ b/src/gradientai/resources/agents/__init__.py
@@ -1,5 +1,13 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .chat import (
+    ChatResource,
+    AsyncChatResource,
+    ChatResourceWithRawResponse,
+    AsyncChatResourceWithRawResponse,
+    ChatResourceWithStreamingResponse,
+    AsyncChatResourceWithStreamingResponse,
+)
 from .agents import (
     AgentsResource,
     AsyncAgentsResource,
@@ -88,6 +96,12 @@
     "AsyncAPIKeysResourceWithRawResponse",
     "APIKeysResourceWithStreamingResponse",
     "AsyncAPIKeysResourceWithStreamingResponse",
+    "ChatResource",
+    "AsyncChatResource",
+    "ChatResourceWithRawResponse",
+    "AsyncChatResourceWithRawResponse",
+    "ChatResourceWithStreamingResponse",
+    "AsyncChatResourceWithStreamingResponse",
     "EvaluationMetricsResource",
     "AsyncEvaluationMetricsResource",
     "EvaluationMetricsResourceWithRawResponse",
diff --git a/src/gradientai/resources/agents/agents.py b/src/gradientai/resources/agents/agents.py
index 0a6e183c..200e9fc0 100644
--- a/src/gradientai/resources/agents/agents.py
+++ b/src/gradientai/resources/agents/agents.py
@@ -41,6 +41,14 @@
     AsyncVersionsResourceWithStreamingResponse,
 )
 from ..._compat import cached_property
+from .chat.chat import (
+    ChatResource,
+    AsyncChatResource,
+    ChatResourceWithRawResponse,
+    AsyncChatResourceWithRawResponse,
+    ChatResourceWithStreamingResponse,
+    AsyncChatResourceWithStreamingResponse,
+)
 from .functions import (
     FunctionsResource,
     AsyncFunctionsResource,
@@ -114,6 +122,10 @@ class AgentsResource(SyncAPIResource):
     def api_keys(self) -> APIKeysResource:
         return APIKeysResource(self._client)
 
+    @cached_property
+    def chat(self) -> ChatResource:
+        return ChatResource(self._client)
+
     @cached_property
     def evaluation_metrics(self) -> EvaluationMetricsResource:
         return EvaluationMetricsResource(self._client)
@@ -498,6 +510,10 @@ class AsyncAgentsResource(AsyncAPIResource):
     def api_keys(self) -> AsyncAPIKeysResource:
         return AsyncAPIKeysResource(self._client)
 
+    @cached_property
+    def chat(self) -> AsyncChatResource:
+        return AsyncChatResource(self._client)
+
     @cached_property
     def evaluation_metrics(self) -> AsyncEvaluationMetricsResource:
         return AsyncEvaluationMetricsResource(self._client)
@@ -904,6 +920,10 @@ def __init__(self, agents: AgentsResource) -> None:
     def api_keys(self) -> APIKeysResourceWithRawResponse:
         return APIKeysResourceWithRawResponse(self._agents.api_keys)
 
+    @cached_property
+    def chat(self) -> ChatResourceWithRawResponse:
+        return ChatResourceWithRawResponse(self._agents.chat)
+
     @cached_property
     def evaluation_metrics(self) -> EvaluationMetricsResourceWithRawResponse:
         return EvaluationMetricsResourceWithRawResponse(self._agents.evaluation_metrics)
@@ -964,6 +984,10 @@ def __init__(self, agents: AsyncAgentsResource) -> None:
     def api_keys(self) -> AsyncAPIKeysResourceWithRawResponse:
         return AsyncAPIKeysResourceWithRawResponse(self._agents.api_keys)
 
+    @cached_property
+    def chat(self) -> AsyncChatResourceWithRawResponse:
+        return AsyncChatResourceWithRawResponse(self._agents.chat)
+
     @cached_property
     def evaluation_metrics(self) -> AsyncEvaluationMetricsResourceWithRawResponse:
         return AsyncEvaluationMetricsResourceWithRawResponse(self._agents.evaluation_metrics)
@@ -1024,6 +1048,10 @@ def __init__(self, agents: AgentsResource) -> None:
     def api_keys(self) -> APIKeysResourceWithStreamingResponse:
         return APIKeysResourceWithStreamingResponse(self._agents.api_keys)
 
+    @cached_property
+    def chat(self) -> ChatResourceWithStreamingResponse:
+        return ChatResourceWithStreamingResponse(self._agents.chat)
+
     @cached_property
     def evaluation_metrics(self) -> EvaluationMetricsResourceWithStreamingResponse:
         return EvaluationMetricsResourceWithStreamingResponse(self._agents.evaluation_metrics)
@@ -1084,6 +1112,10 @@ def __init__(self, agents: AsyncAgentsResource) -> None:
     def api_keys(self) -> AsyncAPIKeysResourceWithStreamingResponse:
         return AsyncAPIKeysResourceWithStreamingResponse(self._agents.api_keys)
 
+    @cached_property
+    def chat(self) -> AsyncChatResourceWithStreamingResponse:
+        return AsyncChatResourceWithStreamingResponse(self._agents.chat)
+
     @cached_property
     def evaluation_metrics(self) -> AsyncEvaluationMetricsResourceWithStreamingResponse:
         return AsyncEvaluationMetricsResourceWithStreamingResponse(self._agents.evaluation_metrics)
diff --git a/src/gradientai/resources/agents/chat/__init__.py b/src/gradientai/resources/agents/chat/__init__.py
new file mode 100644
index 00000000..ec960eb4
--- /dev/null
+++ b/src/gradientai/resources/agents/chat/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .chat import (
+    ChatResource,
+    AsyncChatResource,
+    ChatResourceWithRawResponse,
+    AsyncChatResourceWithRawResponse,
+    ChatResourceWithStreamingResponse,
+    AsyncChatResourceWithStreamingResponse,
+)
+from .completions import (
+    CompletionsResource,
+    AsyncCompletionsResource,
+    CompletionsResourceWithRawResponse,
+    AsyncCompletionsResourceWithRawResponse,
+    CompletionsResourceWithStreamingResponse,
+    AsyncCompletionsResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "CompletionsResource",
+    "AsyncCompletionsResource",
+    "CompletionsResourceWithRawResponse",
+    "AsyncCompletionsResourceWithRawResponse",
+    "CompletionsResourceWithStreamingResponse",
+    "AsyncCompletionsResourceWithStreamingResponse",
+    "ChatResource",
+    "AsyncChatResource",
+    "ChatResourceWithRawResponse",
+    "AsyncChatResourceWithRawResponse",
+    "ChatResourceWithStreamingResponse",
+    "AsyncChatResourceWithStreamingResponse",
+]
diff --git a/src/gradientai/resources/agents/chat/chat.py b/src/gradientai/resources/agents/chat/chat.py
new file mode 100644
index 00000000..c87bd158
--- /dev/null
+++ b/src/gradientai/resources/agents/chat/chat.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ...._compat import cached_property
+from .completions import (
+    CompletionsResource,
+    AsyncCompletionsResource,
+    CompletionsResourceWithRawResponse,
+    AsyncCompletionsResourceWithRawResponse,
+    CompletionsResourceWithStreamingResponse,
+    AsyncCompletionsResourceWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["ChatResource", "AsyncChatResource"]
+
+
+class ChatResource(SyncAPIResource):
+    @cached_property
+    def completions(self) -> CompletionsResource:
+        return CompletionsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ChatResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
+        """
+        return ChatResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ChatResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
+        """
+        return ChatResourceWithStreamingResponse(self)
+
+
+class AsyncChatResource(AsyncAPIResource):
+    @cached_property
+    def completions(self) -> AsyncCompletionsResource:
+        return AsyncCompletionsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncChatResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncChatResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncChatResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
+        """
+        return AsyncChatResourceWithStreamingResponse(self)
+
+
+class ChatResourceWithRawResponse:
+    def __init__(self, chat: ChatResource) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> CompletionsResourceWithRawResponse:
+        return CompletionsResourceWithRawResponse(self._chat.completions)
+
+
+class AsyncChatResourceWithRawResponse:
+    def __init__(self, chat: AsyncChatResource) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> AsyncCompletionsResourceWithRawResponse:
+        return AsyncCompletionsResourceWithRawResponse(self._chat.completions)
+
+
+class ChatResourceWithStreamingResponse:
+    def __init__(self, chat: ChatResource) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> CompletionsResourceWithStreamingResponse:
+        return CompletionsResourceWithStreamingResponse(self._chat.completions)
+
+
+class AsyncChatResourceWithStreamingResponse:
+    def __init__(self, chat: AsyncChatResource) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> AsyncCompletionsResourceWithStreamingResponse:
+        return AsyncCompletionsResourceWithStreamingResponse(self._chat.completions)
diff --git a/src/gradientai/resources/agents/chat/completions.py b/src/gradientai/resources/agents/chat/completions.py
new file mode 100644
index 00000000..a213bf05
--- /dev/null
+++ b/src/gradientai/resources/agents/chat/completions.py
@@ -0,0 +1,385 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+
+import httpx
+
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...._base_client import make_request_options
+from ....types.agents.chat import completion_create_params
+from ....types.agents.chat.completion_create_response import CompletionCreateResponse
+
+__all__ = ["CompletionsResource", "AsyncCompletionsResource"]
+
+
+class CompletionsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CompletionsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
+        """
+        return CompletionsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
+        """
+        return CompletionsResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CompletionCreateResponse:
+        """
+        Creates a model response for the given chat conversation.
+
+        Args:
+          messages: A list of messages comprising the conversation so far.
+
+          model: Model ID used to generate the response.
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run.
+
+          max_tokens: The maximum number of tokens that can be generated in the completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using server-sent events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help DigitalOcean to
+              monitor and detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/chat/completions"
+            if self._client._base_url_overridden
+            else "https://inference.do-ai.run/v1/chat/completions",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "frequency_penalty": frequency_penalty,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "n": n,
+                    "presence_penalty": presence_penalty,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CompletionCreateResponse,
+        )
+
+
+class AsyncCompletionsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCompletionsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
+        """
+        return AsyncCompletionsResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CompletionCreateResponse:
+        """
+        Creates a model response for the given chat conversation.
+
+        Args:
+          messages: A list of messages comprising the conversation so far.
+
+          model: Model ID used to generate the response.
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run.
+
+          max_tokens: The maximum number of tokens that can be generated in the completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using server-sent events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help DigitalOcean to
+              monitor and detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/chat/completions"
+            if self._client._base_url_overridden
+            else "https://inference.do-ai.run/v1/chat/completions",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "frequency_penalty": frequency_penalty,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "n": n,
+                    "presence_penalty": presence_penalty,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CompletionCreateResponse,
+        )
+
+
+class CompletionsResourceWithRawResponse:
+    def __init__(self, completions: CompletionsResource) -> None:
+        self._completions = completions
+
+        self.create = to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsResourceWithRawResponse:
+    def __init__(self, completions: AsyncCompletionsResource) -> None:
+        self._completions = completions
+
+        self.create = async_to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class CompletionsResourceWithStreamingResponse:
+    def __init__(self, completions: CompletionsResource) -> None:
+        self._completions = completions
+
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsResourceWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletionsResource) -> None:
+        self._completions = completions
+
+        self.create = async_to_streamed_response_wrapper(
+            completions.create,
+        )
diff --git a/src/gradientai/resources/models.py b/src/gradientai/resources/models.py
index da5462ae..c8e78b9b 100644
--- a/src/gradientai/resources/models.py
+++ b/src/gradientai/resources/models.py
@@ -2,9 +2,14 @@
 
 from __future__ import annotations
 
+from typing import List
+from typing_extensions import Literal
+
 import httpx
 
+from ..types import model_list_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -13,7 +18,6 @@
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..types.model import Model
 from .._base_client import make_request_options
 from ..types.model_list_response import ModelListResponse
 
@@ -40,22 +44,52 @@ def with_streaming_response(self) -> ModelsResourceWithStreamingResponse:
         """
         return ModelsResourceWithStreamingResponse(self)
 
-    def retrieve(
+    def list(
         self,
-        model: str,
         *,
+        page: int | NotGiven = NOT_GIVEN,
+        per_page: int | NotGiven = NOT_GIVEN,
+        public_only: bool | NotGiven = NOT_GIVEN,
+        usecases: List[
+            Literal[
+                "MODEL_USECASE_UNKNOWN",
+                "MODEL_USECASE_AGENT",
+                "MODEL_USECASE_FINETUNED",
+                "MODEL_USECASE_KNOWLEDGEBASE",
+                "MODEL_USECASE_GUARDRAIL",
+                "MODEL_USECASE_REASONING",
+                "MODEL_USECASE_SERVERLESS",
+            ]
+        ]
+        | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Model:
+    ) -> ModelListResponse:
         """
-        Retrieves a model instance, providing basic information about the model such as
-        the owner and permissioning.
+        To list all models, send a GET request to `/v2/gen-ai/models`.
 
         Args:
+          page: page number.
+
+          per_page: items per page.
+
+          public_only: only include models that are publicly available.
+
+          usecases: include only models defined for the listed usecases.
+
+              - MODEL_USECASE_UNKNOWN: The use case of the model is unknown
+              - MODEL_USECASE_AGENT: The model maybe used in an agent
+              - MODEL_USECASE_FINETUNED: The model maybe used for fine tuning
+              - MODEL_USECASE_KNOWLEDGEBASE: The model maybe used for knowledge bases
+                (embedding models)
+              - MODEL_USECASE_GUARDRAIL: The model maybe used for guardrails
+              - MODEL_USECASE_REASONING: The model usecase for reasoning
+              - MODEL_USECASE_SERVERLESS: The model usecase for serverless inference
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -64,36 +98,24 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not model:
-            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
         return self._get(
-            f"/models/{model}"
+            "/v2/gen-ai/models"
             if self._client._base_url_overridden
-            else f"https://inference.do-ai.run/v1/models/{model}",
+            else "https://api.digitalocean.com/v2/gen-ai/models",
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Model,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ModelListResponse:
-        """
-        Lists the currently available models, and provides basic information about each
-        one such as the owner and availability.
-        """
-        return self._get(
-            "/models" if self._client._base_url_overridden else "https://inference.do-ai.run/v1/models",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "page": page,
+                        "per_page": per_page,
+                        "public_only": public_only,
+                        "usecases": usecases,
+                    },
+                    model_list_params.ModelListParams,
+                ),
             ),
             cast_to=ModelListResponse,
         )
@@ -119,22 +141,52 @@ def with_streaming_response(self) -> AsyncModelsResourceWithStreamingResponse:
         """
         return AsyncModelsResourceWithStreamingResponse(self)
 
-    async def retrieve(
+    async def list(
         self,
-        model: str,
         *,
+        page: int | NotGiven = NOT_GIVEN,
+        per_page: int | NotGiven = NOT_GIVEN,
+        public_only: bool | NotGiven = NOT_GIVEN,
+        usecases: List[
+            Literal[
+                "MODEL_USECASE_UNKNOWN",
+                "MODEL_USECASE_AGENT",
+                "MODEL_USECASE_FINETUNED",
+                "MODEL_USECASE_KNOWLEDGEBASE",
+                "MODEL_USECASE_GUARDRAIL",
+                "MODEL_USECASE_REASONING",
+                "MODEL_USECASE_SERVERLESS",
+            ]
+        ]
+        | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Model:
+    ) -> ModelListResponse:
         """
-        Retrieves a model instance, providing basic information about the model such as
-        the owner and permissioning.
+        To list all models, send a GET request to `/v2/gen-ai/models`.
 
         Args:
+          page: page number.
+
+          per_page: items per page.
+
+          public_only: only include models that are publicly available.
+
+          usecases: include only models defined for the listed usecases.
+
+              - MODEL_USECASE_UNKNOWN: The use case of the model is unknown
+              - MODEL_USECASE_AGENT: The model maybe used in an agent
+              - MODEL_USECASE_FINETUNED: The model maybe used for fine tuning
+              - MODEL_USECASE_KNOWLEDGEBASE: The model maybe used for knowledge bases
+                (embedding models)
+              - MODEL_USECASE_GUARDRAIL: The model maybe used for guardrails
+              - MODEL_USECASE_REASONING: The model usecase for reasoning
+              - MODEL_USECASE_SERVERLESS: The model usecase for serverless inference
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -143,36 +195,24 @@ async def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not model:
-            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
         return await self._get(
-            f"/models/{model}"
+            "/v2/gen-ai/models"
             if self._client._base_url_overridden
-            else f"https://inference.do-ai.run/v1/models/{model}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Model,
-        )
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ModelListResponse:
-        """
-        Lists the currently available models, and provides basic information about each
-        one such as the owner and availability.
-        """
-        return await self._get(
-            "/models" if self._client._base_url_overridden else "https://inference.do-ai.run/v1/models",
+            else "https://api.digitalocean.com/v2/gen-ai/models",
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "page": page,
+                        "per_page": per_page,
+                        "public_only": public_only,
+                        "usecases": usecases,
+                    },
+                    model_list_params.ModelListParams,
+                ),
             ),
             cast_to=ModelListResponse,
         )
@@ -182,9 +222,6 @@ class ModelsResourceWithRawResponse:
     def __init__(self, models: ModelsResource) -> None:
         self._models = models
 
-        self.retrieve = to_raw_response_wrapper(
-            models.retrieve,
-        )
         self.list = to_raw_response_wrapper(
             models.list,
         )
@@ -194,9 +231,6 @@ class AsyncModelsResourceWithRawResponse:
     def __init__(self, models: AsyncModelsResource) -> None:
         self._models = models
 
-        self.retrieve = async_to_raw_response_wrapper(
-            models.retrieve,
-        )
         self.list = async_to_raw_response_wrapper(
             models.list,
         )
@@ -206,9 +240,6 @@ class ModelsResourceWithStreamingResponse:
     def __init__(self, models: ModelsResource) -> None:
         self._models = models
 
-        self.retrieve = to_streamed_response_wrapper(
-            models.retrieve,
-        )
         self.list = to_streamed_response_wrapper(
             models.list,
         )
@@ -218,9 +249,6 @@ class AsyncModelsResourceWithStreamingResponse:
     def __init__(self, models: AsyncModelsResource) -> None:
         self._models = models
 
-        self.retrieve = async_to_streamed_response_wrapper(
-            models.retrieve,
-        )
         self.list = async_to_streamed_response_wrapper(
             models.list,
         )
diff --git a/src/gradientai/types/__init__.py b/src/gradientai/types/__init__.py
index 626c3840..4ec63b92 100644
--- a/src/gradientai/types/__init__.py
+++ b/src/gradientai/types/__init__.py
@@ -2,14 +2,15 @@
 
 from __future__ import annotations
 
-from .model import Model as Model
-from .shared import APIMeta as APIMeta, APILinks as APILinks
+from .shared import APIMeta as APIMeta, APILinks as APILinks, ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
 from .api_agent import APIAgent as APIAgent
+from .api_model import APIModel as APIModel
 from .api_agreement import APIAgreement as APIAgreement
 from .api_workspace import APIWorkspace as APIWorkspace
 from .api_agent_model import APIAgentModel as APIAgentModel
 from .agent_list_params import AgentListParams as AgentListParams
 from .api_model_version import APIModelVersion as APIModelVersion
+from .model_list_params import ModelListParams as ModelListParams
 from .api_knowledge_base import APIKnowledgeBase as APIKnowledgeBase
 from .region_list_params import RegionListParams as RegionListParams
 from .agent_create_params import AgentCreateParams as AgentCreateParams
diff --git a/src/gradientai/types/agents/chat/__init__.py b/src/gradientai/types/agents/chat/__init__.py
new file mode 100644
index 00000000..9384ac14
--- /dev/null
+++ b/src/gradientai/types/agents/chat/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
diff --git a/src/gradientai/types/agents/chat/completion_create_params.py b/src/gradientai/types/agents/chat/completion_create_params.py
new file mode 100644
index 00000000..11d032ff
--- /dev/null
+++ b/src/gradientai/types/agents/chat/completion_create_params.py
@@ -0,0 +1,185 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "CompletionCreateParams",
+    "Message",
+    "MessageChatCompletionRequestSystemMessage",
+    "MessageChatCompletionRequestDeveloperMessage",
+    "MessageChatCompletionRequestUserMessage",
+    "MessageChatCompletionRequestAssistantMessage",
+    "StreamOptions",
+]
+
+
+class CompletionCreateParams(TypedDict, total=False):
+    messages: Required[Iterable[Message]]
+    """A list of messages comprising the conversation so far."""
+
+    model: Required[str]
+    """Model ID used to generate the response."""
+
+    frequency_penalty: Optional[float]
+    """Number between -2.0 and 2.0.
+
+    Positive values penalize new tokens based on their existing frequency in the
+    text so far, decreasing the model's likelihood to repeat the same line verbatim.
+    """
+
+    logit_bias: Optional[Dict[str, int]]
+    """Modify the likelihood of specified tokens appearing in the completion.
+
+    Accepts a JSON object that maps tokens (specified by their token ID in the
+    tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+    bias is added to the logits generated by the model prior to sampling. The exact
+    effect will vary per model, but values between -1 and 1 should decrease or
+    increase likelihood of selection; values like -100 or 100 should result in a ban
+    or exclusive selection of the relevant token.
+    """
+
+    logprobs: Optional[bool]
+    """Whether to return log probabilities of the output tokens or not.
+
+    If true, returns the log probabilities of each output token returned in the
+    `content` of `message`.
+    """
+
+    max_completion_tokens: Optional[int]
+    """
+    The maximum number of completion tokens that may be used over the course of the
+    run. The run will make a best effort to use only the number of completion tokens
+    specified, across multiple turns of the run.
+    """
+
+    max_tokens: Optional[int]
+    """The maximum number of tokens that can be generated in the completion.
+
+    The token count of your prompt plus `max_tokens` cannot exceed the model's
+    context length.
+    """
+
+    metadata: Optional[Dict[str, str]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    n: Optional[int]
+    """How many chat completion choices to generate for each input message.
+
+    Note that you will be charged based on the number of generated tokens across all
+    of the choices. Keep `n` as `1` to minimize costs.
+    """
+
+    presence_penalty: Optional[float]
+    """Number between -2.0 and 2.0.
+
+    Positive values penalize new tokens based on whether they appear in the text so
+    far, increasing the model's likelihood to talk about new topics.
+    """
+
+    stop: Union[Optional[str], List[str], None]
+    """Up to 4 sequences where the API will stop generating further tokens.
+
+    The returned text will not contain the stop sequence.
+    """
+
+    stream: Optional[bool]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using server-sent events.
+    """
+
+    stream_options: Optional[StreamOptions]
+    """Options for streaming response. Only set this when you set `stream: true`."""
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
+    """
+
+    top_logprobs: Optional[int]
+    """
+    An integer between 0 and 20 specifying the number of most likely tokens to
+    return at each token position, each with an associated log probability.
+    `logprobs` must be set to `true` if this parameter is used.
+    """
+
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or `temperature` but not both.
+    """
+
+    user: str
+    """
+    A unique identifier representing your end-user, which can help DigitalOcean to
+    monitor and detect abuse.
+    """
+
+
+class MessageChatCompletionRequestSystemMessage(TypedDict, total=False):
+    content: Required[Union[str, List[str]]]
+    """The contents of the system message."""
+
+    role: Required[Literal["system"]]
+    """The role of the messages author, in this case `system`."""
+
+
+class MessageChatCompletionRequestDeveloperMessage(TypedDict, total=False):
+    content: Required[Union[str, List[str]]]
+    """The contents of the developer message."""
+
+    role: Required[Literal["developer"]]
+    """The role of the messages author, in this case `developer`."""
+
+
+class MessageChatCompletionRequestUserMessage(TypedDict, total=False):
+    content: Required[Union[str, List[str]]]
+    """The contents of the user message."""
+
+    role: Required[Literal["user"]]
+    """The role of the messages author, in this case `user`."""
+
+
+class MessageChatCompletionRequestAssistantMessage(TypedDict, total=False):
+    role: Required[Literal["assistant"]]
+    """The role of the messages author, in this case `assistant`."""
+
+    content: Union[str, List[str], None]
+    """The contents of the assistant message."""
+
+
+Message: TypeAlias = Union[
+    MessageChatCompletionRequestSystemMessage,
+    MessageChatCompletionRequestDeveloperMessage,
+    MessageChatCompletionRequestUserMessage,
+    MessageChatCompletionRequestAssistantMessage,
+]
+
+
+class StreamOptions(TypedDict, total=False):
+    include_usage: bool
+    """If set, an additional chunk will be streamed before the `data: [DONE]` message.
+
+    The `usage` field on this chunk shows the token usage statistics for the entire
+    request, and the `choices` field will always be an empty array.
+
+    All other chunks will also include a `usage` field, but with a null value.
+    **NOTE:** If the stream is interrupted, you may not receive the final usage
+    chunk which contains the total token usage for the request.
+    """
diff --git a/src/gradientai/types/agents/chat/completion_create_response.py b/src/gradientai/types/agents/chat/completion_create_response.py
new file mode 100644
index 00000000..f2860c31
--- /dev/null
+++ b/src/gradientai/types/agents/chat/completion_create_response.py
@@ -0,0 +1,81 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ...shared.chat_completion_token_logprob import ChatCompletionTokenLogprob
+
+__all__ = ["CompletionCreateResponse", "Choice", "ChoiceLogprobs", "ChoiceMessage", "Usage"]
+
+
+class ChoiceLogprobs(BaseModel):
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message content tokens with log probability information."""
+
+    refusal: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message refusal tokens with log probability information."""
+
+
+class ChoiceMessage(BaseModel):
+    content: Optional[str] = None
+    """The contents of the message."""
+
+    refusal: Optional[str] = None
+    """The refusal message generated by the model."""
+
+    role: Literal["assistant"]
+    """The role of the author of this message."""
+
+
+class Choice(BaseModel):
+    finish_reason: Literal["stop", "length"]
+    """The reason the model stopped generating tokens.
+
+    This will be `stop` if the model hit a natural stop point or a provided stop
+    sequence, or `length` if the maximum number of tokens specified in the request
+    was reached.
+    """
+
+    index: int
+    """The index of the choice in the list of choices."""
+
+    logprobs: Optional[ChoiceLogprobs] = None
+    """Log probability information for the choice."""
+
+    message: ChoiceMessage
+    """A chat completion message generated by the model."""
+
+
+class Usage(BaseModel):
+    completion_tokens: int
+    """Number of tokens in the generated completion."""
+
+    prompt_tokens: int
+    """Number of tokens in the prompt."""
+
+    total_tokens: int
+    """Total number of tokens used in the request (prompt + completion)."""
+
+
+class CompletionCreateResponse(BaseModel):
+    id: str
+    """A unique identifier for the chat completion."""
+
+    choices: List[Choice]
+    """A list of chat completion choices.
+
+    Can be more than one if `n` is greater than 1.
+    """
+
+    created: int
+    """The Unix timestamp (in seconds) of when the chat completion was created."""
+
+    model: str
+    """The model used for the chat completion."""
+
+    object: Literal["chat.completion"]
+    """The object type, which is always `chat.completion`."""
+
+    usage: Optional[Usage] = None
+    """Usage statistics for the completion request."""
diff --git a/src/gradientai/types/api_model.py b/src/gradientai/types/api_model.py
new file mode 100644
index 00000000..c2bc1edd
--- /dev/null
+++ b/src/gradientai/types/api_model.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from datetime import datetime
+
+from .._models import BaseModel
+from .api_agreement import APIAgreement
+from .api_model_version import APIModelVersion
+
+__all__ = ["APIModel"]
+
+
+class APIModel(BaseModel):
+    agreement: Optional[APIAgreement] = None
+
+    created_at: Optional[datetime] = None
+
+    is_foundational: Optional[bool] = None
+
+    name: Optional[str] = None
+
+    parent_uuid: Optional[str] = None
+
+    updated_at: Optional[datetime] = None
+
+    upload_complete: Optional[bool] = None
+
+    url: Optional[str] = None
+
+    uuid: Optional[str] = None
+
+    version: Optional[APIModelVersion] = None
diff --git a/src/gradientai/types/chat/__init__.py b/src/gradientai/types/chat/__init__.py
index 59553f68..9384ac14 100644
--- a/src/gradientai/types/chat/__init__.py
+++ b/src/gradientai/types/chat/__init__.py
@@ -4,4 +4,3 @@
 
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
 from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
-from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
diff --git a/src/gradientai/types/chat/completion_create_response.py b/src/gradientai/types/chat/completion_create_response.py
index 1ac59a28..1791373b 100644
--- a/src/gradientai/types/chat/completion_create_response.py
+++ b/src/gradientai/types/chat/completion_create_response.py
@@ -4,7 +4,7 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
-from .chat_completion_token_logprob import ChatCompletionTokenLogprob
+from ..shared.chat_completion_token_logprob import ChatCompletionTokenLogprob
 
 __all__ = ["CompletionCreateResponse", "Choice", "ChoiceLogprobs", "ChoiceMessage", "Usage"]
 
diff --git a/src/gradientai/types/model.py b/src/gradientai/types/model.py
deleted file mode 100644
index 2631ee8d..00000000
--- a/src/gradientai/types/model.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["Model"]
-
-
-class Model(BaseModel):
-    id: str
-    """The model identifier, which can be referenced in the API endpoints."""
-
-    created: int
-    """The Unix timestamp (in seconds) when the model was created."""
-
-    object: Literal["model"]
-    """The object type, which is always "model"."""
-
-    owned_by: str
-    """The organization that owns the model."""
diff --git a/src/gradientai/types/model_list_params.py b/src/gradientai/types/model_list_params.py
new file mode 100644
index 00000000..4abc1dc1
--- /dev/null
+++ b/src/gradientai/types/model_list_params.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ModelListParams"]
+
+
+class ModelListParams(TypedDict, total=False):
+    page: int
+    """page number."""
+
+    per_page: int
+    """items per page."""
+
+    public_only: bool
+    """only include models that are publicly available."""
+
+    usecases: List[
+        Literal[
+            "MODEL_USECASE_UNKNOWN",
+            "MODEL_USECASE_AGENT",
+            "MODEL_USECASE_FINETUNED",
+            "MODEL_USECASE_KNOWLEDGEBASE",
+            "MODEL_USECASE_GUARDRAIL",
+            "MODEL_USECASE_REASONING",
+            "MODEL_USECASE_SERVERLESS",
+        ]
+    ]
+    """include only models defined for the listed usecases.
+
+    - MODEL_USECASE_UNKNOWN: The use case of the model is unknown
+    - MODEL_USECASE_AGENT: The model maybe used in an agent
+    - MODEL_USECASE_FINETUNED: The model maybe used for fine tuning
+    - MODEL_USECASE_KNOWLEDGEBASE: The model maybe used for knowledge bases
+      (embedding models)
+    - MODEL_USECASE_GUARDRAIL: The model maybe used for guardrails
+    - MODEL_USECASE_REASONING: The model usecase for reasoning
+    - MODEL_USECASE_SERVERLESS: The model usecase for serverless inference
+    """
diff --git a/src/gradientai/types/model_list_response.py b/src/gradientai/types/model_list_response.py
index 8f835449..47651759 100644
--- a/src/gradientai/types/model_list_response.py
+++ b/src/gradientai/types/model_list_response.py
@@ -1,15 +1,18 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
-from typing_extensions import Literal
+from typing import List, Optional
 
-from .model import Model
 from .._models import BaseModel
+from .api_model import APIModel
+from .shared.api_meta import APIMeta
+from .shared.api_links import APILinks
 
 __all__ = ["ModelListResponse"]
 
 
 class ModelListResponse(BaseModel):
-    data: List[Model]
+    links: Optional[APILinks] = None
 
-    object: Literal["list"]
+    meta: Optional[APIMeta] = None
+
+    models: Optional[List[APIModel]] = None
diff --git a/src/gradientai/types/shared/__init__.py b/src/gradientai/types/shared/__init__.py
index 5f02d62f..dc71bdd3 100644
--- a/src/gradientai/types/shared/__init__.py
+++ b/src/gradientai/types/shared/__init__.py
@@ -2,3 +2,4 @@
 
 from .api_meta import APIMeta as APIMeta
 from .api_links import APILinks as APILinks
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
diff --git a/src/gradientai/types/chat/chat_completion_token_logprob.py b/src/gradientai/types/shared/chat_completion_token_logprob.py
similarity index 100%
rename from src/gradientai/types/chat/chat_completion_token_logprob.py
rename to src/gradientai/types/shared/chat_completion_token_logprob.py
diff --git a/tests/api_resources/agents/chat/__init__.py b/tests/api_resources/agents/chat/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/agents/chat/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/agents/chat/test_completions.py b/tests/api_resources/agents/chat/test_completions.py
new file mode 100644
index 00000000..89d531a5
--- /dev/null
+++ b/tests/api_resources/agents/chat/test_completions.py
@@ -0,0 +1,186 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from gradientai import GradientAI, AsyncGradientAI
+from tests.utils import assert_matches_type
+from gradientai.types.agents.chat import CompletionCreateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestCompletions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_create(self, client: GradientAI) -> None:
+        completion = client.agents.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+        )
+        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_create_with_all_params(self, client: GradientAI) -> None:
+        completion = client.agents.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+            frequency_penalty=-2,
+            logit_bias={"foo": 0},
+            logprobs=True,
+            max_completion_tokens=256,
+            max_tokens=0,
+            metadata={"foo": "string"},
+            n=1,
+            presence_penalty=-2,
+            stop="\n",
+            stream=True,
+            stream_options={"include_usage": True},
+            temperature=1,
+            top_logprobs=0,
+            top_p=1,
+            user="user-1234",
+        )
+        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_create(self, client: GradientAI) -> None:
+        response = client.agents.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_create(self, client: GradientAI) -> None:
+        with client.agents.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncCompletions:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_create(self, async_client: AsyncGradientAI) -> None:
+        completion = await async_client.agents.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+        )
+        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncGradientAI) -> None:
+        completion = await async_client.agents.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+            frequency_penalty=-2,
+            logit_bias={"foo": 0},
+            logprobs=True,
+            max_completion_tokens=256,
+            max_tokens=0,
+            metadata={"foo": "string"},
+            n=1,
+            presence_penalty=-2,
+            stop="\n",
+            stream=True,
+            stream_options={"include_usage": True},
+            temperature=1,
+            top_logprobs=0,
+            top_p=1,
+            user="user-1234",
+        )
+        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncGradientAI) -> None:
+        response = await async_client.agents.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = await response.parse()
+        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncGradientAI) -> None:
+        async with async_client.agents.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(CompletionCreateResponse, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index afee0c1f..5e119f71 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -9,7 +9,7 @@
 
 from gradientai import GradientAI, AsyncGradientAI
 from tests.utils import assert_matches_type
-from gradientai.types import Model, ModelListResponse
+from gradientai.types import ModelListResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -19,50 +19,19 @@ class TestModels:
 
     @pytest.mark.skip()
     @parametrize
-    def test_method_retrieve(self, client: GradientAI) -> None:
-        model = client.models.retrieve(
-            "llama3-8b-instruct",
-        )
-        assert_matches_type(Model, model, path=["response"])
+    def test_method_list(self, client: GradientAI) -> None:
+        model = client.models.list()
+        assert_matches_type(ModelListResponse, model, path=["response"])
 
     @pytest.mark.skip()
     @parametrize
-    def test_raw_response_retrieve(self, client: GradientAI) -> None:
-        response = client.models.with_raw_response.retrieve(
-            "llama3-8b-instruct",
+    def test_method_list_with_all_params(self, client: GradientAI) -> None:
+        model = client.models.list(
+            page=0,
+            per_page=0,
+            public_only=True,
+            usecases=["MODEL_USECASE_UNKNOWN"],
         )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        model = response.parse()
-        assert_matches_type(Model, model, path=["response"])
-
-    @pytest.mark.skip()
-    @parametrize
-    def test_streaming_response_retrieve(self, client: GradientAI) -> None:
-        with client.models.with_streaming_response.retrieve(
-            "llama3-8b-instruct",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            model = response.parse()
-            assert_matches_type(Model, model, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @pytest.mark.skip()
-    @parametrize
-    def test_path_params_retrieve(self, client: GradientAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model` but received ''"):
-            client.models.with_raw_response.retrieve(
-                "",
-            )
-
-    @pytest.mark.skip()
-    @parametrize
-    def test_method_list(self, client: GradientAI) -> None:
-        model = client.models.list()
         assert_matches_type(ModelListResponse, model, path=["response"])
 
     @pytest.mark.skip()
@@ -95,50 +64,19 @@ class TestAsyncModels:
 
     @pytest.mark.skip()
     @parametrize
-    async def test_method_retrieve(self, async_client: AsyncGradientAI) -> None:
-        model = await async_client.models.retrieve(
-            "llama3-8b-instruct",
-        )
-        assert_matches_type(Model, model, path=["response"])
+    async def test_method_list(self, async_client: AsyncGradientAI) -> None:
+        model = await async_client.models.list()
+        assert_matches_type(ModelListResponse, model, path=["response"])
 
     @pytest.mark.skip()
     @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncGradientAI) -> None:
-        response = await async_client.models.with_raw_response.retrieve(
-            "llama3-8b-instruct",
+    async def test_method_list_with_all_params(self, async_client: AsyncGradientAI) -> None:
+        model = await async_client.models.list(
+            page=0,
+            per_page=0,
+            public_only=True,
+            usecases=["MODEL_USECASE_UNKNOWN"],
         )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        model = await response.parse()
-        assert_matches_type(Model, model, path=["response"])
-
-    @pytest.mark.skip()
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncGradientAI) -> None:
-        async with async_client.models.with_streaming_response.retrieve(
-            "llama3-8b-instruct",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            model = await response.parse()
-            assert_matches_type(Model, model, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @pytest.mark.skip()
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncGradientAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model` but received ''"):
-            await async_client.models.with_raw_response.retrieve(
-                "",
-            )
-
-    @pytest.mark.skip()
-    @parametrize
-    async def test_method_list(self, async_client: AsyncGradientAI) -> None:
-        model = await async_client.models.list()
         assert_matches_type(ModelListResponse, model, path=["response"])
 
     @pytest.mark.skip()