digitalocean
diff --git a/‎specification/DigitalOcean-public.v2.yaml‎
Lines changed: 22 additions & 0 deletions b/‎specification/DigitalOcean-public.v2.yaml‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎specification/resources/inference/examples/curl/inference_create_embeddings.yml‎
Lines changed: 7 additions & 0 deletions b/‎specification/resources/inference/examples/curl/inference_create_embeddings.yml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎specification/resources/inference/examples/curl/inference_create_messages.yml‎
Lines changed: 7 additions & 0 deletions b/‎specification/resources/inference/examples/curl/inference_create_messages.yml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎specification/resources/inference/inference_create_embeddings.yml‎
Lines changed: 44 additions & 0 deletions b/‎specification/resources/inference/inference_create_embeddings.yml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎specification/resources/inference/inference_create_messages.yml‎
Lines changed: 62 additions & 0 deletions b/‎specification/resources/inference/inference_create_messages.yml‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎specification/resources/inference/models/embedding_data_item.yml‎
Lines changed: 29 additions & 0 deletions b/‎specification/resources/inference/models/embedding_data_item.yml‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎specification/resources/inference/models/embeddings_request.yml‎
Lines changed: 35 additions & 0 deletions b/‎specification/resources/inference/models/embeddings_request.yml‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎specification/resources/inference/models/embeddings_response.yml‎
Lines changed: 25 additions & 0 deletions b/‎specification/resources/inference/models/embeddings_response.yml‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎specification/resources/inference/models/embeddings_usage.yml‎
Lines changed: 14 additions & 0 deletions b/‎specification/resources/inference/models/embeddings_usage.yml‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎specification/resources/inference/models/messages_api_message_param.yml‎
Lines changed: 24 additions & 0 deletions b/‎specification/resources/inference/models/messages_api_message_param.yml‎
Lines changed: 24 additions & 0 deletions
@@ -281,6 +281,12 @@ tags:
     description: |-
       Droplet autoscale pools manage automatic horizontal scaling for your applications based on resource usage (CPU, memory, or both) or a static configuration.
 
+  - name: Embeddings
+    description: |-
+      Text embedding vectors via `POST /v1/embeddings` on the
+      [Serverless Inference](https://docs.digitalocean.com/reference/api/api-reference/#tag/Serverless-Inference) base URL
+      `https://inference.do-ai.run` (bearer model access key).
+
   - name: Firewalls
     description: |-
       [DigitalOcean Cloud Firewalls](https://docs.digitalocean.com/products/networking/firewalls/)
@@ -2841,6 +2847,14 @@ paths:
     post:
       $ref: 'resources/inference/inference_create_chat_completion.yml'
 
+  /v1/messages:
+    post:
+      $ref: 'resources/inference/inference_create_messages.yml'
+
+  /v1/embeddings:
+    post:
+      $ref: 'resources/inference/inference_create_embeddings.yml'
+
   /api/v1/chat/completions:
     post:
       $ref: 'resources/inference/agent_inference_create_chat_completion.yml'
@@ -2958,6 +2972,14 @@ components:
         curl -X POST -H "Authorization: Bearer $MODEL_ACCESS_KEY" "https://inference.do-ai.run/v1/chat/completions"
         ```
 
+        ```
+        curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $MODEL_ACCESS_KEY" -d '{"model":"claude-opus-4-6","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}' "https://inference.do-ai.run/v1/messages"
+        ```
+
+        ```
+        curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $MODEL_ACCESS_KEY" -d '{"model":"qwen3-embedding-0.6b","input":["hello world","goodbye world"],"encoding_format":"float","user":"user-1234"}' "https://inference.do-ai.run/v1/embeddings"
+        ```
+
         **Agent Inference:**
 
         ```
 
@@ -0,0 +1,7 @@
+lang: cURL
+source: |-
+  curl -X POST \
+    -H "Content-Type: application/json" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -d '{"model":"qwen3-embedding-0.6b","input":["hello world","goodbye world"],"encoding_format":"float","user":"user-1234"}' \
+    "https://inference.do-ai.run/v1/embeddings"
@@ -0,0 +1,7 @@
+lang: cURL
+source: |-
+  curl -X POST \
+    -H "Content-Type: application/json" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -d '{"model": "claude-opus-4-6", "max_tokens": 1024, "messages": [{"role": "user", "content": "What is the capital of Portugal?"}]}' \
+    "https://inference.do-ai.run/v1/messages"
@@ -0,0 +1,44 @@
+operationId: inference_create_embedding
+summary: Create embedding
+description: >
+  Create vector embeddings for one or more text inputs. OpenAI-compatible request and
+  response. Unknown fields in the request body are rejected. There is no streaming
+  response for this endpoint.
+tags:
+  - Serverless Inference
+  - Embeddings
+servers:
+  - url: "https://inference.do-ai.run"
+    description: production
+requestBody:
+  required: true
+  content:
+    application/json:
+      schema:
+        $ref: "models/embeddings_request.yml"
+responses:
+  "200":
+    description: Embeddings and usage for the given `input` or `inputs`, in order.
+    headers:
+      ratelimit-limit:
+        $ref: '../../shared/headers.yml#/ratelimit-limit'
+      ratelimit-remaining:
+        $ref: '../../shared/headers.yml#/ratelimit-remaining'
+      ratelimit-reset:
+        $ref: '../../shared/headers.yml#/ratelimit-reset'
+    content:
+      application/json:
+        schema:
+          $ref: "models/embeddings_response.yml"
+  "401":
+    $ref: '../../shared/responses/unauthorized.yml'
+  "429":
+    $ref: '../../shared/responses/too_many_requests.yml'
+  "500":
+    $ref: '../../shared/responses/server_error.yml'
+  default:
+    $ref: '../../shared/responses/unexpected_error.yml'
+x-codeSamples:
+  - $ref: 'examples/curl/inference_create_embeddings.yml'
+security:
+  - inference_bearer_auth: []
@@ -0,0 +1,62 @@
+operationId: inference_create_messages
+summary: Create the next assistant message
+description: >
+  Send a structured list of input messages with text and/or image content, and the model
+  will generate the next message in the conversation.
+tags:
+  - Serverless Inference
+servers:
+  - url: "https://inference.do-ai.run"
+    description: production
+x-inference-base-url: "https://inference.do-ai.run"
+requestBody:
+  required: true
+  content:
+    application/json:
+      schema:
+        $ref: "models/messages_create_request.yml"
+responses:
+  "200":
+    description: >
+      Successful generation. When `stream` is true, the body is `text/event-stream` with
+      server-sent event (SSE) payloads; otherwise `application/json` with
+      `CreateMessageResponse`.
+    headers:
+      ratelimit-limit:
+        $ref: '../../shared/headers.yml#/ratelimit-limit'
+      ratelimit-remaining:
+        $ref: '../../shared/headers.yml#/ratelimit-remaining'
+      ratelimit-reset:
+        $ref: '../../shared/headers.yml#/ratelimit-reset'
+    content:
+      application/json:
+        schema:
+          $ref: "models/messages_create_response.yml"
+      text/event-stream:
+        schema:
+          $ref: "models/messages_stream_event.yml"
+  "400":
+    description: Invalid request body, validation error, or policy rejection.
+    headers:
+      ratelimit-limit:
+        $ref: '../../shared/headers.yml#/ratelimit-limit'
+      ratelimit-remaining:
+        $ref: '../../shared/headers.yml#/ratelimit-remaining'
+      ratelimit-reset:
+        $ref: '../../shared/headers.yml#/ratelimit-reset'
+    content:
+      application/json:
+        schema:
+          $ref: "models/messages_create_error_response.yml"
+  "401":
+    $ref: '../../shared/responses/unauthorized.yml'
+  "429":
+    $ref: '../../shared/responses/too_many_requests.yml'
+  "500":
+    $ref: '../../shared/responses/server_error.yml'
+  default:
+    $ref: '../../shared/responses/unexpected_error.yml'
+x-codeSamples:
+  - $ref: 'examples/curl/inference_create_messages.yml'
+security:
+  - inference_bearer_auth: []
@@ -0,0 +1,29 @@
+type: object
+description: One row in the embeddings `data` array, aligned with a single `input` item.
+required:
+  - index
+  - object
+  - embedding
+properties:
+  index:
+    type: integer
+    description: Zero-based index of the corresponding `input` item (0 when `input` is a string).
+    example: 0
+  object:
+    type: string
+    description: The object type, which is always `embedding`.
+    enum:
+      - embedding
+    example: embedding
+  embedding:
+    description: The embedding vector, or a base64-encoded string when the request set encoding_format to base64.
+    example: [0.0123, -0.0456, 0.0001]
+    oneOf:
+      - type: array
+        description: Float vector when encoding_format is float or omitted.
+        items:
+          type: number
+        example: [0.0123, -0.0456, 0.0001]
+      - type: string
+        description: Base64 payload when encoding_format is base64.
+        example: AGZ...encoded...
@@ -0,0 +1,35 @@
+type: object
+description: Request body for `POST /v1/embeddings` (OpenAI-compatible). Extra fields are rejected.
+required:
+  - model
+  - input
+additionalProperties: false
+properties:
+  model:
+    type: string
+    description: Model id to use for embeddings. Must match a model your account can access.
+    example: qwen3-embedding-0.6b
+  input:
+    description: A single string or 1–2048 strings; each string produces one row in `data`, in order.
+    example: hello world
+    oneOf:
+      - type: string
+        example: hello world
+      - type: array
+        minItems: 1
+        maxItems: 2048
+        items:
+          type: string
+        example: ["hello world", "goodbye world"]
+  user:
+    type: string
+    description: Optional end-user identifier to help with abuse monitoring.
+    example: user-1234
+  encoding_format:
+    type: string
+    description: How embedding values are returned in each `data[].embedding` field.
+    enum:
+      - float
+      - base64
+    default: float
+    example: float
@@ -0,0 +1,25 @@
+type: object
+description: OpenAI-style embeddings response.
+required:
+  - object
+  - model
+  - data
+  - usage
+properties:
+  object:
+    type: string
+    description: The object type, which is always the string `list`.
+    enum:
+      - list
+    example: list
+  model:
+    type: string
+    description: The embedding model that produced the vectors.
+    example: qwen3-embedding-0.6b
+  data:
+    type: array
+    description: One entry for each `input` string, in the same order.
+    items:
+      $ref: embedding_data_item.yml
+  usage:
+    $ref: embeddings_usage.yml
@@ -0,0 +1,14 @@
+type: object
+description: Token usage for the embeddings request.
+required:
+  - prompt_tokens
+  - total_tokens
+properties:
+  prompt_tokens:
+    type: integer
+    description: Number of input tokens used for the embedding.
+    example: 6
+  total_tokens:
+    type: integer
+    description: Total billable tokens for the request.
+    example: 6
@@ -0,0 +1,24 @@
+type: object
+description: >
+  One turn in the conversation. Roles are `user` or `assistant` (no `system` role; use the
+  top-level `system` field). Content may be a string (equivalent to a single text block) or
+  an array of content blocks.
+required:
+  - role
+  - content
+properties:
+  role:
+    type: string
+    description: Speaker role for this message.
+    enum:
+      - user
+      - assistant
+    example: user
+  content:
+    description: Message body as plain text or structured blocks.
+    example: What is the capital of Portugal?
+    oneOf:
+      - type: string
+      - type: array
+        items:
+          $ref: messages_request_content_block_param.yml