Skip to content

Commit a6b3286

Browse files
authored
Add v1/messages and v1/embeddings (#1161)
1 parent d88ced4 commit a6b3286

27 files changed

Lines changed: 716 additions & 0 deletions

specification/DigitalOcean-public.v2.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,12 @@ tags:
281281
description: |-
282282
Droplet autoscale pools manage automatic horizontal scaling for your applications based on resource usage (CPU, memory, or both) or a static configuration.
283283
284+
- name: Embeddings
285+
description: |-
286+
Text embedding vectors via `POST /v1/embeddings` on the
287+
[Serverless Inference](https://docs.digitalocean.com/reference/api/api-reference/#tag/Serverless-Inference) base URL
288+
`https://inference.do-ai.run` (bearer model access key).
289+
284290
- name: Firewalls
285291
description: |-
286292
[DigitalOcean Cloud Firewalls](https://docs.digitalocean.com/products/networking/firewalls/)
@@ -2841,6 +2847,14 @@ paths:
28412847
post:
28422848
$ref: 'resources/inference/inference_create_chat_completion.yml'
28432849

2850+
/v1/messages:
2851+
post:
2852+
$ref: 'resources/inference/inference_create_messages.yml'
2853+
2854+
/v1/embeddings:
2855+
post:
2856+
$ref: 'resources/inference/inference_create_embeddings.yml'
2857+
28442858
/api/v1/chat/completions:
28452859
post:
28462860
$ref: 'resources/inference/agent_inference_create_chat_completion.yml'
@@ -2958,6 +2972,14 @@ components:
29582972
curl -X POST -H "Authorization: Bearer $MODEL_ACCESS_KEY" "https://inference.do-ai.run/v1/chat/completions"
29592973
```
29602974
2975+
```
2976+
curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $MODEL_ACCESS_KEY" -d '{"model":"claude-opus-4-6","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}' "https://inference.do-ai.run/v1/messages"
2977+
```
2978+
2979+
```
2980+
curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $MODEL_ACCESS_KEY" -d '{"model":"qwen3-embedding-0.6b","input":["hello world","goodbye world"],"encoding_format":"float","user":"user-1234"}' "https://inference.do-ai.run/v1/embeddings"
2981+
```
2982+
29612983
**Agent Inference:**
29622984
29632985
```
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
lang: cURL
2+
source: |-
3+
curl -X POST \
4+
-H "Content-Type: application/json" \
5+
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
6+
-d '{"model":"qwen3-embedding-0.6b","input":["hello world","goodbye world"],"encoding_format":"float","user":"user-1234"}' \
7+
"https://inference.do-ai.run/v1/embeddings"
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
lang: cURL
2+
source: |-
3+
curl -X POST \
4+
-H "Content-Type: application/json" \
5+
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
6+
-d '{"model": "claude-opus-4-6", "max_tokens": 1024, "messages": [{"role": "user", "content": "What is the capital of Portugal?"}]}' \
7+
"https://inference.do-ai.run/v1/messages"
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
operationId: inference_create_embedding
2+
summary: Create embedding
3+
description: >
4+
Create vector embeddings for one or more text inputs. OpenAI-compatible request and
5+
response. Unknown fields in the request body are rejected. There is no streaming
6+
response for this endpoint.
7+
tags:
8+
- Serverless Inference
9+
- Embeddings
10+
servers:
11+
- url: "https://inference.do-ai.run"
12+
description: production
13+
requestBody:
14+
required: true
15+
content:
16+
application/json:
17+
schema:
18+
$ref: "models/embeddings_request.yml"
19+
responses:
20+
"200":
21+
description: Embeddings and usage for the given `input` or `inputs`, in order.
22+
headers:
23+
ratelimit-limit:
24+
$ref: '../../shared/headers.yml#/ratelimit-limit'
25+
ratelimit-remaining:
26+
$ref: '../../shared/headers.yml#/ratelimit-remaining'
27+
ratelimit-reset:
28+
$ref: '../../shared/headers.yml#/ratelimit-reset'
29+
content:
30+
application/json:
31+
schema:
32+
$ref: "models/embeddings_response.yml"
33+
"401":
34+
$ref: '../../shared/responses/unauthorized.yml'
35+
"429":
36+
$ref: '../../shared/responses/too_many_requests.yml'
37+
"500":
38+
$ref: '../../shared/responses/server_error.yml'
39+
default:
40+
$ref: '../../shared/responses/unexpected_error.yml'
41+
x-codeSamples:
42+
- $ref: 'examples/curl/inference_create_embeddings.yml'
43+
security:
44+
- inference_bearer_auth: []
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
operationId: inference_create_messages
2+
summary: Create the next assistant message
3+
description: >
4+
Send a structured list of input messages with text and/or image content, and the model
5+
will generate the next message in the conversation.
6+
tags:
7+
- Serverless Inference
8+
servers:
9+
- url: "https://inference.do-ai.run"
10+
description: production
11+
x-inference-base-url: "https://inference.do-ai.run"
12+
requestBody:
13+
required: true
14+
content:
15+
application/json:
16+
schema:
17+
$ref: "models/messages_create_request.yml"
18+
responses:
19+
"200":
20+
description: >
21+
Successful generation. When `stream` is true, the body is `text/event-stream` with
22+
server-sent event (SSE) payloads; otherwise `application/json` with
23+
`CreateMessageResponse`.
24+
headers:
25+
ratelimit-limit:
26+
$ref: '../../shared/headers.yml#/ratelimit-limit'
27+
ratelimit-remaining:
28+
$ref: '../../shared/headers.yml#/ratelimit-remaining'
29+
ratelimit-reset:
30+
$ref: '../../shared/headers.yml#/ratelimit-reset'
31+
content:
32+
application/json:
33+
schema:
34+
$ref: "models/messages_create_response.yml"
35+
text/event-stream:
36+
schema:
37+
$ref: "models/messages_stream_event.yml"
38+
"400":
39+
description: Invalid request body, validation error, or policy rejection.
40+
headers:
41+
ratelimit-limit:
42+
$ref: '../../shared/headers.yml#/ratelimit-limit'
43+
ratelimit-remaining:
44+
$ref: '../../shared/headers.yml#/ratelimit-remaining'
45+
ratelimit-reset:
46+
$ref: '../../shared/headers.yml#/ratelimit-reset'
47+
content:
48+
application/json:
49+
schema:
50+
$ref: "models/messages_create_error_response.yml"
51+
"401":
52+
$ref: '../../shared/responses/unauthorized.yml'
53+
"429":
54+
$ref: '../../shared/responses/too_many_requests.yml'
55+
"500":
56+
$ref: '../../shared/responses/server_error.yml'
57+
default:
58+
$ref: '../../shared/responses/unexpected_error.yml'
59+
x-codeSamples:
60+
- $ref: 'examples/curl/inference_create_messages.yml'
61+
security:
62+
- inference_bearer_auth: []
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
type: object
2+
description: One row in the embeddings `data` array, aligned with a single `input` item.
3+
required:
4+
- index
5+
- object
6+
- embedding
7+
properties:
8+
index:
9+
type: integer
10+
description: Zero-based index of the corresponding `input` item (0 when `input` is a string).
11+
example: 0
12+
object:
13+
type: string
14+
description: The object type, which is always `embedding`.
15+
enum:
16+
- embedding
17+
example: embedding
18+
embedding:
19+
description: The embedding vector, or a base64-encoded string when the request set encoding_format to base64.
20+
example: [0.0123, -0.0456, 0.0001]
21+
oneOf:
22+
- type: array
23+
description: Float vector when encoding_format is float or omitted.
24+
items:
25+
type: number
26+
example: [0.0123, -0.0456, 0.0001]
27+
- type: string
28+
description: Base64 payload when encoding_format is base64.
29+
example: AGZ...encoded...
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
type: object
2+
description: Request body for `POST /v1/embeddings` (OpenAI-compatible). Extra fields are rejected.
3+
required:
4+
- model
5+
- input
6+
additionalProperties: false
7+
properties:
8+
model:
9+
type: string
10+
description: Model id to use for embeddings. Must match a model your account can access.
11+
example: qwen3-embedding-0.6b
12+
input:
13+
description: A single string or 1–2048 strings; each string produces one row in `data`, in order.
14+
example: hello world
15+
oneOf:
16+
- type: string
17+
example: hello world
18+
- type: array
19+
minItems: 1
20+
maxItems: 2048
21+
items:
22+
type: string
23+
example: ["hello world", "goodbye world"]
24+
user:
25+
type: string
26+
description: Optional end-user identifier to help with abuse monitoring.
27+
example: user-1234
28+
encoding_format:
29+
type: string
30+
description: How embedding values are returned in each `data[].embedding` field.
31+
enum:
32+
- float
33+
- base64
34+
default: float
35+
example: float
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
type: object
2+
description: OpenAI-style embeddings response.
3+
required:
4+
- object
5+
- model
6+
- data
7+
- usage
8+
properties:
9+
object:
10+
type: string
11+
description: The object type, which is always the string `list`.
12+
enum:
13+
- list
14+
example: list
15+
model:
16+
type: string
17+
description: The embedding model that produced the vectors.
18+
example: qwen3-embedding-0.6b
19+
data:
20+
type: array
21+
description: One entry for each `input` string, in the same order.
22+
items:
23+
$ref: embedding_data_item.yml
24+
usage:
25+
$ref: embeddings_usage.yml
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
type: object
2+
description: Token usage for the embeddings request.
3+
required:
4+
- prompt_tokens
5+
- total_tokens
6+
properties:
7+
prompt_tokens:
8+
type: integer
9+
description: Number of input tokens used for the embedding.
10+
example: 6
11+
total_tokens:
12+
type: integer
13+
description: Total billable tokens for the request.
14+
example: 6
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
type: object
2+
description: >
3+
One turn in the conversation. Roles are `user` or `assistant` (no `system` role; use the
4+
top-level `system` field). Content may be a string (equivalent to a single text block) or
5+
an array of content blocks.
6+
required:
7+
- role
8+
- content
9+
properties:
10+
role:
11+
type: string
12+
description: Speaker role for this message.
13+
enum:
14+
- user
15+
- assistant
16+
example: user
17+
content:
18+
description: Message body as plain text or structured blocks.
19+
example: What is the capital of Portugal?
20+
oneOf:
21+
- type: string
22+
- type: array
23+
items:
24+
$ref: messages_request_content_block_param.yml

0 commit comments

Comments
 (0)