Skip to content

Commit 49946b8

Browse files
authored
Added Batch Inference endpoints (#1164)
* Added Batch Inference endpoints * lint fix
1 parent a3ffbf2 commit 49946b8

24 files changed

Lines changed: 872 additions & 2 deletions

specification/DigitalOcean-public.v2.yaml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,19 @@ tags:
634634
**Note:** The Agent Inference API uses a customer-specific base URL (the agent endpoint)
635635
and is independent of the main DigitalOcean control-plane API (`https://api.digitalocean.com`).
636636
637+
- name: Batch Inference
638+
description: |-
639+
Batch Inference is an asynchronous processing capability designed to help
640+
you scale high-volume AI projects more efficiently. Ideal for heavy-duty
641+
workloads like large-scale data classification, evaluations, and content
642+
enrichment, you can submit thousands or even millions of requests in a
643+
single job with a guaranteed results window of 24 hours. By utilizing
644+
off-peak GPU capacity, Batch Inference provides high-performance LLM
645+
access at a significantly reduced price point compared to standard
646+
synchronous APIs, making it a cost-effective choice for non-interactive
647+
workloads.
648+
649+
637650
x-tagGroups:
638651
- name: Public APIs
639652
tags:
@@ -692,6 +705,7 @@ x-tagGroups:
692705
tags:
693706
- Inference Introduction
694707
- Agent Inference
708+
- Batch Inference
695709
- Embeddings
696710
- Serverless Inference
697711

@@ -2906,6 +2920,32 @@ paths:
29062920
post:
29072921
$ref: 'resources/inference/inference_async_invoke.yml'
29082922

2923+
/v1/batches/files:
2924+
post:
2925+
$ref: 'resources/inference/inference_create_batch_file.yml'
2926+
2927+
/<upload_url>:
2928+
put:
2929+
$ref: 'resources/inference/inference_upload_batch_file.yml'
2930+
2931+
/v1/batches:
2932+
get:
2933+
$ref: 'resources/inference/inference_list_batches.yml'
2934+
post:
2935+
$ref: 'resources/inference/inference_create_batch.yml'
2936+
2937+
/v1/batches/{batch_id}:
2938+
get:
2939+
$ref: 'resources/inference/inference_get_batch.yml'
2940+
2941+
/v1/batches/{batch_id}/cancel:
2942+
post:
2943+
$ref: 'resources/inference/inference_cancel_batch.yml'
2944+
2945+
/v1/batches/{batch_id}/results:
2946+
get:
2947+
$ref: 'resources/inference/inference_get_batch_results.yml'
2948+
29092949
components:
29102950
securitySchemes:
29112951
bearer_auth:
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
lang: cURL
2+
source: |-
3+
curl -sS -X POST "https://inference.do-ai.run/v1/batches/0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21/cancel" \
4+
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
5+
-H "Content-Type: application/json" | jq
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
lang: cURL
2+
source: |-
3+
# OpenAI provider - Chat Completions
4+
curl -sS -X POST "https://inference.do-ai.run/v1/batches" \
5+
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
6+
-H "Content-Type: application/json" \
7+
-d '{
8+
"file_id": "a1b2c3d4-e5f6-4789-90ab-cdef12345678",
9+
"provider": "openai",
10+
"endpoint": "/v1/chat/completions",
11+
"completion_window": "24h",
12+
"request_id": "c7e3ad1e-20c3-4e47-9bf2-6f2a4d6a2f11"
13+
}'
14+
15+
# Anthropic provider - Messages
16+
curl -sS -X POST "https://inference.do-ai.run/v1/batches" \
17+
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
18+
-H "Content-Type: application/json" \
19+
-d '{
20+
"file_id": "a1b2c3d4-e5f6-4789-90ab-cdef12345678",
21+
"provider": "anthropic",
22+
"endpoint": "/v1/messages",
23+
"completion_window": "24h",
24+
"request_id": "2f1a7d9e-8c03-4d2c-9b7e-6f8e2b1a4c77"
25+
}'
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
lang: cURL
2+
source: |-
3+
curl -sS -X POST "https://inference.do-ai.run/v1/batches/files" \
4+
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
5+
-H "Content-Type: application/json" \
6+
-d '{
7+
"file_name": "batch_requests.jsonl"
8+
}' | jq
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
lang: cURL
2+
source: |-
3+
curl -sS -X GET "https://inference.do-ai.run/v1/batches/0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21" \
4+
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
5+
-H "Content-Type: application/json"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
lang: cURL
2+
source: |-
3+
curl -sS -X GET "https://inference.do-ai.run/v1/batches/0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21/results" \
4+
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
5+
-H "Content-Type: application/json" | jq
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
lang: cURL
2+
source: |-
3+
curl -sS -X GET "https://inference.do-ai.run/v1/batches?limit=20" \
4+
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
5+
-H "Content-Type: application/json" | jq
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
lang: cURL
2+
source: |-
3+
# UPLOAD_URL is the exact upload_url returned by POST /v1/batches/files.
4+
# Use it verbatim; do not modify the host, path, or query string.
5+
curl -X PUT "$UPLOAD_URL" \
6+
-H "Content-Type: application/jsonl" \
7+
--data-binary "@eval_prompts_v1.jsonl"
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
operationId: inference_cancel_batch
2+
summary: Cancel a Batch Inference Job
3+
description: >
4+
Requests cancellation of a batch job. The job transitions to `cancelling`
5+
and, once in-flight requests drain, to `cancelled`. Jobs already in a
6+
terminal state (`completed`, `failed`, `expired`, `cancelled`) cannot be
7+
cancelled and return `409 Conflict`.
8+
9+
10+
Partial results produced before cancellation remain available via
11+
`GET /v1/batches/{batch_id}/results`.
12+
tags:
13+
- Batch Inference
14+
servers:
15+
- url: "https://inference.do-ai.run"
16+
description: production
17+
x-inference-base-url: "https://inference.do-ai.run"
18+
parameters:
19+
- in: path
20+
name: batch_id
21+
description: The batch job identifier.
22+
required: true
23+
schema:
24+
type: string
25+
format: uuid
26+
example: "0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21"
27+
responses:
28+
"200":
29+
description: Cancellation accepted. Returns the updated batch job.
30+
headers:
31+
ratelimit-limit:
32+
$ref: '../../shared/headers.yml#/ratelimit-limit'
33+
ratelimit-remaining:
34+
$ref: '../../shared/headers.yml#/ratelimit-remaining'
35+
ratelimit-reset:
36+
$ref: '../../shared/headers.yml#/ratelimit-reset'
37+
content:
38+
application/json:
39+
schema:
40+
$ref: "models/batch.yml"
41+
"401":
42+
$ref: '../../shared/responses/unauthorized.yml'
43+
"403":
44+
$ref: '../../shared/responses/forbidden.yml'
45+
"404":
46+
$ref: '../../shared/responses/not_found.yml'
47+
"409":
48+
$ref: '../../shared/responses/conflict.yml'
49+
"429":
50+
$ref: '../../shared/responses/too_many_requests.yml'
51+
"500":
52+
$ref: '../../shared/responses/server_error.yml'
53+
default:
54+
$ref: '../../shared/responses/unexpected_error.yml'
55+
x-codeSamples:
56+
- $ref: 'examples/curl/inference_cancel_batch.yml'
57+
security:
58+
- inference_bearer_auth: []
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
operationId: inference_create_batch
2+
summary: Create a Batch Inference Job
3+
description: >
4+
Submits a batch job against a previously uploaded JSONL input file.
5+
The upload must have completed before this call; otherwise the request
6+
is rejected.
7+
8+
9+
Supply a unique `request_id` to make the submission idempotent —
10+
retries with the same value return the existing job. When `provider`
11+
is `openai`, the `url` on each JSONL line must match `endpoint`.
12+
tags:
13+
- Batch Inference
14+
servers:
15+
- url: "https://inference.do-ai.run"
16+
description: production
17+
x-inference-base-url: "https://inference.do-ai.run"
18+
requestBody:
19+
required: true
20+
content:
21+
application/json:
22+
schema:
23+
$ref: "models/batch_create_request.yml"
24+
examples:
25+
OpenAI Chat Completions:
26+
value:
27+
file_id: "a1b2c3d4-e5f6-4789-90ab-cdef12345678"
28+
provider: "openai"
29+
endpoint: "/v1/chat/completions"
30+
completion_window: "24h"
31+
request_id: "c7e3ad1e-20c3-4e47-9bf2-6f2a4d6a2f11"
32+
OpenAI Embeddings:
33+
value:
34+
file_id: "a1b2c3d4-e5f6-4789-90ab-cdef12345678"
35+
provider: "openai"
36+
endpoint: "/v1/embeddings"
37+
completion_window: "24h"
38+
request_id: "9f7b9d4a-4e6c-4a27-8e35-1b0e4c5a9a12"
39+
Anthropic Messages:
40+
value:
41+
file_id: "a1b2c3d4-e5f6-4789-90ab-cdef12345678"
42+
provider: "anthropic"
43+
endpoint: "/v1/messages"
44+
completion_window: "24h"
45+
request_id: "2f1a7d9e-8c03-4d2c-9b7e-6f8e2b1a4c77"
46+
metadata:
47+
team: "ml-eval"
48+
dataset: "prompts_v1"
49+
responses:
50+
"201":
51+
description: Batch job accepted. Poll `GET /v1/batches/{batch_id}` for status.
52+
headers:
53+
ratelimit-limit:
54+
$ref: '../../shared/headers.yml#/ratelimit-limit'
55+
ratelimit-remaining:
56+
$ref: '../../shared/headers.yml#/ratelimit-remaining'
57+
ratelimit-reset:
58+
$ref: '../../shared/headers.yml#/ratelimit-reset'
59+
content:
60+
application/json:
61+
schema:
62+
$ref: "models/batch.yml"
63+
"400":
64+
$ref: '../../shared/responses/bad_request.yml'
65+
"401":
66+
$ref: '../../shared/responses/unauthorized.yml'
67+
"403":
68+
$ref: '../../shared/responses/forbidden.yml'
69+
"404":
70+
$ref: '../../shared/responses/not_found.yml'
71+
"409":
72+
$ref: '../../shared/responses/conflict.yml'
73+
"422":
74+
$ref: '../../shared/responses/unprocessable_entity.yml'
75+
"429":
76+
$ref: '../../shared/responses/too_many_requests.yml'
77+
"500":
78+
$ref: '../../shared/responses/server_error.yml'
79+
default:
80+
$ref: '../../shared/responses/unexpected_error.yml'
81+
x-codeSamples:
82+
- $ref: 'examples/curl/inference_create_batch.yml'
83+
security:
84+
- inference_bearer_auth: []

0 commit comments

Comments
 (0)