Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions specification/DigitalOcean-public.v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,19 @@ tags:
**Note:** The Agent Inference API uses a customer-specific base URL (the agent endpoint)
and is independent of the main DigitalOcean control-plane API (`https://api.digitalocean.com`).

- name: Batch Inference
description: |-
Batch Inference is an asynchronous processing capability designed to help
you scale high-volume AI projects more efficiently. Ideal for heavy-duty
workloads like large-scale data classification, evaluations, and content
enrichment, you can submit thousands or even millions of requests in a
single job with a guaranteed results window of 24 hours. By utilizing
off-peak GPU capacity, Batch Inference provides high-performance LLM
access at a significantly reduced price point compared to standard
synchronous APIs, making it a cost-effective choice for non-interactive
workloads.


x-tagGroups:
- name: Public APIs
tags:
Expand Down Expand Up @@ -692,6 +705,7 @@ x-tagGroups:
tags:
- Inference Introduction
- Agent Inference
- Batch Inference
- Embeddings
- Serverless Inference

Expand Down Expand Up @@ -2906,6 +2920,32 @@ paths:
post:
$ref: 'resources/inference/inference_async_invoke.yml'

/v1/batches/files:
post:
$ref: 'resources/inference/inference_create_batch_file.yml'

/<upload_url>:
put:
$ref: 'resources/inference/inference_upload_batch_file.yml'

/v1/batches:
get:
$ref: 'resources/inference/inference_list_batches.yml'
post:
$ref: 'resources/inference/inference_create_batch.yml'

/v1/batches/{batch_id}:
get:
$ref: 'resources/inference/inference_get_batch.yml'

/v1/batches/{batch_id}/cancel:
post:
$ref: 'resources/inference/inference_cancel_batch.yml'

/v1/batches/{batch_id}/results:
get:
$ref: 'resources/inference/inference_get_batch_results.yml'

components:
securitySchemes:
bearer_auth:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
lang: cURL
source: |-
curl -sS -X POST "https://inference.do-ai.run/v1/batches/0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21/cancel" \
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
-H "Content-Type: application/json" | jq
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
lang: cURL
source: |-
# OpenAI provider - Chat Completions
curl -sS -X POST "https://inference.do-ai.run/v1/batches" \
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
-H "Content-Type: application/json" \
-d '{
"file_id": "a1b2c3d4-e5f6-4789-90ab-cdef12345678",
"provider": "openai",
"endpoint": "/v1/chat/completions",
"completion_window": "24h",
"request_id": "c7e3ad1e-20c3-4e47-9bf2-6f2a4d6a2f11"
}'

# Anthropic provider - Messages
curl -sS -X POST "https://inference.do-ai.run/v1/batches" \
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
-H "Content-Type: application/json" \
-d '{
"file_id": "a1b2c3d4-e5f6-4789-90ab-cdef12345678",
"provider": "anthropic",
"endpoint": "/v1/messages",
"completion_window": "24h",
"request_id": "2f1a7d9e-8c03-4d2c-9b7e-6f8e2b1a4c77"
}'
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
lang: cURL
source: |-
curl -sS -X POST "https://inference.do-ai.run/v1/batches/files" \
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
-H "Content-Type: application/json" \
-d '{
"file_name": "batch_requests.jsonl"
}' | jq
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
lang: cURL
source: |-
curl -sS -X GET "https://inference.do-ai.run/v1/batches/0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21" \
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
-H "Content-Type: application/json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
lang: cURL
source: |-
curl -sS -X GET "https://inference.do-ai.run/v1/batches/0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21/results" \
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
-H "Content-Type: application/json" | jq
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
lang: cURL
source: |-
curl -sS -X GET "https://inference.do-ai.run/v1/batches?limit=20" \
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
-H "Content-Type: application/json" | jq
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
lang: cURL
source: |-
# UPLOAD_URL is the exact upload_url returned by POST /v1/batches/files.
# Use it verbatim; do not modify the host, path, or query string.
curl -X PUT "$UPLOAD_URL" \
-H "Content-Type: application/jsonl" \
--data-binary "@eval_prompts_v1.jsonl"
58 changes: 58 additions & 0 deletions specification/resources/inference/inference_cancel_batch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
operationId: inference_cancel_batch
summary: Cancel a Batch Inference Job
description: >
Requests cancellation of a batch job. The job transitions to `cancelling`
and, once in-flight requests drain, to `cancelled`. Jobs already in a
terminal state (`completed`, `failed`, `expired`, `cancelled`) cannot be
cancelled and return `409 Conflict`.


Partial results produced before cancellation remain available via
`GET /v1/batches/{batch_id}/results`.
tags:
- Batch Inference
servers:
- url: "https://inference.do-ai.run"
description: production
x-inference-base-url: "https://inference.do-ai.run"
parameters:
- in: path
name: batch_id
description: The batch job identifier.
required: true
schema:
type: string
format: uuid
example: "0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21"
responses:
"200":
description: Cancellation accepted. Returns the updated batch job.
headers:
ratelimit-limit:
$ref: '../../shared/headers.yml#/ratelimit-limit'
ratelimit-remaining:
$ref: '../../shared/headers.yml#/ratelimit-remaining'
ratelimit-reset:
$ref: '../../shared/headers.yml#/ratelimit-reset'
content:
application/json:
schema:
$ref: "models/batch.yml"
"401":
$ref: '../../shared/responses/unauthorized.yml'
"403":
$ref: '../../shared/responses/forbidden.yml'
"404":
$ref: '../../shared/responses/not_found.yml'
"409":
$ref: '../../shared/responses/conflict.yml'
"429":
$ref: '../../shared/responses/too_many_requests.yml'
"500":
$ref: '../../shared/responses/server_error.yml'
default:
$ref: '../../shared/responses/unexpected_error.yml'
x-codeSamples:
- $ref: 'examples/curl/inference_cancel_batch.yml'
security:
- inference_bearer_auth: []
84 changes: 84 additions & 0 deletions specification/resources/inference/inference_create_batch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
operationId: inference_create_batch
summary: Create a Batch Inference Job
description: >
Submits a batch job against a previously uploaded JSONL input file.
The upload must have completed before this call; otherwise the request
is rejected.


Supply a unique `request_id` to make the submission idempotent —
retries with the same value return the existing job. When `provider`
is `openai`, the `url` on each JSONL line must match `endpoint`.
tags:
- Batch Inference
servers:
- url: "https://inference.do-ai.run"
description: production
x-inference-base-url: "https://inference.do-ai.run"
requestBody:
required: true
content:
application/json:
schema:
$ref: "models/batch_create_request.yml"
examples:
OpenAI Chat Completions:
value:
file_id: "a1b2c3d4-e5f6-4789-90ab-cdef12345678"
provider: "openai"
endpoint: "/v1/chat/completions"
completion_window: "24h"
request_id: "c7e3ad1e-20c3-4e47-9bf2-6f2a4d6a2f11"
OpenAI Embeddings:
value:
file_id: "a1b2c3d4-e5f6-4789-90ab-cdef12345678"
provider: "openai"
endpoint: "/v1/embeddings"
completion_window: "24h"
request_id: "9f7b9d4a-4e6c-4a27-8e35-1b0e4c5a9a12"
Anthropic Messages:
value:
file_id: "a1b2c3d4-e5f6-4789-90ab-cdef12345678"
provider: "anthropic"
endpoint: "/v1/messages"
completion_window: "24h"
request_id: "2f1a7d9e-8c03-4d2c-9b7e-6f8e2b1a4c77"
metadata:
team: "ml-eval"
dataset: "prompts_v1"
responses:
"201":
description: Batch job accepted. Poll `GET /v1/batches/{batch_id}` for status.
headers:
ratelimit-limit:
$ref: '../../shared/headers.yml#/ratelimit-limit'
ratelimit-remaining:
$ref: '../../shared/headers.yml#/ratelimit-remaining'
ratelimit-reset:
$ref: '../../shared/headers.yml#/ratelimit-reset'
content:
application/json:
schema:
$ref: "models/batch.yml"
"400":
$ref: '../../shared/responses/bad_request.yml'
"401":
$ref: '../../shared/responses/unauthorized.yml'
"403":
$ref: '../../shared/responses/forbidden.yml'
"404":
$ref: '../../shared/responses/not_found.yml'
"409":
$ref: '../../shared/responses/conflict.yml'
"422":
$ref: '../../shared/responses/unprocessable_entity.yml'
"429":
$ref: '../../shared/responses/too_many_requests.yml'
"500":
$ref: '../../shared/responses/server_error.yml'
default:
$ref: '../../shared/responses/unexpected_error.yml'
x-codeSamples:
- $ref: 'examples/curl/inference_create_batch.yml'
security:
- inference_bearer_auth: []
55 changes: 55 additions & 0 deletions specification/resources/inference/inference_create_batch_file.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
operationId: inference_create_batch_file
summary: Create a Batch Inference Input File
description: >
Creates a file record and returns a `file_id` plus a short-lived
presigned `PUT` URL (typically valid for ~15 minutes). Upload the raw
JSONL bytes to `upload_url` (see `PUT /{upload_path}`) before calling
`POST /v1/batches`.
tags:
- Batch Inference
servers:
- url: "https://inference.do-ai.run"
description: production
x-inference-base-url: "https://inference.do-ai.run"
requestBody:
required: true
content:
application/json:
schema:
$ref: "models/batch_file_create_request.yml"
examples:
Default:
value:
file_name: "batch_requests.jsonl"
responses:
"201":
description: File intent created.
headers:
ratelimit-limit:
$ref: '../../shared/headers.yml#/ratelimit-limit'
ratelimit-remaining:
$ref: '../../shared/headers.yml#/ratelimit-remaining'
ratelimit-reset:
$ref: '../../shared/headers.yml#/ratelimit-reset'
content:
application/json:
schema:
$ref: "models/batch_file_create_response.yml"
"400":
$ref: '../../shared/responses/bad_request.yml'
"401":
$ref: '../../shared/responses/unauthorized.yml'
"403":
$ref: '../../shared/responses/forbidden.yml'
"422":
$ref: '../../shared/responses/unprocessable_entity.yml'
"429":
$ref: '../../shared/responses/too_many_requests.yml'
"500":
$ref: '../../shared/responses/server_error.yml'
default:
$ref: '../../shared/responses/unexpected_error.yml'
x-codeSamples:
- $ref: 'examples/curl/inference_create_batch_file.yml'
security:
- inference_bearer_auth: []
50 changes: 50 additions & 0 deletions specification/resources/inference/inference_get_batch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
operationId: inference_get_batch
summary: Retrieve a Batch Inference Job
description: >
Returns the current state of a batch job. Poll until `status` reaches a
terminal value (`completed`, `failed`, `expired`, or `cancelled`).
tags:
- Batch Inference
servers:
- url: "https://inference.do-ai.run"
description: production
x-inference-base-url: "https://inference.do-ai.run"
parameters:
- in: path
name: batch_id
description: The batch job identifier.
required: true
schema:
type: string
format: uuid
example: "0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21"
responses:
"200":
description: The batch job.
headers:
ratelimit-limit:
$ref: '../../shared/headers.yml#/ratelimit-limit'
ratelimit-remaining:
$ref: '../../shared/headers.yml#/ratelimit-remaining'
ratelimit-reset:
$ref: '../../shared/headers.yml#/ratelimit-reset'
content:
application/json:
schema:
$ref: "models/batch.yml"
"401":
$ref: '../../shared/responses/unauthorized.yml'
"403":
$ref: '../../shared/responses/forbidden.yml'
"404":
$ref: '../../shared/responses/not_found.yml'
"429":
$ref: '../../shared/responses/too_many_requests.yml'
"500":
$ref: '../../shared/responses/server_error.yml'
default:
$ref: '../../shared/responses/unexpected_error.yml'
x-codeSamples:
- $ref: 'examples/curl/inference_get_batch.yml'
security:
- inference_bearer_auth: []
Loading
Loading