digitalocean · SSharma-10 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/specification/DigitalOcean-public.v2.yaml b/specification/DigitalOcean-public.v2.yaml
@@ -634,6 +634,19 @@ tags:
       **Note:** The Agent Inference API uses a customer-specific base URL (the agent endpoint)
       and is independent of the main DigitalOcean control-plane API (`https://api.digitalocean.com`).
 
+  - name: Batch Inference
+    description: |-
+      Batch Inference is an asynchronous processing capability designed to help
+      you scale high-volume AI projects more efficiently. Ideal for heavy-duty
+      workloads like large-scale data classification, evaluations, and content
+      enrichment, you can submit thousands or even millions of requests in a
+      single job with a guaranteed results window of 24 hours. By utilizing
+      off-peak GPU capacity, Batch Inference provides high-performance LLM
+      access at a significantly reduced price point compared to standard
+      synchronous APIs, making it a cost-effective choice for non-interactive
+      workloads.
+
+
 x-tagGroups:
   - name: Public APIs
     tags:
@@ -692,6 +705,7 @@ x-tagGroups:
     tags:
       - Inference Introduction
       - Agent Inference
+      - Batch Inference
       - Embeddings
       - Serverless Inference
 
@@ -2906,6 +2920,32 @@ paths:
     post:
       $ref: 'resources/inference/inference_async_invoke.yml'
 
+  /v1/batches/files:
+    post:
+      $ref: 'resources/inference/inference_create_batch_file.yml'
+
+  /<upload_url>:
+    put:
+      $ref: 'resources/inference/inference_upload_batch_file.yml'
+
+  /v1/batches:
+    get:
+      $ref: 'resources/inference/inference_list_batches.yml'
+    post:
+      $ref: 'resources/inference/inference_create_batch.yml'
+
+  /v1/batches/{batch_id}:
+    get:
+      $ref: 'resources/inference/inference_get_batch.yml'
+
+  /v1/batches/{batch_id}/cancel:
+    post:
+      $ref: 'resources/inference/inference_cancel_batch.yml'
+
+  /v1/batches/{batch_id}/results:
+    get:
+      $ref: 'resources/inference/inference_get_batch_results.yml'
+
 components:
   securitySchemes:
     bearer_auth:

diff --git a/specification/resources/inference/examples/curl/inference_cancel_batch.yml b/specification/resources/inference/examples/curl/inference_cancel_batch.yml
@@ -0,0 +1,5 @@
+lang: cURL
+source: |-
+  curl -sS -X POST "https://inference.do-ai.run/v1/batches/0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21/cancel" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -H "Content-Type: application/json" | jq
diff --git a/specification/resources/inference/examples/curl/inference_create_batch.yml b/specification/resources/inference/examples/curl/inference_create_batch.yml
@@ -0,0 +1,25 @@
+lang: cURL
+source: |-
+  # OpenAI provider - Chat Completions
+  curl -sS -X POST "https://inference.do-ai.run/v1/batches" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -H "Content-Type: application/json" \
+    -d '{
+      "file_id": "a1b2c3d4-e5f6-4789-90ab-cdef12345678",
+      "provider": "openai",
+      "endpoint": "/v1/chat/completions",
+      "completion_window": "24h",
+      "request_id": "c7e3ad1e-20c3-4e47-9bf2-6f2a4d6a2f11"
+    }'
+
+  # Anthropic provider - Messages
+  curl -sS -X POST "https://inference.do-ai.run/v1/batches" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -H "Content-Type: application/json" \
+    -d '{
+      "file_id": "a1b2c3d4-e5f6-4789-90ab-cdef12345678",
+      "provider": "anthropic",
+      "endpoint": "/v1/messages",
+      "completion_window": "24h",
+      "request_id": "2f1a7d9e-8c03-4d2c-9b7e-6f8e2b1a4c77"
+    }'
diff --git a/specification/resources/inference/examples/curl/inference_create_batch_file.yml b/specification/resources/inference/examples/curl/inference_create_batch_file.yml
@@ -0,0 +1,8 @@
+lang: cURL
+source: |-
+  curl -sS -X POST "https://inference.do-ai.run/v1/batches/files" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -H "Content-Type: application/json" \
+    -d '{
+      "file_name": "batch_requests.jsonl"
+    }' | jq
diff --git a/specification/resources/inference/examples/curl/inference_get_batch.yml b/specification/resources/inference/examples/curl/inference_get_batch.yml
@@ -0,0 +1,5 @@
+lang: cURL
+source: |-
+  curl -sS -X GET "https://inference.do-ai.run/v1/batches/0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -H "Content-Type: application/json"
diff --git a/specification/resources/inference/examples/curl/inference_get_batch_results.yml b/specification/resources/inference/examples/curl/inference_get_batch_results.yml
@@ -0,0 +1,5 @@
+lang: cURL
+source: |-
+  curl -sS -X GET "https://inference.do-ai.run/v1/batches/0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21/results" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -H "Content-Type: application/json" | jq
diff --git a/specification/resources/inference/examples/curl/inference_list_batches.yml b/specification/resources/inference/examples/curl/inference_list_batches.yml
@@ -0,0 +1,5 @@
+lang: cURL
+source: |-
+  curl -sS -X GET "https://inference.do-ai.run/v1/batches?limit=20" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -H "Content-Type: application/json" | jq
diff --git a/specification/resources/inference/examples/curl/inference_upload_batch_file.yml b/specification/resources/inference/examples/curl/inference_upload_batch_file.yml
@@ -0,0 +1,7 @@
+lang: cURL
+source: |-
+  # UPLOAD_URL is the exact upload_url returned by POST /v1/batches/files.
+  # Use it verbatim; do not modify the host, path, or query string.
+  curl -X PUT "$UPLOAD_URL" \
+    -H "Content-Type: application/jsonl" \
+    --data-binary "@eval_prompts_v1.jsonl"
diff --git a/specification/resources/inference/inference_cancel_batch.yml b/specification/resources/inference/inference_cancel_batch.yml
@@ -0,0 +1,58 @@
+operationId: inference_cancel_batch
+summary: Cancel a Batch Inference Job
+description: >
+  Requests cancellation of a batch job. The job transitions to `cancelling`
+  and, once in-flight requests drain, to `cancelled`. Jobs already in a
+  terminal state (`completed`, `failed`, `expired`, `cancelled`) cannot be
+  cancelled and return `409 Conflict`.
+
+
+  Partial results produced before cancellation remain available via
+  `GET /v1/batches/{batch_id}/results`.
+tags:
+  - Batch Inference
+servers:
+  - url: "https://inference.do-ai.run"
+    description: production
+x-inference-base-url: "https://inference.do-ai.run"
+parameters:
+  - in: path
+    name: batch_id
+    description: The batch job identifier.
+    required: true
+    schema:
+      type: string
+      format: uuid
+    example: "0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21"
+responses:
+  "200":
+    description: Cancellation accepted. Returns the updated batch job.
+    headers:
+      ratelimit-limit:
+        $ref: '../../shared/headers.yml#/ratelimit-limit'
+      ratelimit-remaining:
+        $ref: '../../shared/headers.yml#/ratelimit-remaining'
+      ratelimit-reset:
+        $ref: '../../shared/headers.yml#/ratelimit-reset'
+    content:
+      application/json:
+        schema:
+          $ref: "models/batch.yml"
+  "401":
+    $ref: '../../shared/responses/unauthorized.yml'
+  "403":
+    $ref: '../../shared/responses/forbidden.yml'
+  "404":
+    $ref: '../../shared/responses/not_found.yml'
+  "409":
+    $ref: '../../shared/responses/conflict.yml'
+  "429":
+    $ref: '../../shared/responses/too_many_requests.yml'
+  "500":
+    $ref: '../../shared/responses/server_error.yml'
+  default:
+    $ref: '../../shared/responses/unexpected_error.yml'
+x-codeSamples:
+  - $ref: 'examples/curl/inference_cancel_batch.yml'
+security:
+  - inference_bearer_auth: []
diff --git a/specification/resources/inference/inference_create_batch.yml b/specification/resources/inference/inference_create_batch.yml
@@ -0,0 +1,84 @@
+operationId: inference_create_batch
+summary: Create a Batch Inference Job
+description: >
+  Submits a batch job against a previously uploaded JSONL input file.
+  The upload must have completed before this call; otherwise the request
+  is rejected.
+
+
+  Supply a unique `request_id` to make the submission idempotent —
+  retries with the same value return the existing job. When `provider`
+  is `openai`, the `url` on each JSONL line must match `endpoint`.
+tags:
+  - Batch Inference
+servers:
+  - url: "https://inference.do-ai.run"
+    description: production
+x-inference-base-url: "https://inference.do-ai.run"
+requestBody:
+  required: true
+  content:
+    application/json:
+      schema:
+        $ref: "models/batch_create_request.yml"
+      examples:
+        OpenAI Chat Completions:
+          value:
+            file_id: "a1b2c3d4-e5f6-4789-90ab-cdef12345678"
+            provider: "openai"
+            endpoint: "/v1/chat/completions"
+            completion_window: "24h"
+            request_id: "c7e3ad1e-20c3-4e47-9bf2-6f2a4d6a2f11"
+        OpenAI Embeddings:
+          value:
+            file_id: "a1b2c3d4-e5f6-4789-90ab-cdef12345678"
+            provider: "openai"
+            endpoint: "/v1/embeddings"
+            completion_window: "24h"
+            request_id: "9f7b9d4a-4e6c-4a27-8e35-1b0e4c5a9a12"
+        Anthropic Messages:
+          value:
+            file_id: "a1b2c3d4-e5f6-4789-90ab-cdef12345678"
+            provider: "anthropic"
+            endpoint: "/v1/messages"
+            completion_window: "24h"
+            request_id: "2f1a7d9e-8c03-4d2c-9b7e-6f8e2b1a4c77"
+            metadata:
+              team: "ml-eval"
+              dataset: "prompts_v1"
+responses:
+  "201":
+    description: Batch job accepted. Poll `GET /v1/batches/{batch_id}` for status.
+    headers:
+      ratelimit-limit:
+        $ref: '../../shared/headers.yml#/ratelimit-limit'
+      ratelimit-remaining:
+        $ref: '../../shared/headers.yml#/ratelimit-remaining'
+      ratelimit-reset:
+        $ref: '../../shared/headers.yml#/ratelimit-reset'
+    content:
+      application/json:
+        schema:
+          $ref: "models/batch.yml"
+  "400":
+    $ref: '../../shared/responses/bad_request.yml'
+  "401":
+    $ref: '../../shared/responses/unauthorized.yml'
+  "403":
+    $ref: '../../shared/responses/forbidden.yml'
+  "404":
+    $ref: '../../shared/responses/not_found.yml'
+  "409":
+    $ref: '../../shared/responses/conflict.yml'
+  "422":
+    $ref: '../../shared/responses/unprocessable_entity.yml'
+  "429":
+    $ref: '../../shared/responses/too_many_requests.yml'
+  "500":
+    $ref: '../../shared/responses/server_error.yml'
+  default:
+    $ref: '../../shared/responses/unexpected_error.yml'
+x-codeSamples:
+  - $ref: 'examples/curl/inference_create_batch.yml'
+security:
+  - inference_bearer_auth: []
diff --git a/specification/resources/inference/inference_create_batch_file.yml b/specification/resources/inference/inference_create_batch_file.yml
@@ -0,0 +1,55 @@
+operationId: inference_create_batch_file
+summary: Create a Batch Inference Input File
+description: >
+  Creates a file record and returns a `file_id` plus a short-lived
+  presigned `PUT` URL (typically valid for ~15 minutes). Upload the raw
+  JSONL bytes to `upload_url` (see `PUT /{upload_path}`) before calling
+  `POST /v1/batches`.
+tags:
+  - Batch Inference
+servers:
+  - url: "https://inference.do-ai.run"
+    description: production
+x-inference-base-url: "https://inference.do-ai.run"
+requestBody:
+  required: true
+  content:
+    application/json:
+      schema:
+        $ref: "models/batch_file_create_request.yml"
+      examples:
+        Default:
+          value:
+            file_name: "batch_requests.jsonl"
+responses:
+  "201":
+    description: File intent created.
+    headers:
+      ratelimit-limit:
+        $ref: '../../shared/headers.yml#/ratelimit-limit'
+      ratelimit-remaining:
+        $ref: '../../shared/headers.yml#/ratelimit-remaining'
+      ratelimit-reset:
+        $ref: '../../shared/headers.yml#/ratelimit-reset'
+    content:
+      application/json:
+        schema:
+          $ref: "models/batch_file_create_response.yml"
+  "400":
+    $ref: '../../shared/responses/bad_request.yml'
+  "401":
+    $ref: '../../shared/responses/unauthorized.yml'
+  "403":
+    $ref: '../../shared/responses/forbidden.yml'
+  "422":
+    $ref: '../../shared/responses/unprocessable_entity.yml'
+  "429":
+    $ref: '../../shared/responses/too_many_requests.yml'
+  "500":
+    $ref: '../../shared/responses/server_error.yml'
+  default:
+    $ref: '../../shared/responses/unexpected_error.yml'
+x-codeSamples:
+  - $ref: 'examples/curl/inference_create_batch_file.yml'
+security:
+  - inference_bearer_auth: []
diff --git a/specification/resources/inference/inference_get_batch.yml b/specification/resources/inference/inference_get_batch.yml
@@ -0,0 +1,50 @@
+operationId: inference_get_batch
+summary: Retrieve a Batch Inference Job
+description: >
+  Returns the current state of a batch job. Poll until `status` reaches a
+  terminal value (`completed`, `failed`, `expired`, or `cancelled`).
+tags:
+  - Batch Inference
+servers:
+  - url: "https://inference.do-ai.run"
+    description: production
+x-inference-base-url: "https://inference.do-ai.run"
+parameters:
+  - in: path
+    name: batch_id
+    description: The batch job identifier.
+    required: true
+    schema:
+      type: string
+      format: uuid
+    example: "0e9d1d35-3d1e-4d66-9a2f-8c7e0f6b3e21"
+responses:
+  "200":
+    description: The batch job.
+    headers:
+      ratelimit-limit:
+        $ref: '../../shared/headers.yml#/ratelimit-limit'
+      ratelimit-remaining:
+        $ref: '../../shared/headers.yml#/ratelimit-remaining'
+      ratelimit-reset:
+        $ref: '../../shared/headers.yml#/ratelimit-reset'
+    content:
+      application/json:
+        schema:
+          $ref: "models/batch.yml"
+  "401":
+    $ref: '../../shared/responses/unauthorized.yml'
+  "403":
+    $ref: '../../shared/responses/forbidden.yml'
+  "404":
+    $ref: '../../shared/responses/not_found.yml'
+  "429":
+    $ref: '../../shared/responses/too_many_requests.yml'
+  "500":
+    $ref: '../../shared/responses/server_error.yml'
+  default:
+    $ref: '../../shared/responses/unexpected_error.yml'
+x-codeSamples:
+  - $ref: 'examples/curl/inference_get_batch.yml'
+security:
+  - inference_bearer_auth: []