From c926afb8d8748bdc75673f665cc1416f8d98baa1 Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Fri, 22 May 2026 19:24:51 +0530 Subject: [PATCH 01/11] docs: beautify README and fix broken configuration table --- README.md | 88 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 72 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 2b6d1e6c..b1234e4c 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,22 @@ +
+ # bigRAG -Open-source, self-hostable RAG platform with Turbopuffer-backed search. Upload documents, auto-chunk, embed, and retrieve through semantic, keyword, and hybrid modes behind a simple REST API. +**Open-source, self-hostable RAG platform with Turbopuffer-backed search.** + +Upload documents, auto-chunk, embed, and retrieve through semantic, keyword, and hybrid search — all behind one clean REST API. + +[![PyPI version](https://img.shields.io/pypi/v/bigrag?style=flat-square&logo=pypi&logoColor=white&label=PyPI)](https://pypi.org/project/bigrag/) +[![npm version](https://img.shields.io/npm/v/%40bigrag%2Fclient?style=flat-square&logo=npm&logoColor=white&label=npm)](https://www.npmjs.com/package/@bigrag/client) +[![Docker image](https://img.shields.io/docker/v/yoginth/bigrag-api?style=flat-square&logo=docker&logoColor=white&label=Docker&sort=semver)](https://hub.docker.com/r/yoginth/bigrag-api) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue?style=flat-square)](LICENSE) +[![GitHub stars](https://img.shields.io/github/stars/bigint/bigrag?style=flat-square&logo=github&label=Stars)](https://github.com/bigint/bigrag) -[![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) +[Quick Start](#quick-start) · [Architecture](#architecture) · [API Reference](#api-reference) · [SDKs](#sdks) · [MCP Server](#mcp-server) · [Configuration](#configuration) + +
+ +--- ## Features @@ -32,7 +46,12 @@ Open-source, self-hostable RAG platform with Turbopuffer-backed search. Upload d docker compose up -d ``` -This starts bigRAG API, worker, admin UI, Postgres, and Redis. Configure Turbopuffer from onboarding before ingesting or querying collections. Open http://localhost:3000 for the admin UI or http://localhost:4000/docs for the interactive API docs. +This starts the bigRAG API, worker, admin UI, Postgres, and Redis. Open **[localhost:3000](http://localhost:3000)** for the admin UI or **[localhost:4000/docs](http://localhost:4000/docs)** for the interactive API docs. + +> [!IMPORTANT] +> Configure Turbopuffer from onboarding before ingesting or querying collections. + +Once Turbopuffer is configured, you can drive everything over HTTP: ```bash # Create a collection @@ -63,8 +82,7 @@ docker pull yoginth/bigrag-api:2026.4.30 docker pull yoginth/bigrag-ui:2026.4.30 ``` -Release artifacts use CalVer (`YYYY.M.D`). Docker also publishes `latest`; the -Python SDK publishes dated PyPI releases. +Release artifacts use CalVer (`YYYY.M.D`). Docker also publishes `latest`; the Python SDK publishes dated PyPI releases. ## Architecture @@ -244,7 +262,7 @@ const { results } = await client.queries.query("docs", { query: "What is RAG?" } ### Python ```bash -pip install bigrag==2026.5.7 +pip install bigrag==2026.5.22 ``` ```python @@ -259,7 +277,7 @@ doc = await client.documents.upload("docs", "/path/to/paper.pdf") result = await client.queries.query("docs", {"query": "What is RAG?"}) ``` -## MCP server +## MCP Server Expose bigRAG to Claude Desktop, Cursor, and any MCP-aware runtime: @@ -289,24 +307,35 @@ Full-workspace keys expose 8 tools — `list_collections`, `get_collection`, `ge ## Configuration -Bootstrap settings use the `BIGRAG_` prefix as environment variables, or configure via `bigrag.toml`. -Backend logging defaults to `debug` / `text` for local development. Use `BIGRAG_LOG_LEVEL=info` and `BIGRAG_LOG_FORMAT=json` for production log collection. Configure Turbopuffer from the admin UI; it is stored in Postgres with the other instance settings. +Bootstrap settings use the `BIGRAG_` prefix as environment variables, or configure them in `bigrag.toml`. Backend logging defaults to `debug` / `text` for local development — use `BIGRAG_LOG_LEVEL=info` and `BIGRAG_LOG_FORMAT=json` for production log collection. Turbopuffer is configured from the admin UI and stored in Postgres alongside the other instance settings. + +#### Server | Variable | Description | Default | |----------|-------------|---------| | `BIGRAG_PORT` | Server port | `4000` | | `BIGRAG_HOST` | Bind address | `127.0.0.1` | | `BIGRAG_WORKERS` | API worker processes | `1` | +| `BIGRAG_ENV` | `dev` or `prod` (prod enables startup safety checks) | `dev` | | `BIGRAG_LOG_LEVEL` | Backend log level: `debug`, `info`, `warning`, or `error` | `debug` | | `BIGRAG_LOG_FORMAT` | Backend log renderer: `text` or `json` | `text` | | `BIGRAG_CORS_ORIGINS` | JSON array of allowed browser origins | `[]` | +| `BIGRAG_TRUSTED_PROXIES` | JSON array of trusted proxy CIDRs used to honor `X-Forwarded-For` for audit and access logs | `[]` | + +#### Database & Redis + +| Variable | Description | Default | +|----------|-------------|---------| | `BIGRAG_DATABASE_URL` | Postgres URL (`postgres:5432` inside docker-compose, `localhost:5432` for bare-metal dev) | `postgres://bigrag:bigrag@localhost:5432/bigrag?sslmode=disable` | | `BIGRAG_DB_POOL_MIN` | Min Postgres pool size | `5` | | `BIGRAG_DB_POOL_MAX` | Max Postgres pool size | `50` | | `BIGRAG_MIGRATION_TIMEOUT_SECONDS` | Startup migration check timeout (`0` disables the timeout) | `60` | | `BIGRAG_REDIS_URL` | Redis URL | `redis://localhost:6379/0` | -| `BIGRAG_ENV` | `dev` or `prod` (prod enables startup safety checks) | `dev` | -| `BIGRAG_TRUSTED_PROXIES` | JSON array of trusted proxy CIDRs used to honor `X-Forwarded-For` for audit and access logs | `[]` | + +#### Sessions & Auth + +| Variable | Description | Default | +|----------|-------------|---------| | `BIGRAG_SESSION_EXPIRY_HOURS` | Session cookie lifetime | `168` | | `BIGRAG_SESSION_COOKIE_NAME` | Session cookie name | `bigrag_session` | | `BIGRAG_SESSION_COOKIE_SECURE` | HTTPS-only session cookies | `false` | @@ -314,11 +343,13 @@ Backend logging defaults to `debug` / `text` for local development. Use `BIGRAG_ | `BIGRAG_SESSION_COOKIE_DOMAIN` | Optional session cookie domain | — | | `BIGRAG_AUTH_PRINCIPAL_CACHE_TTL` | Principal cache TTL in seconds | `60` | -`./dev.sh` and the default Docker Compose setup allow the local admin UI origin -`http://localhost:3000`. For production, set `BIGRAG_CORS_ORIGINS` to the exact -admin UI origin. Cross-site admin UI deployments also need -`BIGRAG_SESSION_COOKIE_SECURE=true` and usually -`BIGRAG_SESSION_COOKIE_SAMESITE=none`. +> [!TIP] +> `./dev.sh` and the default Docker Compose setup allow the local admin UI origin `http://localhost:3000`. For production, set `BIGRAG_CORS_ORIGINS` to the exact admin UI origin. Cross-site admin UI deployments also need `BIGRAG_SESSION_COOKIE_SECURE=true` and usually `BIGRAG_SESSION_COOKIE_SAMESITE=none`. + +#### Embedding + +| Variable | Description | Default | +|----------|-------------|---------| | `BIGRAG_EMBEDDING_API_KEY` | Default embedding API key | — | | `BIGRAG_EMBEDDING_PROVIDER` | Default embedding provider | `openai` | | `BIGRAG_EMBEDDING_MODEL` | Default embedding model | `text-embedding-3-small` | @@ -327,6 +358,11 @@ admin UI origin. Cross-site admin UI deployments also need | `BIGRAG_EMBEDDING_CONCURRENCY` | Max concurrent embedding requests | `8` | | `BIGRAG_ALLOWED_EMBEDDING_BASE_URLS` | JSON allow-list for embedding base URLs | `[]` | | `BIGRAG_ALLOW_PRIVATE_EMBEDDING_BASE_URLS` | Allow private-network embedding endpoints | `false` | + +#### Chat + +| Variable | Description | Default | +|----------|-------------|---------| | `BIGRAG_CHAT_PROVIDER` | Chat provider | `openai` | | `BIGRAG_CHAT_MODEL` | Default chat model | `gpt-4o-mini` | | `BIGRAG_CHAT_BASE_URL` | Base URL for OpenAI-compatible chat endpoints | — | @@ -334,8 +370,18 @@ admin UI origin. Cross-site admin UI deployments also need | `BIGRAG_CHAT_MAX_CONTEXT_CHARS` | Max retrieved-context characters per chat call | `120000` | | `BIGRAG_ALLOWED_CHAT_BASE_URLS` | JSON allow-list for chat base URLs | `[]` | | `BIGRAG_ALLOW_PRIVATE_CHAT_BASE_URLS` | Allow private-network chat endpoints | `false` | + +#### Security + +| Variable | Description | Default | +|----------|-------------|---------| | `BIGRAG_MASTER_KEY` | Fernet key that encrypts provider credentials, embedding cache rows, and Redis cache payloads (required in `prod`) | — | | `BIGRAG_MASTER_KEY_PREVIOUS` | JSON array of old Fernet keys for staged rotation | `[]` | + +#### Ingestion & Uploads + +| Variable | Description | Default | +|----------|-------------|---------| | `BIGRAG_UPLOAD_DIR` | Local upload directory | `./data/uploads` | | `BIGRAG_INGESTION_WORKERS` | Ingestion concurrency target | `4` | | `BIGRAG_MAX_UPLOAD_SIZE_MB` | Max single-file upload size | `64` | @@ -344,11 +390,21 @@ admin UI origin. Cross-site admin UI deployments also need | `BIGRAG_CONVERSION_TIMEOUT` | Docling conversion timeout in seconds | `300` | | `BIGRAG_CONVERSION_PDF_OCR_ENABLED` | Enable OCR for scanned PDFs | `true` | | `BIGRAG_QUEUE_MAX_DEPTH` | Max pending jobs in the ingestion queue | `10000` | + +#### Caching + +| Variable | Description | Default | +|----------|-------------|---------| | `BIGRAG_COLLECTION_CACHE_TTL` | Collection metadata cache TTL in seconds | `30` | | `BIGRAG_QUERY_EMBEDDING_CACHE_TTL` | Query embedding cache TTL in seconds | `300` | | `BIGRAG_QUERY_RESULT_CACHE_TTL` | Exact query-result cache TTL in seconds | `30` | | `BIGRAG_EMBEDDING_CACHE_MODE` | Persistent chunk embedding cache mode (`encrypted` or `disabled`) | `encrypted` | | `BIGRAG_EMBEDDING_CACHE_RETENTION_DAYS` | Days to keep persistent embedding-cache rows after last use | `30` | + +#### Webhooks + +| Variable | Description | Default | +|----------|-------------|---------| | `BIGRAG_WEBHOOK_DELIVERY_TIMEOUT` | Webhook HTTP timeout in seconds | `10` | | `BIGRAG_WEBHOOK_RETRY_DELAYS` | JSON array of webhook retry delays in seconds | `[10,30,90]` | | `BIGRAG_WEBHOOK_MAX_COUNT` | Max configured webhooks | `50` | From 83427ba5373ee46ecc55952a4c88fe6822976e90 Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Fri, 22 May 2026 19:31:30 +0530 Subject: [PATCH 02/11] docs: simplify api reference with collapsible endpoints --- .../docs/api-reference/admin-realtime.mdx | 14 +- .../content/docs/api-reference/api-keys.mdx | 35 ++-- website/content/docs/api-reference/audit.mdx | 50 +++--- .../docs/api-reference/authentication.mdx | 4 +- .../content/docs/api-reference/backups.mdx | 52 ++++-- website/content/docs/api-reference/chat.mdx | 39 +++-- .../docs/api-reference/collections.mdx | 112 ++++++------- .../content/docs/api-reference/connectors.mdx | 30 +++- .../content/docs/api-reference/documents.mdx | 153 +++++++++--------- .../docs/api-reference/embedding-presets.mdx | 37 +++-- .../content/docs/api-reference/evaluation.mdx | 18 ++- website/content/docs/api-reference/health.mdx | 59 ++++--- .../docs/api-reference/instance-settings.mdx | 63 ++++---- .../docs/api-reference/mcp-servers.mdx | 61 ++++--- .../docs/api-reference/preferences.mdx | 10 +- website/content/docs/api-reference/query.mdx | 38 ++--- website/content/docs/api-reference/usage.mdx | 2 + website/content/docs/api-reference/users.mdx | 26 ++- .../content/docs/api-reference/vectors.mdx | 31 ++-- .../content/docs/api-reference/webhooks.mdx | 67 ++++---- 20 files changed, 540 insertions(+), 361 deletions(-) diff --git a/website/content/docs/api-reference/admin-realtime.mdx b/website/content/docs/api-reference/admin-realtime.mdx index 9bb38781..81e10787 100644 --- a/website/content/docs/api-reference/admin-realtime.mdx +++ b/website/content/docs/api-reference/admin-realtime.mdx @@ -3,6 +3,8 @@ title: Admin Realtime description: Session-cookie SSE snapshot streams used by the admin UI. --- +import { Callout } from "fumadocs-ui/components/callout"; + Admin realtime endpoints are Server-Sent Events streams for the bundled admin UI. They require an admin session cookie and do not accept API-key bearer tokens or `?token=` query authentication. Each stream sends an immediate `snapshot` event whose `payload` matches the related REST response shape, then sends later `snapshot` events when the backing data changes or when the server-side refresh tick fires. @@ -22,9 +24,9 @@ Streams also send heartbeat comments (`: heartbeat`) to keep proxies from closin | Endpoint | Payload shape | Lifecycle | |----------|---------------|-----------| | `GET /v1/admin/realtime/collections/{name}/documents` | `GET /v1/collections/{name}/documents` | stays open | -| `GET /v1/admin/realtime/collections/{name}/documents/{document_id}` | `GET /v1/collections/{name}/documents/{document_id}` | closes when the document is `ready` or `failed` | +| `GET /v1/admin/realtime/collections/{name}/documents/{document_id}` | `GET /v1/collections/{name}/documents/{document_id}` | closes when `ready` or `failed` | | `GET /v1/admin/realtime/collections/{name}/documents/batch-status?document_ids=...` | `POST /v1/collections/{name}/documents/batch/status` | closes when all watched documents are `ready` or `failed` | -| `GET /v1/admin/realtime/collections/{name}/upload-sessions/{session_id}` | `GET /v1/collections/{name}/upload-sessions/{session_id}` | closes when the upload session is `complete`, `failed`, or `canceled` | +| `GET /v1/admin/realtime/collections/{name}/upload-sessions/{session_id}` | `GET /v1/collections/{name}/upload-sessions/{session_id}` | closes when `complete`, `failed`, or `canceled` | | `GET /v1/admin/realtime/collections/{name}/stats` | `GET /v1/collections/{name}/stats` | stays open | Document list streams accept the same `q`, `status`, `sort`, `order`, `limit`, and `offset` filters as the REST list. Batch streams accept repeated or comma-separated `document_ids` values, up to 100 IDs. @@ -52,10 +54,10 @@ Connector source streams accept `collection`. Sync-job streams accept `collectio | `GET /v1/admin/realtime/platform/stats` | `GET /v1/stats` | | `GET /v1/admin/realtime/platform/readiness` | `GET /health/ready` | -These streams support the same query parameters as their REST counterparts. Where no domain event source exists yet, the backend emits full snapshots from a server-side refresh tick so browsers do not need to poll. - -Backup streams refresh quickly while jobs are pending or running. +These streams support the same query parameters as their REST counterparts. Where no domain event source exists yet, the backend emits full snapshots from a server-side refresh tick so browsers do not need to poll. Backup streams refresh quickly while jobs are pending or running. ## API Client Fallback -External clients should continue to use the REST endpoints and may poll document status or batch status until documents become `ready` or `failed`. Collection-wide ingestion events remain available through `GET /v1/collections/{name}/events` for API-key clients that need event-level progress. + + External clients should continue to use the REST endpoints and may poll document status or batch status until documents become `ready` or `failed`. Collection-wide ingestion events remain available through `GET /v1/collections/{name}/events` for API-key clients that need event-level progress. + diff --git a/website/content/docs/api-reference/api-keys.mdx b/website/content/docs/api-reference/api-keys.mdx index 10246d7f..2d3e9450 100644 --- a/website/content/docs/api-reference/api-keys.mdx +++ b/website/content/docs/api-reference/api-keys.mdx @@ -3,15 +3,18 @@ title: API Keys description: Mint, list, update, rotate, and revoke bigrag_sk_ keys. --- -Base path: `/v1/admin/api-keys` +import { Callout } from "fumadocs-ui/components/callout"; +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; -API keys are the credential that SDKs and backend services use against bigRAG. Plaintext values are returned **once** on creation or rotation and then never re-retrievable; only the prefix and a keyed hash are persisted. When `BIGRAG_MASTER_KEY` is unset, older development keys may use the legacy SHA-256 hash format. +Base path: `/v1/admin/api-keys` — Session-only. You cannot mint new keys using an existing key. + +Plaintext values are returned **once** on creation or rotation and are never re-retrievable — only the prefix and a keyed hash are persisted. When `BIGRAG_MASTER_KEY` is unset, older development keys may use the legacy SHA-256 hash format. During master-key rotation, API-key lookup checks `BIGRAG_MASTER_KEY` plus `BIGRAG_MASTER_KEY_PREVIOUS`, so existing keys continue to authenticate while you rotate and rewrite stored hashes. -Session-only: every endpoint below requires a session cookie. You cannot mint new keys using an existing key. + -## List Keys + ``` GET /v1/admin/api-keys?limit=50&offset=0 @@ -39,7 +42,9 @@ GET /v1/admin/api-keys?limit=50&offset=0 } ``` -## Create Key + + + ``` POST /v1/admin/api-keys @@ -78,9 +83,13 @@ POST /v1/admin/api-keys } ``` -Store `key` immediately — it is not retrievable later. + + Store `key` immediately — it is not retrievable later. + -## Update Key + + + ``` PATCH /v1/admin/api-keys/{key_id} @@ -94,7 +103,9 @@ Any combination of `name`, `active`, `expires_at`, `scopes`, and `collection` is **Response** `200`: updated key (without `key`). -## Rotate Key + + + ``` POST /v1/admin/api-keys/{key_id}/rotate @@ -104,7 +115,9 @@ Rotates the plaintext secret, resets `last_used_at`, re-enables the key, and inv **Response** `200`: same shape as create, including one-time `key`. -## Delete Key + + + ``` DELETE /v1/admin/api-keys/{key_id} @@ -113,3 +126,7 @@ DELETE /v1/admin/api-keys/{key_id} Permanently revokes the key. Any in-flight request that has already been authenticated still completes; subsequent requests return `401`. **Response** `200`: `{"status": "ok", "message": "API key deleted"}`. + + + + diff --git a/website/content/docs/api-reference/audit.mdx b/website/content/docs/api-reference/audit.mdx index 9c0db67d..6d207a18 100644 --- a/website/content/docs/api-reference/audit.mdx +++ b/website/content/docs/api-reference/audit.mdx @@ -3,19 +3,19 @@ title: Audit Log description: Read admin audit trails and RAG access telemetry. --- -Base path: `/v1/admin` +import { Callout } from "fumadocs-ui/components/callout"; +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; -Session-only. Audit records are immutable. +Base path: `/v1/admin` — Session-only. Audit records are immutable. -bigRAG separates two operational trails: +bigRAG maintains two separate operational trails: -- `audit_log` records privileged state changes and selected operator/retrieval actions such as API-key creation, collection mutations, evaluation runs, and vector writes. -- `access_log` records RAG calls with actor, auth method, endpoint, status, collection context, - and latency. Query, batch query, vector mutation, and evaluation calls are included; admin UI - refreshes and admin/control-plane reads are excluded. Request bodies, raw prompts, document - content, secrets, and credentials are not stored. +- **`audit_log`** — privileged state changes and selected operator/retrieval actions: API-key creation, collection mutations, evaluation runs, and vector writes. +- **`access_log`** — RAG calls with actor, auth method, endpoint, status, collection context, and latency. Includes query, batch query, vector mutation, and evaluation calls; excludes admin UI refreshes and admin/control-plane reads. Request bodies, raw prompts, document content, secrets, and credentials are never stored. -## List Audit Entries + + + ``` GET /v1/admin/audit @@ -31,7 +31,9 @@ GET /v1/admin/audit | `limit` | integer | 1–1,000, default 100 | | `offset` | integer | Default 0 | -Date-range filtering isn't yet supported — paginate and filter `created_at` client-side if you need a time window. + + Date-range filtering isn't yet supported — paginate and filter `created_at` client-side if you need a time window. + **Response** `200`: @@ -56,13 +58,13 @@ Date-range filtering isn't yet supported — paginate and filter `created_at` cl } ``` -### Field notes +**Field notes:** -- `actor_id` / `actor_email` identify the admin account that performed the action. Both are `null` for system-initiated events (e.g. cleanup workers). -- `api_key_id` is populated when the action was performed via an API key — the field holds the key's UUID, and `actor_id` is the admin who owns the key. For session-cookie (admin UI) calls, `api_key_id` is `null`. -- `metadata` is per-action context (never credentials or document content). Shape varies by `action` — e.g. `api_key.create` includes `name` and `scopes`. +- `actor_id` / `actor_email` — the admin account that performed the action. Both are `null` for system-initiated events (e.g. cleanup workers). +- `api_key_id` — populated when the action was performed via an API key; holds the key's UUID, and `actor_id` is the owning admin. `null` for session-cookie (admin UI) calls. +- `metadata` — per-action context (never credentials or document content). Shape varies by `action` — e.g. `api_key.create` includes `name` and `scopes`. -### Currently emitted actions +**Currently emitted actions:** | Action family | Emitted by | |---------------|------------| @@ -78,16 +80,18 @@ Date-range filtering isn't yet supported — paginate and filter `created_at` cl | `backup.requested` | Readable backup request | | `query.*`, `vectors.*`, `analytics.read`, `evaluation.run`, `chat.*` | Retrieval, vector, analytics, evaluation, and chat actions | -Use the structured application log stream alongside the audit log when you need full request diagnostics, raw route timing, or operational events that are intentionally outside the audit action vocabulary. +Use the structured application log stream alongside the audit log when you need full request diagnostics, raw route timing, or operational events outside the audit action vocabulary. + + -## Access Overview + + +Session-only admin endpoint. ``` GET /v1/admin/access/overview ``` -Session-only admin endpoint. - **Query parameters:** | Parameter | Type | Notes | @@ -96,7 +100,9 @@ Session-only admin endpoint. **Response** `200` includes total RAG events, success/error rate, average and p95 latency, query event count, action buckets, latency-by-action buckets, a timeline, and the most recent access rows. -## List Access Logs + + + ``` GET /v1/admin/access/logs @@ -117,3 +123,7 @@ GET /v1/admin/access/logs | `offset` | integer | Default 0 | Access-log rows include actor/API-key identity when available, route/path, status code, latency, IP, user-agent, request ID, collection context, and safe metadata such as query hash, query length, top-k, result count, evaluation metrics, and cache use. + + + + diff --git a/website/content/docs/api-reference/authentication.mdx b/website/content/docs/api-reference/authentication.mdx index ed37f304..30471e4f 100644 --- a/website/content/docs/api-reference/authentication.mdx +++ b/website/content/docs/api-reference/authentication.mdx @@ -47,8 +47,6 @@ curl -X POST http://localhost:4000/v1/auth/login \ curl http://localhost:4000/v1/auth/me -b cookies.txt ``` -Session endpoints: - | Method | Path | Purpose | |--------|------|---------| | `POST` | `/v1/auth/login` | Email + password → session cookie | @@ -111,7 +109,7 @@ Collection SSE accepts bearer headers or short-lived event tokens. Long-lived AP ### Scopes -Scopes use `resource:action` with `*` as a wildcard. Common shapes: +Scopes use `resource:action` with `*` as a wildcard. | Scope | Grants | |-------|--------| diff --git a/website/content/docs/api-reference/backups.mdx b/website/content/docs/api-reference/backups.mdx index ccb416c3..90d3b1b2 100644 --- a/website/content/docs/api-reference/backups.mdx +++ b/website/content/docs/api-reference/backups.mdx @@ -3,19 +3,26 @@ title: Backups description: Admin endpoints for readable S3-compatible full-instance backups. --- -Readable backups export Postgres rows, vector-store points, and uploaded source files to an S3-compatible bucket. Vector-store points are written to JSONL as each provider page arrives, so backup memory usage does not grow with the full vector collection size. They are backup-only; bigRAG does not expose restore endpoints. - -Backups are plain JSON, JSONL, SQL, and raw files. They are not client-side encrypted, so the destination bucket must be treated as sensitive. Secret-bearing database columns and embedding-cache vectors are redacted during export. +import { Callout } from "fumadocs-ui/components/callout"; +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; Base path: `/v1/admin/backups` -## List backups +Readable backups export Postgres rows, vector-store points, and uploaded source files to an S3-compatible bucket. Vector-store points are written to JSONL as each provider page arrives, so backup memory usage does not grow with the full vector collection size. bigRAG does not expose restore endpoints. + + + Backups are plain JSON, JSONL, SQL, and raw files — not client-side encrypted. Treat the destination bucket as sensitive. Secret-bearing database columns and embedding-cache vectors are redacted during export. + + + + + ```http GET /v1/admin/backups ``` -Query parameters: +**Query parameters:** | Name | Type | Default | Notes | |------|------|---------|-------| @@ -24,7 +31,7 @@ Query parameters: | `cursor` | string | — | Cursor returned as `next_cursor` | | `include_total` | boolean | `false` | Include the total backup count | -Response: +**Response:** ```json { @@ -51,7 +58,9 @@ Response: } ``` -## Get backup + + + ```http GET /v1/admin/backups/{backup_id} @@ -59,7 +68,9 @@ GET /v1/admin/backups/{backup_id} Returns one backup job or `404` if it does not exist. -## Start backup + + + ```http POST /v1/admin/backups @@ -70,23 +81,34 @@ Content-Type: application/json Creates a backup job and enqueues it on the Dramatiq backup worker. Only one backup can be pending or running at a time. -While the job runs, bigRAG acquires a maintenance lock. Mutating API requests are rejected with `423`, new ingestion jobs are rejected, Dramatiq ingestion and connector actors delay themselves, and scheduled Google Drive syncs do not start. +While the job runs, bigRAG acquires a maintenance lock: + +- Mutating API requests are rejected with `423` +- New ingestion jobs are rejected +- Dramatiq ingestion and connector actors delay themselves +- Scheduled Google Drive syncs do not start The backup fails if: -- The backup bucket settings are incomplete or invalid. -- Turbopuffer cannot be reached. -- A stored upload file referenced by a document row is missing. +- The backup bucket settings are incomplete or invalid +- Turbopuffer cannot be reached +- A stored upload file referenced by a document row is missing + +Returns `409` when another backup or maintenance lock is active. -Starting a backup returns `409` when another backup or maintenance lock is active. + -## Realtime stream + ```http GET /v1/admin/realtime/backups ``` -Session-cookie SSE stream used by the admin UI. Each `snapshot` payload has the same shape as `GET /v1/admin/backups`. +Session-cookie SSE stream used by the admin UI. Each `snapshot` payload has the same shape as `GET /v1/admin/backups`. Refreshes quickly while jobs are pending or running. + + + + ## Object layout diff --git a/website/content/docs/api-reference/chat.mdx b/website/content/docs/api-reference/chat.mdx index 8b48a7e2..2825df08 100644 --- a/website/content/docs/api-reference/chat.mdx +++ b/website/content/docs/api-reference/chat.mdx @@ -3,24 +3,35 @@ title: Chat description: Stateless playground chat API with retrieval, generated answers, and citations. --- +import { Callout } from "fumadocs-ui/components/callout"; +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; + Base path: `/v1/chat` -The chat API retrieves chunks from a collection, builds the grounded prompt on the backend, calls the configured chat provider, and returns cited answers for the current turn. Source context includes document IDs, filenames, chunk labels, document metadata, and chunk text so the model can answer metadata questions as well as content questions. It does not store chat messages or conversations. It supports session auth and API-key auth. Scoped API keys can use chat when granted `chat:write`; collection-pinned keys can only chat against their pinned collection. +The chat API retrieves chunks from a collection, builds the grounded prompt on the backend, calls the configured chat provider, and returns cited answers. Source context includes document IDs, filenames, chunk labels, document metadata, and chunk text. It does not store chat messages or conversations. + +Accepts session auth and API-key auth. Scoped API keys can use chat when granted `chat:write`; collection-pinned keys can only chat against their pinned collection. -Supported providers are `openai` and `openai_compatible`. Save the provider key in the admin UI at `chat.openai_key`. API clients can also pass `provider_api_key` per request when they do not want to use the user's saved key. +**Provider configuration:** -`BIGRAG_CHAT_API_KEY` is an instance-level fallback for the default OpenAI chat endpoint only. If `chat_base_url` or request `provider_base_url` points to a non-default OpenAI-compatible endpoint, chat requests must use a saved chat key or `provider_api_key`; the instance fallback key is rejected before any provider call. +- Supported providers: `openai` and `openai_compatible`. Save the provider key in the admin UI at `chat.openai_key`. +- API clients can also pass `provider_api_key` per request to bypass the saved key. +- `BIGRAG_CHAT_API_KEY` is an instance-level fallback for the default OpenAI chat endpoint only. If `chat_base_url` or request `provider_base_url` points to a non-default OpenAI-compatible endpoint, a saved chat key or `provider_api_key` is required — the instance fallback key is rejected before any provider call. -## Question Suggestions + + + ``` GET /v1/chat/question-suggestions?collection=handbook POST /v1/chat/question-suggestions ``` -`GET` returns the last generated starter-question set for the collection, or an empty set when none has been generated. `POST` uses the saved chat/OpenAI key from the current user's preferences or the instance fallback when the default OpenAI endpoint is in use. With a non-default chat base URL, question generation requires a saved chat key because it cannot accept a per-request provider key. +`GET` returns the last generated starter-question set for the collection, or an empty set when none has been generated. + +`POST` uses the saved chat/OpenAI key from the current user's preferences or the instance fallback when the default OpenAI endpoint is in use. With a non-default chat base URL, question generation requires a saved chat key because it cannot accept a per-request provider key. -POST body: +**POST body:** ```json { @@ -30,7 +41,7 @@ POST body: } ``` -Response: +**Response:** ```json { @@ -47,13 +58,15 @@ Response: } ``` -## Create Chat Turn + + + ``` POST /v1/chat ``` -Request body: +**Request body:** ```json { @@ -74,9 +87,7 @@ Set `multimodal: true` to include retrieved image content for vision-capable mod When `search_mode` is `semantic` and the message contains identifier-like tokens such as UUIDs, chat uses hybrid retrieval for that turn so exact IDs and error codes are matched lexically. The response retrieval payload includes both `requested_search_mode` and the effective `search_mode`. If the message names a document UUID in the selected collection, the source list can include a metadata-only source for that document. -## Stream Chat Turn - -Set `stream: true` to receive Server-Sent Events: +**Streaming:** Set `stream: true` to receive Server-Sent Events: ```text event: user_message @@ -98,3 +109,7 @@ data: [DONE] ``` If provider generation fails after streaming starts, the API emits an `error` event followed by `data: [DONE]`. + + + + diff --git a/website/content/docs/api-reference/collections.mdx b/website/content/docs/api-reference/collections.mdx index 35465f83..35ec44ea 100644 --- a/website/content/docs/api-reference/collections.mdx +++ b/website/content/docs/api-reference/collections.mdx @@ -3,17 +3,16 @@ title: Collections description: API endpoints for managing collections. --- -Base path: `/v1/collections` +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; -All endpoints require [authentication](/docs/api-reference/authentication) — a session cookie or an API key with `collection:read` (for reads) or `collection:write` (for create / update / delete). +Base path: `/v1/collections` -## List Collections +All endpoints require [authentication](/docs/api-reference/authentication) — a session cookie or an API key with `collection:read` (reads) or `collection:write` (create / update / delete). -``` -GET /v1/collections -``` +## CRUD -**Query parameters:** + + | Parameter | Type | Default | Description | |-----------|------|---------|-------------| @@ -58,11 +57,9 @@ GET /v1/collections } ``` -## Create Collection + -``` -POST /v1/collections -``` + **Request body:** @@ -98,8 +95,8 @@ POST /v1/collections | `description` | string | no | `""` | — | | `embedding_provider` | string | no | Server default | `openai`, `cohere`, `voyage`, `openai_compatible` | | `embedding_model` | string | no | Server default | Model name | -| `embedding_preset_id` | string | no | — | Link the collection to a saved embedding preset. The collection inherits the preset's provider, model, dimension, base URL, and key live | -| `embedding_api_key` | string | no | — | Required when `embedding_preset_id` is not provided; validated against the provider before the collection is created. Use any non-empty value for local OpenAI-compatible gateways without auth. Ignored when a preset is supplied — the collection inherits the preset's key live | +| `embedding_preset_id` | string | no | — | Link to a saved embedding preset; inherits provider, model, dimension, base URL, and key live | +| `embedding_api_key` | string | no | — | Required when `embedding_preset_id` is not provided; validated against the provider before the collection is created. Use any non-empty value for local OpenAI-compatible gateways without auth. Ignored when a preset is supplied | | `embedding_base_url` | string | no | — | Required for `openai_compatible` (Ollama, vLLM, TEI, LiteLLM, Azure, Bedrock) | | `dimension` | integer | no | Provider default | Must match the model | | `chunk_size` | integer | no | `512` | 64–10,000 | @@ -125,40 +122,15 @@ POST /v1/collections - `409` — Collection name already exists, or the Turbopuffer namespace already exists with a different vector dimension - `502` — Vector store rejected collection provisioning -## Get Collection + -``` -GET /v1/collections/{name} -``` + **Response** `200`: Full collection object. **Errors:** `404`. -## Get Collection Stats - -``` -GET /v1/collections/{name}/stats -``` - -Lightweight endpoint returning document and chunk counts. - -**Response** `200`: - -```json -{ - "collection": "research_papers", - "document_count": 15, - "total_chunks": 482, - "total_tokens": 125000, - "total_size_bytes": 52428800, - "status_counts": { "ready": 12, "pending": 2, "processing": 1, "failed": 0 } -} -``` + -## Update Collection - -``` -PUT /v1/collections/{name} -``` + Only the fields below are mutable. Embedding provider / model / dimension, chunk size / overlap, and `tenant_field` are fixed at creation. @@ -180,11 +152,9 @@ Only the fields below are mutable. Embedding provider / model / dimension, chunk **Response** `200`: Updated collection object. **Errors:** `404`, `422` (when `embedding_api_key` is empty or rejected by the provider). -## Delete Collection + -``` -DELETE /v1/collections/{name} -``` + Deletes the collection, every document, every vector, and every stored file. @@ -196,11 +166,32 @@ Deletes the collection, every document, every vector, and every stored file. **Errors:** `404`. -## Truncate Collection + + +## Stats & operations + + + + +Lightweight endpoint returning document and chunk counts. + +**Response** `200`: + +```json +{ + "collection": "research_papers", + "document_count": 15, + "total_chunks": 482, + "total_tokens": 125000, + "total_size_bytes": 52428800, + "status_counts": { "ready": 12, "pending": 2, "processing": 1, "failed": 0 } +} ``` -POST /v1/collections/{name}/truncate -``` + + + + Deletes every document, vector, and file in the collection but keeps the collection configuration. @@ -212,11 +203,9 @@ Deletes every document, vector, and file in the collection but keeps the collect **Errors:** `404`. -## Re-embed Collection + -``` -POST /v1/collections/{name}/reembed -``` + Re-queues every `ready` or `failed` document for conversion, chunking, and embedding using the collection's current immutable embedding configuration. Vectors already cached by `content_hash + model_key` are reused automatically. @@ -228,15 +217,17 @@ Re-queues every `ready` or `failed` document for conversion, chunking, and embed **Errors:** `404`. -## Stream Collection Events (SSE) + + -``` -GET /v1/collections/{name}/events -``` +## Streams + + + -Server-Sent Events stream for **every** document event in the collection — useful for admin UI integrations, dashboards, and multi-document status panels. +Server-Sent Events stream for every document event in the collection — useful for admin UI integrations, dashboards, and multi-document status panels. -SSE clients that can't set headers should first mint a short-lived event token and then pass it as `?token=...`. +SSE clients that can't set headers should first mint a short-lived event token and then pass it as `?token=...`: ```http POST /v1/collections/{name}/events/token @@ -264,3 +255,6 @@ Only short-lived event tokens are accepted in the query string. Long-lived API k ``` `step` values: `received`, `converted`, `embedded`, `indexed`, `completed`, `failed`. + + + diff --git a/website/content/docs/api-reference/connectors.mdx b/website/content/docs/api-reference/connectors.mdx index 3b1b8f17..a102dc6f 100644 --- a/website/content/docs/api-reference/connectors.mdx +++ b/website/content/docs/api-reference/connectors.mdx @@ -3,18 +3,24 @@ title: Connectors description: Provider-neutral connector configuration, OAuth, source selection, and resync APIs. --- +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; + Cloud connectors import remote files into the normal bigRAG document pipeline. The connector core owns provider config, user accounts, sources, sync jobs, manifests, scheduling, document replacement, and remote deletion handling. Provider adapters own OAuth details, remote listing, export/download behavior, and provider-specific metadata. Google Drive is the first provider and uses the `google` route slug with `google_drive` as the stored provider id. Future providers such as SharePoint or OneDrive should plug into the same route shape instead of cloning the source and sync job flow. -## Admin Provider Config + + + + +Session-only admin endpoints. For Google Drive, use `/v1/admin/connectors/google`. ``` GET /v1/admin/connectors/{provider} PUT /v1/admin/connectors/{provider} ``` -Session-only admin endpoints. For Google Drive, use `/v1/admin/connectors/google`. The `PUT` body stores OAuth credentials: +The `PUT` body stores OAuth credentials: ```json { @@ -26,7 +32,11 @@ Session-only admin endpoints. For Google Drive, use `/v1/admin/connectors/google Responses mask secret values and include `callback_url`, which must be registered with the provider. Google Drive returns a callback URL ending in `/v1/connectors/google/oauth/callback`. -## User Connection + + + + +These endpoints require session authentication. ``` GET /v1/connectors/{provider}/account @@ -36,9 +46,11 @@ GET /v1/connectors/{provider}/oauth/callback POST /v1/connectors/{provider}/disconnect ``` -These endpoints require session authentication. For Google Drive, the OAuth flow uses `openid email profile` and `https://www.googleapis.com/auth/drive.readonly` so bigRAG can render an in-app Drive browser and sync selected files/folders. +For Google Drive, the OAuth flow uses `openid email profile` and `https://www.googleapis.com/auth/drive.readonly` so bigRAG can render an in-app Drive browser and sync selected files/folders. -## Drive Browser + + + ``` GET /v1/connectors/{provider}/files?parent_id=root @@ -47,7 +59,9 @@ GET /v1/connectors/{provider}/files?query=handbook Returns the connected user's visible remote files and folders, plus whether each item is supported by the ingestion pipeline. For Google Drive, native Docs, Sheets, and Slides are exported to DOCX, XLSX, and PPTX before ingestion. -## Sources + + + ``` GET /v1/connectors/{provider}/sources?collection=docs @@ -95,3 +109,7 @@ Sync jobs can be filtered by `collection`, `source_id`, and `limit`. While a Goo ``` Terminal `complete` jobs mean Drive files were scanned, downloaded or updated, remote deletions were applied, and changed documents were queued for the normal ingestion pipeline. Conversion, chunking, embedding, and indexing continue through document status progress. + + + + diff --git a/website/content/docs/api-reference/documents.mdx b/website/content/docs/api-reference/documents.mdx index 7a3fe795..270eda25 100644 --- a/website/content/docs/api-reference/documents.mdx +++ b/website/content/docs/api-reference/documents.mdx @@ -3,19 +3,21 @@ title: Documents description: API endpoints for uploading, managing, and monitoring documents. --- +import { Callout } from "fumadocs-ui/components/callout"; +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; + Base path: `/v1/collections/{collection_name}/documents` All endpoints require [authentication](/docs/api-reference/authentication). API keys need: -- `document:upload` for upload, batch upload, and reprocess -- `document:read` for list/get/download/chunks/elements and batch get/status -- `document:delete` for delete and batch delete +- `document:upload` — upload, batch upload, reprocess +- `document:read` — list, get, download, chunks, elements, batch get/status +- `document:delete` — delete, batch delete -## Upload Document +## Core -``` -POST /v1/collections/{collection_name}/documents -``` + + Upload a document for ingestion. Uses `multipart/form-data`. @@ -60,11 +62,9 @@ curl -X POST http://localhost:4000/v1/collections/research/documents \ **Errors:** `400` — Unsupported file type or blocked by metadata-schema checks, `404` — Collection not found, `413` — File too large, `503` — Queue unavailable (document is stored as `failed`) -## List Documents + -``` -GET /v1/collections/{collection_name}/documents -``` + | Parameter | Type | Default | Constraints | |-----------|------|---------|-------------| @@ -100,21 +100,17 @@ GET /v1/collections/{collection_name}/documents } ``` -## Get Document + -``` -GET /v1/collections/{collection_name}/documents/{document_id} -``` + **Response** `200`: Full document object. **Errors:** `404` — Document or collection not found -## Delete Document + -``` -DELETE /v1/collections/{collection_name}/documents/{document_id} -``` + Deletes the document and its associated vectors. @@ -126,11 +122,9 @@ Deletes the document and its associated vectors. **Errors:** `404` — Document or collection not found -## Reprocess Document + -``` -POST /v1/collections/{collection_name}/documents/{document_id}/reprocess -``` + Re-parse, re-chunk, and re-embed a document. @@ -140,15 +134,17 @@ Re-parse, re-chunk, and re-embed a document. { "status": "ok", "message": "Document reprocessing started" } ``` -## Get Chunks + -``` -GET /v1/collections/{collection_name}/documents/{document_id}/chunks -``` + + +Download the original uploaded file. Returns binary content with appropriate `Content-Type`. -Get chunks for a processed document. If the document exists but its -Turbopuffer namespace has not been created yet or was already removed, the -endpoint returns an empty chunk list with `total: 0`. + + + + +Get chunks for a processed document. If the document exists but its Turbopuffer namespace has not been created yet or was already removed, the endpoint returns an empty chunk list with `total: 0`. | Parameter | Type | Default | Constraints | |-----------|------|---------|-------------| @@ -174,13 +170,11 @@ endpoint returns an empty chunk list with `total: 0`. Each chunk returns `document_id` as a top-level field. `metadata` contains the remaining chunk/document payload metadata and omits fixed fields such as `text`, `chunk_index`, and `document_id`. -## Get Elements + -``` -GET /v1/collections/{collection_name}/documents/{document_id}/elements -``` + -List stored multimodal document elements. This returns an empty list unless the collection had `multimodal_enabled` when the document was ingested. +List stored multimodal document elements. Returns an empty list unless the collection had `multimodal_enabled` when the document was ingested. | Parameter | Type | Default | Constraints | |-----------|------|---------|-------------| @@ -220,19 +214,24 @@ List stored multimodal document elements. This returns an empty list unless the `kind` is one of `text`, `heading`, `table`, `image`, `equation`, or `unknown`. `enrichment_status` is `not_requested`, `pending`, `ready`, or `failed`. -## Download File + + + + +Convenience read endpoints that resolve the document by UUID without the caller supplying the collection name. Still requires `document:read`; collection-pinned API keys can only use them for documents that belong to their pinned collection. ``` -GET /v1/collections/{collection_name}/documents/{document_id}/file +GET /v1/documents/{document_id} +GET /v1/documents/{document_id}/chunks ``` -Download the original uploaded file. Returns binary content with appropriate `Content-Type`. + + -## Batch Upload +## Batch & sessions -``` -POST /v1/collections/{collection_name}/documents/batch/upload -``` + + Upload up to 100 files in one request. Each file goes through the same extension sniffing, metadata-schema validation, and `content_hash` deduplication pipeline as the single-file upload endpoint. @@ -268,10 +267,14 @@ curl -X POST http://localhost:4000/v1/collections/docs/documents/batch/upload \ If queueing fails for an item, that document is still returned but with `status: "failed"` plus an `error_message`, rather than being left stuck in `pending`. -## Upload Sessions + + + Upload sessions are the large-upload path for thousands of local files. They create one durable session, then upload files one request at a time so browsers and proxies never build a giant multipart body. +**Create session:** + ``` POST /v1/collections/{collection_name}/upload-sessions ``` @@ -300,12 +303,12 @@ POST /v1/collections/{collection_name}/upload-sessions } ``` +**Upload one file:** + ``` POST /v1/collections/{collection_name}/upload-sessions/{session_id}/files ``` -Upload one file with `multipart/form-data`: - ```bash curl -X POST http://localhost:4000/v1/collections/docs/upload-sessions/$SESSION_ID/files \ -H "Authorization: Bearer $BIGRAG_API_KEY" \ @@ -315,33 +318,21 @@ curl -X POST http://localhost:4000/v1/collections/docs/upload-sessions/$SESSION_ Each accepted file is validated, deduped by `content_hash`, stored, converted into a `Document`, and queued for ingestion. Failed files are recorded as upload-session items and do not abort the session. +**Session lifecycle endpoints:** + ``` -GET /v1/collections/{collection_name}/upload-sessions/{session_id} +GET /v1/collections/{collection_name}/upload-sessions/{session_id} POST /v1/collections/{collection_name}/upload-sessions/{session_id}/complete POST /v1/collections/{collection_name}/upload-sessions/{session_id}/cancel ``` -`GET` returns aggregate counts plus the latest active or failed items. `complete` marks the upload phase closed; ingestion can still continue until all linked documents are `ready` or `failed`. If a linked document is deleted after upload, its session item is reported as `canceled` instead of `queued`. `cancel` closes the session and cancels active queued documents created by that session. - -## Batch Status - -``` -POST /v1/collections/{collection_name}/documents/batch/status -``` +- `GET` — returns aggregate counts plus the latest active or failed items. +- `complete` — marks the upload phase closed; ingestion can still continue until all linked documents are `ready` or `failed`. If a linked document is deleted after upload, its session item is reported as `canceled` instead of `queued`. +- `cancel` — closes the session and cancels active queued documents created by that session. -Check status of up to 100 documents. + -```json -{ "document_ids": ["doc-id-1", "doc-id-2"] } -``` - -Returns `id`, `status`, `chunk_count`, `error_message`, and the latest `progress` snapshot for each found document. API clients can poll this endpoint after batch upload until every requested document is `ready` or `failed`; the admin UI uses the session-cookie admin realtime batch stream instead. - -## Batch Get - -``` -POST /v1/collections/{collection_name}/documents/batch/get -``` + Fetch full metadata for up to 100 documents by ID. @@ -376,11 +367,9 @@ Fetch full metadata for up to 100 documents by ID. Unlike Batch Status (which returns only `id`, `status`, `error_message`, `chunk_count`), this endpoint returns the complete document object including `filename`, `file_size`, `file_type`, and `metadata`. -## Batch Delete + -``` -POST /v1/collections/{collection_name}/documents/batch/delete -``` + Delete up to 100 documents. Partial success is supported — missing or failed deletes are returned in `errors` rather than aborting the whole batch. @@ -400,18 +389,27 @@ Delete up to 100 documents. Partial success is supported — missing or failed d } ``` -## Global Document Lookup + + -``` -GET /v1/documents/{document_id} -GET /v1/documents/{document_id}/chunks +## Status & polling + + + + +Check status of up to 100 documents. + +```json +{ "document_ids": ["doc-id-1", "doc-id-2"] } ``` -Convenience read endpoints that resolve the document by UUID without the caller supplying the collection name. They still require `document:read`, and collection-pinned API keys can only use them for documents that belong to their pinned collection. +Returns `id`, `status`, `chunk_count`, `error_message`, and the latest `progress` snapshot for each found document. API clients can poll this endpoint after batch upload until every requested document is `ready` or `failed`; the admin UI uses the session-cookie admin realtime batch stream instead. + + -## API-Client Status Polling + -Use `GET /v1/collections/{collection_name}/documents/{document_id}` for a single document, or `POST /v1/collections/{collection_name}/documents/batch/status` for multiple documents. API clients can poll while `status` is `pending` or `processing`, then stop once it reaches `ready` or `failed`. The admin UI uses [admin realtime SSE streams](/docs/api-reference/admin-realtime) for the same full payloads. +Use `GET /v1/collections/{collection_name}/documents/{document_id}` for a single document. Poll while `status` is `pending` or `processing`, then stop once it reaches `ready` or `failed`. The admin UI uses [admin realtime SSE streams](/docs/api-reference/admin-realtime) for the same full payloads. Document status responses include a `progress` object with the latest ingestion event snapshot: @@ -428,3 +426,6 @@ Document status responses include a `progress` object with the latest ingestion } } ``` + + + diff --git a/website/content/docs/api-reference/embedding-presets.mdx b/website/content/docs/api-reference/embedding-presets.mdx index 089bc9a3..9236bde3 100644 --- a/website/content/docs/api-reference/embedding-presets.mdx +++ b/website/content/docs/api-reference/embedding-presets.mdx @@ -3,13 +3,18 @@ title: Embedding Presets description: Reusable embedding provider + model configurations. --- -Base path: `/v1/admin/embedding-presets` +import { Callout } from "fumadocs-ui/components/callout"; +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; -Presets let you save an embedding provider, model, dimension, base URL, and API key once and reference them when creating collections. Collections linked with `embedding_preset_id` read the preset live, so preset updates affect those collections and invalidate their cached config. +Base path: `/v1/admin/embedding-presets` — Session-only. -Session-only. The `api_key` column is Fernet-encrypted at rest with `BIGRAG_MASTER_KEY` — see [Encryption at rest](/docs/deployment/encryption). +Presets save an embedding provider, model, dimension, base URL, and API key once, then let you reference them when creating collections. Collections linked with `embedding_preset_id` read the preset live — preset updates affect those collections and invalidate their cached config. -## List Presets +The `api_key` column is Fernet-encrypted at rest with `BIGRAG_MASTER_KEY` — see [Encryption at rest](/docs/deployment/encryption). + + + + ``` GET /v1/admin/embedding-presets?limit=100&offset=0 @@ -38,7 +43,9 @@ GET /v1/admin/embedding-presets?limit=100&offset=0 The raw `api_key` is never returned; only `has_api_key` is surfaced. -## Create Preset + + + ``` POST /v1/admin/embedding-presets @@ -65,15 +72,21 @@ POST /v1/admin/embedding-presets **Response** `201`: preset without `api_key`. -## Update Preset + + + ``` PATCH /v1/admin/embedding-presets/{preset_id} ``` -Partial — any combination of `name`, `provider`, `model`, `base_url`, `dimension`, `api_key`. Updating credentials propagates immediately to every collection linked via `embedding_preset_id`; their cached config is invalidated as part of the request. +Partial update — any combination of `name`, `provider`, `model`, `base_url`, `dimension`, `api_key`. Updating credentials propagates immediately to every collection linked via `embedding_preset_id`; their cached config is invalidated as part of the request. + + -## Test Preset Credentials + + +Two forms: ``` POST /v1/admin/embedding-presets/test @@ -93,10 +106,16 @@ The first form tests plaintext form values before create/update: The saved-preset form tests the encrypted key already stored for an existing preset. Both return `{"status":"ok","message":"Embedding provider connection succeeded"}` on success. -## Delete Preset + + + ``` DELETE /v1/admin/embedding-presets/{preset_id} ``` Removes the preset. Returns `409` when any collection still references it via `embedding_preset_id` — switch those collections to a per-collection key (or another preset) first. + + + + diff --git a/website/content/docs/api-reference/evaluation.mdx b/website/content/docs/api-reference/evaluation.mdx index a516d022..cf558ca4 100644 --- a/website/content/docs/api-reference/evaluation.mdx +++ b/website/content/docs/api-reference/evaluation.mdx @@ -3,6 +3,8 @@ title: Evaluation description: Measure retrieval quality against a labelled query set. --- +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; + ``` POST /v1/evaluation ``` @@ -11,7 +13,13 @@ Runs a batch of queries against a collection and scores the retrieved chunks aga Accepts a session cookie or an API key with `query:read`. -## Request +The admin UI exposes the same runner under **Settings → Evaluation**. + + + + + +**Request:** ```json { @@ -48,7 +56,7 @@ Accepts a session cookie or an API key with `query:read`. The collection's saved reranking config is applied automatically — there is no `rerank` flag. -## Response +**Response:** ```json { @@ -70,8 +78,12 @@ The collection's saved reranking config is applied automatically — there is no } ``` +**Metrics:** + - **Recall@k** — fraction of `relevant_ids` that appear anywhere in the top-k hits - **MRR** — `1 / rank` of the first relevant hit, averaged across cases - **nDCG@k** — rank-discounted relevance, averaged across cases -The admin UI exposes the same runner under **Settings → Evaluation**. + + + diff --git a/website/content/docs/api-reference/health.mdx b/website/content/docs/api-reference/health.mdx index 6324606c..99debcd6 100644 --- a/website/content/docs/api-reference/health.mdx +++ b/website/content/docs/api-reference/health.mdx @@ -3,27 +3,27 @@ title: Health & Stats description: Health checks and platform statistics endpoints. --- -## Liveness Check +import { Callout } from "fumadocs-ui/components/callout"; +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; -``` -GET /health -``` +## Health + + + -No authentication required. Returns 200 if the server is running. +No authentication required. Returns `200` if the server is running. ```json { "status": "ok", "version": "2026.4.30" } ``` -## Readiness Check + -``` -GET /health/ready -``` + No authentication required. Tests connectivity to Postgres, Turbopuffer, Redis, and the configured embedding provider. -**Response** `200`: +**Response** `200` (healthy): ```json { @@ -37,7 +37,7 @@ No authentication required. Tests connectivity to Postgres, Turbopuffer, Redis, } ``` -When a dependency is unreachable or misconfigured: +**Response** `503` (degraded): ```json { @@ -55,11 +55,13 @@ When a dependency is unreachable or misconfigured: Returns HTTP `503` with `"status": "degraded"` when any dependency is unhealthy. Dependency errors are category labels only: `auth_failed`, `rate_limited`, `timeout`, `unreachable`, `misconfigured`, or `unknown`. The embedding health check result is cached for 60 seconds. -## Platform Stats + + -``` -GET /v1/stats -``` +## Stats & models + + + Platform-wide statistics including collections, documents, and queue status. @@ -104,15 +106,21 @@ Platform-wide statistics including collections, documents, and queue status. } ``` -`status` summarizes the queue and worker state. `ok` means no queue risk was detected, `degraded` means the worker or queue needs attention but is not blocked, and `down` means queued or processing work exists while the worker heartbeat is offline. `queue_health.reasons` can include `worker_offline`, `worker_offline_with_active_queue`, `dead_lettered_jobs`, `stale_processing_jobs`, and `retrying_jobs`. When stats detect expired or unrefreshed processing leases, the worker marks those documents pending again and requeues them through the active ingestion queue instead of leaving them as permanently processing. +`status` summarizes the queue and worker state: -Every HTTP response includes an `X-Request-ID` header. Send your own value to correlate client logs with API logs, or let bigRAG generate one. +| Value | Meaning | +|-------|---------| +| `ok` | No queue risk detected | +| `degraded` | Worker or queue needs attention but is not blocked | +| `down` | Queued or processing work exists while the worker heartbeat is offline | -## Embedding Models +`queue_health.reasons` can include: `worker_offline`, `worker_offline_with_active_queue`, `dead_lettered_jobs`, `stale_processing_jobs`, `retrying_jobs`. -``` -GET /v1/embeddings/models -``` +When stats detect expired or unrefreshed processing leases, the worker marks those documents pending again and requeues them through the active ingestion queue instead of leaving them as permanently processing. + + + + Requires authentication. Lists all available embedding models and providers. @@ -171,7 +179,10 @@ Requires authentication. Lists all available embedding models and providers. } ``` -## Error Codes + + + +## Error codes All API errors return a JSON body with a `detail` field: @@ -188,3 +199,7 @@ All API errors return a JSON body with a `detail` field: | `409` | Conflict | Duplicate name | | `413` | Payload Too Large | File exceeds `BIGRAG_MAX_UPLOAD_SIZE_MB` | | `500` | Internal Server Error | Server-side error | + + +Every HTTP response includes an `X-Request-ID` header. Send your own value to correlate client logs with API logs, or let bigRAG generate one. + diff --git a/website/content/docs/api-reference/instance-settings.mdx b/website/content/docs/api-reference/instance-settings.mdx index 277f7aef..1ac71173 100644 --- a/website/content/docs/api-reference/instance-settings.mdx +++ b/website/content/docs/api-reference/instance-settings.mdx @@ -3,27 +3,27 @@ title: Instance Settings description: Admin-only runtime configuration registry. --- +import { Callout } from "fumadocs-ui/components/callout"; +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; + Base path: `/v1/admin/settings` Session admin only. These endpoints manage the database-backed runtime settings registry used by the API and admin UI. The admin UI shows a curated subset of this registry; deployment-sensitive settings such as CORS origins and session-cookie policy are intentionally managed through bootstrap config, this API, or SDK automation. Secret settings are Fernet-encrypted at rest and redacted on read. -## Admin UI vs API Registry - -The Settings UI is optimized for day-to-day operators. It does not show every registry key. +## Admin UI vs API registry | Surface | Use it for | |---------|------------| -| `/settings → Security` | Trusted proxy ranges, outbound provider URL policy, private-network escape hatches, local webhook policy, and embedding-cache posture. | -| `/settings?tab=storage` | Uploaded source-file storage for local disk and S3-compatible object stores. | -| `/settings?tab=backups` | Readable backup destination settings, export controls, and backup history. | -| Deployment config or this API | CORS origins and session-cookie flags: `cors_origins`, `session_cookie_secure`, `session_cookie_samesite`, and `session_cookie_domain`. These can lock admins out when changed incorrectly, so they are best handled deliberately during deployment. | -| Other Settings pages | Ingestion, queue, retention, webhooks, model defaults, and Turbopuffer search settings. | +| `/settings → Security` | Trusted proxy ranges, outbound provider URL policy, private-network escape hatches, local webhook policy, and embedding-cache posture | +| `/settings?tab=storage` | Uploaded source-file storage for local disk and S3-compatible object stores | +| `/settings?tab=backups` | Readable backup destination settings, export controls, and backup history | +| Deployment config or this API | CORS origins and session-cookie flags: `cors_origins`, `session_cookie_secure`, `session_cookie_samesite`, and `session_cookie_domain`. These can lock admins out when changed incorrectly, so they are best handled deliberately during deployment | +| Other Settings pages | Ingestion, queue, retention, webhooks, model defaults, and Turbopuffer search settings | -## List Settings +## Endpoints -``` -GET /v1/admin/settings -``` + + Returns the setting registry plus current public values. @@ -71,11 +71,9 @@ Returns the setting registry plus current public values. Secret values return `value: null` and `has_value: true` when saved. Secret specs also return `default: null`, even when a bootstrap or runtime default exists. -## Update Settings + -``` -PUT /v1/admin/settings -``` + ```json { @@ -102,17 +100,19 @@ Deployment-managed security keys use the same endpoint when you intentionally au } ``` -Security hardening settings include `allow_public_bind_in_prod`. Ingestion settings include raw vector API request caps (`max_vector_upsert_count`, `max_vector_delete_count`, `max_vector_text_chars`, `max_vector_metadata_bytes`). Turbopuffer settings include `turbopuffer_api_key`, `turbopuffer_base_url`, `turbopuffer_region`, and `turbopuffer_namespace_prefix`; the admin UI renders `turbopuffer_region` as a public-region dropdown while the API accepts the stored string value. UI-visible settings can be changed from the admin UI. Registry keys that are not rendered in the UI remain available through this API and the SDK admin settings resources. +Other notable settings groups: -Storage and Turbopuffer connection changes are validated before they are saved and apply to the running API. Custom `turbopuffer_base_url` values must be normalized HTTPS roots that pass outbound URL safety checks. +- **Security hardening:** `allow_public_bind_in_prod` +- **Ingestion caps:** `max_vector_upsert_count`, `max_vector_delete_count`, `max_vector_text_chars`, `max_vector_metadata_bytes` +- **Turbopuffer:** `turbopuffer_api_key`, `turbopuffer_base_url`, `turbopuffer_region`, `turbopuffer_namespace_prefix` (the admin UI renders `turbopuffer_region` as a public-region dropdown while the API accepts the stored string value) -## Test Settings +Storage and Turbopuffer connection changes are validated before they are saved and apply to the running API. Custom `turbopuffer_base_url` values must be normalized HTTPS roots that pass outbound URL safety checks. UI-visible settings can be changed from the admin UI; registry keys not rendered in the UI remain available through this API and the SDK admin settings resources. -``` -POST /v1/admin/settings/test -``` + + + -Validates the submitted values without saving them. Storage settings run a lightweight backend construction/probe when S3 is selected. Backup settings run a lightweight S3-compatible bucket probe. Turbopuffer settings run the same connectivity validation used by save operations. This endpoint remains useful for SDKs and automation that want a dry run. +Validates submitted values without saving them. Storage settings run a lightweight backend construction/probe when S3 is selected. Backup settings run a lightweight S3-compatible bucket probe. Turbopuffer settings run the same connectivity validation used by save operations. Useful for SDKs and automation that want a dry run. ```json { "values": { "storage_backend": "s3", "storage_s3_bucket": "bigrag-documents" } } @@ -126,23 +126,19 @@ Validates the submitted values without saving them. Storage settings run a light { "values": { "turbopuffer_api_key": "tpuf_...", "turbopuffer_region": "aws-us-east-1", "turbopuffer_namespace_prefix": "prod_" } } ``` -## Reset Settings + -``` -POST /v1/admin/settings/reset -``` + ```json { "keys": ["trusted_proxies", "embedding_cache_mode"] } ``` -An empty `keys` array resets every registered instance setting back to its default/bootstrap value, including settings not shown in the admin UI. Successful resets are audited as `instance_settings.reset`. This endpoint remains available to API clients, but the admin UI does not expose a general Reset button for settings panels. +An empty `keys` array resets every registered instance setting back to its default/bootstrap value, including settings not shown in the admin UI. Successful resets are audited as `instance_settings.reset`. The admin UI does not expose a general Reset button for settings panels. -## Purge Embedding Cache + -``` -POST /v1/admin/settings/embedding-cache/purge -``` + Deletes every row from the persistent embedding cache. Use this after key rotation, before handing a backup to another environment, or when an operator wants to force all future ingestion to re-embed. @@ -151,3 +147,6 @@ Deletes every row from the persistent embedding cache. Use this after key rotati ```json { "status": "ok", "message": "Purged 128 embedding cache rows" } ``` + + + diff --git a/website/content/docs/api-reference/mcp-servers.mdx b/website/content/docs/api-reference/mcp-servers.mdx index 4526cba0..add2a572 100644 --- a/website/content/docs/api-reference/mcp-servers.mdx +++ b/website/content/docs/api-reference/mcp-servers.mdx @@ -3,22 +3,25 @@ title: MCP Servers description: Mint and manage Model Context Protocol server credentials for Claude Desktop, Cursor, and other MCP clients. --- -Base path: `/v1/admin/mcp-servers` +import { Callout } from "fumadocs-ui/components/callout"; +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; -Each MCP server is an API key with extra metadata pinned to it (`title`, `server_name`, optional `collection` scope). MCP keys are read/query-only by default with `collection:read`, `document:read`, and `query:read`. Under the hood it reuses the same `ApiKey` row, authentication, scope, and audit/access-logging machinery as ordinary API keys — but the `/v1/admin/api-keys` endpoints filter MCP-tagged keys out, so they don't pollute that list. +Base path: `/v1/admin/mcp-servers` — Session-only (admin UI). -Session-only (admin UI). +Each MCP server is an API key with extra metadata (`title`, `server_name`, optional `collection` scope). MCP keys are read/query-only by default with `collection:read`, `document:read`, and `query:read`. They reuse the same `ApiKey` row, auth, scope, and audit machinery as ordinary API keys — but `/v1/admin/api-keys` filters MCP-tagged keys out to keep that list clean. -A delete on the underlying API key cascades through. Generic API keys minted via `/v1/admin/api-keys` and MCP servers minted via this endpoint share a uniqueness constraint on `key_hash`. +A delete on the underlying API key cascades through. Generic API keys and MCP servers share a uniqueness constraint on `key_hash`. -## List MCP Servers + + + + +Returns only MCP-tagged keys owned by the calling admin. ``` GET /v1/admin/mcp-servers ``` -Returns only MCP-tagged keys owned by the calling admin. - **Response** `200`: ```json @@ -40,21 +43,23 @@ Returns only MCP-tagged keys owned by the calling admin. } ``` -## Create MCP Server + + + + +Mints a fresh API key bound to the new server. The plaintext token is returned **once** in `api_key` — store it before the response leaves the wire. ``` POST /v1/admin/mcp-servers ``` -Mints a fresh API key bound to the new server. The plaintext token is returned **once** in `api_key` — store it before the response leaves the wire. - -**Request:** +**Request fields:** | Field | Type | Required | Notes | |-------|------|----------|-------| | `title` | string | yes | 1–80 chars. Display name in the admin UI and the client's connector list. | -| `server_name` | string | yes | 1–60 chars. Lowercase alphanumeric and dashes (`^[a-z0-9][a-z0-9-]*$`). Used as the key under `mcpServers` in the client config and must be unique per owner. | -| `collection` | string | no | Pin the credential to a single collection. If set, the collection must exist. Omit (or set to empty) for a global key. | +| `server_name` | string | yes | 1–60 chars. Lowercase alphanumeric and dashes (`^[a-z0-9][a-z0-9-]*$`). Used as the key under `mcpServers` in the client config; must be unique per owner. | +| `collection` | string | no | Pin the credential to a single collection. If set, the collection must exist. Omit or set to empty for a global key. | **Response** `201`: @@ -73,16 +78,18 @@ Mints a fresh API key bound to the new server. The plaintext token is returned * } ``` -`409` if `server_name` already exists for this user. `400` if `collection` does not exist. +**Errors:** `409` if `server_name` already exists for this user. `400` if `collection` does not exist. -## Update MCP Server + + + + +Edit `title`, `server_name`, and/or `collection`. Send only the fields you want to change. ``` PATCH /v1/admin/mcp-servers/{server_id} ``` -Edit `title`, `server_name`, and/or `collection`. Send only the fields you want to change. - | Field | Type | Notes | |-------|------|-------| | `title` | string | Optional | @@ -91,30 +98,38 @@ Edit `title`, `server_name`, and/or `collection`. Send only the fields you want **Response** `200`: same shape as the list entry above. -## Rotate Credential + + + + +Mints a new API key for an existing server and invalidates the old one immediately. Use this if the previous URL leaked or was lost. ``` POST /v1/admin/mcp-servers/{server_id}/rotate ``` -Mint a new API key for an existing server and invalidate the old one immediately. Use this if the previous URL leaked or was lost. - **Response** `200`: same shape as create — includes a one-shot `api_key`. -## Delete MCP Server + + + + +Removes the server and its underlying API key. Existing connector URLs stop working on the next request. ``` DELETE /v1/admin/mcp-servers/{server_id} ``` -Removes the server and its underlying API key. Existing connector URLs stop working on the next request. - **Response** `200`: ```json { "status": "ok", "message": "MCP server deleted" } ``` + + + + ## See also - [MCP usage guide](/docs/sdks/mcp) — wire a server into Claude Desktop, Cursor, or a custom client. diff --git a/website/content/docs/api-reference/preferences.mdx b/website/content/docs/api-reference/preferences.mdx index 5943998a..afdf878e 100644 --- a/website/content/docs/api-reference/preferences.mdx +++ b/website/content/docs/api-reference/preferences.mdx @@ -3,9 +3,11 @@ title: User Preferences description: Per-user JSON blob for admin UI state. --- -Base path: `/v1/auth/preferences` +import { Callout } from "fumadocs-ui/components/callout"; -Session-only. This is a JSON scratch-pad scoped to the logged-in user — the admin UI uses it to persist sidebar state, default collection filter, preferred search mode, and similar client-side prefs. The server only interprets documented sensitive paths so it can encrypt and redact secrets. +Base path: `/v1/auth/preferences` — Session-only. + +A JSON scratch-pad scoped to the logged-in user. The admin UI uses it to persist sidebar state, default collection filter, preferred search mode, and similar client-side prefs. The server only interprets documented sensitive paths so it can encrypt and redact secrets. ## Get Preferences @@ -45,4 +47,6 @@ The body may be either `{"data": {...}}` or a bare object — both are accepted. Sensitive paths such as `chat.openai_key` are Fernet-encrypted at rest when `BIGRAG_MASTER_KEY` is configured. Preference responses do not return the key; they return `chat.has_openai_key` instead. -There is no size validation beyond the standard request-body limit, but keep payloads small (<64 KB) — this isn't a document store. + + There is no size validation beyond the standard request-body limit, but keep payloads small (<64 KB) — this isn't a document store. + diff --git a/website/content/docs/api-reference/query.mdx b/website/content/docs/api-reference/query.mdx index 0a1019d1..8e50821d 100644 --- a/website/content/docs/api-reference/query.mdx +++ b/website/content/docs/api-reference/query.mdx @@ -3,13 +3,14 @@ title: Query & Search description: Turbopuffer-backed API endpoints for semantic, keyword, and hybrid search. --- -All endpoints require [authentication](/docs/api-reference/authentication) — a session cookie or an API key with `query:read`. Queries run against Turbopuffer namespaces, using semantic vectors, BM25 keyword search, or hybrid fusion depending on `search_mode`. +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; -## Single Collection Query +All endpoints require [authentication](/docs/api-reference/authentication) — a session cookie or an API key with `query:read`. Queries run against Turbopuffer namespaces using semantic vectors, BM25 keyword search, or hybrid fusion depending on `search_mode`. -``` -POST /v1/collections/{collection_name}/query -``` +## Search + + + Run a search against one collection's Turbopuffer namespace. Semantic and hybrid modes embed the query with the collection's configured embedding model; keyword mode skips embedding. @@ -83,7 +84,7 @@ Run a search against one collection's Turbopuffer namespace. Semantic and hybrid } ``` -### Response fields +**Response fields:** | Field | Type | Notes | |-------|------|-------| @@ -103,11 +104,9 @@ Run a search against one collection's Turbopuffer namespace. Semantic and hybrid **Errors:** `404` — collection not found. -## Multi-Collection Query + -``` -POST /v1/query -``` + Search across multiple collections and merge the results by score. @@ -140,11 +139,9 @@ curl -X POST http://localhost:4000/v1/query \ -d '{"query":"machine learning","collections":["docs","papers"],"top_k":20}' ``` -## Batch Query + -``` -POST /v1/batch/query -``` + Run up to 20 independent queries in parallel. Each query item accepts the full single-collection body. @@ -160,11 +157,13 @@ Run up to 20 independent queries in parallel. Each query item accepts the full s Response: `{"results": [...]}` — an array of query responses in the same order as the input. -## Collection Analytics + + -``` -GET /v1/collections/{name}/analytics -``` +## Analytics + + + Rolling 24-hour, 7-day, and 30-day query statistics, cached for five minutes. @@ -181,3 +180,6 @@ Rolling 24-hour, 7-day, and 30-day query statistics, cached for five minutes. ] } ``` + + + diff --git a/website/content/docs/api-reference/usage.mdx b/website/content/docs/api-reference/usage.mdx index 7496c9a8..f83c662c 100644 --- a/website/content/docs/api-reference/usage.mdx +++ b/website/content/docs/api-reference/usage.mdx @@ -3,6 +3,8 @@ title: Usage description: Aggregate per-collection usage statistics. --- +import { Callout } from "fumadocs-ui/components/callout"; + ``` GET /v1/usage ``` diff --git a/website/content/docs/api-reference/users.mdx b/website/content/docs/api-reference/users.mdx index ea00c16d..4ecf7123 100644 --- a/website/content/docs/api-reference/users.mdx +++ b/website/content/docs/api-reference/users.mdx @@ -3,11 +3,15 @@ title: Users description: Admin endpoints for managing accounts. --- +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; + Base path: `/v1/admin/users` All endpoints require a **session** (they reject API keys) and the caller must be an admin. The first admin is created via [`POST /v1/auth/setup`](/docs/api-reference/authentication); this resource mints every admin after that. -## List Users + + + ``` GET /v1/admin/users @@ -39,7 +43,9 @@ GET /v1/admin/users } ``` -## Create User + + + ``` POST /v1/admin/users @@ -65,7 +71,9 @@ POST /v1/admin/users **Errors:** `422` (short password or invalid body), `409` (email taken). -## Update User + + + ``` PATCH /v1/admin/users/{user_id} @@ -88,13 +96,17 @@ Changing `password` revokes every existing session for that user. **Errors:** `400` (last-admin demotion), `409` (email taken), `404`. -## Delete User + + + ``` DELETE /v1/admin/users/{user_id} ``` -Deletes a user along with their sessions, API keys, and audit footprint. Rejected when: +Deletes a user along with their sessions, API keys, and audit footprint. + +Rejected when: - The caller is trying to delete themselves - The target is the only remaining admin @@ -102,3 +114,7 @@ Deletes a user along with their sessions, API keys, and audit footprint. Rejecte **Response** `200`: `{"status": "ok", "message": "..."}`. **Errors:** `400` (last-admin / self-delete), `404`. + + + + diff --git a/website/content/docs/api-reference/vectors.mdx b/website/content/docs/api-reference/vectors.mdx index ceade4c6..3c9bad8d 100644 --- a/website/content/docs/api-reference/vectors.mdx +++ b/website/content/docs/api-reference/vectors.mdx @@ -4,18 +4,23 @@ description: Direct Turbopuffer-backed vector operations. --- import { Callout } from "fumadocs-ui/components/callout"; +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; -For advanced use cases, you can directly manage Turbopuffer vectors without going through the document ingestion pipeline. This is useful for custom embeddings or integrating with external embedding services. +Direct vector operations for custom embeddings or external embedding services, bypassing the document ingestion pipeline. Scoped API keys need `vector:write` for upsert and `vector:delete` for delete. `collection:write` no longer grants raw vector mutation. -## Admin Vector Storage Overview + + + + +Session-only. Returns Turbopuffer health plus collection and vector totals for admin diagnostics. ``` GET /v1/admin/vector-storage/overview ``` -Session-only. Returns Turbopuffer health plus collection and vector totals for admin diagnostics. +**Response** `200`: ```json { @@ -37,14 +42,16 @@ Session-only. Returns Turbopuffer health plus collection and vector totals for a } ``` -## Upsert Vectors + + + + +Insert or update vectors directly in the collection's Turbopuffer namespace. ``` POST /v1/collections/{collection_name}/vectors/upsert ``` -Insert or update vectors directly in the collection's Turbopuffer namespace. - **Request body:** ```json @@ -85,14 +92,16 @@ Default request safety caps reject requests above 1,000 vectors per upsert, 100, - `413` — Too many vectors, too much vector text, or too much vector metadata - `404` — Collection not found -## Delete Vectors + + + + +Delete vectors by their IDs. ``` POST /v1/collections/{collection_name}/vectors/delete ``` -Delete vectors by their IDs. - The default request safety cap rejects requests above 10,000 IDs per delete. Admins can tune this in instance settings. **Request body:** @@ -109,6 +118,10 @@ The default request safety cap rejects requests above 10,000 IDs per delete. Adm **Errors:** `404` — Collection not found + + + + ## Example: Custom Embeddings ```bash diff --git a/website/content/docs/api-reference/webhooks.mdx b/website/content/docs/api-reference/webhooks.mdx index 4a7453dc..c317877f 100644 --- a/website/content/docs/api-reference/webhooks.mdx +++ b/website/content/docs/api-reference/webhooks.mdx @@ -3,15 +3,17 @@ title: Webhooks description: API endpoints for managing webhook registrations. --- +import { Callout } from "fumadocs-ui/components/callout"; +import { Accordion, Accordions } from "fumadocs-ui/components/accordion"; + Base path: `/v1/admin/webhooks` Session-only admin endpoints. API-key bearer tokens are rejected. -## Register Webhook +## Management -``` -POST /v1/admin/webhooks -``` + + | Field | Type | Required | Description | |-------|------|----------|-------------| @@ -33,7 +35,9 @@ curl -X POST http://localhost:4000/v1/admin/webhooks \ Supported event types: `document.processing`, `document.ready`, `document.failed`, `document.deleted`, `collection.created`, `collection.updated`, `collection.deleted`, `collection.truncated`, `collection.reembed.queued`, `connector.sync.started`, `connector.sync.completed`, `connector.sync.failed`, `connector.sync.needs_reauth`, `backup.started`, `backup.succeeded`, `backup.failed`. -## List Webhooks + + + ``` GET /v1/admin/webhooks?limit=50&offset=0 @@ -50,27 +54,31 @@ GET /v1/admin/webhooks?limit=50&offset=0 `total` is the total number of configured webhooks before pagination. -## Get Webhook + -``` -GET /v1/admin/webhooks/{id} -``` + -## Update Webhook +Returns the full webhook object. -``` -PUT /v1/admin/webhooks/{id} -``` + + + Updatable fields: `url`, `events`, `collections`, `active`. -## Delete Webhook + -``` -DELETE /v1/admin/webhooks/{id} -``` + + +Deletes the webhook registration. + + + -## List Deliveries +## Deliveries + + + ``` GET /v1/admin/webhooks/{id}/deliveries?limit=50&offset=0 @@ -78,19 +86,15 @@ GET /v1/admin/webhooks/{id}/deliveries?limit=50&offset=0 Returns delivery history for a webhook, useful for debugging. -## Test Webhook + -``` -POST /v1/admin/webhooks/{id}/test -``` + Sends a `webhook.test` event to verify the endpoint is reachable. -## Replay Delivery + -``` -POST /v1/admin/webhooks/{id}/deliveries/{delivery_id}/replay -``` + Re-sends the original stored delivery payload to the webhook URL and records the replay attempt in the audit log. Use this after fixing a subscriber outage or endpoint bug. @@ -104,9 +108,12 @@ Re-sends the original stored delivery payload to the webhook URL and records the } ``` -## Webhook Payload + + -The existing document processing events keep their original flat payload for compatibility: +## Payload format + +Document processing events use a flat payload for compatibility: ```json { @@ -148,7 +155,7 @@ New data-operation events use a common envelope: Admin-plane events (`user.create`, `api_key.*`, `webhook.*`, settings changes) remain in the [audit log](/docs/api-reference/audit). Per-query success webhooks are intentionally not emitted; use access logs and analytics for high-RPS query monitoring. -## Headers +## Headers & retry Each delivery includes these headers: @@ -161,6 +168,4 @@ X-BigRAG-Delivery: The signature is `HMAC-SHA256(secret, "{timestamp}.{raw_body}")`, where `timestamp` is the exact `X-BigRAG-Timestamp` value and `raw_body` is the delivered JSON body bytes decoded as UTF-8. -## Retry Policy - Failed deliveries remain in the delivery outbox as `pending` with `next_retry_at` set for the next attempt. The dispatcher claims due rows, updates `attempts`, and marks the delivery `delivered` or `failed` once the retry budget is exhausted. From 8f39a29add6fb1bfbccbdee0e7f7eab4fbc235cc Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Fri, 22 May 2026 19:31:30 +0530 Subject: [PATCH 03/11] docs: simplify concept pages with tables and callouts --- .../content/docs/concepts/architecture.mdx | 171 ++++++------------ website/content/docs/concepts/collections.mdx | 14 +- website/content/docs/concepts/documents.mdx | 48 +++-- website/content/docs/concepts/embeddings.mdx | 35 +++- website/content/docs/concepts/search.mdx | 10 +- website/content/docs/concepts/webhooks.mdx | 6 +- 6 files changed, 115 insertions(+), 169 deletions(-) diff --git a/website/content/docs/concepts/architecture.mdx b/website/content/docs/concepts/architecture.mdx index 6ad308bf..19419ef3 100644 --- a/website/content/docs/concepts/architecture.mdx +++ b/website/content/docs/concepts/architecture.mdx @@ -5,62 +5,35 @@ description: Why Turbopuffer + Postgres + Redis + Docling, how requests flow end import { Callout } from "fumadocs-ui/components/callout"; +bigRAG uses four distinct stores, each with a specific role, wired together through a FastAPI backend and Dramatiq worker queue. + ## Four-store design -bigRAG intentionally keeps four distinct stores instead of shoving -everything into Postgres with pgvector. Each one earns its keep: - -- **Turbopuffer search** — dense embeddings, chunk text, payload - metadata, BM25 full-text search, filtered semantic search, and - collection namespaces. -- **Postgres** — metadata and control plane. Collections, documents, - users, API keys, webhooks, query logs, audit trail, embedding cache. - Transactions keep ingestion consistent across stores. -- **Redis** — Dramatiq broker storage for ingestion, connector, - webhook, backup, and maintenance actors; processing leases, - cancellation epochs, idempotency-key cache, auth/collection/query - hot-path caches, batched document-progress lookups, and short-lived - platform/health caches. -- **Local disk** — raw uploaded documents. Needed to produce - citations back into source files and to support re-embedding without - re-uploading. +| Store | Role | +|-------|------| +| **Turbopuffer** | Dense embeddings, chunk text, payload metadata, BM25 full-text search, filtered semantic search, collection namespaces | +| **Postgres** | Metadata and control plane: collections, documents, users, API keys, webhooks, query logs, audit trail, embedding cache. Transactions keep ingestion consistent across stores. | +| **Redis** | Dramatiq broker for ingestion, connector, webhook, backup, and maintenance actors; processing leases, cancellation epochs, idempotency-key cache, auth/collection/query hot-path caches, batched document-progress lookups, short-lived platform/health caches | +| **Local disk** | Raw uploaded documents. Required for citations back into source files and for re-embedding without re-uploading. | -bigRAG runs Postgres and Redis locally with `docker compose up`, while -Turbopuffer provides the managed vector and full-text search backend. +Postgres and Redis run locally via `docker compose up`. Turbopuffer is the managed vector and full-text search backend. -The backend talks to Turbopuffer through the official Python client for -namespace writes, deletes, health listing, vector queries, and BM25 -queries. +The backend talks to Turbopuffer through the official Python client for namespace writes, deletes, health listing, vector queries, and BM25 queries. -The admin UI leans on TanStack Router's built-in route code splitting -instead of wrapping dashboard pages in extra `React.lazy` boundaries, -so each route transition pays for one route chunk, not a route chunk -plus an immediately nested page chunk. On the backend, Postgres keeps -composite newest-first indexes for collection document lists and common -access-log filters so admin timelines avoid full sorts as data grows. +The admin UI uses TanStack Router's built-in route code splitting — each route transition pays for one route chunk, not a route chunk plus a nested page chunk. Postgres uses composite newest-first indexes for collection document lists and common access-log filters to avoid full sorts as data grows. ## Identifier model -bigRAG-owned records use native Postgres `uuid` columns and are generated -by the API as UUIDv7 values before insert. That gives users, sessions, -API keys, collections, documents, upload sessions, backup jobs, -connectors, webhooks, query logs, embedding presets, audit logs, and -access logs one canonical identifier shape while keeping IDs time-ordered -for healthier newest-first indexes. - -Existing UUIDv4 rows remain valid because UUIDv4 and UUIDv7 share the -same Postgres `uuid` type. Foreign keys store raw UUID values internally, -and the REST API plus SDKs expose them as strings. - -Not every field ending in `id` is a bigRAG record ID. External provider -identifiers such as Google Drive file IDs, caller-provided vector IDs, -collection names, settings keys, idempotency keys, OAuth states, session -tokens, API keys, and webhook secrets remain opaque strings. Vector-store -point IDs are backend-safe deterministic UUIDv5 values derived from the -caller-facing vector or chunk ID, so Turbopuffer writes, deletes, and -exports can use stable point IDs without changing API-visible IDs. +bigRAG-owned records use native Postgres `uuid` columns generated as UUIDv7 values before insert. This covers users, sessions, API keys, collections, documents, upload sessions, backup jobs, connectors, webhooks, query logs, embedding presets, audit logs, and access logs — one canonical identifier shape, time-ordered for healthier newest-first indexes. + +Existing UUIDv4 rows remain valid because UUIDv4 and UUIDv7 share the same Postgres `uuid` type. Foreign keys store raw UUID values internally; the REST API and SDKs expose them as strings. + +Not every field ending in `id` is a bigRAG record ID: + +- External provider identifiers (Google Drive file IDs, collection names, settings keys, OAuth states, session tokens, API keys, webhook secrets) remain opaque strings. +- Vector-store point IDs are deterministic UUIDv5 values derived from the caller-facing vector or chunk ID, so Turbopuffer writes, deletes, and exports use stable point IDs without changing API-visible IDs. ## Request flow — `POST /v1/collections/{name}/documents` @@ -90,10 +63,7 @@ Webhook outbox actor │ data-operation fan-out through pending delivery rows ``` -Failure at any step marks the document `failed` with a typed error -message. Active workers renew Redis leases while long jobs run. Transient -failures are rescheduled as delayed Dramatiq messages, and exhausted jobs -are retained in the dead-letter list for operator visibility. +Failure at any step marks the document `failed` with a typed error message. Active workers renew Redis leases while long jobs run. Transient failures are rescheduled as delayed Dramatiq messages; exhausted jobs are retained in the dead-letter list for operator visibility. ## Request flow — `POST /v1/collections/{name}/query` @@ -109,86 +79,51 @@ Client → FastAPI Response JSON with results and timings ``` -Keyword mode skips embeddings. Semantic and hybrid mode use the -collection's configured embedding model. Keyword search runs BM25 over -Turbopuffer full-text chunk data, and hybrid search fuses Turbopuffer -ANN results with BM25 results using reciprocal rank fusion. -Query-result cache hits return the cached chunks but report the current -cache lookup latency, not the original uncached retrieval timings. +- **Keyword mode**: skips embeddings; runs BM25 over Turbopuffer full-text chunk data. +- **Semantic mode**: uses the collection's configured embedding model. +- **Hybrid mode**: fuses Turbopuffer ANN results with BM25 results via reciprocal rank fusion. +- **Cache hits**: return the cached chunks with the current cache lookup latency, not the original uncached retrieval timings. ## Why Docling -Alternatives considered and why we didn't pick them: +Alternatives considered: -- **Unstructured.io** — great coverage but requires network calls and - a hosted inference server by default. bigRAG needs to run offline. -- **PyMuPDF + LangChain** — OK for PDFs but nothing else, and loses - layout structure. -- **LlamaParse** — hosted only. +| Alternative | Why not | +|-------------|---------| +| Unstructured.io | Requires network calls and a hosted inference server by default; bigRAG needs to run offline | +| PyMuPDF + LangChain | OK for PDFs only; loses layout structure | +| LlamaParse | Hosted only | -Docling runs entirely locally, handles DOCX / PPTX / HTML / Markdown / -images, and exposes layout provenance we can thread into citation metadata -(`page_no`, `bbox`). PDFs with embedded text use a faster direct extractor; -local OCR for scanned PDFs is enabled by default. +Docling runs entirely locally, handles DOCX / PPTX / HTML / Markdown / images, and exposes layout provenance for citation metadata (`page_no`, `bbox`). PDFs with embedded text use a faster direct extractor; local OCR for scanned PDFs is enabled by default. ## Why Turbopuffer -pgvector is tempting — one less external service. But at scale the gap -widens: +| Capability | Why it matters | +|------------|----------------| +| Managed vector engine | Keeps ANN search and payload filtering outside the transactional database; Postgres stays the metadata and control plane | +| Full-text plus vectors | Stores chunk text with full-text search enabled, so keyword search runs as BM25 and hybrid search fuses BM25 with semantic results | +| Namespace isolation | Each collection maps to one namespace; deletion, truncation, export, and raw vector writes are scoped to one backend object | -- **Managed vector engine**: Turbopuffer keeps ANN search and payload - filtering outside the transactional database, so Postgres remains the - metadata and control plane. -- **Full-text plus vectors**: Turbopuffer stores chunk text with - full-text search enabled, which lets keyword search run as BM25 and - hybrid search fuse BM25 with semantic results. -- **Namespace isolation**: each collection maps to a namespace, keeping - collection deletion, truncation, export, and raw vector writes scoped - to one backend object. - -For single-tenant deployments under ~1M vectors, pgvector would work -well. bigRAG keeps Postgres focused on durable metadata and uses -Turbopuffer for retrieval-specific storage and query execution. +For single-tenant deployments under ~1M vectors, pgvector would work well. bigRAG keeps Postgres focused on durable metadata and uses Turbopuffer for retrieval-specific storage and query execution. ## Scaling notes -- **Horizontal API workers**: FastAPI is stateless past the lifespan. - Run N replicas behind a load balancer; they share Postgres, Redis, - and the configured vector-store clients. API replicas enqueue work but do not - run background jobs in-process. -- **Dramatiq workers**: run one or more `bigrag-worker` processes for - ingestion, connector syncs, webhook outbox delivery, backups, and - cleanup. Worker heartbeat and queue state are visible through - `/v1/stats`. Each worker process initializes runtime dependencies - before reserving jobs, aborts boot if initialization fails, refreshes - queue-scoped heartbeats while idle, and recovers stale processing - leases on startup. Periodic maintenance and webhook outbox ticks are - seeded after boot through the same Redis scheduler guards used by the - actors, so restarts and replicas do not create duplicate delayed - messages. -- **Vector backend**: Turbopuffer namespaces are created on first write - and store bigRAG vector IDs as payload attributes while using - backend-safe IDs internally for writes, deletes, and exports. -- **Postgres replication**: the control plane is read-light; a - warm-standby suffices for failover. -- **Redis persistence**: enable `appendonly yes` (already set in the - shipped compose file). Pending Dramatiq messages, delayed retries, - processing leases, and dead-letter entries survive process restarts - within the Redis durability window. +- **Horizontal API workers**: FastAPI is stateless past the lifespan. Run N replicas behind a load balancer; they share Postgres, Redis, and the configured vector-store clients. API replicas enqueue work but do not run background jobs in-process. +- **Dramatiq workers**: run one or more `bigrag-worker` processes for ingestion, connector syncs, webhook outbox delivery, backups, and cleanup. Worker heartbeat and queue state are visible through `/v1/stats`. Each worker initializes runtime dependencies before reserving jobs, aborts boot on failure, refreshes queue-scoped heartbeats while idle, and recovers stale processing leases on startup. Periodic maintenance and webhook outbox ticks are seeded after boot through Redis scheduler guards, so restarts and replicas do not create duplicate delayed messages. +- **Vector backend**: Turbopuffer namespaces are created on first write and store bigRAG vector IDs as payload attributes while using backend-safe IDs internally for writes, deletes, and exports. +- **Postgres replication**: the control plane is read-light; a warm-standby suffices for failover. +- **Redis persistence**: enable `appendonly yes` (already set in the shipped compose file). Pending Dramatiq messages, delayed retries, processing leases, and dead-letter entries survive process restarts within the Redis durability window. ## Connector service layout -Connector routes stay provider-neutral through `connector_registry.py`. -Reusable connector behavior lives in `bigrag.services.connectors`: -account/config management, source and sync-job persistence, progress -payloads, manifest updates, sync status handling, and document handoff -to the ingestion actor. The ingestion queue keeps Redis lease/recovery -state separate from per-document processing so retry, dead-letter, and -webhook fanout behavior can evolve without changing route-level APIs. -Scheduled connector ticks run as Dramatiq maintenance actors and claim -due sources with row locks so multiple worker replicas do not create -duplicate scheduled sync jobs. Provider modules layer on top of those -shared helpers; the Google -Drive provider keeps OAuth/account handling, Drive API calls, source -helpers, and sync adapter wiring in separate modules while preserving -`bigrag.services.google_drive` as the public import surface. +Connector routes stay provider-neutral through `connector_registry.py`. Reusable connector behavior lives in `bigrag.services.connectors`: + +- Account/config management +- Source and sync-job persistence +- Progress payloads and manifest updates +- Sync status handling +- Document handoff to the ingestion actor + +The ingestion queue keeps Redis lease/recovery state separate from per-document processing so retry, dead-letter, and webhook fanout behavior can evolve without changing route-level APIs. + +Scheduled connector ticks run as Dramatiq maintenance actors and claim due sources with row locks so multiple worker replicas do not create duplicate scheduled sync jobs. The Google Drive provider keeps OAuth/account handling, Drive API calls, source helpers, and sync adapter wiring in separate modules while preserving `bigrag.services.google_drive` as the public import surface. diff --git a/website/content/docs/concepts/collections.mdx b/website/content/docs/concepts/collections.mdx index 7f65babe..d133ef91 100644 --- a/website/content/docs/concepts/collections.mdx +++ b/website/content/docs/concepts/collections.mdx @@ -60,11 +60,13 @@ curl -X POST http://localhost:4000/v1/collections \ | `multimodal_enrichment_enabled` | boolean | no | `false` | Queue VLM summaries for image, table, and equation elements; requires `multimodal_enabled` | | `metadata` | object | no | `{}` | Arbitrary key-value pairs | -Collection creation validates the saved Turbopuffer connection and any existing namespace schema for the collection name. The namespace is created on the first document or vector write, and chunk text is stored with full-text search enabled so semantic, keyword, and hybrid retrieval use the same backend. If the namespace already uses a different vector dimension, create a new collection name, delete or truncate the collection, or use a different Turbopuffer namespace prefix before switching embedding dimensions. +Collection creation validates the saved Turbopuffer connection and any existing namespace schema for the collection name. The namespace is created on the first document or vector write, and chunk text is stored with full-text search enabled so semantic, keyword, and hybrid retrieval use the same backend. -## Collection Stats + +If the namespace already uses a different vector dimension, create a new collection name, delete or truncate the collection, or use a different Turbopuffer namespace prefix before switching embedding dimensions. + -Get a lightweight summary of a collection's document and chunk counts without fetching platform-wide stats: +## Collection Stats ```bash curl http://localhost:4000/v1/collections/research_papers/stats \ @@ -117,14 +119,12 @@ You can override reranking per query by passing `"rerank": true` or `"rerank": f ## Multimodal Elements -Set `multimodal_enabled: true` on a collection to preserve document structure alongside chunk vectors. Ingestion stores first-class element records for headings, tables, equations, images, page numbers, bounding boxes, captions, nearby context, and asset references where available. Query and chat responses remain backward compatible; multimodal provenance appears in `multimodal_elements` when a retrieved chunk overlaps stored elements. +Set `multimodal_enabled: true` to preserve document structure alongside chunk vectors. Ingestion stores first-class element records for headings, tables, equations, images, page numbers, bounding boxes, captions, nearby context, and asset references where available. Query and chat responses remain backward compatible; multimodal provenance appears in `multimodal_elements` when a retrieved chunk overlaps stored elements. Set `multimodal_enrichment_enabled: true` to queue asynchronous VLM summaries for image, table, and equation elements using the configured OpenAI-compatible chat provider. Core document readiness is still based on parse/chunk/embed completion; enrichment can finish later or fail independently. ## Updating a Collection -You can update a collection's description, metadata, reranking settings, and default query settings: - ```bash curl -X PUT http://localhost:4000/v1/collections/research_papers \ -H "Authorization: Bearer $BIGRAG_API_KEY" \ @@ -156,7 +156,7 @@ curl -X PUT http://localhost:4000/v1/collections/research_papers \ All fields are optional — only provided fields are updated. - Embedding configuration (provider, model, dimension) and chunk settings cannot be changed after creation. +Embedding configuration (provider, model, dimension) and chunk settings cannot be changed after creation. ## Deleting a Collection diff --git a/website/content/docs/concepts/documents.mdx b/website/content/docs/concepts/documents.mdx index a09c13f7..a93e0a98 100644 --- a/website/content/docs/concepts/documents.mdx +++ b/website/content/docs/concepts/documents.mdx @@ -5,15 +5,15 @@ description: How bigRAG ingests, parses, chunks, embeds, and stores documents in import { Callout } from "fumadocs-ui/components/callout"; -Documents are files uploaded to a collection. When uploaded, they are automatically parsed, chunked, embedded, and written to the collection's Turbopuffer namespace for semantic, keyword, and hybrid search. +Documents are files uploaded to a collection. Once uploaded they are automatically parsed, chunked, embedded, and written to the collection's Turbopuffer namespace for semantic, keyword, and hybrid search. ## Supported Formats -bigRAG extracts embedded text from PDFs directly, and uses [Docling](https://github.com/DS4SD/docling) for other rich document parsing: +bigRAG extracts embedded text from PDFs directly and uses [Docling](https://github.com/DS4SD/docling) for other rich document parsing: | Format | Extensions | Notes | |--------|------------|-------| -| PDF | `.pdf` | Fast embedded-text extraction; scanned-PDF OCR is enabled by default | +| PDF | `.pdf` | Fast embedded-text extraction; scanned-PDF OCR enabled by default | | Microsoft Word | `.docx` | Full layout support | | Microsoft PowerPoint | `.pptx` | Slide content extraction | | Microsoft Excel | `.xlsx` | Table data extraction | @@ -27,10 +27,6 @@ bigRAG extracts embedded text from PDFs directly, and uses [Docling](https://git ## Ingestion Methods -### File Upload - -Upload documents directly via the API or SDKs: - | Method | Best for | |--------|----------| | Single upload | One interactive file | @@ -38,29 +34,27 @@ Upload documents directly via the API or SDKs: | Upload session | Large local folder/file imports up to the configured session limits | | Connector sync | Cloud files managed by a provider such as Google Drive | -Upload sessions accept one file per request under a durable session ID. The admin UI uses this path for local files and folders so it can keep browser memory bounded, retry individual failures, and restore progress after navigation. +Upload sessions accept one file per request under a durable session ID. The admin UI uses this path for local files and folders to keep browser memory bounded, retry individual failures, and restore progress after navigation. ## Ingestion Pipeline -Regardless of ingestion method, every document goes through the same pipeline: +Every document goes through the same pipeline regardless of ingestion method: -1. **Store** — file is saved to the local upload directory -2. **Queue** — document is sent to the Dramatiq Redis broker with status `pending` -3. **Parse** — a Dramatiq worker picks up the document and extracts text directly for text PDFs or parses richer formats with Docling -4. **Elements** — multimodal collections store document elements such as headings, tables, equations, images, page bounds, captions, and nearby context -5. **Chunk** — extracted text is split into chunks based on the collection's `chunk_size` and `chunk_overlap` -6. **Embed** — each chunk is embedded using the collection's configured embedding model -7. **Store** — embeddings, chunk text, and metadata are batch-inserted into Turbopuffer -8. **Ready** — document status is updated to `ready` with the chunk count +1. **Store** — file saved to the local upload directory +2. **Queue** — document sent to the Dramatiq Redis broker with status `pending` +3. **Parse** — worker extracts text directly for text PDFs, or parses richer formats with Docling +4. **Elements** — multimodal collections store document elements: headings, tables, equations, images, page bounds, captions, and nearby context +5. **Chunk** — extracted text split into chunks based on the collection's `chunk_size` and `chunk_overlap` +6. **Embed** — each chunk embedded using the collection's configured model +7. **Store** — embeddings, chunk text, and metadata batch-inserted into Turbopuffer +8. **Ready** — document status updated to `ready` with the chunk count -Workers renew processing leases while a document is active. Transient failures are rescheduled as delayed Dramatiq messages before the next attempt; exhausted jobs are marked `failed` and retained in the dead-letter list. +Workers renew processing leases while a document is active. Transient failures are rescheduled as delayed Dramatiq messages; exhausted jobs are marked `failed` and retained in the dead-letter list. When `multimodal_enrichment_enabled` is set on the collection, a follow-up worker job generates summaries for image, table, and equation elements. This enrichment is asynchronous and does not block the document from becoming searchable. ## Processing Status -Documents transition through these states: - | Status | Description | |--------|-------------| | `pending` | Queued, waiting for a worker | @@ -84,13 +78,13 @@ Chunking splits document text into overlapping segments for embedding and retrie | `chunk_size` | 512 | 64–10,000 | Maximum characters per chunk | | `chunk_overlap` | 50 | 0–5,000 | Overlap characters between adjacent chunks | -- **Smaller chunks** (256–512) are better for precise answers and factual retrieval -- **Larger chunks** (1,000–2,000) provide more context per result -- **Overlap** ensures important content at chunk boundaries is not lost +- **Smaller chunks** (256–512) — better for precise answers and factual retrieval +- **Larger chunks** (1,000–2,000) — more context per result +- **Overlap** — ensures important content at chunk boundaries is not lost ## API-Client Status Polling -API clients can monitor document processing by polling the document record: +Poll the document record to monitor processing: ```javascript let document = await client.documents.get("research", "DOC_ID"); @@ -118,14 +112,14 @@ The document response includes: ## Document Elements -For collections created or updated with `multimodal_enabled: true`, use: +For collections created or updated with `multimodal_enabled: true`: ```bash curl "http://localhost:4000/v1/collections/research/documents/DOC_ID/elements" \ -H "Authorization: Bearer $BIGRAG_API_KEY" ``` -Each element includes `kind`, extracted `text`, optional `summary`, `caption`, `asset_path`, `page_no`, `bbox`, character offsets, nearby context, enrichment status, and source metadata. Existing text-only retrieval still works; the element records add provenance for query and chat clients that want richer context. +Each element includes `kind`, extracted `text`, optional `summary`, `caption`, `asset_path`, `page_no`, `bbox`, character offsets, nearby context, enrichment status, and source metadata. Existing text-only retrieval still works; element records add provenance for query and chat clients that want richer context. ## Batch Operations @@ -159,7 +153,7 @@ curl -X POST http://localhost:4000/v1/collections/docs/documents/batch/delete \ ``` - Batch operations support up to 100 items per request. API clients can poll batch status after upload to read each document's latest progress snapshot until every document is ready or failed. Partial success is supported for batch delete — failed items are reported in the `errors` array. +Batch operations support up to 100 items per request. API clients can poll batch status after upload to read each document's latest progress snapshot until every document is ready or failed. Partial success is supported for batch delete — failed items are reported in the `errors` array. ## Large Upload Sessions diff --git a/website/content/docs/concepts/embeddings.mdx b/website/content/docs/concepts/embeddings.mdx index 540e2252..e0265e61 100644 --- a/website/content/docs/concepts/embeddings.mdx +++ b/website/content/docs/concepts/embeddings.mdx @@ -10,10 +10,12 @@ bigRAG supports four provider families. Each collection picks one at creation ti ## Providers -- **`openai`** — the OpenAI API -- **`cohere`** — the Cohere Embed API -- **`voyage`** — the Voyage AI Embed API (general-purpose, code, finance, and legal models) -- **`openai_compatible`** — any HTTP endpoint that implements the OpenAI `/embeddings` shape (Ollama, vLLM, TEI / HuggingFace Text Embedding Inference, Infinity, LiteLLM, Azure OpenAI, Bedrock via LiteLLM, self-hosted models, …) +| Provider | Description | +|----------|-------------| +| `openai` | The OpenAI API | +| `cohere` | The Cohere Embed API | +| `voyage` | The Voyage AI Embed API (general-purpose, code, finance, and legal models) | +| `openai_compatible` | Any HTTP endpoint that implements the OpenAI `/embeddings` shape — Ollama, vLLM, TEI / HuggingFace Text Embedding Inference, Infinity, LiteLLM, Azure OpenAI, Bedrock via LiteLLM, self-hosted models, and more | ## Managed models @@ -42,6 +44,7 @@ curl http://localhost:4000/v1/embeddings/models \ + ```bash curl -X POST http://localhost:4000/v1/collections \ -H "Authorization: Bearer $BIGRAG_API_KEY" \ @@ -54,8 +57,10 @@ curl -X POST http://localhost:4000/v1/collections \ "dimension": 1536 }' ``` + + ```bash curl -X POST http://localhost:4000/v1/collections \ -H "Authorization: Bearer $BIGRAG_API_KEY" \ @@ -68,8 +73,10 @@ curl -X POST http://localhost:4000/v1/collections \ "dimension": 1024 }' ``` + + ```bash curl -X POST http://localhost:4000/v1/collections \ -H "Authorization: Bearer $BIGRAG_API_KEY" \ @@ -84,8 +91,10 @@ curl -X POST http://localhost:4000/v1/collections \ ``` `voyage-3-large`, `voyage-3.5`, `voyage-3.5-lite`, and `voyage-code-3` accept Matryoshka dimensions (256, 512, 1024, 2048). Pass `dimension` accordingly. `voyage-finance-2` and `voyage-law-2` are fixed at 1024. + + ```bash curl -X POST http://localhost:4000/v1/collections \ -H "Authorization: Bearer $BIGRAG_API_KEY" \ @@ -99,8 +108,10 @@ curl -X POST http://localhost:4000/v1/collections \ "dimension": 768 }' ``` + + ```bash curl -X POST http://localhost:4000/v1/collections \ -H "Authorization: Bearer $BIGRAG_API_KEY" \ @@ -114,6 +125,7 @@ curl -X POST http://localhost:4000/v1/collections \ "dimension": 1024 }' ``` + @@ -121,7 +133,7 @@ curl -X POST http://localhost:4000/v1/collections \ ### OpenAI-compatible endpoints in practice -Any gateway that speaks the OpenAI `/embeddings` shape works. Common choices: +Any gateway that speaks the OpenAI `/embeddings` shape works: | Tool | Typical `embedding_base_url` | |------|------------------------------| @@ -141,15 +153,22 @@ Any gateway that speaks the OpenAI `/embeddings` shape works. Common choices: 4. New vectors are batched at `BIGRAG_INGESTION_BATCH_SIZE` and written to the collection's Turbopuffer namespace. 5. Queries embed the same way, with short-lived Redis caching for repeated query embeddings. -The cache key is `(content_hash, provider, model, dimension)`. A new collection that uses a different model gets a separate cache key space, so unchanged chunks under that model start cold while previous model cache entries remain available to collections that still use them. +The cache key is `(content_hash, provider, model, dimension)`. A new collection using a different model starts cold for that model while previous cache entries remain available to collections still using the old model. -Persistent embedding-cache rows are encrypted by default with `BIGRAG_MASTER_KEY`. Set `embedding_cache_mode` to `disabled` in `/settings` when you prefer provider cost over storing reusable vectors. The default retention window is 30 days after last use, and admins can purge the cache from the Security settings tab. +Persistent embedding-cache rows are encrypted by default with `BIGRAG_MASTER_KEY`. Set `embedding_cache_mode` to `disabled` in `/settings` to prefer provider cost over storing reusable vectors. The default retention window is 30 days after last use; admins can purge the cache from the Security settings tab. Redis query caches are also encrypted when `BIGRAG_MASTER_KEY` is configured. The default query embedding TTL is 300 seconds; set `query_embedding_cache_ttl` to `0` to disable it. ## Concurrency & throughput -Embedding requests are guarded by a semaphore (`BIGRAG_EMBEDDING_CONCURRENCY`, default 8). Raise it for high-QPS providers; lower it if your embedding provider throttles requests. When a provider returns a rate limit with `Retry-After`, `retry-after-ms`, or a message such as `Please try again in 37ms`, ingestion records a short Redis cooldown for that provider/model, waits for the hint, and retries the same batch without consuming the generic transient retry budget. Repeated rate limits are capped per batch, so sustained TPM pressure should still be handled by lowering concurrency or batch size. Token counting uses `tiktoken` where the provider ships a tokenizer and falls back to a 4-character-per-token heuristic otherwise. +Embedding requests are guarded by a semaphore (`BIGRAG_EMBEDDING_CONCURRENCY`, default 8): + +- Raise it for high-QPS providers. +- Lower it if your embedding provider throttles requests. + +When a provider returns a rate limit with `Retry-After`, `retry-after-ms`, or a message such as `Please try again in 37ms`, ingestion records a short Redis cooldown for that provider/model, waits for the hint, and retries the same batch without consuming the generic transient retry budget. Repeated rate limits are capped per batch, so sustained TPM pressure should be handled by lowering concurrency or batch size. + +Token counting uses `tiktoken` where the provider ships a tokenizer and falls back to a 4-character-per-token heuristic otherwise. ## Reranking diff --git a/website/content/docs/concepts/search.mdx b/website/content/docs/concepts/search.mdx index 15c9767d..99d3f96e 100644 --- a/website/content/docs/concepts/search.mdx +++ b/website/content/docs/concepts/search.mdx @@ -3,7 +3,7 @@ title: Search description: Turbopuffer-backed semantic, keyword, and hybrid search. --- -bigRAG exposes three search modes on Turbopuffer and an optional rerank pass. Keep query-time choices narrow: pick a mode, set `top_k` and filters, and enable reranking only when a collection is configured for it. +bigRAG exposes three search modes on Turbopuffer and an optional rerank pass. Pick a mode, set `top_k` and filters, and enable reranking only when a collection is configured for it. ## Modes @@ -63,7 +63,7 @@ Each bigRAG collection maps to one Turbopuffer namespace. Document chunks are wr ## Filters -Pass a plain value for exact match, or use operators for more control. +Pass a plain value for exact match, or use operators for more control: ```json { "filters": { "author": "Smith", "year": 2026 } } @@ -89,7 +89,7 @@ Pass a plain value for exact match, or use operators for more control. } ``` -Multiple filters are combined with AND. When a collection was created with `tenant_field`, bigRAG configures that field for backend filtering and requires the tenant field in every query and chat filter. Missing tenant filters return `400`. +Multiple filters are combined with AND. When a collection was created with `tenant_field`, bigRAG configures that field for backend filtering and requires it in every query and chat filter. Missing tenant filters return `400`. Keyword search uses Turbopuffer BM25 over the chunk `text` field. Hybrid search runs Turbopuffer ANN and BM25 queries, then merges the two result sets with reciprocal rank fusion before optional reranking. @@ -112,9 +112,7 @@ Every response includes a `timings` breakdown: } ``` -When a query-result cache entry is reused, `cache_hit` is `true`, `cache_ms` -contains the Redis lookup time, and the embed/search/rerank timings stay at -zero instead of replaying the original uncached request's latencies. +When a query-result cache entry is reused, `cache_hit` is `true`, `cache_ms` contains the Redis lookup time, and the embed/search/rerank timings stay at zero instead of replaying the original uncached request's latencies. ## Multi-Collection Query diff --git a/website/content/docs/concepts/webhooks.mdx b/website/content/docs/concepts/webhooks.mdx index ec326112..f2f661e5 100644 --- a/website/content/docs/concepts/webhooks.mdx +++ b/website/content/docs/concepts/webhooks.mdx @@ -48,12 +48,12 @@ curl -X POST http://localhost:4000/v1/admin/webhooks \ | `collections` | string[] | no | Filter by collection names (`null` = all) | - The response includes a `secret` field that is shown only once. Store it — it's used to verify webhook signatures. +The response includes a `secret` field that is shown only once. Store it — it's used to verify webhook signatures. ## Payload Format -The existing `document.processing`, `document.ready`, and `document.failed` events keep their original payload shape: +The `document.processing`, `document.ready`, and `document.failed` events use this shape: ```json { @@ -120,7 +120,7 @@ def verify(payload: bytes, secret: str, timestamp: str, signature: str) -> bool: ## Retry Policy -Failed deliveries are stored in the delivery outbox before the first attempt. Retries use the configured exponential backoff window and remain `pending` with `next_retry_at` until the next due attempt, so process restarts can resume delivery. After all attempts, the delivery is marked as `failed`. +Failed deliveries are stored in the delivery outbox before the first attempt. Retries use the configured exponential backoff window and remain `pending` with `next_retry_at` until the next due attempt, so process restarts can resume delivery. After all attempts, the delivery is marked `failed`. Check delivery history: From 5fbd239ed291dd6a4029ae4ea158b5d3bc70ccee Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Fri, 22 May 2026 19:31:31 +0530 Subject: [PATCH 04/11] docs: simplify deployment guides and break up dense sections --- website/content/docs/deployment/docker.mdx | 14 +- .../content/docs/deployment/encryption.mdx | 21 +- .../content/docs/deployment/production.mdx | 180 +++++++++--------- website/content/docs/deployment/railway.mdx | 57 ++++-- 4 files changed, 152 insertions(+), 120 deletions(-) diff --git a/website/content/docs/deployment/docker.mdx b/website/content/docs/deployment/docker.mdx index fe9fefe4..13c01f54 100644 --- a/website/content/docs/deployment/docker.mdx +++ b/website/content/docs/deployment/docker.mdx @@ -27,7 +27,7 @@ BIGRAG_UI_IMAGE=yoginth/bigrag-ui:2026.4.30 \ docker compose up -d --no-build ``` -This starts bigRAG API, worker, admin UI, Postgres, and Redis. Turbopuffer runs as the managed vector, keyword, and hybrid search backend. +Starts the bigRAG API, worker, admin UI, Postgres, and Redis. Turbopuffer runs as the managed vector, keyword, and hybrid search backend. ### Verify @@ -40,7 +40,10 @@ curl http://localhost:4000/health ## Docker Compose -The default `docker-compose.yml` runs the API, worker, admin UI, Postgres, and Redis. Turbopuffer is configured from the admin UI and stored in Postgres; it does not run as a local Compose service. When hacking from a checkout, plain `docker compose up -d` builds local API and UI images. When deploying published images, set the image variables shown above and use `--no-build`. +The default `docker-compose.yml` runs the API, worker, admin UI, Postgres, and Redis. Turbopuffer is configured from the admin UI and stored in Postgres; it does not run as a local Compose service. + +- When hacking from a checkout, plain `docker compose up -d` builds local API and UI images. +- When deploying published images, set the image variables shown above and use `--no-build`. ```yaml services: @@ -180,8 +183,6 @@ Set `BIGRAG_CORS_ORIGINS='["http://localhost:3000"]'` on the API when running th ## Image split -Build and publish two images by default: - - `bigrag-api` — backend API and the `bigrag-worker` entrypoint. - `bigrag-ui` — static admin UI served by Nginx. @@ -216,10 +217,7 @@ docker exec bigrag-postgres pg_isready -U bigrag docker exec bigrag-redis redis-cli ping ``` -The worker container healthcheck reads the worker heartbeat from Redis rather -than calling the API HTTP server. If you split workers by queue, set -`BIGRAG_WORKER_HEALTHCHECK_KEY` to the matching queue heartbeat key, such as -`bigrag:dramatiq:worker:heartbeat:webhooks`. +The worker container healthcheck reads the worker heartbeat from Redis rather than calling the API HTTP server. If you split workers by queue, set `BIGRAG_WORKER_HEALTHCHECK_KEY` to the matching queue heartbeat key, such as `bigrag:dramatiq:worker:heartbeat:webhooks`. `/health/ready` also validates the embedding provider. If no collection has an `embedding_api_key` configured, it returns `503`. Use `/health` if you just need to check the server is running. diff --git a/website/content/docs/deployment/encryption.mdx b/website/content/docs/deployment/encryption.mdx index f783a68c..37d6c1ba 100644 --- a/website/content/docs/deployment/encryption.mdx +++ b/website/content/docs/deployment/encryption.mdx @@ -44,12 +44,12 @@ export BIGRAG_MASTER_KEY="" In `BIGRAG_ENV=dev`, an unset key leaves secret encryption unavailable and disables the persistent embedding cache. `dev.sh` ships a fixed dev key so fresh clones start cleanly. - Losing `BIGRAG_MASTER_KEY` means the encrypted columns above become permanently unreadable. Back it up the same way you back up your Postgres root credentials — offline, split across owners. + Losing `BIGRAG_MASTER_KEY` means the encrypted columns above become permanently unreadable. Back it up the same way you back up your Postgres root credentials — offline, split across owners. ## What the operator must encrypt -bigRAG delegates the bulk of at-rest protection to your infrastructure. For a production deployment you must configure at least the following: +bigRAG delegates the bulk of at-rest protection to your infrastructure. For a production deployment, configure at least the following: ### Postgres @@ -59,12 +59,17 @@ bigRAG delegates the bulk of at-rest protection to your infrastructure. For a pr ### Turbopuffer -- Turbopuffer stores vectors, full-text fields, payload attributes, and indexes in its managed service. Select the production region deliberately and keep the API key scoped to the bigRAG service. -- Save the Turbopuffer API key in the admin UI so it is encrypted in the `instance_settings` table with `BIGRAG_MASTER_KEY`. Rotate it on the same cadence as other provider credentials, and restrict outbound network access where your platform allows it. +Turbopuffer stores vectors, full-text fields, payload attributes, and indexes in its managed service. + +- Select the production region deliberately. +- Keep the API key scoped to the bigRAG service. +- Save the Turbopuffer API key in the admin UI so it is encrypted in the `instance_settings` table with `BIGRAG_MASTER_KEY`. +- Rotate it on the same cadence as other provider credentials, and restrict outbound network access where your platform allows it. ### Redis -- Redis holds the ingestion queue, the event bus, idempotency responses, and short-lived platform caches. bigRAG encrypts cache values when `BIGRAG_MASTER_KEY` is configured, but queue and event-bus payloads still need infrastructure protection. +Redis holds the ingestion queue, the event bus, idempotency responses, and short-lived platform caches. bigRAG encrypts cache values when `BIGRAG_MASTER_KEY` is configured, but queue and event-bus payloads still need infrastructure protection. + - Disable RDB/AOF persistence in prod, or put the persistence directory on an encrypted volume. - Use TLS (`rediss://`) between bigRAG and Redis. @@ -74,7 +79,9 @@ The upload directory (default `./data/uploads`) must live on an encrypted volume ### Readable backups -Readable backups created from `/settings?tab=backups` are intentionally not client-side encrypted. They keep logs, Turbopuffer payload exports, and raw uploaded files inspectable, but redact provider keys, OAuth tokens, webhook secrets, S3 credentials, API key hashes, session hashes, and embedding-cache vectors. Put the backup bucket behind strict IAM/R2 token policy, bucket encryption, private networking where available, lifecycle retention, and access logging. +Readable backups created from `/settings?tab=backups` are intentionally not client-side encrypted. They keep logs, Turbopuffer payload exports, and raw uploaded files inspectable, but redact provider keys, OAuth tokens, webhook secrets, S3 credentials, API key hashes, session hashes, and embedding-cache vectors. + +Protect the backup bucket with strict IAM/R2 token policy, bucket encryption, private networking where available, lifecycle retention, and access logging. ## Rotation @@ -89,7 +96,7 @@ Manual process: A built-in `bigrag crypto rotate` command is not available yet; use a controlled maintenance script for step 3. -## What's intentionally **not** app-layer encrypted +## What's intentionally not app-layer encrypted | Data | Why | |------|-----| diff --git a/website/content/docs/deployment/production.mdx b/website/content/docs/deployment/production.mdx index 83110652..7c5afef4 100644 --- a/website/content/docs/deployment/production.mdx +++ b/website/content/docs/deployment/production.mdx @@ -4,6 +4,7 @@ description: Configuration and best practices for production deployments. --- import { Callout } from "fumadocs-ui/components/callout"; +import { Accordions, Accordion } from "fumadocs-ui/components/accordion"; ## Production hardening checklist @@ -12,33 +13,21 @@ Tick these off before pointing real traffic at a bigRAG deployment: - [ ] `BIGRAG_ENV=prod` — enables the startup guard. - [ ] `BIGRAG_MASTER_KEY` set to a 32-byte Fernet key (see [Encryption at rest](/docs/deployment/encryption)). - [ ] Rotate `POSTGRES_PASSWORD` from the shipped default (`bigrag`). -- [ ] `BIGRAG_SESSION_COOKIE_SECURE=true` and terminate TLS at your - reverse proxy (nginx, Caddy, Traefik, Cloudflare). -- [ ] `BIGRAG_LOG_LEVEL=info` and `BIGRAG_LOG_FORMAT=json` for production log - collection. Local `dev.sh` also uses `info` by default for readable progress. -- [ ] `BIGRAG_CORS_ORIGINS` set to an explicit list of your admin UI / - app origins — no `*`. -- [ ] `BIGRAG_TRUSTED_PROXIES` set to the CIDRs of your own reverse - proxies if bigRAG sits behind nginx, Caddy, Traefik, or a load balancer. -- [ ] Redis with `requirepass` set and `appendonly yes` persisted to - a mounted volume. +- [ ] `BIGRAG_SESSION_COOKIE_SECURE=true` and terminate TLS at your reverse proxy (nginx, Caddy, Traefik, Cloudflare). +- [ ] `BIGRAG_LOG_LEVEL=info` and `BIGRAG_LOG_FORMAT=json` for production log collection. Local `dev.sh` also uses `info` by default for readable progress. +- [ ] `BIGRAG_CORS_ORIGINS` set to an explicit list of your admin UI / app origins — no `*`. +- [ ] `BIGRAG_TRUSTED_PROXIES` set to the CIDRs of your own reverse proxies if bigRAG sits behind nginx, Caddy, Traefik, or a load balancer. +- [ ] Redis with `requirepass` set and `appendonly yes` persisted to a mounted volume. - [ ] Turbopuffer API key and region saved from the admin UI for managed vector and full-text search. - [ ] Postgres warm-standby replica for failover. -- [ ] Backup strategy: configure readable S3/R2 backups from `/settings?tab=backups`, - protect that bucket as sensitive, and separately keep encrypted infrastructure - snapshots for Postgres, Turbopuffer exports, Redis, and uploads. -- [ ] Set `metadata_schema` on collections that accept untrusted - metadata so uploads with invalid shape are rejected at the edge. +- [ ] Backup strategy: configure readable S3/R2 backups from `/settings?tab=backups`, protect that bucket as sensitive, and separately keep encrypted infrastructure snapshots for Postgres, Turbopuffer exports, Redis, and uploads. +- [ ] Set `metadata_schema` on collections that accept untrusted metadata so uploads with invalid shape are rejected at the edge. - [ ] Wire `GET /v1/admin/audit` into your SIEM / log pipeline. -- [ ] Configure webhooks to your monitoring stack for - `document.failed`, `connector.sync.failed`, and `backup.failed` events so - data-operation errors don't sit silently. +- [ ] Configure webhooks to your monitoring stack for `document.failed`, `connector.sync.failed`, and `backup.failed` events so data-operation errors don't sit silently. ## Startup safety guard -bigRAG refuses to boot when `BIGRAG_ENV=prod` is set **and** any of the following -insecure defaults are still active. Setting `BIGRAG_ENV=prod` in production catches -config mistakes that would otherwise ship silently: +bigRAG refuses to boot when `BIGRAG_ENV=prod` is set **and** any of the following insecure defaults are still active: - `BIGRAG_SESSION_COOKIE_SECURE` is `false` — set to `true` so cookies are HTTPS-only. - `BIGRAG_DATABASE_URL` still uses the shipped `bigrag:bigrag` credentials — rotate the Postgres password. @@ -46,9 +35,7 @@ config mistakes that would otherwise ship silently: - `BIGRAG_HOST` binds to `0.0.0.0` or `::` without `BIGRAG_ALLOW_PUBLIC_BIND_IN_PROD=true` after you have confirmed the service sits behind TLS and a network boundary. - `BIGRAG_SESSION_COOKIE_DOMAIN` is set while `BIGRAG_TRUSTED_PROXIES` is empty. -When the guard trips, it logs every violation before exiting so you can fix the whole -list in one edit. If you intentionally want to run with the development defaults (e.g. -on a private network behind a VPN), leave `BIGRAG_ENV` unset or set it to `dev`. +When the guard trips, it logs every violation before exiting so you can fix the whole list in one edit. To run with development defaults (e.g. on a private network behind a VPN), leave `BIGRAG_ENV` unset or set it to `dev`. ## Production Docker Compose @@ -183,9 +170,7 @@ volumes: redis_data: ``` -Set `REDIS_PASSWORD` and a matching `BIGRAG_REDIS_URL` in the Compose -environment. If the password contains `@`, `:`, `/`, `?`, `#`, or other -URL-reserved characters, URL-encode it in `BIGRAG_REDIS_URL`. +Set `REDIS_PASSWORD` and a matching `BIGRAG_REDIS_URL` in the Compose environment. If the password contains `@`, `:`, `/`, `?`, `#`, or other URL-reserved characters, URL-encode it in `BIGRAG_REDIS_URL`. ## Recommended Settings @@ -240,66 +225,62 @@ upload_dir = "./data/uploads" ## Readable backups -The admin UI can create backup-only full-instance exports from `/settings?tab=backups`. -The destination is S3-compatible, so AWS S3, Cloudflare R2, and MinIO all work. -Use a bucket or prefix dedicated to backups rather than reusing the upload bucket. +The admin UI creates backup-only full-instance exports from `/settings?tab=backups`. The destination is S3-compatible — AWS S3, Cloudflare R2, and MinIO all work. Use a bucket or prefix dedicated to backups rather than reusing the upload bucket. -Readable backups are plain JSON, JSONL, SQL, and raw files. They include Postgres -rows, vector-store points and payloads, uploaded files, and logs. Provider -secrets, token/hash columns, S3 credentials, and embedding-cache vectors are -redacted. They are useful for inspection and data portability, but they are not a -substitute for encrypted infrastructure snapshots or tested disaster recovery -runbooks. Turbopuffer point exports stream provider pages into JSONL instead of -materializing a full collection in API memory. +**What's included:** + +- Postgres rows, vector-store points and payloads, uploaded files, and logs — all in plain JSON, JSONL, SQL, and raw files. +- Turbopuffer point exports stream provider pages into JSONL instead of materializing a full collection in API memory. + +**What's redacted:** + +- Provider secrets, token/hash columns, S3 credentials, and embedding-cache vectors. + + + Readable backups are useful for inspection and data portability, but they are not a substitute for encrypted infrastructure snapshots or tested disaster recovery runbooks. + ## Logging -API and worker logs are emitted through `structlog` with stable event names and -key-value fields for request, vector-store, webhook, and auth activity. -Text logs are colored by default with fixed columns for time, level, logger, -event, and fields. Text rendering escapes control characters in event and field -values before writing to stdout, so request paths and headers cannot create -extra terminal lines or emit terminal control sequences. Dependency loggers such -as Dramatiq, Alembic, OpenAI, Uvicorn access, HTTPX, and Turbopuffer are kept at warning level -so local terminals show bigRAG actions instead of library startup chatter. Text -logs at `info` render each API request as one concise method/path/status/latency -line. Worker logs include the Dramatiq process label and PID, such as -`worker=worker-1 pid=12345`, so concurrent worker output can be traced back to -the active process. Ingestion and RAG reads also log concise one-line progress messages; -internal chunk, cache, provider, job, request, and document identifiers stay out -of the default text terminal stream. JSON logs and `debug` request logs retain -structured method, path, sanitized query params, selected request headers, -client IP, route, endpoint/action context, status, `first_byte_ms`, and total -`elapsed_ms`. Raw request bodies, prompts, document -content, cookies, authorization headers, and secret-like query params are not -rendered. URL-valued headers such as `Referer` are logged without URL userinfo, -with secret-like query params and fragments redacted. Set -`BIGRAG_LOG_FORMAT=json` when Docker, Railway, Fluent Bit, or another log -shipper will parse stdout. Use `BIGRAG_LOG_LEVEL=debug` only for short-lived -diagnostics. - -Every HTTP response includes `X-Request-ID`. If a client sends that header, -bigRAG preserves it; otherwise the API generates one and includes it in request -JSON logs and access-log rows. Secrets are redacted before log rendering. +API and worker logs are emitted through `structlog` with stable event names and key-value fields. + +**What's logged:** + +- Request activity: method, path, sanitized query params, selected request headers, client IP, route, endpoint/action context, status, `first_byte_ms`, and total `elapsed_ms` — in JSON and `debug` request logs. +- Vector-store, webhook, and auth activity. +- Worker output: Dramatiq process label and PID (e.g. `worker=worker-1 pid=12345`) so concurrent output can be traced back to the active process. +- Ingestion and RAG reads as concise one-line progress messages. +- Every HTTP response includes `X-Request-ID`. If a client sends that header, bigRAG preserves it; otherwise the API generates one and includes it in request JSON logs and access-log rows. + +**What's redacted:** + +- Raw request bodies, prompts, document content, cookies, authorization headers, and secret-like query params. +- URL-valued headers such as `Referer` are logged without URL userinfo, with secret-like query params and fragments stripped. +- Secrets are redacted before log rendering. + +**Format and level notes:** + +- Text logs use colored output with fixed columns for time, level, logger, event, and fields. Control characters in event and field values are escaped before writing to stdout, so request paths and headers cannot emit terminal control sequences. +- Dependency loggers (Dramatiq, Alembic, OpenAI, Uvicorn access, HTTPX, Turbopuffer) are kept at warning level so local terminals show bigRAG actions instead of library startup chatter. +- Text logs at `info` render each API request as one concise method/path/status/latency line. Internal chunk, cache, provider, job, request, and document identifiers stay out of the default text terminal stream. +- Set `BIGRAG_LOG_FORMAT=json` when Docker, Railway, Fluent Bit, or another log shipper will parse stdout. +- Use `BIGRAG_LOG_LEVEL=debug` only for short-lived diagnostics. ## Basic operations checklist Use this quick path before deeper debugging: -1. `GET /health` confirms the API process is alive. -2. `GET /health/ready` confirms Postgres, Redis, Turbopuffer search, and the - embedding provider are reachable. Dependency errors are category labels such - as `timeout`, `unreachable`, `auth_failed`, `misconfigured`, or `unknown`. -3. `GET /v1/stats` confirms document counts, queue depth, queue health, and the - latest `bigrag-worker` heartbeat. -4. The admin UI Health tab shows the same dependency and queue state without - opening logs. -5. If documents stop moving, check worker logs, then inspect `queue_health`, - `workers.status`, `dead_lettered`, `retrying`, and `stale_processing`. +1. `GET /health` — confirms the API process is alive. +2. `GET /health/ready` — confirms Postgres, Redis, Turbopuffer search, and the embedding provider are reachable. Dependency errors are category labels such as `timeout`, `unreachable`, `auth_failed`, `misconfigured`, or `unknown`. +3. `GET /v1/stats` — confirms document counts, queue depth, queue health, and the latest `bigrag-worker` heartbeat. +4. The admin UI Health tab shows the same dependency and queue state without opening logs. +5. If documents stop moving, check worker logs, then inspect `queue_health`, `workers.status`, `dead_lettered`, `retrying`, and `stale_processing`. ## Troubleshooting -### Connection refused on startup + + + Ensure all infrastructure services are running and healthy: @@ -308,39 +289,54 @@ docker exec bigrag-postgres pg_isready -U bigrag docker exec bigrag-redis redis-cli ping ``` -The worker container healthcheck reads the worker heartbeat from Redis rather -than calling the API HTTP server. If you split workers by queue, set -`BIGRAG_WORKER_HEALTHCHECK_KEY` to the matching queue heartbeat key, such as -`bigrag:dramatiq:worker:heartbeat:webhooks`. +The worker container healthcheck reads the worker heartbeat from Redis rather than calling the API HTTP server. If you split workers by queue, set `BIGRAG_WORKER_HEALTHCHECK_KEY` to the matching queue heartbeat key, such as `bigrag:dramatiq:worker:heartbeat:webhooks`. + + + + -### Documents stuck in "pending" +- Verify Redis is running and accessible. +- Verify the `bigrag-worker` process is running. +- Check worker logs for ingestion or connector actor errors. +- Check `/v1/stats` for worker heartbeat and queue depth. -- Verify Redis is running and accessible -- Verify the `bigrag-worker` process is running -- Check worker logs for ingestion or connector actor errors -- Check `/v1/stats` for worker heartbeat and queue depth + -### Worker-dependent actions are disabled in the admin UI + Uploads, document reprocessing, backups, webhook deliveries, and Google Drive sync all depend on `bigrag-worker`. If the admin UI shows `bigrag-worker is offline`, start or restart the worker service, then confirm `/v1/stats` reports `"workers": { "online": true }` with a recent `heartbeat_at`. -### Dimension mismatch on query + + + The query embedding dimension doesn't match the collection's configured dimension. Ensure you're querying with the same embedding model. -### File upload returns 413 + + + File exceeds the max upload size. Increase `BIGRAG_MAX_UPLOAD_SIZE_MB`. -### `/health/ready` returns 503 but services are running + + + The readiness endpoint also checks Turbopuffer and the embedding provider. Check `/health` first to confirm the server itself is running, then save Turbopuffer settings in the admin UI and create a collection with a valid embedding key. -### Document uploads fail with embedding errors + + + Ensure the collection has a valid `embedding_api_key` set. You can provide it when creating the collection or set `BIGRAG_EMBEDDING_API_KEY` as a server-wide default. -### Slow embedding performance + + + + +- Increase `BIGRAG_INGESTION_BATCH_SIZE` for better throughput. +- Increase `BIGRAG_EMBEDDING_CONCURRENCY` for more parallelism. + + -- Increase `BIGRAG_INGESTION_BATCH_SIZE` for better throughput -- Increase `BIGRAG_EMBEDDING_CONCURRENCY` for more parallelism + diff --git a/website/content/docs/deployment/railway.mdx b/website/content/docs/deployment/railway.mdx index 06326bf3..40c6b0e5 100644 --- a/website/content/docs/deployment/railway.mdx +++ b/website/content/docs/deployment/railway.mdx @@ -4,17 +4,20 @@ description: Deploy bigRAG to Railway with managed Postgres, Redis, and Turbopuf --- import { Callout } from "fumadocs-ui/components/callout"; +import { Accordions, Accordion } from "fumadocs-ui/components/accordion"; -This guide deploys the API, Dramatiq worker, admin UI, Postgres, and Redis in one Railway project. Turbopuffer provides managed vector, keyword, and hybrid search. +Deploy the API, Dramatiq worker, admin UI, Postgres, and Redis in one Railway project. Turbopuffer provides managed vector, keyword, and hybrid search. ## What you get -- **API** — FastAPI backend, public domain. -- **Worker** — Dramatiq background jobs for ingestion, connectors, webhooks, backups, and cleanup. -- **App** — admin UI, public domain. -- **Postgres 17** — Railway plugin, exposes `DATABASE_URL`. -- **Redis 7** — Railway plugin, exposes `REDIS_URL`. -- **Turbopuffer** — managed vector, keyword, and hybrid search, configured from the admin UI. +| Service | Notes | +|---------|-------| +| **API** | FastAPI backend, public domain | +| **Worker** | Dramatiq background jobs for ingestion, connectors, webhooks, backups, and cleanup | +| **App** | Admin UI, public domain | +| **Postgres 17** | Railway plugin, exposes `DATABASE_URL` | +| **Redis 7** | Railway plugin, exposes `REDIS_URL` | +| **Turbopuffer** | Managed vector, keyword, and hybrid search, configured from the admin UI | ## Prerequisites @@ -113,14 +116,42 @@ Back the master key up immediately. It encrypts provider secrets, webhook secret ## Troubleshooting -**`/health/ready` reports degraded Turbopuffer search.** Check the Turbopuffer settings in the admin UI and any outbound network restrictions. + -**API exits with `ImportError: Can't find Python file .../site-packages/alembic/env.py`.** Rebuild from the latest repo revision and clear Railway's build cache if it keeps reusing an old image. The API image bundles bigRAG's Alembic migration environment into the installed package; this error means the deployed image was built without that migration environment. + -**App loads but API calls fail with CORS errors.** Check `BIGRAG_CORS_ORIGINS` on `API`; it must match the app public domain exactly. +Check the Turbopuffer settings in the admin UI and any outbound network restrictions. -**Documents stay pending.** Check the Worker service logs and confirm `/v1/stats` shows a recent worker heartbeat. + -**Admin UI says `bigrag-worker is offline`.** Uploads, document reprocessing, backups, webhook deliveries, and Google Drive sync require the Worker service. Restart the Worker service and confirm `/v1/stats` reports `"workers": { "online": true }` with a recent `heartbeat_at`. + -**App build fails with workspace errors.** Confirm the root directory is `/`, the build command uses `pnpm --filter @bigrag/app build`, and the start command uses `pnpm --filter @bigrag/app start`. +Rebuild from the latest repo revision and clear Railway's build cache if it keeps reusing an old image. The API image bundles bigRAG's Alembic migration environment into the installed package; this error means the deployed image was built without that migration environment. + + + + + +Check `BIGRAG_CORS_ORIGINS` on `API`; it must match the app public domain exactly. + + + + + +Check the Worker service logs and confirm `/v1/stats` shows a recent worker heartbeat. + + + + + +Uploads, document reprocessing, backups, webhook deliveries, and Google Drive sync require the Worker service. Restart the Worker service and confirm `/v1/stats` reports `"workers": { "online": true }` with a recent `heartbeat_at`. + + + + + +Confirm the root directory is `/`, the build command uses `pnpm --filter @bigrag/app build`, and the start command uses `pnpm --filter @bigrag/app start`. + + + + From 21ab43c2e05d2521c470473a6242373e520f3b1d Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Fri, 22 May 2026 19:31:31 +0530 Subject: [PATCH 05/11] docs: simplify getting started pages --- .../docs/getting-started/configuration.mdx | 47 ++++++++++++------- .../docs/getting-started/installation.mdx | 23 ++++----- .../docs/getting-started/quickstart.mdx | 22 ++++++++- 3 files changed, 61 insertions(+), 31 deletions(-) diff --git a/website/content/docs/getting-started/configuration.mdx b/website/content/docs/getting-started/configuration.mdx index 803fd241..20e4ed4c 100644 --- a/website/content/docs/getting-started/configuration.mdx +++ b/website/content/docs/getting-started/configuration.mdx @@ -30,13 +30,25 @@ Bootstrap config is the small set of values the API needs before it can connect | `BIGRAG_MASTER_KEY_PREVIOUS` | JSON array of old Fernet keys during staged key rotation. | `[]` | | `BIGRAG_UPLOAD_DIR` | Local document upload directory when storage backend is `local`. | `./data/uploads` | -`BIGRAG_DATABASE_URL` preserves Postgres `sslmode` settings. Use `sslmode=require`, `verify-ca`, or `verify-full` for hosted Postgres TLS; use `sslmode=disable` only for local or trusted private networks. +**Two variables to note:** -`BIGRAG_MASTER_KEY_PREVIOUS` is dual-read during rotation for encrypted secrets and API-key hash lookup. Keep old keys there until encrypted rows and minted API keys have been rewritten or replaced. +- `BIGRAG_DATABASE_URL` preserves Postgres `sslmode` settings. Use `sslmode=require`, `verify-ca`, or `verify-full` for hosted Postgres TLS; use `sslmode=disable` only for local or trusted private networks. +- `BIGRAG_MASTER_KEY_PREVIOUS` is dual-read during rotation for encrypted secrets and API-key hash lookup. Keep old keys there until encrypted rows and minted API keys have been rewritten or replaced. The admin UI has one frontend bootstrap value: point it at the API with `VITE_BIGRAG_URL` at build/dev time or `BIGRAG_URL` in the app container when frontend and backend are deployed separately. -Run background jobs with `bigrag-worker`. The worker reads the same `BIGRAG_` environment and accepts `--processes`, `--threads`, `--queues`, and `--config` for deployment-specific concurrency and TOML bootstrap. The worker parent runs database migrations once before spawning Dramatiq processes, and child processes skip migration bootstrap. The local dev script and Docker Compose default to `BIGRAG_WORKER_PROCESSES=5` and `BIGRAG_WORKER_THREADS=8`. +## Worker + +Run background jobs with `bigrag-worker`. The worker reads the same `BIGRAG_` environment and accepts these flags: + +| Flag | Purpose | +|------|---------| +| `--processes` | Number of Dramatiq worker processes | +| `--threads` | Threads per process | +| `--queues` | Queue names to consume | +| `--config` | Path to a TOML bootstrap file | + +The worker parent runs database migrations once before spawning Dramatiq processes; child processes skip migration bootstrap. The local dev script and Docker Compose default to `BIGRAG_WORKER_PROCESSES=5` and `BIGRAG_WORKER_THREADS=8`. ## TOML bootstrap file @@ -63,24 +75,27 @@ Environment variables override TOML values. CLI flags passed to `python -m bigra ## Runtime settings -After the first admin account exists, the admin UI sends first-run operators to `/onboarding` to create one verified embedding preset and save Turbopuffer settings for collection work. The Turbopuffer step's **Save and Finish** action saves the vector settings and opens `/overview`. Later, go to `/settings` to manage platform runtime settings, uploaded source-file storage, and backups, and `/models` to manage model presets and model runtime settings. Turbopuffer is configured from the UI, is the search backend for every collection, and is stored in Postgres in the `instance_settings` table. Secret settings are encrypted with `BIGRAG_MASTER_KEY` and redacted on read. +After the first admin account exists, the admin UI sends first-run operators to `/onboarding` to create one verified embedding preset and save Turbopuffer settings. The Turbopuffer step's **Save and Finish** action saves the vector settings and opens `/overview`. -The same settings are available through [`/v1/admin/settings`](/docs/api-reference/instance-settings). +Later, manage settings at: -| Area | Examples | -|------|----------| -| Security | Trusted proxies, provider URL allow-lists, embedding-cache mode, and private-network escape hatches. CORS origins and session-cookie policy are still supported, but you manage them from deployment config or the instance settings API instead of the Security tab. | -| Data | Upload limits, OCR, ingestion workers, queue depth, webhook delivery limits, and retention windows. | -| Storage (`/settings?tab=storage`) | Storage backend, S3-compatible bucket, endpoint, region, prefix, credentials, and path-style behavior. | -| Models (`/models`) | Default embedding provider, model, dimension, key, base URL, cache TTLs, chat provider, chat model, temperature, history count, and context budget. | -| Turbopuffer search | API key, public-region dropdown, namespace prefix, and optional compatible base URL for vector, keyword, and hybrid retrieval. | -| Backups (`/settings?tab=backups`) | S3-compatible readable backup bucket, endpoint, region, prefix, credentials, export controls, and backup history. | +| Location | What you configure | +|----------|--------------------| +| `/settings` | Platform runtime settings, uploaded source-file storage, and backups | +| `/settings?tab=account` | Account settings | +| `/settings?tab=health` | Dependency health | +| `/settings?tab=security` | Trusted proxies, provider URL allow-lists, embedding-cache mode, and private-network escape hatches. CORS origins and session-cookie policy are still supported, but managed from deployment config or the instance settings API instead of the Security tab. | +| `/settings?tab=data` | Upload limits, OCR, ingestion workers, queue depth, webhook delivery limits, and retention windows. | +| `/settings?tab=storage` | Storage backend, S3-compatible bucket, endpoint, region, prefix, credentials, and path-style behavior. | +| `/settings?tab=vector_store` | Turbopuffer API key, public-region dropdown, namespace prefix, and optional compatible base URL for vector, keyword, and hybrid retrieval. | +| `/settings?tab=backups` | S3-compatible readable backup bucket, endpoint, region, prefix, credentials, export controls, and backup history. | +| `/models` | Default embedding provider, model, dimension, key, base URL, cache TTLs, chat provider, chat model, temperature, history count, and context budget. | -`BIGRAG_CHAT_API_KEY` can seed a default instance chat credential for OpenAI's standard API endpoint. For non-default `chat_base_url` values, configure a saved chat key in the admin UI or pass `provider_api_key` on chat requests so the instance fallback key is never sent to a custom provider. +Turbopuffer is the search backend for every collection. Settings are stored in Postgres in the `instance_settings` table; secret settings are encrypted with `BIGRAG_MASTER_KEY` and redacted on read. -Settings only accepts current tab values: `/settings?tab=account`, `/settings?tab=health`, `/settings?tab=security`, `/settings?tab=data`, `/settings?tab=storage`, `/settings?tab=vector_store`, and `/settings?tab=backups`. +`BIGRAG_CHAT_API_KEY` can seed a default instance chat credential for OpenAI's standard API endpoint. For non-default `chat_base_url` values, configure a saved chat key in the admin UI or pass `provider_api_key` on chat requests so the instance fallback key is never sent to a custom provider. -Database-backed admin settings apply immediately on save. Storage, backup, and Turbopuffer connection changes validate the new target before saving, then swap the runtime client in place where applicable. +The same settings are available through [`/v1/admin/settings`](/docs/api-reference/instance-settings). Database-backed admin settings apply immediately on save. Storage, backup, and Turbopuffer connection changes validate the new target before saving, then swap the runtime client in place where applicable. Turbopuffer settings are not bootstrap environment variables. Save them from the admin UI so the API and worker read the same database-backed instance settings before creating collections. diff --git a/website/content/docs/getting-started/installation.mdx b/website/content/docs/getting-started/installation.mdx index e7ccbdb0..d16dbd63 100644 --- a/website/content/docs/getting-started/installation.mdx +++ b/website/content/docs/getting-started/installation.mdx @@ -87,20 +87,15 @@ The easiest way to start everything for development: This script: -1. Kills stale processes on port 4000 and old local `bigrag-worker` processes -2. Validates required commands (`docker`, `curl`, `uv`) -3. Starts Docker services (Postgres and Redis) and waits for readiness -4. Installs Python dependencies with `uv` -5. Starts the backend with auto-reload -6. Starts `bigrag-worker` with five Dramatiq processes on Redis -7. Gracefully stops everything on Ctrl+C - -If the admin UI shows `bigrag-worker is offline`, check the terminal for the -colored `work` log stream. Each worker-side log line includes the active -Dramatiq process label and PID, such as `worker=worker-1 pid=12345`. Override the local process count with -`BIGRAG_WORKER_PROCESSES` and thread count with `BIGRAG_WORKER_THREADS`. A -healthy local worker should keep running and `/v1/stats` should report a recent -worker heartbeat for the signed-in admin session. +1. Kills stale processes on port 4000 and old local `bigrag-worker` processes. +2. Validates required commands (`docker`, `curl`, `uv`). +3. Starts Docker services (Postgres and Redis) and waits for readiness. +4. Installs Python dependencies with `uv`. +5. Starts the backend with auto-reload. +6. Starts `bigrag-worker` with five Dramatiq processes on Redis. +7. Gracefully stops everything on Ctrl+C. + +If the admin UI shows `bigrag-worker is offline`, check the terminal for the colored `work` log stream. Each worker-side log line includes the active Dramatiq process label and PID, such as `worker=worker-1 pid=12345`. Override the local process count with `BIGRAG_WORKER_PROCESSES` and thread count with `BIGRAG_WORKER_THREADS`. A healthy local worker should keep running and `/v1/stats` should report a recent worker heartbeat for the signed-in admin session. ## Verify diff --git a/website/content/docs/getting-started/quickstart.mdx b/website/content/docs/getting-started/quickstart.mdx index 39ae4e99..ff7a5147 100644 --- a/website/content/docs/getting-started/quickstart.mdx +++ b/website/content/docs/getting-started/quickstart.mdx @@ -48,7 +48,11 @@ The response sets a `bigrag_session` cookie. The admin UI exposes the same flow ### Complete provider onboarding -In the admin UI, finish `/onboarding` by adding one embedding preset and saving Turbopuffer connection details. A successful Turbopuffer save sends you to `/overview`. You can skip Turbopuffer during onboarding for a read-only tour, but collections cannot ingest or query until the backend has a working Turbopuffer connection. +In the admin UI, finish `/onboarding` by adding one embedding preset and saving Turbopuffer connection details. A successful Turbopuffer save sends you to `/overview`. + + + You can skip Turbopuffer during onboarding for a read-only tour, but collections cannot ingest or query until the backend has a working Turbopuffer connection. + @@ -76,6 +80,7 @@ A collection groups documents that share the same embedding configuration and Tu + ```bash curl -X POST $BASE/v1/collections \ -H "Authorization: Bearer $BIGRAG_API_KEY" \ @@ -90,8 +95,10 @@ curl -X POST $BASE/v1/collections \ "chunk_overlap": 50 }' ``` + + ```typescript import { BigRAG } from "@bigrag/client"; @@ -110,6 +117,7 @@ const collection = await client.collections.create({ chunk_overlap: 50, }); ``` + @@ -123,6 +131,7 @@ No managed embedding key handy? Use `embedding_provider: "openai_compatible"` wi + ```bash curl -X POST $BASE/v1/collections/knowledge_base/documents \ -H "Authorization: Bearer $BIGRAG_API_KEY" \ @@ -132,8 +141,10 @@ curl -X POST $BASE/v1/collections/knowledge_base/documents \ export DOC_ID=$(jq -r .id /tmp/document.json) ``` + + ```typescript const doc = await client.documents.upload( "knowledge_base", @@ -141,6 +152,7 @@ const doc = await client.documents.upload( { department: "engineering" } ); ``` + @@ -152,6 +164,7 @@ The document starts as `pending` and transitions to `processing` then `ready`. + ```bash while true; do doc=$(curl -s "$BASE/v1/collections/knowledge_base/documents/$DOC_ID" \ @@ -162,8 +175,10 @@ while true; do sleep 2 done ``` + + ```typescript let current = doc; while (current.status === "pending" || current.status === "processing") { @@ -172,6 +187,7 @@ while (current.status === "pending" || current.status === "processing") { console.log(current.progress?.message ?? current.status); } ``` + @@ -183,6 +199,7 @@ Once documents are `ready`, you can query Turbopuffer-backed semantic search: + ```bash curl -X POST $BASE/v1/collections/knowledge_base/query \ -H "Authorization: Bearer $BIGRAG_API_KEY" \ @@ -192,8 +209,10 @@ curl -X POST $BASE/v1/collections/knowledge_base/query \ "top_k": 5 }' ``` + + ```typescript const { results, timings } = await client.queries.query("knowledge_base", { query: "What is the PTO policy?", @@ -205,6 +224,7 @@ for (const result of results) { } console.log(`embed=${timings.embed_ms}ms search=${timings.search_ms}ms total=${timings.total_ms}ms`); ``` + From dd1d50183cd2feda8e4587a4f1e5f7f6738cd98c Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Fri, 22 May 2026 19:31:31 +0530 Subject: [PATCH 06/11] docs: simplify sdk pages for scannability --- website/content/docs/sdks/mcp.mdx | 112 ++++++++++------------- website/content/docs/sdks/python.mdx | 108 +++++++++++++++------- website/content/docs/sdks/typescript.mdx | 68 ++++++++++---- 3 files changed, 173 insertions(+), 115 deletions(-) diff --git a/website/content/docs/sdks/mcp.mdx b/website/content/docs/sdks/mcp.mdx index 6e2974b6..1799bb67 100644 --- a/website/content/docs/sdks/mcp.mdx +++ b/website/content/docs/sdks/mcp.mdx @@ -3,52 +3,50 @@ title: MCP Server description: Expose a bigRAG instance over the Model Context Protocol so Claude Desktop, Cursor, and other MCP clients can retrieve from your collections natively. --- -bigRAG ships an [MCP](https://modelcontextprotocol.io) server in two -shapes: +import { Callout } from "fumadocs-ui/components/callout"; +import { Steps, Step } from "fumadocs-ui/components/steps"; -- **Remote HTTP** — mounted on the bigRAG API at `/mcp`, for - streamable-http clients that can send an `Authorization: Bearer ...` - header. No local install. -- **Local stdio** — the `bigrag-mcp` CLI, for Claude Desktop's - `config.json` and older Cursor setups. +bigRAG ships an [MCP](https://modelcontextprotocol.io) server in two shapes: -Both derive scope (all-collections vs pinned to one collection) from the -API key. Local stdio adjusts its registered toolset at startup; remote -HTTP always advertises the full tool list and enforces scope at call time. +- **Remote HTTP** — mounted on the bigRAG API at `/mcp`, for streamable-http clients that can send an `Authorization: Bearer ...` header. No local install. +- **Local stdio** — the `bigrag-mcp` CLI, for Claude Desktop's `config.json` and older Cursor setups. -## Admin UI credentials +Both derive scope (all-collections vs pinned to one collection) from the API key. Local stdio adjusts its registered toolset at startup; remote HTTP always advertises the full tool list and enforces scope at call time. + +## Admin UI Setup The admin UI's **MCP** page (`/mcp`) is the canonical way to set one up. -Creating an MCP there: -1. mints a fresh bigRAG API key dedicated to that MCP (not shared - with `/api-keys`) scoped to `collection:read`, `document:read`, and - `query:read`; -2. shows the full API key exactly once — copy it straight into your - MCP client's bearer header or Claude Desktop config; -3. persists the MCP as a first-class server-side resource (title, - server name, collection scope, key prefix) that you can later - rotate or delete. + + + +Mint a fresh bigRAG API key dedicated to that MCP (not shared with `/api-keys`), scoped to `collection:read`, `document:read`, and `query:read`. + + + +Copy the full API key — it is shown exactly once. Paste it directly into your MCP client's bearer header or Claude Desktop config. + + + +The MCP is persisted as a first-class server-side resource (title, server name, collection scope, key prefix) that you can later rotate or delete. + + + -You do **not** pick from existing keys on `/api-keys` — MCP credentials -live in their own namespace. If a key leaks or is lost, click **Rotate -key** on the MCP's detail dialog. The previous key stops working -immediately on the next request. + + You do not pick from existing keys on `/api-keys` — MCP credentials live in their own namespace. If a key leaks or is lost, click **Rotate key** on the MCP's detail dialog. The previous key stops working immediately on the next request. + -## Scoping to a single collection +## Scoping to a Single Collection -When creating the MCP in the admin UI, set **Collection scope** to pin it to -one collection. The bigRAG API blocks cross-collection endpoints for -scoped MCPs with a 403. +When creating the MCP in the admin UI, set **Collection scope** to pin it to one collection. The bigRAG API blocks cross-collection endpoints for scoped MCPs with a 403. | MCP scope | Local stdio tools | Remote HTTP tools | |-----------|-------------------|-------------------| | **All collections** | 8 tools including `list_collections` and `multi_collection_query` | 8 tools | | **Pinned to `X`** | 6 tools, all with `collection` pre-bound to `X`; `list_collections` and `multi_collection_query` are hidden | 8 tools advertised; cross-collection calls return 403 | -There is no separate `BIGRAG_COLLECTION` env var — the key itself -carries its scope. To change scope, delete the MCP and create a new -one (or rotate after editing via the REST API). +There is no separate `BIGRAG_COLLECTION` env var — the key itself carries its scope. To change scope, delete the MCP and create a new one (or rotate after editing via the REST API). ## Remote HTTP @@ -64,14 +62,13 @@ Every request must send the MCP key in an HTTP authorization header: Authorization: Bearer bigrag_sk_... ``` -Use remote clients that can attach bearer headers. If your client only -supports URL-only remote servers, use the local stdio bridge below -until OAuth support is available. + + Under the hood the endpoint is a FastMCP server with `stateless_http=True` and `json_response=True`; every request carries the bearer key and is dispatched through the bigRAG REST API, so auth and scope rules apply uniformly. + -Under the hood the endpoint is a FastMCP server with -`stateless_http=True` and `json_response=True`; every request carries -the bearer key and is dispatched through the bigRAG REST API, so auth and -scope rules apply uniformly. + + If your client only supports URL-only remote servers, use the local stdio bridge below until OAuth support is available. + ## Local stdio (Claude Desktop, older Cursor) @@ -90,7 +87,7 @@ BIGRAG_API_KEY=bigrag_sk_... \ bigrag-mcp ``` -Flags: +### Flags | Flag | Default | Notes | |------|---------|-------| @@ -101,14 +98,11 @@ Flags: For `streamable-http`, `--port` sets the FastMCP bind port. The stdio transport ignores it. -Scope is discovered on startup via `GET /v1/auth/whoami`. If the key -is pinned to a collection, the CLI registers the scoped toolset -automatically. +Scope is discovered on startup via `GET /v1/auth/whoami`. If the key is pinned to a collection, the CLI registers the scoped toolset automatically. -### Claude Desktop config +### Claude Desktop Config -Add to `~/Library/Application Support/Claude/claude_desktop_config.json` -(macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows): +Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows): ```json { @@ -124,13 +118,11 @@ Add to `~/Library/Application Support/Claude/claude_desktop_config.json` } ``` -Restart Claude Desktop. The server appears under the tools icon with -the tools registered for that key's scope. +Restart Claude Desktop. The server appears under the tools icon with the tools registered for that key's scope. ## Tools -All tools are thin wrappers around `/v1/*` endpoints. They honor the -same auth and scopes as any other API client. +All tools are thin wrappers around `/v1/*` endpoints. They honor the same auth and scopes as any other API client. ### Full-workspace key @@ -147,30 +139,20 @@ same auth and scopes as any other API client. ### Collection-pinned key -Local stdio registers six tools minus `list_collections` and -`multi_collection_query`, with the `collection` / `name` argument removed -because it is pre-bound from the key's scope. Remote HTTP advertises the -same eight tool names as a full-workspace key, but scoped-key calls that -would cross collection boundaries fail with 403. +Local stdio registers six tools minus `list_collections` and `multi_collection_query`, with the `collection` / `name` argument removed because it is pre-bound from the key's scope. Remote HTTP advertises the same eight tool names as a full-workspace key, but scoped-key calls that would cross collection boundaries fail with 403. -## Example interaction +## Example Interaction Once configured: > **User:** What does our runbook say about Postgres failover? > -> **Model:** _(invokes `query` with `collection: runbooks`,_ -> _`query: "Postgres failover procedure"`, `top_k: 5`)_ +> **Model:** _(invokes `query` with `collection: runbooks`, `query: "Postgres failover procedure"`, `top_k: 5`)_ > > _(receives top-5 chunks; synthesises an answer citing `document_id`s)_ ## Limitations -- Ingestion (upload and webhooks) is intentionally not exposed — - MCP tools target retrieval workflows. Use the HTTP API, a language - SDK, or the admin UI for writes. -- Streaming SSE endpoints (collection events) are not wrapped — MCP tools - are request/response only. -- OAuth 2.0 flows are not implemented. Remote HTTP clients must send - `Authorization: Bearer ...`; URL query-token authentication is not - accepted on `/mcp`. +- Ingestion (upload and webhooks) is intentionally not exposed — MCP tools target retrieval workflows. Use the HTTP API, a language SDK, or the admin UI for writes. +- Streaming SSE endpoints (collection events) are not wrapped — MCP tools are request/response only. +- OAuth 2.0 flows are not implemented. Remote HTTP clients must send `Authorization: Bearer ...`; URL query-token authentication is not accepted on `/mcp`. diff --git a/website/content/docs/sdks/python.mdx b/website/content/docs/sdks/python.mdx index 447334fc..83b2f48a 100644 --- a/website/content/docs/sdks/python.mdx +++ b/website/content/docs/sdks/python.mdx @@ -5,6 +5,7 @@ description: Async Python client for the bigRAG API with full type hints and zer import { Tabs, Tab } from "fumadocs-ui/components/tabs"; import { Callout } from "fumadocs-ui/components/callout"; +import { Accordions, Accordion } from "fumadocs-ui/components/accordion"; ## Installation @@ -61,8 +62,6 @@ The client reads `BIGRAG_API_KEY` from the environment if `api_key` is not passe ## Resource Namespaces -The SDK follows a resource namespace pattern: - | Namespace | Description | |---|---| | `client.collections` | Collection CRUD, stats, event tokens, and event streams | @@ -117,7 +116,9 @@ async for event in client.collections.stream_events("docs", token=token["token"] print(event) ``` -Set `embedding_api_key` or `reranking_api_key` to `None` in update calls to clear the stored key. + + Set `embedding_api_key` or `reranking_api_key` to `None` in update calls to clear the stored key. + ## Documents @@ -132,14 +133,24 @@ doc = await client.documents.upload( "docs", "/path/to/file.pdf", metadata={"department": "engineering"}, ) +``` + + + + -# Batch upload +```python result = await client.documents.batch_upload("docs", [ "/path/to/a.pdf", "/path/to/b.docx", ]) +``` + + -# Large upload session + + +```python session = await client.documents.create_upload_session( "docs", total_files=2, @@ -153,7 +164,13 @@ await client.documents.upload_session_file( client_item_id="000001", ) session = await client.documents.complete_upload_session("docs", session["id"]) +``` + + + + +```python # List documents docs = await client.documents.list("docs", status="ready", limit=20) @@ -161,7 +178,13 @@ docs = await client.documents.list("docs", status="ready", limit=20) doc = await client.documents.get("docs", "doc-id") await client.documents.delete("docs", "doc-id") await client.documents.reprocess("docs", "doc-id") +``` + + + + +```python # Get chunks chunks = await client.documents.get_chunks("docs", "doc-id") @@ -170,17 +193,31 @@ elements = await client.documents.get_elements("docs", "doc-id") # Get file download URL url = client.documents.get_file_url("docs", "doc-id") +``` + + -# Batch operations + + +```python statuses = await client.documents.batch_get_status("docs", ["id1", "id2"]) docs = await client.documents.batch_get("docs", ["id1", "id2"]) result = await client.documents.batch_delete("docs", ["id1", "id2"]) +``` + + + + -# Global document access (without collection scope) +```python doc = await client.documents.get_by_id("doc-id") chunks = await client.documents.get_chunks_by_id("doc-id") ``` + + + + ## Chat ```python @@ -273,9 +310,11 @@ deliveries = await client.webhooks.list_deliveries("webhook-id", limit=10) Webhook events cover document, collection, connector sync, and backup data-operation changes. Examples: `document.ready`, `document.deleted`, `collection.truncated`, `connector.sync.failed`, and `backup.succeeded`. -Webhook management calls `/v1/admin/webhooks` and requires session-cookie admin auth; API-key clients receive `403`. + + Webhook management calls `/v1/admin/webhooks` and requires session-cookie admin auth. API-key clients receive `403`. + -## API-Client Status Polling +## Status Polling Track document processing with REST polling: @@ -305,31 +344,6 @@ stats = await col.stats() analytics = await col.analytics() ``` -## Error Handling - -```python -from bigrag import ( - BigRAGError, # base for all errors - APIError, # any HTTP error - BadRequestError, # 400 - AuthenticationError, # 401 - NotFoundError, # 404 - RateLimitError, # 429 - InternalServerError, # 500 - APIConnectionError, # network errors - APITimeoutError, # timeout -) - -try: - await client.collections.get("missing") -except NotFoundError as e: - print(f"Not found: {e} (status={e.status})") -except APIConnectionError: - print("Cannot reach the API server") -except APITimeoutError: - print("Request timed out") -``` - ## Admin ```python @@ -375,9 +389,35 @@ models = await client.list_embedding_models() analytics = await client.collections.analytics("docs") ``` +## Error Handling + +```python +from bigrag import ( + BigRAGError, # base for all errors + APIError, # any HTTP error + BadRequestError, # 400 + AuthenticationError, # 401 + NotFoundError, # 404 + RateLimitError, # 429 + InternalServerError, # 500 + APIConnectionError, # network errors + APITimeoutError, # timeout +) + +try: + await client.collections.get("missing") +except NotFoundError as e: + print(f"Not found: {e} (status={e.status})") +except APIConnectionError: + print("Cannot reach the API server") +except APITimeoutError: + print("Request timed out") +``` + ## Retry Behavior The SDK automatically retries on: + - HTTP 429 from proxy or infrastructure layers - HTTP 5xx (server errors) - Connection errors and timeouts diff --git a/website/content/docs/sdks/typescript.mdx b/website/content/docs/sdks/typescript.mdx index 402cd9c4..118be515 100644 --- a/website/content/docs/sdks/typescript.mdx +++ b/website/content/docs/sdks/typescript.mdx @@ -5,6 +5,7 @@ description: Zero-dependency TypeScript client for Node.js, browsers, Deno, Bun, import { Tabs, Tab } from "fumadocs-ui/components/tabs"; import { Callout } from "fumadocs-ui/components/callout"; +import { Accordions, Accordion } from "fumadocs-ui/components/accordion"; ## Installation @@ -101,36 +102,61 @@ client.collections.createEventToken(name) client.collections.streamEvents(name, { token? }) // AsyncIterable of collection events (SSE) ``` -For update calls, send `embedding_api_key: null` or `reranking_api_key: null` to clear the stored key. + + For update calls, send `embedding_api_key: null` or `reranking_api_key: null` to clear the stored key. + ## Documents ```typescript client.documents.upload(collection, file, metadata?) client.documents.batchUpload(collection, files, metadata?) -client.documents.createUploadSession(collection, { total_files, total_bytes, metadata? }) -client.documents.uploadSessionFile(collection, sessionId, file, { clientItemId?, filename? }) -client.documents.getUploadSession(collection, sessionId) -client.documents.completeUploadSession(collection, sessionId) -client.documents.cancelUploadSession(collection, sessionId) client.documents.list(collection, { q?, status?, sort?, order?, limit?, offset? }) client.documents.listAll(collection, { q?, status?, sort?, order?, limit? }) // AsyncGenerator client.documents.get(collection, documentId) client.documents.delete(collection, documentId) -client.documents.batchGetStatus(collection, documentIds) -client.documents.batchGet(collection, documentIds) -client.documents.batchDelete(collection, documentIds) client.documents.reprocess(collection, documentId) client.documents.getChunks(collection, documentId, { limit?, offset? }?) client.documents.getElements(collection, documentId, { limit?, offset? }?) client.documents.getFileUrl(collection, documentId) // returns URL string +``` + +File uploads accept `File`, `Blob`, `Buffer`, `Uint8Array`, or `{ path: string; name?: string }`. + + + + + +```typescript +client.documents.createUploadSession(collection, { total_files, total_bytes, metadata? }) +client.documents.uploadSessionFile(collection, sessionId, file, { clientItemId?, filename? }) +client.documents.getUploadSession(collection, sessionId) +client.documents.completeUploadSession(collection, sessionId) +client.documents.cancelUploadSession(collection, sessionId) +``` + + -// Global document access (without collection scope) + + +```typescript +client.documents.batchGetStatus(collection, documentIds) +client.documents.batchGet(collection, documentIds) +client.documents.batchDelete(collection, documentIds) +``` + + + + + +```typescript client.documents.getById(documentId) client.documents.getChunksById(documentId, { limit?, offset? }?) ``` -File uploads accept `File`, `Blob`, `Buffer`, `Uint8Array`, or `{ path: string; name?: string }`. + + + ## Chat @@ -197,7 +223,9 @@ client.webhooks.replayDelivery(id, deliveryId) Webhook events cover document, collection, connector sync, and backup data-operation changes. Examples: `document.ready`, `document.deleted`, `collection.truncated`, `connector.sync.failed`, and `backup.succeeded`. -Webhook management calls `/v1/admin/webhooks` and requires session-cookie admin auth; API-key clients receive `403`. + + Webhook management calls `/v1/admin/webhooks` and requires session-cookie admin auth. API-key clients receive `403`. + ## Auth @@ -261,6 +289,10 @@ client.admin.mcpServers.rotate(serverId) client.admin.mcpServers.delete(serverId) ``` + + + + ```typescript for await (const event of client.admin.realtime.backups({ limit: 20 })) { if (event.event === "snapshot") { @@ -273,6 +305,14 @@ for await (const event of client.admin.realtime.platformReadiness()) { } ``` + + + + + + Admin endpoints (`/v1/admin/*`, most of `/v1/auth/*`, and Google Drive connector OAuth flows) are session-only. API keys remain the right fit for read/query/ingest workloads. + + ## Connectors ```typescript @@ -310,10 +350,6 @@ client.getUsage({ windowDays? }) // GET /v1/usage client.collections.analytics(collection) // GET /v1/collections/{name}/analytics ``` - -Admin endpoints (`/v1/admin/*`, most of `/v1/auth/*`, and Google Drive connector OAuth flows) are session-only. API keys remain the right fit for read/query/ingest workloads. - - ## Error Handling ```typescript From 2a7751f2091a5af7c56c5387160970b2cc71c9d2 Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Fri, 22 May 2026 19:31:32 +0530 Subject: [PATCH 07/11] docs: simplify overview, comparison, admin ui, and guides --- website/content/docs/admin-ui.mdx | 31 +++++++------- website/content/docs/comparison.mdx | 6 +-- .../docs/cookbook/multi-tenant-saas.mdx | 37 ++++------------- website/content/docs/index.mdx | 2 +- .../content/docs/migration/from-pinecone.mdx | 40 +++++-------------- 5 files changed, 35 insertions(+), 81 deletions(-) diff --git a/website/content/docs/admin-ui.mdx b/website/content/docs/admin-ui.mdx index 4a68c7d8..69f385e4 100644 --- a/website/content/docs/admin-ui.mdx +++ b/website/content/docs/admin-ui.mdx @@ -7,11 +7,10 @@ import { Callout } from "fumadocs-ui/components/callout"; The admin UI is the Vite + TanStack Router single-page app packaged with bigRAG. Every control is also a REST API, so the admin UI is optional — you can run bigRAG headless — but it's the fastest way to bootstrap, prototype, and operate the platform. -The admin UI supports light, dark, and system themes. The selection is stored in the browser, applies before the React app loads, and is available from both the login/setup screens and the signed-in account menu. - -Submitted operator forms use TanStack Form for client-side draft state, validation, resets, and submit handling while keeping the existing Base UI controls. This covers setup, login, collection creation and search, eval runs, API keys, MCP servers, account password changes, runtime settings, embedding presets, webhook creation, and connector credential forms. - -Collection creation keeps customer/tenant filter enforcement and JSON metadata-schema validation inside Advanced safeguards. Most collections can leave these disabled; enable customer separation only when one collection contains data for multiple customers, and enable metadata validation only when uploads need a fixed metadata shape. +Key UI behaviors: +- Supports light, dark, and system themes; selection is stored in the browser and applies before the React app loads. +- Operator forms use TanStack Form for client-side draft state, validation, resets, and submit handling with existing Base UI controls. This covers setup, login, collection creation and search, eval runs, API keys, MCP servers, account password changes, runtime settings, embedding presets, webhook creation, and connector credential forms. +- Collection creation keeps customer/tenant filter enforcement and JSON metadata-schema validation inside Advanced safeguards. Most collections can leave these disabled; enable customer separation only when one collection holds data for multiple customers, and enable metadata validation only when uploads need a fixed metadata shape. ## Running the Admin UI @@ -21,7 +20,7 @@ pnpm dev:app # admin UI on http://localhost:3000 The admin UI calls the FastAPI backend directly. The default API URL is `http://localhost:4000`; override it with `VITE_BIGRAG_URL` at build/dev time or `BIGRAG_URL` in the app container. Configure browser origins through deployment config or the [instance settings API](/docs/api-reference/instance-settings) when running a split admin UI. -For cross-site production deployments, enable secure session cookies and usually set `SameSite=none` through `BIGRAG_SESSION_COOKIE_*` bootstrap values or the instance settings API. Same-site subdomain deployments can usually keep `SameSite=lax`. +For cross-site production deployments, enable secure session cookies and set `SameSite=none` through `BIGRAG_SESSION_COOKIE_*` bootstrap values or the instance settings API. Same-site subdomain deployments can usually keep `SameSite=lax`. ## Route map @@ -63,13 +62,11 @@ The `/connectors` page renders provider tabs above the selected provider setup p The Settings Backups tab renders backup destination settings directly above export controls backed by `GET /v1/admin/backups`, `GET /v1/admin/realtime/backups`, and `POST /v1/admin/backups`. Backup starts are disabled when the worker heartbeat is offline or no bucket is configured. -The Settings Storage tab manages uploaded source-file storage. It uses `GET /v1/admin/settings` and `PUT /v1/admin/settings` for local disk and S3-compatible object-store settings. +The Settings Storage tab manages uploaded source-file storage via `GET /v1/admin/settings` and `PUT /v1/admin/settings` for local disk and S3-compatible object-store settings. The Settings Vector storage tab exposes the saved Turbopuffer API key state, public-region dropdown, namespace prefix, and optional base URL through the same instance settings API. -Settings are grouped into Account, Health, Security, Data, Storage, Vector storage, and Backups. Security covers trusted proxies, outbound URL policy, and embedding-cache posture. Data covers ingestion, queue, retention, and webhook runtime settings. Storage, Vector storage, and Backups own their matching runtime settings and validation checks. - -Settings only accepts current tab values: `/settings?tab=account`, `/settings?tab=health`, `/settings?tab=security`, `/settings?tab=data`, `/settings?tab=storage`, `/settings?tab=vector_store`, and `/settings?tab=backups`. Legacy `/data-storage` and `/backups` URLs redirect to their Settings tabs. +Settings accepts tab values via query param: `/settings?tab=account`, `/settings?tab=health`, `/settings?tab=security`, `/settings?tab=data`, `/settings?tab=storage`, `/settings?tab=vector_store`, and `/settings?tab=backups`. Legacy `/data-storage` and `/backups` URLs redirect to their Settings tabs. | Group | Section | Reads from | Writes to | |-------|---------|-----------|-----------| @@ -83,15 +80,15 @@ Settings only accepts current tab values: `/settings?tab=account`, `/settings?ta ## Realtime updates -The admin UI uses session-cookie [admin realtime SSE streams](/docs/api-reference/admin-realtime) for live document progress, batch upload progress, collection stats, connector sync state, access logs, paginated audit log pages, usage, platform stats, and readiness. Each stream sends an immediate full `snapshot` payload that matches the related REST response, then sends later snapshots as data changes. +The admin UI uses session-cookie [admin realtime SSE streams](/docs/api-reference/admin-realtime) for live document progress, batch upload progress, collection stats, connector sync state, access logs, paginated audit log pages, usage, platform stats, and readiness. Each stream sends an immediate full `snapshot` payload matching the related REST response, then sends later snapshots as data changes. -The expensive overview surfaces also keep short-lived backend caches behind the readiness and access-overview loaders so several open admin sessions do not rerun the same aggregate or dependency checks on every interval tick. +Expensive overview surfaces keep short-lived backend caches behind the readiness and access-overview loaders so several open admin sessions do not rerun the same aggregate or dependency checks on every interval tick. -The same platform stats stream powers the worker-offline UX. When `workers.online` is `false`, the admin UI shows `bigrag-worker is offline`, includes the last heartbeat age when available, and disables actions that would enqueue uploads, document reprocessing, backups, Google Drive source sync, or manual Drive resync. Missing worker stats are treated as unknown, so actions remain available until an offline heartbeat is confirmed. +**Worker-offline UX:** The same platform stats stream drives the offline indicator. When `workers.online` is `false`, the admin UI shows `bigrag-worker is offline`, includes the last heartbeat age when available, and disables actions that would enqueue uploads, document reprocessing, backups, Google Drive source sync, or manual Drive resync. Missing worker stats are treated as unknown, so actions remain available until an offline heartbeat is confirmed. -After a large local upload starts, the collection documents page opens `GET /v1/admin/realtime/collections/{name}/upload-sessions/{session_id}` as a high-priority realtime stream. The stream returns received, queued, ingesting, completed, failed, and canceled counts; the panel renders the active summary plus a small item window that prioritizes active and failed files before older completed rows, so 10,000-file uploads do not require a 10,000-ID status stream. If the browser cannot keep an SSE connection open, the panel falls back to two-second polling. If a linked document is deleted while the upload session remains visible, the item is treated as canceled rather than active queued work. Terminal sessions are removed from the page automatically once no active or failed work remains. +**Large upload progress:** After a large local upload starts, the collection documents page opens `GET /v1/admin/realtime/collections/{name}/upload-sessions/{session_id}` as a high-priority realtime stream. The stream returns received, queued, ingesting, completed, failed, and canceled counts. The panel renders the active summary plus a small item window that prioritizes active and failed files before older completed rows, so 10,000-file uploads do not require a 10,000-ID status stream. If the browser cannot keep an SSE connection open, the panel falls back to two-second polling. If a linked document is deleted while the upload session remains visible, the item is treated as canceled rather than active queued work. Terminal sessions are removed automatically once no active or failed work remains. -The Google Drive connector page opens `GET /v1/admin/realtime/google/sync-jobs?collection={name}` beside the source stream. The sync monitor shows Drive scanning, download/update progress, remote deletion cleanup, and queued-for-ingestion completion; document conversion and indexing continue through the normal document progress surfaces. +**Google Drive:** The Drive connector page opens `GET /v1/admin/realtime/google/sync-jobs?collection={name}` beside the source stream. The sync monitor shows Drive scanning, download/update progress, remote deletion cleanup, and queued-for-ingestion completion; document conversion and indexing continue through the normal document progress surfaces. Upload progress restoration and the current Drive browser selection are browser-local admin UI state. Upload session IDs are persisted in local storage so a collection's upload panel can reconnect after navigation; stale upload-session IDs are cleared without a failure alert when the API returns `404`. Chat turns are in-memory only and are not persisted in browser storage or the REST API. @@ -101,13 +98,13 @@ Realtime pages only evaluate terminal-state predicates after the first snapshot ## Chat specifics -The chat UI sends each turn to `POST /v1/chat`. Retrieval, prompt assembly, provider calls, and citations happen on the backend, but chat messages and conversations are not persisted anywhere. The saved chat provider key stays on the backend; preference responses only expose whether a key is configured. Preferences such as model, temperature, `top_k`, search mode, rerank preference, and system prompt persist via `/v1/auth/preferences`. +The chat UI sends each turn to `POST /v1/chat`. Retrieval, prompt assembly, provider calls, and citations happen on the backend; chat messages and conversations are not persisted anywhere. The saved chat provider key stays on the backend — preference responses only expose whether a key is configured. Preferences such as model, temperature, `top_k`, search mode, rerank preference, and system prompt persist via `/v1/auth/preferences`. Playground answers render safe CommonMark/GFM Markdown, including emphasis, lists, task lists, code, blockquotes, links, and tables. Raw HTML is not enabled, and source citations remain clickable inside rendered Markdown. Source cards keep long extracted snippets inside the chat column, including compact JSON, IDs, and other unbroken tokens from synthetic or machine-generated documents. -The empty chat screen no longer shows generic default prompts. After a collection is selected, operators can generate five random starter questions using the saved chat provider key. The backend samples that collection's ready documents and chunks, saves the latest generated set for the collection, and returns that saved set after reloads. +The empty chat screen does not show generic default prompts. After a collection is selected, operators can generate five random starter questions using the saved chat provider key. The backend samples that collection's ready documents and chunks, saves the latest generated set for the collection, and returns that saved set after reloads. ### Citations diff --git a/website/content/docs/comparison.mdx b/website/content/docs/comparison.mdx index 9bdd79af..0d5b2fab 100644 --- a/website/content/docs/comparison.mdx +++ b/website/content/docs/comparison.mdx @@ -169,9 +169,9 @@ Frameworks give you maximum flexibility but require writing code to build a work ## When to Use bigRAG -bigRAG is built for developers who want a **production-ready RAG API** without assembling a pipeline from scratch or deploying a heavy platform with features they don't need. +bigRAG is built for developers who want a **production-ready RAG API** without assembling a pipeline from scratch or deploying a heavy platform. -bigRAG is a great fit when you: +**bigRAG is a great fit when you:** - Need a **REST API** for document ingestion and retrieval in your own application - Want **self-hosted** deployment with full control over your data @@ -181,7 +181,7 @@ bigRAG is a great fit when you: - Prefer a **lean, focused platform** with an optional admin UI instead of a feature-heavy end-user chat product - Need a **TypeScript SDK** for frontend or Node.js integration -Consider an alternative when you: +**Consider an alternative when you:** - Need a **ready-made end-user chat UI** for non-technical users → OpenRAG, RAGFlow, or AnythingLLM - Require **fully offline** operation with local models → PrivateGPT diff --git a/website/content/docs/cookbook/multi-tenant-saas.mdx b/website/content/docs/cookbook/multi-tenant-saas.mdx index f6a83bbd..02b38387 100644 --- a/website/content/docs/cookbook/multi-tenant-saas.mdx +++ b/website/content/docs/cookbook/multi-tenant-saas.mdx @@ -5,16 +5,11 @@ description: Patterns for serving many customers from one bigRAG deployment — import { Callout } from "fumadocs-ui/components/callout"; -This recipe turns bigRAG into a per-tenant RAG backbone for a typical -SaaS app — one collection per customer (or one collection shared with -tenant filtering), scoped API keys, usage dashboards, and cost caps. -Python snippets assume the async SDK is running inside an async function with -an initialized `client`. +This recipe turns bigRAG into a per-tenant RAG backbone for a typical SaaS app — one collection per customer (or one collection shared with tenant filtering), scoped API keys, usage dashboards, and cost caps. Python snippets assume the async SDK is running inside an async function with an initialized `client`. ## Pattern A — collection per tenant -Simplest isolation model. Each tenant's data lives in its own -collection; queries are physically scoped by the collection name. +Simplest isolation model. Each tenant's data lives in its own collection; queries are physically scoped by the collection name. ### Provisioning @@ -37,8 +32,7 @@ await client.collections.create({ ### Per-tenant API key -Mint a scoped key so the tenant's browser / mobile app can query but -can't touch other collections or admin APIs: +Mint a scoped key so the tenant's browser/mobile app can query but can't touch other collections or admin APIs: ```python key = await client.admin.api_keys.create({ @@ -52,17 +46,12 @@ tenant_api_key = key["key"] The middleware enforces both the scopes and the collection pin, so a leaked tenant key can only reach its own collection. -Collections-per-tenant is simple but creates operational overhead as -tenant count grows. Above small/medium tenant counts, move to Pattern B -so one Turbopuffer namespace can use tenant metadata filters. +Collections-per-tenant is simple but creates operational overhead as tenant count grows. Above small/medium tenant counts, move to Pattern B so one Turbopuffer namespace can use tenant metadata filters. ## Pattern B — shared collection with tenant filters -One collection, tenant isolation via a required metadata filter. Setting -`tenant_field` tells bigRAG to configure that field for backend filtering -and reject uploads, raw vector upserts, queries, and chat calls that omit -the tenant field. +One collection, tenant isolation via a required metadata filter. Setting `tenant_field` tells bigRAG to configure that field for backend filtering and reject uploads, raw vector upserts, queries, and chat calls that omit the tenant field. ```python await client.collections.create({ @@ -100,17 +89,11 @@ usage = await client.get_usage(window_days=30) # } ``` -Join this with your billing system's `tenant_id ↔ collection_name` -map and you have a per-tenant invoice line. For pattern B, the built-in -usage endpoint is collection-level; keep per-tenant counters in your own -application when multiple tenants share one collection. +Join this with your billing system's `tenant_id ↔ collection_name` map and you have a per-tenant invoice line. For Pattern B, the built-in usage endpoint is collection-level; keep per-tenant counters in your own application when multiple tenants share one collection. ## Audit trail for compliance -Privileged actions such as key creation, webhook config, user updates, -collection mutations, and document mutations are in `audit_log`. Surface -them in internal tooling by filtering on `actor_id`, `resource_type`, or -the relevant `resource_id`: +Privileged actions such as key creation, webhook config, user updates, collection mutations, and document mutations are in `audit_log`. Surface them in internal tooling by filtering on `actor_id`, `resource_type`, or the relevant `resource_id`: ```bash curl -b cookies.txt \ @@ -129,8 +112,4 @@ await client.collections.delete(f"tenant_{tenant_id}") await client.documents.batch_delete("shared_tenants", document_ids) ``` -For shared collections, keep a tenant-owned document ID index in your -application so offboarding can delete the right documents. If you use raw -vector upserts directly, keep vector IDs and call `client.vectors.delete`. -Revoke the tenant's API keys via `client.admin.api_keys.delete(key_id)` and -archive the relevant audit rows for your records. +For shared collections, keep a tenant-owned document ID index in your application so offboarding can delete the right documents. If you use raw vector upserts directly, keep vector IDs and call `client.vectors.delete`. Revoke the tenant's API keys via `client.admin.api_keys.delete(key_id)` and archive the relevant audit rows for your records. diff --git a/website/content/docs/index.mdx b/website/content/docs/index.mdx index 0dc5a6e6..6dbf9e69 100644 --- a/website/content/docs/index.mdx +++ b/website/content/docs/index.mdx @@ -7,7 +7,7 @@ full: true import { Cards, Card } from "fumadocs-ui/components/card"; import { Mermaid } from "@/components/mermaid"; -bigRAG is an open-source RAG (Retrieval-Augmented Generation) platform with Turbopuffer at the retrieval layer. It provides a complete pipeline for document ingestion, chunking, embedding, semantic search, keyword search, and hybrid search behind a REST API, TypeScript / Python SDKs, an admin UI, and an MCP server for Claude Desktop, Cursor, and any MCP-aware runtime. +bigRAG is an open-source RAG platform with Turbopuffer at the retrieval layer. It provides a complete pipeline for document ingestion, chunking, embedding, and search behind a REST API, TypeScript/Python SDKs, an admin UI, and an MCP server. ## Key Features diff --git a/website/content/docs/migration/from-pinecone.mdx b/website/content/docs/migration/from-pinecone.mdx index f450c3f0..8fc5967b 100644 --- a/website/content/docs/migration/from-pinecone.mdx +++ b/website/content/docs/migration/from-pinecone.mdx @@ -5,10 +5,7 @@ description: Move an existing Pinecone index to bigRAG with one importer script import { Callout } from "fumadocs-ui/components/callout"; -bigRAG's data model is a superset of Pinecone's — every Pinecone -concept has a direct equivalent, plus a few extras (persistent chunk -text, ingestion pipeline, webhooks). Migrating is primarily a data -copy and an API-layer search-and-replace. +bigRAG's data model is a superset of Pinecone's — every Pinecone concept has a direct equivalent, plus a few extras (persistent chunk text, ingestion pipeline, webhooks). Migrating is primarily a data copy and an API-layer search-and-replace. ## Concept mapping @@ -23,9 +20,7 @@ copy and an API-layer search-and-replace. | `describe_index_stats` | `GET /v1/collections/{name}/stats` | -bigRAG stores chunk **text** alongside the vector — so results carry -the source passage natively. Pinecone only keeps metadata; if your -pipeline previously embedded text elsewhere, you can drop that storage. +bigRAG stores chunk **text** alongside the vector — so results carry the source passage natively. Pinecone only keeps metadata; if your pipeline previously embedded text elsewhere, you can drop that storage. ## One-shot importer script @@ -76,36 +71,19 @@ asyncio.run(main()) ## Two migration strategies -**Lift and shift** — the script above copies vectors as-is. Fastest, -keeps existing retrieval quality identical. Good when you're just -moving off Pinecone's hosting cost. +**Lift and shift** — the script above copies vectors as-is. Fastest, keeps existing retrieval quality identical. Good when you're just moving off Pinecone's hosting cost. -**Re-ingest from source** — upload the original documents through -`POST /v1/collections/{name}/documents`. Slower (you pay the embedding -bill again) but unlocks features Pinecone doesn't have: citation -provenance (`page_no`, `char_start`), chunk-level re-embedding, the -persistent embedding cache, Docling-parsed tables and images. -Recommended if you're in the middle of a chunking rethink anyway. +**Re-ingest from source** — upload the original documents through `POST /v1/collections/{name}/documents`. Slower (you pay the embedding bill again) but unlocks features Pinecone doesn't have: citation provenance (`page_no`, `char_start`), chunk-level re-embedding, the persistent embedding cache, Docling-parsed tables and images. Recommended if you're in the middle of a chunking rethink anyway. ## API differences to watch -- **Filters stay as JSON objects**. Pass a dict like - `{"department": "sales"}` and bigRAG translates it into Turbopuffer - filters. -- **Namespaces → metadata filters**. Pinecone's `namespace="docs"` - becomes `metadata.namespace = "docs"`. Set - `tenant_field: "namespace"` on the collection when that field should - become required on uploads, raw vector upserts, queries, and chat - requests. -- **Hybrid search is built-in**. `search_mode: "hybrid"` runs keyword - BM25 + vector search in parallel and fuses with reciprocal rank fusion. - No separate "sparse" vectors to maintain. +- **Filters stay as JSON objects.** Pass a dict like `{"department": "sales"}` and bigRAG translates it into Turbopuffer filters. +- **Namespaces → metadata filters.** Pinecone's `namespace="docs"` becomes `metadata.namespace = "docs"`. Set `tenant_field: "namespace"` on the collection when that field should become required on uploads, raw vector upserts, queries, and chat requests. +- **Hybrid search is built-in.** `search_mode: "hybrid"` runs keyword BM25 + vector search in parallel and fuses with reciprocal rank fusion. No separate "sparse" vectors to maintain. ## Cut-over 1. Run the importer with `BATCH=100` overnight. -2. Update your application's search code to hit bigRAG. Filters, top_k, - and metadata pass through. +2. Update your application's search code to hit bigRAG. Filters, top_k, and metadata pass through. 3. Dual-read for 24h (query both, log discrepancies) to verify parity. -4. Flip reads fully to bigRAG, stop writes to Pinecone, and once dust - settles, delete the Pinecone index. +4. Flip reads fully to bigRAG, stop writes to Pinecone, and once dust settles, delete the Pinecone index. From 9710258afb88d191672a374cb5700975e0b9609f Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Fri, 22 May 2026 19:34:43 +0530 Subject: [PATCH 08/11] refactor: remove unused ingestion queue methods --- api/bigrag/services/queue.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/api/bigrag/services/queue.py b/api/bigrag/services/queue.py index 7e0845a0..ecfbc697 100644 --- a/api/bigrag/services/queue.py +++ b/api/bigrag/services/queue.py @@ -81,9 +81,6 @@ async def _recover_stuck_jobs(self) -> int: return 0 return await queue_recovery.recover_stuck_jobs(self._redis) - async def _epoch_value(self, key: str) -> int: - return await queue_state.epoch_value(self._redis, key) - async def _collection_epoch(self, collection_name: str) -> int: return await queue_state.collection_epoch(self._redis, collection_name) @@ -116,14 +113,6 @@ async def enqueue(self, job: IngestionJob) -> None: await self._redis.hincrby(STATS_KEY, "queued", 1) logger.info(f"{job.collection_name} | queued | {pending + 1} pending") - async def flush_collection(self, collection_name: str) -> int: - if not self._redis: - return 0 - removed = await queue_state.flush_collection_jobs(self._redis, collection_name) - if removed: - logger.info("queue flushed jobs", collection=collection_name, removed=removed) - return int(removed) - async def cancel_collection(self, collection_name: str) -> int: if not self._redis: return 0 @@ -241,25 +230,6 @@ async def _fanout_webhook_event(self, event: IngestionEvent) -> None: error=repr(exc), ) - async def _ocr_scanned_pdf( - self, - *, - file_data: bytes, - suffix: str, - job: IngestionJob, - prefix: str, - start_time: float, - ) -> str: - return await queue_conversion.ocr_scanned_pdf( - file_data=file_data, - suffix=suffix, - job=job, - prefix=prefix, - start_time=start_time, - emit=self._emit, - ensure_job_current=self._ensure_job_current, - ) - async def _convert_document(self, job: IngestionJob, prefix: str) -> ParsedDocument: return await queue_conversion.convert_document( job, From 58cdd97151c7de69b79b72b653beb5eecfc57a2a Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Fri, 22 May 2026 19:34:44 +0530 Subject: [PATCH 09/11] refactor: remove unused service helper functions --- api/bigrag/services/access_log/flusher.py | 4 ---- api/bigrag/services/audit.py | 15 --------------- .../connectors/google_drive_sources.py | 19 ------------------- api/bigrag/services/event_bus/types.py | 14 -------------- 4 files changed, 52 deletions(-) diff --git a/api/bigrag/services/access_log/flusher.py b/api/bigrag/services/access_log/flusher.py index 97f1c2c9..e81a13ff 100644 --- a/api/bigrag/services/access_log/flusher.py +++ b/api/bigrag/services/access_log/flusher.py @@ -21,10 +21,6 @@ _access_log_flusher_task: asyncio.Task | None = None -def get_queue() -> asyncio.Queue[dict[str, Any]] | None: - return _access_log_queue - - async def _drain_batch(queue: asyncio.Queue[dict[str, Any]]) -> list[dict[str, Any]]: try: first = await asyncio.wait_for(queue.get(), timeout=_ACCESS_LOG_FLUSH_INTERVAL) diff --git a/api/bigrag/services/audit.py b/api/bigrag/services/audit.py index 999adb90..552ba358 100644 --- a/api/bigrag/services/audit.py +++ b/api/bigrag/services/audit.py @@ -164,21 +164,6 @@ async def stop_audit_flusher() -> None: _audit_queue = None -async def flush_audit_logs() -> None: - queue = _audit_queue - if queue is None: - return - while not queue.empty(): - batch: list[dict[str, Any]] = [] - while len(batch) < _AUDIT_BATCH_MAX: - try: - batch.append(queue.get_nowait()) - except asyncio.QueueEmpty: - break - if batch: - await _flush_batch(batch) - - def record( request: Request, *, diff --git a/api/bigrag/services/connectors/google_drive_sources.py b/api/bigrag/services/connectors/google_drive_sources.py index 4b48c2d0..3e592de6 100644 --- a/api/bigrag/services/connectors/google_drive_sources.py +++ b/api/bigrag/services/connectors/google_drive_sources.py @@ -6,7 +6,6 @@ from bigrag.services.connector_core import ( configured, create_source, - create_sync_job, delete_source, list_sources, source_public, @@ -83,24 +82,6 @@ async def create_google_source( ) -async def create_google_sync_job( - session, - *, - source: ConnectorSource, - trigger: str, - user_id: str | None, - commit: bool = True, -) -> ConnectorSyncJob: - return await create_sync_job( - session, - provider=GOOGLE_PROVIDER, - source=source, - trigger=trigger, - user_id=user_id, - commit=commit, - ) - - def start_google_sync_job(job_id: str) -> None: from bigrag.services.jobs.actors import enqueue_google_drive_sync diff --git a/api/bigrag/services/event_bus/types.py b/api/bigrag/services/event_bus/types.py index a049a00a..7bf2df94 100644 --- a/api/bigrag/services/event_bus/types.py +++ b/api/bigrag/services/event_bus/types.py @@ -30,20 +30,6 @@ class IngestionEvent: detail: dict = field(default_factory=dict) collection_name: str = "" - def to_sse(self) -> str: - data = { - "document_id": self.document_id, - "collection_name": self.collection_name, - "step": self.step, - "status": self.status, - "message": self.message, - "progress": self.progress, - **self.detail, - } - return ( - f"id: {next_sse_id()}\nretry: {SSE_RETRY_MS}\ndata: {orjson.dumps(data).decode()}\n\n" - ) - def serialize(self) -> bytes: return orjson.dumps(asdict(self)) From 7e30899938e9c4d960a210de91302f883163e839 Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Fri, 22 May 2026 19:36:20 +0530 Subject: [PATCH 10/11] refactor: remove unreachable retry-promotion code --- api/bigrag/services/queue.py | 13 ------------ api/bigrag/services/queue_state.py | 32 ------------------------------ 2 files changed, 45 deletions(-) diff --git a/api/bigrag/services/queue.py b/api/bigrag/services/queue.py index ecfbc697..752e4788 100644 --- a/api/bigrag/services/queue.py +++ b/api/bigrag/services/queue.py @@ -21,7 +21,6 @@ QUEUE_KEY = queue_state.QUEUE_KEY PROCESSING_KEY = queue_state.PROCESSING_KEY DEAD_LETTER_KEY = queue_state.DEAD_LETTER_KEY -RETRY_KEY = queue_state.RETRY_KEY STATS_KEY = queue_state.STATS_KEY LEASE_KEY_PREFIX = queue_state.LEASE_KEY_PREFIX COLLECTION_EPOCH_KEY_PREFIX = queue_state.COLLECTION_EPOCH_KEY_PREFIX @@ -150,18 +149,6 @@ async def stats(self) -> dict: ) return stats - async def _promote_due_retries(self) -> int: - from bigrag.services.runtime_settings import get_value - - queue_max_depth = await get_value("queue_max_depth") - promoted = await queue_state.promote_due_retries( - self._redis, - queue_max_depth=queue_max_depth, - ) - if promoted: - logger.info("queue promoted retry jobs", count=promoted) - return promoted - async def _renew_lease(self, job_id: str) -> None: lease_key = _lease_key(job_id) while True: diff --git a/api/bigrag/services/queue_state.py b/api/bigrag/services/queue_state.py index b706c79f..daa48d90 100644 --- a/api/bigrag/services/queue_state.py +++ b/api/bigrag/services/queue_state.py @@ -16,23 +16,6 @@ LEASE_TTL_SECONDS = 30 * 60 LEASE_RENEW_INTERVAL_SECONDS = 60 LEASE_ACTIVE_MIN_TTL_SECONDS = LEASE_TTL_SECONDS - LEASE_RENEW_INTERVAL_SECONDS * 4 -RETRY_PROMOTION_LIMIT = 100 - -PROMOTE_RETRIES_LUA = """ -local promoted = 0 -local due = redis.call('ZRANGEBYSCORE', KEYS[1], '-inf', ARGV[1], 'LIMIT', 0, tonumber(ARGV[3])) -for _, raw in ipairs(due) do - local depth = redis.call('LLEN', KEYS[2]) - if depth >= tonumber(ARGV[2]) then - break - end - if redis.call('ZREM', KEYS[1], raw) == 1 then - redis.call('LPUSH', KEYS[2], raw) - promoted = promoted + 1 - end -end -return promoted -""" FLUSH_LUA = """ local items = redis.call('LRANGE', KEYS[1], 0, -1) @@ -136,21 +119,6 @@ async def ensure_job_current(redis, job: IngestionJob) -> None: raise IngestionCancelledError(f"Ingestion cancelled for document '{job.document_id}'") -async def promote_due_retries(redis, *, queue_max_depth: int, now: int | None = None) -> int: - due_at = int(time_seconds() if now is None else now) - return int( - await redis.eval( - PROMOTE_RETRIES_LUA, - 2, - RETRY_KEY, - QUEUE_KEY, - due_at, - queue_max_depth, - RETRY_PROMOTION_LIMIT, - ) - ) - - async def flush_collection_jobs(redis, collection_name: str) -> int: removed = await redis.eval( FLUSH_LUA, From 90edc3b32e37f21fd2cd2d430419fee53642db91 Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Fri, 22 May 2026 19:36:21 +0530 Subject: [PATCH 11/11] refactor: remove unused backup manifest verification --- api/bigrag/services/backup/manifest.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/api/bigrag/services/backup/manifest.py b/api/bigrag/services/backup/manifest.py index 2661364c..515c43a0 100644 --- a/api/bigrag/services/backup/manifest.py +++ b/api/bigrag/services/backup/manifest.py @@ -67,18 +67,3 @@ def _manifest( body = orjson.dumps(payload, option=orjson.OPT_SORT_KEYS) payload["hmac_sha256"] = hmac.new(key, body, hashlib.sha256).hexdigest() return payload - - -def verify_manifest(manifest: dict[str, Any]) -> bool: - signature = manifest.get("hmac_sha256") - if not signature: - return False - key = _signing_key() - if key is None: - return False - body = orjson.dumps( - {k: v for k, v in manifest.items() if k != "hmac_sha256"}, - option=orjson.OPT_SORT_KEYS, - ) - expected = hmac.new(key, body, hashlib.sha256).hexdigest() - return hmac.compare_digest(expected, signature)