diff --git a/src/lib/navigation.ts b/src/lib/navigation.ts
index bbd60ee7..c44f4774 100644
--- a/src/lib/navigation.ts
+++ b/src/lib/navigation.ts
@@ -39,7 +39,20 @@ export const tabNavigation: NavTab[] = [
icon: 'rocket',
items: [
{ title: 'Introduction', href: '/docs' },
- { title: 'Self-Hosting', href: '/docs/self-hosting', badge: 'New' },
+ {
+ title: 'Self-Hosting',
+ badge: 'New',
+ items: [
+ { title: 'Overview', href: '/docs/self-hosting' },
+ { title: 'Requirements', href: '/docs/self-hosting/requirements' },
+ { title: 'Docker Compose', href: '/docs/self-hosting/docker-compose' },
+ { title: 'Environment Variables', href: '/docs/self-hosting/environment' },
+ { title: 'System Configuration', href: '/docs/self-hosting/configuration' },
+ { title: 'User Management', href: '/docs/self-hosting/user-management' },
+ { title: 'Production', href: '/docs/self-hosting/production' },
+ { title: 'Troubleshooting', href: '/docs/self-hosting/troubleshooting' },
+ ]
+ },
{
title: 'Quickstart',
icon: 'rocket',
diff --git a/src/pages/docs/self-hosting.mdx b/src/pages/docs/self-hosting.mdx
index fcc41f5a..9eb7c337 100644
--- a/src/pages/docs/self-hosting.mdx
+++ b/src/pages/docs/self-hosting.mdx
@@ -1,214 +1,102 @@
---
title: "Self-Hosting"
-description: "Deploy the full Future AGI platform on your own infrastructure with Docker Compose."
+description: "Run the complete Future AGI platform on your own infrastructure."
---
## About
-Future AGI is the complete platform to test, guard, and monitor AI agents. Build self-improving agents that ship smarter with every version. Self-hosting runs the entire platform on your infrastructure. The backend is built on Django, the frontend on React + Vite. All data stays on your machines.
+Future AGI is fully open-source. Self-hosting runs the entire stack on your machines — all traces, datasets, evaluations, and model calls stay within your network. Backend is Django, frontend is React + Vite, LLM gateway is Go.
-## When to self-host
+Not sure if you need this? The hosted version at [app.futureagi.com](https://app.futureagi.com) is easier to operate. Self-host when you need **data residency**, **air-gapped environments**, **cost control at scale**, or **deep customization**.
-- **Data residency or compliance**: Traces, datasets, and evaluation outputs never leave your network.
-- **Air-gapped environments**: Run without outbound internet access (air-gapped support is coming; Docker Compose today assumes outbound access for pulling base images and LLM APIs).
-- **Cost control at scale**: For high-volume workloads, run your own infrastructure instead of the hosted plan.
-- **Customization**: Modify any part of the platform to fit internal systems.
-
-If none of these apply, the hosted version at [app.futureagi.com](https://app.futureagi.com) is easier to operate.
-
-### Deployment options
-
-| Option | Status |
-|---|---|
-| Docker Compose | Available |
-| Helm Charts (Kubernetes) | Coming soon |
-| Air-gapped | Coming soon |
-
-## Prerequisites
-
-| Requirement | Minimum | Recommended |
-|---|---|---|
-| RAM | 8 GB | 16 GB |
-| Disk | 20 GB free | 50 GB+ |
-| CPU | 4 cores | 8 cores |
-| Docker | 24.0+ | Latest stable |
-| Docker Compose | v2.20+ | Latest stable |
-| Python | 3.11 | 3.11 |
-
-## Install Docker and Compose
-
-
-
-
-Install [Docker Desktop for Mac](https://docs.docker.com/desktop/setup/install/mac-install/), or use [Colima](https://github.com/abiosoft/colima) for a lighter alternative:
+## Quick start
```bash
-brew install docker docker-compose colima
-colima start --cpu 4 --memory 8 --disk 64
-```
-
-If using Docker Desktop, go to Settings > Resources and set RAM to at least **8 GB** and disk to **64 GB**.
-
-
-
-
-```bash
-sudo apt-get update
-sudo apt-get install -y docker.io docker-compose-v2
-sudo systemctl start docker && sudo systemctl enable docker
-sudo usermod -aG docker $USER
-```
-
-Log out and back in for the group change to take effect.
-
-
-
-
-Install [Docker Desktop for Windows](https://docs.docker.com/desktop/setup/install/windows-install/) with WSL 2 backend enabled.
-
-
-
-
-Verify:
-
-```bash
-docker --version # 24.0+
-docker compose version # v2.20+
-```
-
-## Clone the repository
-
-1. [Fork the repository](https://github.com/future-agi/future-agi/fork) on GitHub.
-2. Clone your fork:
-
-```bash
-git clone https://github.com/YOUR_USERNAME/future-agi.git
+git clone https://github.com/future-agi/future-agi.git
cd future-agi
cp .env.example .env
-```
-
-## Configure environment
-
-Replace every `CHANGEME` value in `.env`:
-
-```bash
-openssl rand -hex 32 # for SECRET_KEY and AGENTCC_INTERNAL_API_KEY
-openssl rand -base64 24 # for PG_PASSWORD
-```
-
-| Variable | What it's for |
-|---|---|
-| `SECRET_KEY` | Django session signing and CSRF |
-| `PG_PASSWORD` | PostgreSQL password |
-| `AGENTCC_INTERNAL_API_KEY` | Shared secret between backend and gateway |
-
-
-For a quick local test, the stack boots fine without changing anything. The `CHANGEME` values only matter when you expose the instance to others.
-
-
-## Run the Stack
-
-```bash
+docker pull futureagi/future-agi:v1.8.19_base
docker compose up
```
-To run in the background:
-
-```bash
-docker compose up -d
-docker compose logs -f backend # watch startup progress
-```
-
-Once all services are up, open the frontend and backend:
+First boot builds from source (~10–15 min). After `Application startup complete`:
-- **Frontend**: [http://localhost:3031](http://localhost:3031)
-- **Backend API**: [http://localhost:8000](http://localhost:8000)
-
-Verify all services are healthy:
-
-```bash
-docker compose ps
-```
-
-## Create your account
-
-The email registration flow requires Mailgun credentials (`MAILGUN_API_KEY` and `MAILGUN_SENDER_DOMAIN` in `.env`). Without Mailgun, create a user via the Django shell:
-
-```bash
-docker compose exec backend python manage.py shell -c "
-from django.contrib.auth.hashers import make_password
-from accounts.models import User
-User.objects.create(email='you@example.com', password=make_password('your-password'))
-"
-```
+| Service | URL |
+|---|---|
+| Frontend | http://localhost:3000 |
+| Backend API | http://localhost:8000 |
+| PeerDB UI | http://localhost:3001 — `peerdb` / `peerdb` |
-Log in with those credentials.
+## Deployment options
-## Stop, reset, and upgrade
+| Option | Status |
+|---|---|
+| Docker Compose | Available |
+| Helm / Kubernetes | Coming soon |
+| Air-gapped | Coming soon |
-```bash
-# Stop (data persists in Docker volumes)
-docker compose down
+## Architecture
-# Stop and wipe all data (fresh start)
-docker compose down -v
+21 containers across four layers.
-# Upgrade to latest
-git pull
-docker compose build
-docker compose up -d
```
-
-
-Migrations run automatically on boot. If a migration fails after an upgrade, run `docker compose exec backend python manage.py migrate` manually and check the release notes.
-
-
-## Development mode
-
-Use the dev overlay for hot reload and per-queue workers:
-
-```bash
-docker compose -f docker-compose.yml -f docker-compose.dev.yml up
+Browser
+ └─ frontend (React/nginx)
+ └─ backend (Django) ──── gateway (Go) ──── OpenAI · Anthropic · Gemini · Bedrock
+ ├── postgres primary DB + WAL replication
+ ├── clickhouse analytics store
+ ├── redis cache / pub-sub
+ ├── minio object storage
+ └── temporal ──── worker background jobs / eval pipelines
+
+postgres ──── PeerDB CDC ──── clickhouse (continuous replication)
```
-This adds hot reload for backend code, per-queue Temporal workers, exposed database ports, and a Temporal UI.
-
-## Production checklist
+**Application** — `frontend` · `backend` · `worker` · `gateway` · `serving` · `code-executor`
-Before exposing to users:
+**Data** — `postgres` · `clickhouse` · `redis` · `minio`
-- [ ] Replace every `CHANGEME` value in `.env`
-- [ ] Set `ENV_TYPE=prod` and `FAST_STARTUP=false`
-- [ ] Put a reverse proxy (Caddy, Nginx, or Traefik) in front for TLS
-- [ ] Replace Compose databases with managed equivalents (RDS, ClickHouse Cloud, ElastiCache, S3)
-- [ ] Increase backend workers (`GRANIAN_WORKERS` in `.env`) to match your CPU count
-- [ ] Set up backups for PostgreSQL (`pg_dump`) and ClickHouse (`BACKUP`)
-- [ ] Use a secrets manager instead of a plain `.env` file
+**Workflow** — `temporal`
-## Troubleshooting
-
-| Problem | Fix |
-|---|---|
-| First build takes 10+ minutes | Normal. Images build from source. Subsequent starts are under 30 seconds. |
-| Container keeps restarting | Run `docker compose logs SERVICE_NAME`. Usually a missing env var or port conflict. All ports are configurable in `.env`. |
-| Backend never shows startup complete | Allocate at least 8 GB RAM to Docker. Check with `docker info`. |
-| Frontend shows blank page | The `VITE_HOST_API` variable in `.env` must match the backend URL accessible from your browser. Changing it requires rebuilding: `docker compose build frontend`. |
+**CDC (PeerDB)** — `peerdb-catalog` · `peerdb-temporal` · `peerdb-minio` · `peerdb-flow-api` · `peerdb-flow-worker` · `peerdb-flow-snapshot-worker` · `peerdb-server` · `peerdb-ui` · `peerdb-temporal-init` · `peerdb-init`
-For anything not listed, open an issue at [github.com/future-agi/future-agi/issues](https://github.com/future-agi/future-agi/issues) with the output of `docker compose logs`.
+| Layer | Service | Purpose |
+|---|---|---|
+| App | `frontend` | React SPA served by nginx |
+| App | `backend` | Django REST + gRPC + WebSocket API |
+| App | `worker` | Temporal worker — evals, agent loops, data jobs |
+| App | `gateway` | Go LLM proxy — routing, retries, rate limits, logging |
+| App | `serving` | Embeddings and small model inference |
+| App | `code-executor` | nsjail-sandboxed eval code runner (`privileged: true` required) |
+| Data | `postgres` | Primary DB — users, traces, datasets, evals, prompts |
+| Data | `clickhouse` | Analytics DB — replicated from Postgres via PeerDB |
+| Data | `redis` | Cache, rate limits, WebSocket pub/sub |
+| Data | `minio` | S3-compatible object storage (swap for S3 in prod) |
+| Workflow | `temporal` | Durable workflow engine — shares main Postgres |
+| CDC | PeerDB stack | Continuous Postgres → ClickHouse replication (10 services) |
## Next Steps
-
- Set up traceAI to observe your AI application.
+
+ Hardware tiers, platform compatibility, ports reference.
+
+
+ Setup, deployment modes, day-to-day operations.
+
+
+ Full `.env` reference — secrets, ports, flags, keys.
+
+
+ LLM gateway providers, PeerDB mirrors, Temporal workers.
-
- Evaluate your AI agent outputs.
+
+ Create accounts via email or Django shell.
-
- Route, cache, and guard LLM requests with Agent Command Center.
+
+ Hardening, backups, monitoring, upgrades.
-
- Set up the development environment and contribute.
+
+ Solutions for every known error.
diff --git a/src/pages/docs/self-hosting/configuration.mdx b/src/pages/docs/self-hosting/configuration.mdx
new file mode 100644
index 00000000..db2f7215
--- /dev/null
+++ b/src/pages/docs/self-hosting/configuration.mdx
@@ -0,0 +1,139 @@
+---
+title: "System Configuration"
+description: "Complete the configuration for the LLM gateway, PeerDB CDC mirrors, and Temporal workers."
+---
+
+## About
+
+Configure the moving parts that aren't covered by `.env` alone: provider entries in the LLM gateway's `config.yaml`, the PeerDB Postgres → ClickHouse replication mirrors, and Temporal worker concurrency.
+
+## LLM gateway
+
+
+The LLM gateway requires additional configuration before model calls will work. You must create a `config.yaml` and provide your provider API keys — see the setup steps below.
+
+
+The gateway is a Go LLM proxy that routes all model calls. It ships with `config.example.yaml` — OpenAI enabled by default.
+
+### Setup
+
+```bash
+# 1. Copy the example
+cp futureagi/agentcc-gateway/config.example.yaml \
+ futureagi/agentcc-gateway/config.yaml
+
+# 2. Edit config.yaml — uncomment providers, set keys via ${VAR} interpolation
+# 3. Set matching keys in .env (OPENAI_API_KEY, ANTHROPIC_API_KEY, etc.)
+
+# 4. Point the gateway volume at your config.yaml (in docker-compose.yml)
+# volumes:
+# - ./futureagi/agentcc-gateway/config.yaml:/app/config.yaml:ro
+
+# 5. Restart
+docker compose up -d --force-recreate gateway
+```
+
+`config.yaml` is gitignored. Treat it as a secret.
+
+### Provider config examples
+
+
+
+```yaml
+providers:
+ openai:
+ api_key: "${OPENAI_API_KEY}"
+ api_format: "openai"
+ models: [gpt-4o, gpt-4o-mini]
+
+ anthropic:
+ api_key: "${ANTHROPIC_API_KEY}"
+ api_format: "anthropic"
+ models: [claude-opus-4-5, claude-sonnet-4-5]
+
+ gemini:
+ api_key: "${GOOGLE_API_KEY}"
+ api_format: "gemini"
+ models: [gemini-2.0-flash, gemini-1.5-pro]
+```
+
+
+```yaml
+providers:
+ bedrock:
+ api_key: "${AWS_SECRET_ACCESS_KEY}"
+ api_format: "bedrock"
+ region: "${AWS_REGION}"
+ access_key: "${AWS_ACCESS_KEY_ID}"
+ models: [anthropic.claude-3-5-sonnet-20241022-v2:0]
+```
+
+
+```yaml
+providers:
+ vertex:
+ base_url: "https://us-central1-aiplatform.googleapis.com"
+ api_key: "${GOOGLE_ACCESS_TOKEN}"
+ api_format: "gemini"
+ headers:
+ x-gcp-project: "${GCP_PROJECT_ID}"
+ x-gcp-location: "us-central1"
+ models: [gemini-2.0-flash-001]
+```
+Vertex uses a Bearer token, not an API key. Rotate `GOOGLE_ACCESS_TOKEN` via a sidecar calling `gcloud auth print-access-token`.
+
+
+
+For routing rules, rate limits, caching, and the full config reference — see [Agent Command Center → Self-hosted](/docs/command-center/deployment/self-hosted).
+
+---
+
+## PeerDB (Postgres → ClickHouse CDC)
+
+PeerDB continuously replicates Postgres tables into ClickHouse so trace and eval analytics stay fast.
+
+**First-boot timing issue**: `peerdb-init` runs immediately on startup, before Django migrations may have completed. If mirrors show "not started" in the PeerDB UI:
+
+```bash
+# 1. Wait until backend logs "Application startup complete"
+docker compose logs -f backend
+
+# 2. Re-run init
+docker compose run --rm peerdb-init bash /setup.sh
+```
+
+Verify at [http://localhost:3001](http://localhost:3001) — mirrors should show `running` within seconds.
+
+After upgrades that touch replicated tables, re-run the same init command.
+
+---
+
+## Temporal workers
+
+**Default (all-queue)** — one worker polls all task queues. Controlled by `TEMPORAL_ALL_QUEUES=true` in `.env`. Good for self-hosted deployments.
+
+**Per-queue workers** (dev mode) — six dedicated workers via the dev overlay:
+
+| Service name | Queue | Typical concurrency |
+|---|---|---|
+| `worker-default` | `default` | 100 |
+| `worker-tasks-s` | `tasks_s` | 200 |
+| `worker-tasks-l` | `tasks_l` | 50 |
+| `worker-tasks-xl` | `tasks_xl` | 10 |
+| `worker-trace-ingestion` | `trace_ingestion` | 100 |
+| `worker-agent-compass` | `agent_compass` | 50 |
+
+Tune concurrency in `.env` via `TEMPORAL_MAX_CONCURRENT_ACTIVITIES` and `TEMPORAL_MAX_CONCURRENT_WORKFLOW_TASKS`.
+
+Temporal UI (dev mode): [http://localhost:8085](http://localhost:8085)
+
+## Next Steps
+
+
+
+ Hardening, backups, and monitoring before going live.
+
+
+ Solutions for common configuration errors.
+
+
diff --git a/src/pages/docs/self-hosting/docker-compose.mdx b/src/pages/docs/self-hosting/docker-compose.mdx
new file mode 100644
index 00000000..a4098a84
--- /dev/null
+++ b/src/pages/docs/self-hosting/docker-compose.mdx
@@ -0,0 +1,127 @@
+---
+title: "Docker Compose"
+description: "Clone, configure, and run the full Future AGI stack. Covers all three deployment modes."
+---
+
+## About
+
+Docker Compose is the supported way to run a self-hosted Future AGI instance. This page covers the full-stack deployment (all 21 services), the dev overlay with hot reload and per-queue workers, and a frontend-only mode for pointing the UI at a remote backend.
+
+## Setup
+
+```bash
+git clone https://github.com/future-agi/future-agi.git
+cd future-agi
+cp .env.example .env
+docker pull futureagi/future-agi:v1.8.19_base
+docker compose up
+```
+
+First boot builds from source (~10–15 min). When the backend logs `Application startup complete`:
+
+- **Frontend** — [http://localhost:3000](http://localhost:3000)
+- **Backend API** — [http://localhost:8000](http://localhost:8000)
+- **PeerDB UI** — [http://localhost:3001](http://localhost:3001) · `peerdb` / `peerdb`
+
+Replace `CHANGEME` secrets in `.env` before sharing the instance with others. See [Environment Variables](/docs/self-hosting/environment).
+
+---
+
+## Deployment modes
+
+### Mode 1 — Full stack (default)
+
+```bash
+docker compose up -d # detached
+docker compose ps # check health
+docker compose logs -f backend
+```
+
+Starts all 21 services. Frontend binds on `0.0.0.0:3000`; all data stores bind on `127.0.0.1`. For production, put a reverse proxy (Caddy, nginx, Traefik) in front for HTTPS.
+
+### Mode 2 — Dev overlay
+
+```bash
+docker compose -f docker-compose.yml -f docker-compose.dev.yml up
+```
+
+| What changes | Detail |
+|---|---|
+| Hot reload | `./futureagi` volume-mounted into backend and workers — Python changes reload without rebuild. Frontend also supports hot-reload in dev mode. |
+| Per-queue workers | 6 workers (`worker-default`, `worker-tasks-s`, `worker-tasks-l`, `worker-tasks-xl`, `worker-trace-ingestion`, `worker-agent-compass`) instead of one all-queue worker |
+| Public DB ports | Postgres, ClickHouse, Redis, MinIO, Temporal all bind on `0.0.0.0` for host tool access |
+| Temporal UI | [http://localhost:8085](http://localhost:8085) |
+| `FAST_STARTUP=true` | Migrations skipped on restart — run manually: `docker compose exec -it backend bash -c "python manage.py migrate"` |
+
+The base `worker` service is disabled in dev mode (moved to the `oss-only` profile) to prevent duplicate queue polling.
+
+### Mode 3 — Frontend only
+
+For pointing the UI at a remote backend (another Compose project, a VM, or Future AGI Cloud).
+
+```bash
+VITE_HOST_API=https://api.your-backend.example.com \
+ docker compose -f docker-compose.frontend.yml up --build
+```
+
+
+`VITE_HOST_API` is baked into the JS bundle at build time. Changing it requires a rebuild: `docker compose -f docker-compose.frontend.yml build --no-cache frontend`
+
+
+---
+
+## Operations
+
+```bash
+# Logs
+docker compose logs -f backend worker
+
+# Shell into a container
+docker compose exec backend bash
+docker compose exec postgres psql -U futureagi -d futureagi
+
+# Stop (data persists)
+docker compose down
+
+# Wipe all data and restart fresh
+docker compose down -v
+```
+
+---
+
+## Upgrading
+
+```bash
+git pull
+docker compose build
+docker compose up -d
+```
+
+Migrations run automatically on startup. If a migration fails:
+
+```bash
+docker compose exec backend python manage.py migrate
+```
+
+If the release notes mention PeerDB mirror changes, re-run init after migrations complete:
+
+```bash
+docker compose run --rm peerdb-init bash /setup.sh
+```
+
+## Next Steps
+
+
+
+ Configure secrets, ports, and runtime flags in `.env`.
+
+
+ Set up LLM gateway providers and Temporal workers.
+
+
+ Create your first account and configure email delivery.
+
+
+ Hardening checklist before exposing to users.
+
+
diff --git a/src/pages/docs/self-hosting/environment.mdx b/src/pages/docs/self-hosting/environment.mdx
new file mode 100644
index 00000000..0c63ef95
--- /dev/null
+++ b/src/pages/docs/self-hosting/environment.mdx
@@ -0,0 +1,118 @@
+---
+title: "Environment Variables"
+description: "Full .env reference — secrets, ports, runtime flags, LLM keys, and frontend config."
+---
+
+## About
+
+Reference for every environment variable the stack reads from `.env`. Grouped by purpose: secrets, database credentials, runtime flags, LLM provider keys, email, and frontend build-time config.
+
+```bash
+cp .env.example .env
+```
+
+The stack boots fine with defaults. Replace `CHANGEME` secrets before sharing with others.
+
+## Required secrets
+
+| Variable | Generate with | Used by |
+|---|---|---|
+| `SECRET_KEY` | `openssl rand -hex 32` | Django sessions, CSRF, password reset |
+| `PG_PASSWORD` | `openssl rand -base64 24` | PostgreSQL auth |
+| `MINIO_ROOT_PASSWORD` | `openssl rand -base64 24` | MinIO object storage auth |
+| `AGENTCC_INTERNAL_API_KEY` | `openssl rand -hex 32` | Backend ↔ gateway shared secret |
+
+## Database credentials
+
+| Variable | Default | Notes |
+|---|---|---|
+| `PG_USER` | `futureagi` | PostgreSQL username |
+| `PG_PASSWORD` | `CHANGEME` | **Must change** |
+| `PG_DB` | `futureagi` | PostgreSQL database name |
+| `MINIO_ROOT_USER` | `futureagi` | MinIO username |
+| `MINIO_ROOT_PASSWORD` | `CHANGEME` | **Must change** |
+| `CH_USE_REPLICATED_ENGINES` | `false` | `true` only for multi-node ClickHouse |
+
+## Ports
+
+All configurable. See [Requirements → Ports reference](/docs/self-hosting/requirements#ports-reference) for the full table with defaults and exposure scope.
+
+## Backend runtime
+
+| Variable | Default | Description |
+|---|---|---|
+| `ENV_TYPE` | `development` | `development` · `staging` · `prod` — prod mode disables debug output, enables `check --deploy` |
+| `FAST_STARTUP` | `false` | Skip migrations on restart (dev only). Always `false` in production. |
+| `GRANIAN_WORKERS` | `1` | ASGI worker processes. Set to CPU count in production. |
+| `GRANIAN_THREADS` | `2` | Threads per worker. |
+| `ENABLE_GRPC` | `true` | Enable gRPC endpoint. |
+| `ENABLE_HTTP` | `true` | Enable HTTP/REST endpoint. |
+
+## Temporal worker
+
+| Variable | Default | Description |
+|---|---|---|
+| `TEMPORAL_NAMESPACE` | `default` | Temporal namespace. |
+| `TEMPORAL_ALL_QUEUES` | `true` | Single worker polls all queues. Set `false` + use dev overlay for per-queue workers. |
+| `TEMPORAL_MAX_CONCURRENT_ACTIVITIES` | `50` | Max concurrent activity tasks. |
+| `TEMPORAL_MAX_CONCURRENT_WORKFLOW_TASKS` | `50` | Max concurrent workflow tasks. |
+
+## LLM gateway
+
+| Variable | Default | Description |
+|---|---|---|
+| `AGENTCC_INTERNAL_API_KEY` | `CHANGEME` | **Must change.** Backend authenticates gateway calls with this. |
+
+## LLM provider keys
+
+Leave blank for providers you're not using.
+
+| Variable | Provider |
+|---|---|
+| `OPENAI_API_KEY` | OpenAI |
+| `ANTHROPIC_API_KEY` | Anthropic |
+| `GOOGLE_API_KEY` | Google Gemini |
+| `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` / `AWS_REGION` | AWS Bedrock + S3 |
+
+## Email (Mailgun)
+
+Required for email-based sign-up and password reset. Without these, create users via the Django shell — see [User Management](/docs/self-hosting/user-management).
+
+| Variable | Description |
+|---|---|
+| `MAILGUN_API_KEY` | Mailgun private API key |
+| `MAILGUN_SENDER_DOMAIN` | Verified Mailgun sending domain |
+| `DEFAULT_FROM_EMAIL` | `From:` address for outbound emails |
+| `SERVER_EMAIL` | Django admin error emails |
+
+## Frontend build-time
+
+
+These are baked into the JS bundle at Vite build time. Changing them requires rebuilding: `docker compose build frontend`
+
+
+| Variable | Default | Description |
+|---|---|---|
+| `VITE_HOST_API` | `http://localhost:8000` | Backend URL as seen by the browser. In production: your public backend URL. |
+| `VITE_ENVIRONMENT` | `development` | Frontend analytics and feature flags. |
+
+## Optional
+
+| Variable | Default | Description |
+|---|---|---|
+| `RECAPTCHA_ENABLED` | `false` | Enable reCAPTCHA on registration. |
+| `RECAPTCHA_SECRET_KEY` | — | reCAPTCHA v2/v3 server-side key. |
+| `VITE_GOOGLE_SITE_KEY` | — | reCAPTCHA client-side key (requires frontend rebuild). |
+| `FUTURE_AGI_CLOUD_API_KEY` | — | EE-tier Cloud features only. Leave blank for OSS. |
+| `FUTURE_AGI_CLOUD_API_URL` | `https://api.futureagi.com` | Do not change. |
+
+## Next Steps
+
+
+
+ Set up LLM gateway providers and PeerDB mirrors.
+
+
+ Hardening checklist for exposing the stack to users.
+
+
diff --git a/src/pages/docs/self-hosting/production.mdx b/src/pages/docs/self-hosting/production.mdx
new file mode 100644
index 00000000..bfb2180b
--- /dev/null
+++ b/src/pages/docs/self-hosting/production.mdx
@@ -0,0 +1,137 @@
+---
+title: "Production"
+description: "Hardening checklist, backups, monitoring, and upgrade runbook."
+---
+
+## About
+
+Run through this before exposing the stack to real users. Covers secrets, TLS, swapping in managed data stores, backup commands for Postgres/ClickHouse/MinIO, Prometheus monitoring, and the upgrade and rollback runbook.
+
+## Hardening checklist
+
+**Secrets** — replace all `CHANGEME` values before going live:
+
+```bash
+openssl rand -hex 32 # SECRET_KEY, AGENTCC_INTERNAL_API_KEY
+openssl rand -base64 24 # PG_PASSWORD, MINIO_ROOT_PASSWORD
+```
+
+**Runtime flags** in `.env`:
+- `ENV_TYPE=prod`
+- `FAST_STARTUP=false`
+- `GRANIAN_WORKERS=`
+
+**TLS** — the frontend and backend don't terminate TLS. Put Caddy, nginx, or Traefik in front:
+
+```
+# Caddyfile (simplest — auto-issues Let's Encrypt certs)
+app.yourcompany.com { reverse_proxy localhost:3000 }
+api.yourcompany.com { reverse_proxy localhost:8000 }
+```
+
+After setting up TLS, set `VITE_HOST_API=https://api.yourcompany.com` in `.env` and rebuild:
+
+```bash
+docker compose build frontend && docker compose up -d frontend
+```
+
+**Managed data stores** — for production, replace compose-managed services:
+
+| Replace | With | Change |
+|---|---|---|
+| `postgres` | RDS / Aurora / Cloud SQL | Set `PG_*` vars to managed endpoint |
+| `clickhouse` | ClickHouse Cloud | Set `CH_HOST`, `CH_PORT`, etc. |
+| `redis` | ElastiCache / Upstash | Set `REDIS_URL` |
+| `minio` | AWS S3 | Set `S3_ENDPOINT_URL=https://s3.amazonaws.com` + AWS creds |
+
+
+`code-executor` requires `privileged: true`. Run on EC2 / GCE instances — not Fargate or Cloud Run.
+
+
+**Secrets manager** — use AWS Secrets Manager, HashiCorp Vault, or GCP Secret Manager instead of a plain `.env` file.
+
+---
+
+## Backups
+
+### PostgreSQL
+
+```bash
+# Backup
+docker compose exec postgres \
+ pg_dump -U futureagi -d futureagi --format=custom \
+ > backup-$(date +%F).dump
+
+# Restore
+docker compose exec -T postgres \
+ pg_restore -U futureagi -d futureagi --clean --if-exists \
+ < backup-2026-04-22.dump
+```
+
+Volumes: `future-agi_postgres-data` · `future-agi_clickhouse-data` · `future-agi_redis-data` · `future-agi_minio-data` · `future-agi_peerdb-catalog-data` · `future-agi_peerdb-minio-data`
+
+### ClickHouse
+
+```sql
+BACKUP TABLE default.traces TO S3('s3://your-bucket/ch-backup/', 'KEY', 'SECRET');
+```
+
+ClickHouse data can also be rebuilt from scratch by re-running PeerDB init since it replicates from Postgres.
+
+### MinIO
+
+```bash
+mc alias set local http://localhost:9005 futureagi
+mc alias set s3 https://s3.amazonaws.com
+mc mirror local/ s3/your-bucket/
+```
+
+---
+
+## Monitoring
+
+Backend exposes Prometheus metrics at `http://localhost:8000/metrics`. Add a scraper:
+
+```yaml
+# prometheus.yml
+scrape_configs:
+ - job_name: futureagi
+ static_configs:
+ - targets: ['localhost:8000']
+ metrics_path: /metrics
+```
+
+Key signals: backend error rate, Temporal workflow success/failure, Postgres WAL lag (PeerDB health), ClickHouse query latency, PeerDB mirror status at [localhost:3001](http://localhost:3001).
+
+---
+
+## Upgrades
+
+```bash
+git pull
+docker compose build
+docker compose up -d
+```
+
+Migrations run automatically. If a migration fails: `docker compose exec backend python manage.py migrate`
+
+If release notes mention PeerDB changes: `docker compose run --rm peerdb-init bash /setup.sh`
+
+**Rollback:**
+
+```bash
+git log --oneline -5
+git checkout
+docker compose build && docker compose up -d
+```
+
+## Next Steps
+
+
+
+ Symptoms, causes, and fixes for common errors.
+
+
+ Tune the LLM gateway, PeerDB mirrors, and Temporal workers.
+
+
diff --git a/src/pages/docs/self-hosting/requirements.mdx b/src/pages/docs/self-hosting/requirements.mdx
new file mode 100644
index 00000000..a096bbf7
--- /dev/null
+++ b/src/pages/docs/self-hosting/requirements.mdx
@@ -0,0 +1,96 @@
+---
+title: "Requirements"
+description: "Hardware sizing, platform compatibility, OS support, and network requirements."
+---
+
+## About
+
+Hardware tiers, supported platforms, and the network ports each service uses. Read this first to size your environment before running [Docker Compose](/docs/self-hosting/docker-compose).
+
+## Hardware tiers
+
+| Tier | Use case | CPU | RAM | Disk |
+|---|---|---|---|---|
+| **Evaluation** | Local trial, single user | 4 cores | 8 GB | 20 GB |
+| **Team** | 1–20 users, regular eval runs | 8 cores | 16 GB | 50 GB |
+| **Production** | 20+ users, high throughput | 16+ cores | 32+ GB | 200 GB+ SSD |
+
+Resource drivers: ClickHouse and Temporal worker each hold ~1 GB RAM at steady state. First image build is ~6 GB disk. ClickHouse grows with trace volume; Postgres stays small.
+
+
+Docker Desktop (Mac/Windows): Settings → Resources → set RAM ≥ 8 GB, disk ≥ 64 GB. The defaults (2–4 GB RAM) will OOM-kill ClickHouse or the backend.
+
+
+## Software
+
+| Requirement | Minimum | Verify |
+|---|---|---|
+| Docker Engine | 24.0+ | `docker --version` |
+| Docker Compose | v2.20+ | `docker compose version` |
+
+
+
+```bash
+brew install docker docker-compose colima
+colima start --cpu 4 --memory 8 --disk 64
+```
+Or install [Docker Desktop for Mac](https://docs.docker.com/desktop/setup/install/mac-install/) and allocate ≥ 8 GB RAM in Settings → Resources.
+
+
+```bash
+sudo apt-get install -y docker.io docker-compose-v2
+sudo systemctl enable --now docker
+sudo usermod -aG docker $USER # log out and back in
+```
+
+
+Install [Docker Desktop for Windows](https://docs.docker.com/desktop/setup/install/windows-install/) with WSL 2 backend. Allocate ≥ 8 GB RAM in Settings → Resources.
+
+
+
+## Platform compatibility
+
+The `code-executor` service requires `privileged: true`. Platforms that block it will crash the service; the rest of the stack still runs.
+
+| Platform | Supported | Notes |
+|---|---|---|
+| Linux bare metal / EC2 / GCE / Azure VM | Yes | Full support |
+| GKE / EKS with privileged enabled | Yes | Requires PodSecurityPolicy exception |
+| ECS Fargate | No | `privileged: true` not supported |
+| Google Cloud Run | No | Same |
+| Render / Railway / Fly.io | No | Managed platforms block privileged mode |
+
+## Ports reference
+
+All ports are configurable via `.env`.
+
+| Service | Default | Exposed to | `.env` key |
+|---|---|---|---|
+| Frontend | `3000` | `0.0.0.0` | `FRONTEND_PORT` |
+| Backend API | `8000` | `0.0.0.0` | `BACKEND_PORT` |
+| Gateway | `8090` | Internal only | `GATEWAY_PORT` |
+| Model serving | `8080` | Internal only | `SERVING_PORT` |
+| Code executor | `8060` | Internal only | `CODE_EXECUTOR_PORT` |
+| Postgres | `5432` | `127.0.0.1` (dev: public) | `PG_PORT` |
+| ClickHouse HTTP | `8123` | `127.0.0.1` (dev: public) | `CH_HTTP_PORT` |
+| ClickHouse TCP | `9000` | `127.0.0.1` (dev: public) | `CH_PORT` |
+| Redis | `6379` | `127.0.0.1` (dev: public) | `REDIS_PORT` |
+| MinIO API | `9005` | `127.0.0.1` | `MINIO_API_PORT` |
+| MinIO console | `9006` | `127.0.0.1` | `MINIO_CONSOLE_PORT` |
+| Temporal | `7233` | `127.0.0.1` (dev: public) | `TEMPORAL_PORT` |
+| Temporal UI | `8085` | Dev mode only | `TEMPORAL_UI_PORT` |
+| PeerDB server | `9900` | `127.0.0.1` | `PEERDB_PORT` |
+| PeerDB UI | `3001` | `0.0.0.0` | `PEERDB_UI_PORT` |
+
+In production, only the frontend and backend ports should be internet-facing, and only behind a TLS-terminating reverse proxy.
+
+## Next Steps
+
+
+
+ Clone, configure, and run the full stack.
+
+
+ Set secrets and tune runtime flags before first boot.
+
+
diff --git a/src/pages/docs/self-hosting/troubleshooting.mdx b/src/pages/docs/self-hosting/troubleshooting.mdx
new file mode 100644
index 00000000..184f2f97
--- /dev/null
+++ b/src/pages/docs/self-hosting/troubleshooting.mdx
@@ -0,0 +1,146 @@
+---
+title: "Troubleshooting"
+description: "Debugging guide and solutions for every known error."
+---
+
+## About
+
+Symptoms, causes, and fixes for the errors most commonly hit when self-hosting. Grouped by where they show up: startup, network, PeerDB, Temporal, and post-upgrade.
+
+## Start here
+
+```bash
+docker compose ps # what's running / what's restarting
+docker compose logs -f backend # most informative starting point
+docker compose exec backend bash # shell into any container
+```
+
+---
+
+## Startup errors
+
+**`Cannot connect to the Docker daemon`**
+Docker isn't running. Start Docker Desktop (Mac/Windows) or `sudo systemctl start docker` (Linux).
+
+---
+
+**First build takes 15+ min or hangs on `uv pip install`**
+Normal on first boot. If stuck >20 min, cancel and retry:
+```bash
+docker compose build --no-cache backend
+```
+
+---
+
+**`ERROR: not enough free space`**
+Docker Desktop's virtual disk is full. Settings → Resources → Disk image size → raise to 100 GB+. Or prune: `docker system prune -af && docker builder prune -af`
+
+---
+
+**Port already in use**
+```bash
+lsof -i :3000 # find the conflicting process
+# or override in .env:
+FRONTEND_PORT=3100
+BACKEND_PORT=8100
+```
+
+---
+
+**Backend never reaches `Application startup complete`**
+- Check RAM: `docker info | grep -i memory` — Docker needs ≥ 8 GB
+- Check for migration errors: `docker compose logs backend | grep -i error`
+- Run migrations manually: `docker compose exec backend python manage.py migrate`
+
+---
+
+**`FATAL: password authentication failed for user "futureagi"`**
+`PG_PASSWORD` was changed after the Postgres volume was initialized. Postgres sets the password only on first boot.
+- Option 1: revert `PG_PASSWORD` to the original value
+- Option 2 (data loss): `docker compose down -v && docker compose up -d`
+
+---
+
+**`code-executor` crashes with `clone: Operation not permitted`**
+Host platform blocks `privileged: true`. Won't work on Fargate, Cloud Run, or restricted Kubernetes. Use EC2, GCE, or bare metal. The rest of the stack runs — only code-based eval features are unavailable.
+
+---
+
+## Network and UI errors
+
+**Frontend blank page or CORS errors**
+`VITE_HOST_API` in `.env` doesn't match the current backend URL. Rebuild:
+```bash
+docker compose build --no-cache frontend
+docker compose up -d frontend
+```
+
+---
+
+**API calls fail with 502**
+Backend isn't healthy. Check: `docker compose logs backend` and `docker compose ps backend`.
+
+---
+
+## PeerDB errors
+
+**Mirrors show "not started" or don't appear**
+PeerDB init ran before Django migrations completed. Fix:
+```bash
+docker compose logs -f backend # wait for "Application startup complete"
+docker compose run --rm peerdb-init bash /setup.sh
+```
+Verify at [http://localhost:3001](http://localhost:3001) — mirrors should show `running`.
+
+---
+
+**Analytics data is stale**
+PeerDB replication has fallen behind. Check mirror lag in the PeerDB UI. Re-run init if a mirror shows an error:
+```bash
+docker compose run --rm peerdb-init bash /setup.sh
+```
+
+---
+
+## Temporal errors
+
+**`temporal-server` keeps restarting**
+Almost always a Postgres issue. Check: `docker compose logs postgres`. If Postgres is OOM-killing, raise Docker RAM to ≥ 8 GB. If Postgres is healthy: `docker compose restart postgres temporal`
+
+---
+
+## After an upgrade
+
+**Migration fails after `git pull`**
+```bash
+docker compose exec backend python manage.py migrate
+```
+If a conflict persists, check the release notes for manual steps.
+
+**Everything worked before the upgrade, now it doesn't**
+```bash
+git log --oneline -5
+git checkout
+docker compose build && docker compose up -d
+```
+
+---
+
+## Still stuck?
+
+Open an issue at [github.com/future-agi/future-agi/issues](https://github.com/future-agi/future-agi/issues) with:
+```bash
+docker compose logs > all-logs.txt 2>&1
+docker compose ps >> all-logs.txt
+```
+
+## Next Steps
+
+
+
+ Verify your platform and resources meet the minimums.
+
+
+ Hardening, backups, and monitoring once the stack is stable.
+
+
diff --git a/src/pages/docs/self-hosting/user-management.mdx b/src/pages/docs/self-hosting/user-management.mdx
new file mode 100644
index 00000000..1bb87d2d
--- /dev/null
+++ b/src/pages/docs/self-hosting/user-management.mdx
@@ -0,0 +1,77 @@
+---
+title: "User Management"
+description: "Create accounts, configure email delivery, and manage users via the Django admin shell."
+---
+
+## About
+
+Create accounts, reset passwords, and manage roles. The email-based sign-up flow needs Mailgun; without it, the Django shell is the fastest path to a first user.
+
+## Create your first user
+
+### With Mailgun (recommended)
+
+Set these in `.env` and restart the backend:
+
+```bash
+MAILGUN_API_KEY=key-...
+MAILGUN_SENDER_DOMAIN=mail.yourcompany.com
+DEFAULT_FROM_EMAIL=no-reply@yourcompany.com
+```
+
+```bash
+docker compose restart backend
+```
+
+Then sign up via [http://localhost:3000](http://localhost:3000).
+
+### Without Mailgun — Django shell
+
+```bash
+docker compose exec backend python manage.py shell -c "
+from django.contrib.auth.hashers import make_password
+from accounts.models import User
+User.objects.create(email='you@example.com', password=make_password('your-password'))
+"
+```
+
+Log in at [http://localhost:3000](http://localhost:3000) with those credentials.
+
+## Superuser
+
+```bash
+docker compose exec backend python manage.py createsuperuser
+```
+
+Superusers can access the Django admin at `http://localhost:8000/admin/`.
+
+## Reset a password
+
+```bash
+docker compose exec backend python manage.py shell -c "
+from django.contrib.auth.hashers import make_password
+from accounts.models import User
+u = User.objects.get(email='you@example.com')
+u.password = make_password('new-password')
+u.save()
+"
+```
+
+## Roles and permissions
+
+Manage workspace roles and permissions in the platform UI under **Settings → User Management** and **Settings → Roles & Permissions**. See [Roles & Permissions](/docs/roles-and-permissions) for the full model.
+
+## SSO / SAML2
+
+Future AGI includes a `saml2_auth` module for SAML2 SSO (Okta, Azure AD, Google Workspace). Configuration requires a SAML2 metadata file and environment variables mounted into the backend container. For setup details, open a discussion at [github.com/future-agi/future-agi](https://github.com/future-agi/future-agi).
+
+## Next Steps
+
+
+
+ Hardening checklist before exposing to real users.
+
+
+ Workspace roles and permission model.
+
+