From c98a8692d93de1ba810c9f2e6be8c7b2502329ed Mon Sep 17 00:00:00 2001
From: rajashish147 <rajashish147@gmail.com>
Date: Sat, 28 Mar 2026 19:42:04 +0530
Subject: [PATCH 1/3] feat: add Radix UI alert dialog and update dependencies

- Added @radix-ui/react-alert-dialog version 1.1.15 to package-lock.json.
- Updated dependencies for leaflet and added types for leaflet.markercluster.
- Removed unused dependencies related to @radix-ui/react-dialog from package-lock.json.

chore: create PowerShell script for API smoke tests

- Added run_api_smoke.ps1 to perform health checks on the API after startup.
- The script checks the health endpoint and logs output for troubleshooting.

feat: add admin audit log and circuit breaker persistence

- Created admin_audit_log table for immutable audit trails of admin actions.
- Added circuit breaker state persistence to the webhooks table to prevent data loss on Redis restarts.

feat: implement webhook DLQ archival and retention

- Created webhook_dlq_archive table to store archived DLQ entries for permanent history.
- Ensured the schema is write-once to maintain an immutable audit trail.

chore: enhance RLS and search path for new tables

- Enabled row-level security on admin_audit_log and webhook_dlq_archive tables.
- Added explicit policies for service role and authenticated admin access.
- Locked search_path for the set_updated_at function to enhance security.
---
 .github/workflows/pr.yml                      |  25 +-
 apps/api/src/app.ts                           |  29 +-
 apps/api/src/config/env.ts                    |  48 +
 .../api/src/modules/admin/audit-log.routes.ts |  72 ++
 .../src/modules/admin/system-health.routes.ts | 134 +++
 .../src/modules/admin/webhook-dlq.routes.ts   | 146 ++++
 .../modules/webhooks/webhooks.repository.ts   |   9 +-
 .../src/modules/webhooks/webhooks.service.ts  |   9 +-
 apps/api/src/plugins/prometheus.ts            |  27 +-
 .../src/plugins/security/ratelimit.plugin.ts  | 146 +++-
 apps/api/src/routes/health.ts                 |   7 +-
 apps/api/src/server.ts                        |  15 +-
 apps/api/src/utils/audit.ts                   |  43 +
 apps/api/src/utils/errors.ts                  |   7 +
 apps/api/src/utils/hmac.ts                    |  54 ++
 apps/api/src/workers/circuit-breaker.ts       | 399 +++++++++
 apps/api/src/workers/retry-intents.ts         |   4 +-
 apps/api/src/workers/startup.ts               |  15 +
 apps/api/src/workers/webhook.queue.ts         | 277 +++++-
 apps/api/src/workers/webhook.worker.ts        | 224 ++++-
 .../admin/webhooks.integration.test.ts        |  46 +-
 .../api/tests/unit/utils/webhook.unit.test.ts |  78 +-
 apps/web/next.config.mjs                      |   7 +-
 apps/web/package.json                         |   3 +
 .../admin/monitoring/map/EmployeeMap.tsx      | 198 +++--
 .../(protected)/admin/monitoring/map/page.tsx | 237 ++++-
 .../app/(protected)/admin/webhooks/page.tsx   | 819 ++++++++++++++++++
 apps/web/src/app/(protected)/profile/page.tsx |  26 +-
 apps/web/src/app/globals.css                  |   3 +
 apps/web/src/app/providers.tsx                |  26 +-
 apps/web/src/components/layout/Header.tsx     |   4 +-
 apps/web/src/components/layout/Sidebar.tsx    |  12 +
 apps/web/src/components/ui/alert-dialog.tsx   | 127 +++
 apps/web/src/contexts/AuthContext.tsx         |   6 +-
 apps/web/src/hooks/queries/useAnalytics.ts    |  13 +-
 apps/web/src/hooks/queries/useDashboard.ts    |   1 +
 apps/web/src/hooks/queries/useEmployees.ts    |   5 +-
 apps/web/src/hooks/queries/useExpenses.ts     |   7 +-
 apps/web/src/hooks/queries/useSessions.ts     |   7 +-
 apps/web/src/hooks/queries/useWebhooks.ts     | 135 +++
 apps/web/src/hooks/useAuth.ts                 |  10 +-
 apps/web/src/lib/api/client.ts                |  39 +-
 apps/web/src/lib/api/endpoints.ts             |  10 +
 apps/web/src/lib/auth/role.ts                 |  54 ++
 apps/web/src/lib/query-client.ts              |  35 +
 apps/web/src/middleware.ts                    |  25 +-
 docs/SLO.md                                   | 136 +++
 docs/WEBHOOK_SIGNATURES.md                    | 194 +++++
 infra/grafana/dashboards/fieldtrack.json      | 134 ++-
 infra/nginx/fieldtrack.conf                   |   4 +-
 infra/prometheus/alerts.yml                   | 389 ++++++++-
 package-lock.json                             | 217 +++--
 run_api_smoke.ps1                             |  90 ++
 .../20260328134113_add_admin_audit_log.sql    |  25 +
 ...0328134130_circuit_breaker_persistence.sql |  31 +
 .../20260328134140_webhook_dlq_archive.sql    |  30 +
 ..._phase29_hardening_rls_and_search_path.sql |  65 ++
 57 files changed, 4616 insertions(+), 322 deletions(-)
 create mode 100644 apps/api/src/modules/admin/audit-log.routes.ts
 create mode 100644 apps/api/src/modules/admin/system-health.routes.ts
 create mode 100644 apps/api/src/modules/admin/webhook-dlq.routes.ts
 create mode 100644 apps/api/src/utils/audit.ts
 create mode 100644 apps/api/src/workers/circuit-breaker.ts
 create mode 100644 apps/web/src/app/(protected)/admin/webhooks/page.tsx
 create mode 100644 apps/web/src/components/ui/alert-dialog.tsx
 create mode 100644 apps/web/src/hooks/queries/useWebhooks.ts
 create mode 100644 apps/web/src/lib/auth/role.ts
 create mode 100644 docs/SLO.md
 create mode 100644 docs/WEBHOOK_SIGNATURES.md
 create mode 100644 run_api_smoke.ps1
 create mode 100644 supabase/migrations/20260328134113_add_admin_audit_log.sql
 create mode 100644 supabase/migrations/20260328134130_circuit_breaker_persistence.sql
 create mode 100644 supabase/migrations/20260328134140_webhook_dlq_archive.sql
 create mode 100644 supabase/migrations/20260328135403_phase29_hardening_rls_and_search_path.sql

diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index e91846f..9593662 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -71,6 +71,7 @@ jobs:
         with:
           node-version: '24'
           cache: npm
+          cache-dependency-path: '**/package-lock.json'
 
       - run: npm ci
         if: needs.detect-changes.outputs.backend == 'true'
@@ -94,6 +95,7 @@ jobs:
         if: needs.detect-changes.outputs.backend == 'true'
         run: |
           docker build \
+            --target production \
             -t fieldtrack-backend:ci-validation \
             -f apps/api/Dockerfile \
             .
@@ -110,15 +112,31 @@ jobs:
             -e SUPABASE_SERVICE_ROLE_KEY=${{ secrets.SUPABASE_SERVICE_ROLE_KEY_TEST }} \
             fieldtrack-backend:ci-validation
 
-          sleep 3
+          STATUS="000"
+          for i in $(seq 1 12); do
+            STATUS=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:3001/health || echo "000")
+            if [ "$STATUS" = "200" ]; then break; fi
+            echo "Health check attempt $i: HTTP $STATUS — waiting..."
+            sleep 2
+          done
 
-          STATUS=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:3001/health || echo "000")
           if [ "$STATUS" != "200" ]; then
-            echo "❌ /health returned HTTP $STATUS (expected 200)"
+            echo "❌ /health returned HTTP $STATUS after 24 s (expected 200)"
             docker logs fieldtrack-ci-test --tail 50
             exit 1
           fi
 
+          # Smoke tests: admin endpoints must reject unauthenticated requests with 401
+          for ENDPOINT in /admin/audit-log /admin/webhook-dlq; do
+            ECODE=$(curl -s -o /dev/null -w "%{http_code}" "http://127.0.0.1:3001${ENDPOINT}" || echo "000")
+            if [ "$ECODE" != "401" ]; then
+              echo "❌ ${ENDPOINT} expected 401 (unauthenticated), got ${ECODE}"
+              docker logs fieldtrack-ci-test --tail 50
+              exit 1
+            fi
+            echo "✓ ${ENDPOINT} → 401 (auth guard verified)"
+          done
+
           docker rm -f fieldtrack-ci-test
           docker rmi fieldtrack-backend:ci-validation
 
@@ -163,6 +181,7 @@ jobs:
         with:
           node-version: '24'
           cache: npm
+          cache-dependency-path: '**/package-lock.json'
 
       - run: npm ci
         if: needs.detect-changes.outputs.frontend == 'true'
diff --git a/apps/api/src/app.ts b/apps/api/src/app.ts
index bc62682..60f0177 100644
--- a/apps/api/src/app.ts
+++ b/apps/api/src/app.ts
@@ -98,8 +98,9 @@ export async function buildApp(): Promise<FastifyInstance> {
 
   // Performance timing — logs every response with full correlation context:
   // requestId, method, route, status, elapsed ms, userId, organizationId.
-  // Emits an additional WARN for responses slower than 200 ms so slow endpoints
-  // are immediately visible in Grafana/Loki without a query.
+  // Emits WARN for responses slower than 500 ms so slow endpoints are
+  // immediately visible in Grafana/Loki without a query.
+  // Emits ERROR for responses slower than 2000 ms — indicates a serious problem.
   app.addHook("onResponse", async (request, reply) => {
     const ms = Math.round(reply.elapsedTime);
     const logPayload = {
@@ -112,8 +113,10 @@ export async function buildApp(): Promise<FastifyInstance> {
       userId: (request as { user?: { sub?: string } }).user?.sub,
       organizationId: (request as { organizationId?: string }).organizationId,
     };
-    if (ms > 200) {
-      request.log.warn(logPayload, "slow response");
+    if (ms > 2_000) {
+      request.log.error({ ...logPayload, slow_request: true }, "very_slow_response");
+    } else if (ms > 500) {
+      request.log.warn({ ...logPayload, slow_request: true }, "slow_response");
     } else {
       request.log.info(logPayload, "response");
     }
@@ -146,6 +149,16 @@ export async function buildApp(): Promise<FastifyInstance> {
     }
 
     request.log.error({ error: error.message, requestId: request.id }, "Unhandled error");
+    // Track error rate — emit structured log field so Loki/Grafana can count 5xx per route
+    request.log.error(
+      {
+        error_rate_event: true,
+        route: request.routeOptions?.url ?? request.url,
+        method: request.method,
+        requestId: request.id,
+      },
+      "error_rate_event",
+    );
     void reply.status(500).send({
       success: false,
       error: "Internal server error",
@@ -181,10 +194,18 @@ export async function buildApp(): Promise<FastifyInstance> {
   if (shouldStartWorkers()) {
     const { adminQueuesRoutes } = await import("./modules/admin/queues.routes.js");
     const { adminRetryIntentsRoutes } = await import("./modules/admin/retry-intents.routes.js");
+    const { systemHealthRoutes } = await import("./modules/admin/system-health.routes.js");
+    const { webhookDlqRoutes } = await import("./modules/admin/webhook-dlq.routes.js");
     await app.register(adminQueuesRoutes);
     await app.register(adminRetryIntentsRoutes);
+    await app.register(systemHealthRoutes);
+    await app.register(webhookDlqRoutes);
   }
 
+  // Admin audit log — not worker-gated (pure DB, no Redis required).
+  const { auditLogRoutes } = await import("./modules/admin/audit-log.routes.js");
+  await app.register(auditLogRoutes);
+
   // NOTE: Workers and startup recovery are intentionally started in server.ts
   // after app.listen() resolves. This keeps lifecycle explicit and prevents
   // accidental starts during app construction or module import.
diff --git a/apps/api/src/config/env.ts b/apps/api/src/config/env.ts
index 9e0a40b..18c64bf 100644
--- a/apps/api/src/config/env.ts
+++ b/apps/api/src/config/env.ts
@@ -297,6 +297,39 @@ const envSchema = z
      */
     MAX_QUEUE_DEPTH: z.coerce.number().int().positive().default(1_000),
 
+    /**
+     * Maximum number of jobs that may sit in the webhook DLQ before new
+     * entries are rejected.  When the cap is reached the oldest job is
+     * archived and evicted to make room for the new entry.
+      * Default: 10 000 jobs.
+     */
+        WEBHOOK_DLQ_MAX_SIZE: z.coerce.number().int().positive().default(10_000),
+
+    /**
+     * Number of days to retain webhook DLQ entries before they are purged.
+     * Jobs older than this are archived to webhook_dlq_archive and removed.
+     * Range: 14–30 days.  Default: 30 days.
+     */
+    WEBHOOK_DLQ_RETENTION_DAYS: z.coerce
+      .number()
+      .int()
+      .min(14, "WEBHOOK_DLQ_RETENTION_DAYS must be at least 14")
+      .max(30, "WEBHOOK_DLQ_RETENTION_DAYS must be at most 30")
+      .default(30),
+
+    /**
+     * Maximum webhook payload size in bytes for outbound delivery.
+     * Deliveries above this threshold are marked failed and moved to DLQ to
+     * prevent oversized payload retries from consuming worker capacity.
+     * Default: 256 KiB.
+     */
+    WEBHOOK_MAX_PAYLOAD_BYTES: z.coerce
+      .number()
+      .int()
+      .min(16_384, "WEBHOOK_MAX_PAYLOAD_BYTES must be at least 16 KiB")
+      .max(1_000_000, "WEBHOOK_MAX_PAYLOAD_BYTES must be at most 1_000_000 bytes")
+      .default(262_144),
+
     /**
      * Maximum GPS point count per session before the recalculation job is
      * rejected. Guards against pathological data saturating the event loop.
@@ -340,6 +373,21 @@ const envSchema = z
       .max(50, "ANALYTICS_WORKER_CONCURRENCY must be at most 50 (database pressure above this is counterproductive)")
       .default(5),
 
+    /**
+     * Number of webhook delivery jobs the webhook worker processes
+     * concurrently per replica.  Default 5 provides enough throughput for
+     * most deployments.  Increase with caution — HTTP fan-out can exhaust
+     * the OS file descriptor limit at high concurrency.
+     *
+     * Range: 1–20.
+     */
+    WEBHOOK_WORKER_CONCURRENCY: z.coerce
+      .number()
+      .int()
+      .min(1, "WEBHOOK_WORKER_CONCURRENCY must be at least 1")
+      .max(20, "WEBHOOK_WORKER_CONCURRENCY must be at most 20")
+      .default(5),
+
     // ── Infrastructure availability ────────────────────────────────────────
 
     /**
diff --git a/apps/api/src/modules/admin/audit-log.routes.ts b/apps/api/src/modules/admin/audit-log.routes.ts
new file mode 100644
index 0000000..a909df8
--- /dev/null
+++ b/apps/api/src/modules/admin/audit-log.routes.ts
@@ -0,0 +1,72 @@
+/**
+ * audit-log.routes.ts — GET /admin/audit-log
+ *
+ * Returns a paginated list of admin audit events from `public.admin_audit_log`.
+ * Supports cursor-based pagination via `before` (ISO timestamp) and optional
+ * filtering by `event` type.
+ *
+ * Auth: ADMIN role required.
+ * Not worker-gated — pure DB (does not require Redis / BullMQ).
+ */
+
+import type { FastifyInstance } from "fastify";
+import { z } from "zod";
+import { authenticate } from "../../middleware/auth.js";
+import { requireRole } from "../../middleware/role-guard.js";
+import { supabaseServiceClient as supabase } from "../../config/supabase.js";
+import { handleError } from "../../utils/response.js";
+
+export async function auditLogRoutes(app: FastifyInstance): Promise<void> {
+  app.get(
+    "/admin/audit-log",
+    {
+      schema: {
+        tags: ["admin"],
+        description: "Paginated admin audit log — lists privileged actions (ADMIN only).",
+        querystring: z.object({
+          limit:  z.coerce.number().int().min(1).max(200).default(50),
+          before: z.string().datetime({ offset: true }).optional(),
+          event:  z.string().optional(),
+        }),
+      },
+      preValidation: [authenticate, requireRole("ADMIN")],
+    },
+    async (request, reply) => {
+      try {
+        const { limit, before, event } = request.query as {
+          limit:  number;
+          before?: string;
+          event?:  string;
+        };
+
+        let query = supabase
+          .from("admin_audit_log")
+          .select("id, event, actor_id, organization_id, resource_type, resource_id, payload, created_at")
+          .order("created_at", { ascending: false })
+          .limit(limit);
+
+        if (before) {
+          query = query.lt("created_at", before);
+        }
+
+        if (event) {
+          query = query.eq("event", event);
+        }
+
+        const { data, error } = await query;
+
+        if (error) {
+          throw new Error(`[audit-log] DB query failed: ${error.message}`);
+        }
+
+        reply.status(200).send({
+          success: true,
+          data:    data ?? [],
+          count:   (data ?? []).length,
+        });
+      } catch (error) {
+        handleError(error, request, reply, "Failed to fetch audit log");
+      }
+    },
+  );
+}
diff --git a/apps/api/src/modules/admin/system-health.routes.ts b/apps/api/src/modules/admin/system-health.routes.ts
new file mode 100644
index 0000000..1018fd7
--- /dev/null
+++ b/apps/api/src/modules/admin/system-health.routes.ts
@@ -0,0 +1,134 @@
+/**
+ * system-health.routes.ts — Deep system health endpoint for operators.
+ *
+ * GET /admin/system-health
+ *
+ * Returns a single-call view of:
+ *  - Worker status (expected 3-of-3: distance, analytics, webhook)
+ *  - Queue backlog (waiting + delayed jobs per queue)
+ *  - Webhook DLQ depth
+ *  - Webhook delivery stats: success rate, failure count, retry count
+ *
+ * Auth: ADMIN only (JWT + role check).
+ * Redis reads only — no heavy DB aggregation in the hot path.
+ * Webhook stats use a lightweight DB count query scoped to the org.
+ */
+
+import type { FastifyInstance } from "fastify";
+import { authenticate } from "../../middleware/auth.js";
+import { requireRole } from "../../middleware/role-guard.js";
+import { areWorkersStarted, getExpectedWorkerCount } from "../../workers/startup.js";
+import { getWebhookQueueDepth, getWebhookDlqDepth } from "../../workers/webhook.queue.js";
+import { getAnalyticsQueueStats } from "../../workers/analytics.queue.js";
+import { distanceQueue } from "../../workers/distance.queue.js";
+import { supabaseServiceClient as supabase } from "../../config/supabase.js";
+import { handleError } from "../../utils/response.js";
+
+const EXPECTED_WORKER_COUNT = getExpectedWorkerCount(); // distance + analytics + webhook (driven by WORKER_TYPES)
+
+export async function systemHealthRoutes(app: FastifyInstance): Promise<void> {
+  app.get(
+    "/admin/system-health",
+    {
+      schema: {
+        tags: ["admin"],
+        description:
+          "Deep system health: worker status, queue backlogs, DLQ depth, and webhook delivery stats (ADMIN only).",
+      },
+      preValidation: [authenticate, requireRole("ADMIN")],
+    },
+    async (request, reply) => {
+      try {
+        const orgId = (request as { organizationId?: string }).organizationId;
+
+        // ── Worker status ──────────────────────────────────────────────────
+        const workersActive = areWorkersStarted() ? EXPECTED_WORKER_COUNT : 0;
+
+        // ── Queue depths (Redis) ───────────────────────────────────────────
+        const [
+          webhookQueueDepth,
+          webhookDlqDepth,
+          analyticsStats,
+          distanceWaiting,
+          distanceDelayed,
+        ] = await Promise.allSettled([
+          getWebhookQueueDepth(),
+          getWebhookDlqDepth(),
+          getAnalyticsQueueStats(),
+          distanceQueue.getWaitingCount(),
+          distanceQueue.getDelayedCount(),
+        ]);
+
+        const safeNumber = (r: PromiseSettledResult<number>) =>
+          r.status === "fulfilled" ? r.value : -1;
+
+        const analyticsQueueDepth =
+          analyticsStats.status === "fulfilled"
+            ? analyticsStats.value.waiting + (analyticsStats.value.active ?? 0)
+            : -1;
+
+        // ── Webhook delivery stats (DB, org-scoped) ────────────────────────
+        let webhookSuccessRate = 0;
+        let webhookFailureCount = 0;
+        let webhookRetryCount = 0;
+        let webhookTotalCount = 0;
+
+        if (orgId) {
+          const { data: stats } = await supabase
+            .from("webhook_deliveries")
+            .select("status, attempt_count")
+            .eq("organization_id", orgId)
+            .limit(500);
+
+          if (stats) {
+            webhookTotalCount = stats.length;
+            const successes = stats.filter((r) => r.status === "success").length;
+            webhookFailureCount = stats.filter((r) => r.status === "failed").length;
+            // Retry count = total attempts beyond the first across all deliveries
+            webhookRetryCount = stats.reduce(
+              (sum, r) => sum + Math.max(0, (r.attempt_count ?? 0) - 1),
+              0,
+            );
+            webhookSuccessRate =
+              webhookTotalCount > 0
+                ? Math.round((successes / webhookTotalCount) * 100)
+                : 100; // 100% if no deliveries yet
+          }
+        }
+
+        reply.status(200).send({
+          success: true,
+          timestamp: new Date().toISOString(),
+          workers: {
+            active: workersActive,
+            expected: EXPECTED_WORKER_COUNT,
+            healthy: workersActive === EXPECTED_WORKER_COUNT,
+          },
+          queues: {
+            webhook: {
+              backlog: safeNumber(webhookQueueDepth),
+              dlq:     safeNumber(webhookDlqDepth),
+            },
+            analytics: {
+              backlog: analyticsQueueDepth,
+            },
+            distance: {
+              backlog:
+                safeNumber(distanceWaiting) >= 0 && safeNumber(distanceDelayed) >= 0
+                  ? safeNumber(distanceWaiting) + safeNumber(distanceDelayed)
+                  : -1,
+            },
+          },
+          webhooks: {
+            successRatePct: webhookSuccessRate,
+            failureCount:   webhookFailureCount,
+            retryCount:     webhookRetryCount,
+            totalDeliveries: webhookTotalCount,
+          },
+        });
+      } catch (error) {
+        handleError(error, request, reply, "Failed to fetch system health");
+      }
+    },
+  );
+}
diff --git a/apps/api/src/modules/admin/webhook-dlq.routes.ts b/apps/api/src/modules/admin/webhook-dlq.routes.ts
new file mode 100644
index 0000000..bae65c3
--- /dev/null
+++ b/apps/api/src/modules/admin/webhook-dlq.routes.ts
@@ -0,0 +1,146 @@
+/**
+ * webhook-dlq.routes.ts — Admin API for Dead-Letter Queue (DLQ) management.
+ *
+ * GET  /admin/webhook-dlq            — list DLQ jobs pending review
+ * POST /admin/webhook-dlq/:id/replay — replay a single DLQ job (reset attempt_count)
+ *
+ * All routes require ADMIN role (JWT + RBAC).
+ * Only available when WORKERS_ENABLED=true (registered from app.ts).
+ *
+ * Replay semantics:
+ *  - Removes the job from the DLQ
+ *  - Re-enqueues into the main webhook-delivery queue with attempt_number=1
+ *  - Resets attempt_count in DB to allow full retry schedule
+ *  - Logs a structured audit entry on every replay
+ */
+
+import type { FastifyInstance } from "fastify";
+import { z } from "zod";
+import { authenticate } from "../../middleware/auth.js";
+import { requireRole } from "../../middleware/role-guard.js";
+import {
+  replayWebhookDlqJob,
+  listWebhookDlqJobs,
+  getWebhookDlqDepth,
+} from "../../workers/webhook.queue.js";
+import { supabaseServiceClient as supabase } from "../../config/supabase.js";
+import { NotFoundError } from "../../utils/errors.js";
+import { handleError } from "../../utils/response.js";
+import { insertAuditRecord } from "../../utils/audit.js";
+
+const DLQ_REPLAY_COOLDOWN_MS = 5_000;
+let lastDlqReplayAt = 0;
+
+export async function webhookDlqRoutes(app: FastifyInstance): Promise<void> {
+  // ── GET /admin/webhook-dlq ─────────────────────────────────────────────────
+  app.get(
+    "/admin/webhook-dlq",
+    {
+      schema: {
+        tags: ["admin", "webhooks"],
+        description: "List jobs in the webhook Dead-Letter Queue (ADMIN only).",
+        querystring: z.object({
+          limit: z.coerce.number().int().min(1).max(100).default(50),
+        }),
+      },
+      preValidation: [authenticate, requireRole("ADMIN")],
+    },
+    async (request, reply) => {
+      try {
+        const { limit } = request.query as { limit: number };
+        const [jobs, depth] = await Promise.all([
+          listWebhookDlqJobs(limit),
+          getWebhookDlqDepth(),
+        ]);
+        reply.status(200).send({
+          success: true,
+          dlq_depth: depth,
+          jobs,
+        });
+      } catch (error) {
+        handleError(error, request, reply, "Failed to list DLQ jobs");
+      }
+    },
+  );
+
+  // ── POST /admin/webhook-dlq/:id/replay ────────────────────────────────────
+  app.post<{ Params: { id: string } }>(
+    "/admin/webhook-dlq/:id/replay",
+    {
+      schema: {
+        tags: ["admin", "webhooks"],
+        description: "Replay a DLQ job: re-enqueue with attempt_count reset (ADMIN only).",
+        params: z.object({ id: z.string().uuid() }),
+      },
+      preValidation: [authenticate, requireRole("ADMIN")],
+    },
+    async (request, reply) => {
+      try {
+        const { id: deliveryId } = request.params;
+        const adminId = (request as { user?: { sub?: string } }).user?.sub;
+        const orgId   = (request as { organizationId?: string }).organizationId;
+
+        // Per-admin replay cooldown — prevents accidental mass re-delivery
+        const now = Date.now();
+        const elapsed = now - lastDlqReplayAt;
+        if (elapsed < DLQ_REPLAY_COOLDOWN_MS) {
+          reply.status(429).send({
+            success: false,
+            error: `DLQ replay rate-limited. Retry in ${DLQ_REPLAY_COOLDOWN_MS - elapsed}ms.`,
+          });
+          return;
+        }
+        lastDlqReplayAt = now;
+
+        const replayed = await replayWebhookDlqJob(deliveryId);
+        if (!replayed) {
+          throw new NotFoundError(`DLQ job for delivery ${deliveryId} not found`);
+        }
+
+        // Reset attempt_count in DB so the full retry schedule applies
+        await supabase
+          .from("webhook_deliveries")
+          .update({
+            status:        "pending",
+            attempt_count:  0,
+            next_retry_at:  new Date().toISOString(),
+          })
+          .eq("id", deliveryId);
+
+        // Structured audit log — queryable in Grafana/Loki
+        request.log.info(
+          {
+            audit:      true,
+            event:      "WEBHOOK_DLQ_REPLAY",
+            deliveryId,
+            adminId,
+            organizationId: orgId,
+            timestamp:  new Date().toISOString(),
+          },
+          "webhook-dlq: DLQ job replayed by admin",
+        );
+
+        // Persist to DB audit trail for GET /admin/audit-log
+        await insertAuditRecord({
+          event:          "WEBHOOK_DLQ_REPLAY",
+          actor_id:       adminId,
+          organization_id: orgId,
+          resource_type:  "webhook_delivery",
+          resource_id:    deliveryId,
+          payload:        { replayed_at: new Date().toISOString() },
+        });
+
+        reply.status(200).send({
+          success: true,
+          data: {
+            delivery_id: deliveryId,
+            replayed_at: new Date().toISOString(),
+            message: "Job re-queued with attempt_count reset",
+          },
+        });
+      } catch (error) {
+        handleError(error, request, reply, "Failed to replay DLQ job");
+      }
+    },
+  );
+}
diff --git a/apps/api/src/modules/webhooks/webhooks.repository.ts b/apps/api/src/modules/webhooks/webhooks.repository.ts
index 860a74f..792f512 100644
--- a/apps/api/src/modules/webhooks/webhooks.repository.ts
+++ b/apps/api/src/modules/webhooks/webhooks.repository.ts
@@ -18,6 +18,9 @@ import type {
   DeliveryListQuery,
 } from "./webhooks.schema.js";
 
+const WEBHOOK_DELIVERY_COLUMNS =
+  "id, webhook_id, event_id, organization_id, status, attempt_count, response_status, response_body, last_attempt_at, next_retry_at, created_at";
+
 // ─── Webhook CRUD ─────────────────────────────────────────────────────────────
 
 export const webhooksRepository = {
@@ -112,7 +115,7 @@ export const webhooksRepository = {
     const to   = from + query.limit - 1;
 
     let q = orgTable(request, "webhook_deliveries")
-      .select("*", { count: "exact" })
+      .select(WEBHOOK_DELIVERY_COLUMNS, { count: "exact" })
       .order("created_at", { ascending: false })
       .range(from, to);
 
@@ -136,7 +139,7 @@ export const webhooksRepository = {
     deliveryId: string,
   ): Promise<WebhookDelivery | null> {
     const { data, error } = await orgTable(request, "webhook_deliveries")
-      .select("*")
+      .select(WEBHOOK_DELIVERY_COLUMNS)
       .eq("id", deliveryId)
       .limit(1)
       .maybeSingle();
@@ -172,7 +175,7 @@ export const webhooksRepository = {
     const { data, error } = await orgTable(request, "webhook_deliveries")
       .update({ status: "pending", next_retry_at: nextRetryAt })
       .eq("id", deliveryId)
-      .select("*")
+      .select(WEBHOOK_DELIVERY_COLUMNS)
       .single();
 
     if (error) throw new Error(`Failed to reset delivery: ${error.message}`);
diff --git a/apps/api/src/modules/webhooks/webhooks.service.ts b/apps/api/src/modules/webhooks/webhooks.service.ts
index da7ad3b..2187fb4 100644
--- a/apps/api/src/modules/webhooks/webhooks.service.ts
+++ b/apps/api/src/modules/webhooks/webhooks.service.ts
@@ -13,8 +13,9 @@
 import type { FastifyRequest } from "fastify";
 import { webhooksRepository } from "./webhooks.repository.js";
 import { validateWebhookUrl, InvalidWebhookUrlError } from "../../utils/url-validator.js";
-import { BadRequestError, NotFoundError } from "../../utils/errors.js";
+import { BadRequestError, NotFoundError, ServiceUnavailableError } from "../../utils/errors.js";
 import { enqueueWebhookDelivery } from "../../workers/webhook.queue.js";
+import { shouldStartWorkers } from "../../workers/startup.js";
 import type {
   CreateWebhookBody,
   UpdateWebhookBody,
@@ -103,6 +104,12 @@ export const webhooksService = {
     const delivery = await webhooksRepository.findDeliveryById(request, deliveryId);
     if (!delivery) throw new NotFoundError("Delivery not found");
 
+    if (!shouldStartWorkers()) {
+      throw new ServiceUnavailableError(
+        "Workers not enabled — webhook delivery requires WORKERS_ENABLED=true",
+      );
+    }
+
     if (delivery.status === "pending") {
       throw new BadRequestError("Delivery is already pending — retry not needed");
     }
diff --git a/apps/api/src/plugins/prometheus.ts b/apps/api/src/plugins/prometheus.ts
index ec70f0f..45044a3 100644
--- a/apps/api/src/plugins/prometheus.ts
+++ b/apps/api/src/plugins/prometheus.ts
@@ -226,6 +226,13 @@ export const distanceJobsTotal = new client.Counter({
  * millions).  Each org generates at most O(event_types × statuses) = ~15 series.
  * Do NOT add high-cardinality labels such as event_id or webhook_id.
  *
+ * IMPORTANT: `event_type` MUST be sanitised through `normalizeEventType()`
+ * (defined in webhook.worker.ts) before being used as a label value.
+ * Raw values from the DB payload could be arbitrary strings, creating
+ * unbounded cardinality.  `normalizeEventType()` maps unknown values to
+ * `"other"`.  Update KNOWN_EVENT_TYPES whenever a new EventDataMap key is
+ * added to event-bus.ts.
+ *
  * Not yet wired to the delivery worker — defined here so the metric is
  * registered in the same process-level registry as all other metrics and
  * appears in /metrics output from day one (with zero counters until Phase 25
@@ -239,7 +246,7 @@ export const distanceJobsTotal = new client.Counter({
 export const webhookDeliveriesTotal = new client.Counter({
   name: "webhook_deliveries_total",
   help: "Total number of webhook delivery attempts",
-  labelNames: ["event_type", "status", "organization_id"] as const,
+  labelNames: ["event_type", "status"] as const,
   registers: [register],
 });
 
@@ -260,7 +267,23 @@ export const webhookDeliveriesTotal = new client.Counter({
 export const webhookFailuresTotal = new client.Counter({
   name: "webhook_failures_total",
   help: "Total number of webhook deliveries that permanently failed after all retries",
-  labelNames: ["event_type", "organization_id"] as const,
+  labelNames: ["event_type"] as const,
+  registers: [register],
+});
+
+/**
+ * Total webhook delivery retries scheduled.
+ * Incremented each time a failed attempt is re-queued (attempt 2..MAX).
+ *
+ * Usage in the delivery worker:
+ *   webhookRetriesTotal
+ *     .labels({ event_type: "my.event", organization_id: orgId })
+ *     .inc();
+ */
+export const webhookRetriesTotal = new client.Counter({
+  name: "webhook_retries_total",
+  help: "Total number of webhook delivery retries scheduled",
+  labelNames: ["event_type"] as const,
   registers: [register],
 });
 
diff --git a/apps/api/src/plugins/security/ratelimit.plugin.ts b/apps/api/src/plugins/security/ratelimit.plugin.ts
index cf0cd39..0a8cd07 100644
--- a/apps/api/src/plugins/security/ratelimit.plugin.ts
+++ b/apps/api/src/plugins/security/ratelimit.plugin.ts
@@ -1,27 +1,30 @@
 /**
- * Phase 15: Redis-backed Rate Limiting Plugin
+ * Rate Limiting Plugin — two-tier, Redis-backed, sliding-window.
  *
- * Registers @fastify/rate-limit globally with a Redis store so that limits are
- * enforced across all container replicas — never in process-memory.
+ * Tier 1 — Per-user (1 200 req/min):
+ *   @fastify/rate-limit with its Redis store.  The plugin already implements
+ *   a sliding-window counter when a Redis store is supplied, replacing the
+ *   vulnerable fixed-window that allows a burst-then-reset exploit at every
+ *   window boundary.
  *
- * Global defaults: 1200 requests / minute per authenticated user (keyed by
- * Authorization header).  This is intentionally generous — an admin polling
- * the dashboard every 5 s consumes only 12 req/min.  The strict cap exists to
- * block runaway loops, not legitimate clients.
+ * Tier 2 — Per-org (5 000 req/min):
+ *   Implemented as a Fastify preHandler hook using a Redis sorted-set sliding
+ *   window, executed atomically via a Lua script (EVAL).  The set stores
+ *   timestamps as both score and member, so each request occupies exactly one
+ *   slot that ages out automatically.
  *
- * Keying by token (not IP) means multiple real users behind the same NAT or
- * load-test runner each get their own independent quota.
+ *   Algorithm (runs inside one EVAL call per request — no TOCTOU):
+ *     1. ZREMRANGEBYSCORE key -∞ (now - window_ms)   ← evict expired entries
+ *     2. ZADD  key  now_ms  "<now_ms>:<random>"       ← register this request
+ *     3. ZCARD key                                    ← count in-window entries
+ *     4. PEXPIRE key (window_ms * 2)                  ← keep key alive
+ *     5. return count
  *
- * Routes that need stricter limits (e.g. auth) can override via route config:
+ *   If count > max → HTTP 429.  The random suffix in the member prevents two
+ *   concurrent requests at exactly the same millisecond from aliasing onto
+ *   the same key and causing an under-count.
  *
- *   {
- *     config: {
- *       rateLimit: { max: 5, timeWindow: '1 minute' }
- *     }
- *   }
- *
- * Localhost (127.0.0.1 / ::1) is always allow-listed so health checks and
- * internal tooling never trigger limits.
+ * Localhost (127.0.0.1 / ::1) is always allow-listed.
  */
 
 import fp from "fastify-plugin";
@@ -29,18 +32,52 @@ import type { FastifyInstance, FastifyPluginAsync } from "fastify";
 import fastifyRateLimit from "@fastify/rate-limit";
 import { shouldStartWorkers } from "../../workers/startup.js";
 
+// ── Tier-2 constants ─────────────────────────────────────────────────────────
+
+/** Per-org request ceiling per rolling window. */
+const ORG_RATE_LIMIT_MAX        = 5_000;
+/** Sliding-window size in milliseconds. */
+const ORG_RATE_LIMIT_WINDOW_MS  = 60_000; // 1 minute
+
+/**
+ * Lua script: atomic sliding-window check + record using a sorted set.
+ *
+ * KEYS[1]  → Redis key for this org's rate-limit window
+ * ARGV[1]  → current timestamp in milliseconds (string)
+ * ARGV[2]  → window size in milliseconds (string)
+ * ARGV[3]  → unique member for this request (string: "<ts>:<random>")
+ * ARGV[4]  → jittered TTL in milliseconds (string: window_ms * 2 ± jitter)
+ *
+ * Returns the count of requests inside the current window AFTER recording
+ * this request (i.e., the value to compare against the cap).
+ */
+const SLIDING_WINDOW_LUA = `
+local key        = KEYS[1]
+local now_ms     = tonumber(ARGV[1])
+local window_ms  = tonumber(ARGV[2])
+local member     = ARGV[3]
+local ttl_ms     = tonumber(ARGV[4])
+local cutoff     = now_ms - window_ms
+
+redis.call('ZREMRANGEBYSCORE', key, '-inf', cutoff)
+redis.call('ZADD', key, now_ms, member)
+local count = redis.call('ZCARD', key)
+redis.call('PEXPIRE', key, ttl_ms)
+return count
+`;
+
 const rateLimitPlugin: FastifyPluginAsync = async (fastify: FastifyInstance) => {
-    // Rate limiting requires Redis. Skip when workers/Redis are not provisioned
-    // (CI, local dev without Redis). In production, WORKERS_ENABLED=true so
-    // this check always passes and rate limiting is enforced.
     if (!shouldStartWorkers()) {
         fastify.log.info("security-rate-limit plugin SKIPPED (WORKERS_ENABLED=false — Redis not provisioned)");
         return;
     }
 
-    // Lazy import Redis only when needed
     const { Redis } = await import("ioredis");
     const { redisConnectionOptions } = await import("../../config/redis.js");
+
+    // ── Tier 1: Per-user sliding window (1 200 req/min) ─────────────────────
+    // @fastify/rate-limit uses a sliding-window counter internally when a
+    // Redis store is provided — no fixed-window burst vulnerability.
     const rateLimitRedis = new Redis(redisConnectionOptions);
 
     await fastify.register(fastifyRateLimit, {
@@ -48,26 +85,12 @@ const rateLimitPlugin: FastifyPluginAsync = async (fastify: FastifyInstance) =>
         hook: "preHandler",
         max: 1200,
         timeWindow: "1 minute",
-
-        // Redis store — required for Docker / multi-instance deployments.
         redis: rateLimitRedis,
-
-        // Key by validated user ID (sub claim from JWT) so each authenticated
-        // user gets their own quota. This is more secure than keying by the
-        // raw Authorization header since it uses the verified identity.
-        // Unauthenticated requests fall back to client IP.
         keyGenerator: (request) => {
             const user = (request as { user?: { sub?: string } }).user;
-            if (user?.sub) {
-                return `user:${user.sub}`;
-            }
-            return `ip:${request.ip}`;
+            return user?.sub ? `rl:user:${user.sub}` : `rl:ip:${request.ip}`;
         },
-
-        // Bypass rate limiting for localhost health checks / internal tooling.
         allowList: ["127.0.0.1", "::1"],
-
-        // Return a machine-readable error body on 429.
         errorResponseBuilder: (_request, context) => ({
             success: false,
             error: "Too many requests",
@@ -75,7 +98,54 @@ const rateLimitPlugin: FastifyPluginAsync = async (fastify: FastifyInstance) =>
         }),
     });
 
-    fastify.log.info("security-rate-limit plugin registered (Redis-backed, 1200 req/min per token)");
+    // ── Tier 2: Per-org sliding window (5 000 req/min) ──────────────────────
+    const orgRlRedis = new Redis(redisConnectionOptions);
+
+    // Pre-load the Lua script SHA for efficient reuse.
+    // evalsha is ~10 % faster than eval for hot-path scripts called thousands
+    // of times per minute because Redis skips the parse/compile step.
+    const slidingWindowSha = await orgRlRedis.script("LOAD", SLIDING_WINDOW_LUA) as string;
+
+    fastify.addHook("preHandler", async (request, reply) => {
+        const orgId = (request as { organizationId?: string }).organizationId;
+        if (!orgId) return;
+        if (request.ip === "127.0.0.1" || request.ip === "::1") return;
+
+        const nowMs  = Date.now();
+        // Unique per-request member prevents millisecond aliasing.
+        const member = `${nowMs}:${Math.random().toString(36).slice(2)}`;
+        const key    = `rl:org:${orgId}`;
+        // Jitter the key TTL by 0–10% of the window to prevent a synchronized
+        // mass-expiry storm when many org keys were created at the same time.
+        const ttlMs  = Math.round(ORG_RATE_LIMIT_WINDOW_MS * 2 + ORG_RATE_LIMIT_WINDOW_MS * 0.1 * Math.random());
+
+        let count: number;
+        try {
+            // Run via pre-loaded SHA; fall back to EVAL if the script was
+            // flushed from script cache (e.g. Redis restart).
+            count = await orgRlRedis
+                .evalsha(slidingWindowSha, 1, key, String(nowMs), String(ORG_RATE_LIMIT_WINDOW_MS), member, String(ttlMs))
+                .catch(() =>
+                    orgRlRedis.eval(SLIDING_WINDOW_LUA, 1, key, String(nowMs), String(ORG_RATE_LIMIT_WINDOW_MS), member, String(ttlMs)),
+                ) as number;
+        } catch {
+            // Non-fatal: if Redis is unavailable let the request through.
+            return;
+        }
+
+        if (count > ORG_RATE_LIMIT_MAX) {
+            // Return a consistent 429 with the standard retryAfter field.
+            void reply.status(429).send({
+                success:    false,
+                error:      "Organization rate limit exceeded",
+                retryAfter: `${Math.ceil(ORG_RATE_LIMIT_WINDOW_MS / 1000)}s`,
+            });
+        }
+    });
+
+    fastify.log.info(
+        "security-rate-limit plugin registered (per-user: 1200 req/min sliding, per-org: 5000 req/min sliding, Redis-backed)",
+    );
 };
 
 export default fp(rateLimitPlugin, {
diff --git a/apps/api/src/routes/health.ts b/apps/api/src/routes/health.ts
index a0a5d3d..98afbcc 100644
--- a/apps/api/src/routes/health.ts
+++ b/apps/api/src/routes/health.ts
@@ -1,5 +1,6 @@
 import type { FastifyInstance } from "fastify";
 import { getConfigHash } from "../config/env.js";
+import { shouldStartWorkers, areWorkersStarted, getExpectedWorkerCount } from "../workers/startup.js";
 
 interface HealthResponse {
         status: string;
@@ -73,7 +74,6 @@ export async function healthRoutes(app: FastifyInstance): Promise<void> {
         const { supabaseServiceClient } = await import("../config/supabase.js");
         const { distanceQueue } = await import("../workers/distance.queue.js");
         const { analyticsQueue } = await import("../workers/analytics.queue.js");
-        const { shouldStartWorkers, areWorkersStarted } = await import("../workers/startup.js");
 
         const checks: ReadyResponse["checks"] = {
             redis: "error",
@@ -106,11 +106,12 @@ export async function healthRoutes(app: FastifyInstance): Promise<void> {
         checks.redis = redisResult.status === "fulfilled" ? "ok" : "error";
         checks.supabase = supabaseResult.status === "fulfilled" ? "ok" : "error";
         checks.bullmq = bullmqResult.status === "fulfilled" ? "ok" : "error";
+        const expected = getExpectedWorkerCount();
         if (!shouldStartWorkers()) {
-            checks.workers = { status: "skipped", active: 0, expected: 2 };
+            checks.workers = { status: "skipped", active: 0, expected };
         } else {
             const started = areWorkersStarted();
-            checks.workers = { status: started ? "ok" : "error", active: started ? 2 : 0, expected: 2 };
+            checks.workers = { status: started ? "ok" : "error", active: started ? expected : 0, expected };
         }
 
         const ready = checks.redis === "ok" && checks.supabase === "ok" && checks.bullmq === "ok";
diff --git a/apps/api/src/server.ts b/apps/api/src/server.ts
index 5f982cc..65182c7 100644
--- a/apps/api/src/server.ts
+++ b/apps/api/src/server.ts
@@ -1,7 +1,7 @@
 import "./tracing.js";
 import { env, getConfigHash, getEnv, logStartupConfig } from "./config/env.js";
 import { buildApp } from "./app.js";
-import { shouldStartWorkers } from "./workers/startup.js";
+import { shouldStartWorkers, getExpectedWorkerCount } from "./workers/startup.js";
 
 async function start(): Promise<void> {
   // Force environment validation at process startup so production fails fast.
@@ -54,10 +54,21 @@ async function start(): Promise<void> {
       const { startRetryIntentCleanupJob } = await import("./workers/retry-cleanup.job.js");
 
       await startWorkers(app);
-      app.log.info({ activeWorkers: 2 }, "[BOOT] workers started");
+      app.log.info({ activeWorkers: getExpectedWorkerCount() }, "[BOOT] workers started");
       performStartupRecovery(app);
       void replayPendingRetryIntents(app);
       startRetryIntentCleanupJob(app);
+
+      // Restore any open circuit-breaker states from DB into Redis so that
+      // delivery workers respect open circuits after a Redis flush/restart.
+      const { syncCircuitBreakerState } = await import("./workers/circuit-breaker.js");
+      const { getRedisConnectionOptions } = await import("./config/redis.js");
+      const { Redis } = await import("ioredis");
+      const cbSyncRedis = new Redis(getRedisConnectionOptions());
+      cbSyncRedis.on("error", () => { /* non-fatal */ });
+      void syncCircuitBreakerState(cbSyncRedis, app.log).finally(() => {
+        void cbSyncRedis.quit().catch(() => undefined);
+      });
     } else {
       app.log.info(
         {
diff --git a/apps/api/src/utils/audit.ts b/apps/api/src/utils/audit.ts
new file mode 100644
index 0000000..489faab
--- /dev/null
+++ b/apps/api/src/utils/audit.ts
@@ -0,0 +1,43 @@
+/**
+ * audit.ts — Lightweight structured audit log writer.
+ *
+ * Inserts records into `public.admin_audit_log` via the Supabase service
+ * client.  Non-fatal: DB insertion failures are logged but never propagate
+ * to the caller, so a write error never breaks the primary admin action.
+ *
+ * Callers should also use `request.log.info({ audit: true, ... })` for
+ * structured log correlation in Loki/Grafana alongside DB records.
+ */
+
+import { supabaseServiceClient as supabase } from "../config/supabase.js";
+
+export interface AuditEntry {
+  event: string;
+  actor_id?: string | null;
+  organization_id?: string | null;
+  resource_type?: string;
+  resource_id?: string;
+  payload?: Record<string, unknown>;
+}
+
+/**
+ * Insert one record into `admin_audit_log`.
+ *
+ * Swallows any DB error and logs it as a warning — audit log failures must
+ * never interrupt the primary operation.
+ */
+export async function insertAuditRecord(entry: AuditEntry): Promise<void> {
+  const { error } = await supabase.from("admin_audit_log").insert({
+    event:           entry.event,
+    actor_id:        entry.actor_id ?? null,
+    organization_id: entry.organization_id ?? null,
+    resource_type:   entry.resource_type ?? null,
+    resource_id:     entry.resource_id ?? null,
+    payload:         entry.payload ?? {},
+  });
+
+  if (error) {
+    // Non-fatal: log but do not throw.
+    console.warn("[audit] Failed to persist audit record:", error.message, { event: entry.event });
+  }
+}
diff --git a/apps/api/src/utils/errors.ts b/apps/api/src/utils/errors.ts
index 17c2309..36cba29 100644
--- a/apps/api/src/utils/errors.ts
+++ b/apps/api/src/utils/errors.ts
@@ -62,6 +62,13 @@ export class QueueOverloadedError extends AppError {
     }
 }
 
+export class ServiceUnavailableError extends AppError {
+    constructor(message = "Service unavailable") {
+        super(message, 503, "SERVICE_UNAVAILABLE");
+        this.name = "ServiceUnavailableError";
+    }
+}
+
 // ─── Domain-specific errors ───────────────────────────────────────────────────
 
 export class EmployeeAlreadyCheckedIn extends BadRequestError {
diff --git a/apps/api/src/utils/hmac.ts b/apps/api/src/utils/hmac.ts
index ee667ab..ac31044 100644
--- a/apps/api/src/utils/hmac.ts
+++ b/apps/api/src/utils/hmac.ts
@@ -35,6 +35,36 @@ export function generateSignature(secret: string, payload: string): string {
   return `sha256=${hmac.digest("hex")}`;
 }
 
+// ─── Replay-Safe Signature ────────────────────────────────────────────────────
+
+/**
+ * Generate a timestamp-bound HMAC-SHA256 signature for outbound webhook delivery.
+ *
+ * Signing body: `{timestamp}.{payload}` — this binds the signature to both the
+ * payload content AND the delivery time, making captured requests non-replayable
+ * after the tolerance window (receivers should reject timestamps older than ~5 min).
+ *
+ * Returns both the Unix timestamp (seconds) used in signing, and the signature
+ * string.  The caller must send `X-FieldTrack-Timestamp: <ts>` as a header so
+ * the receiver can reconstruct the signed string for verification.
+ *
+ * @param secret   The per-webhook signing secret.
+ * @param payload  The raw request body string.
+ * @param tsSeconds  Unix timestamp in seconds (defaults to `Date.now() / 1000 | 0`).
+ * @returns        `{ signature: "sha256=<hex>", timestamp: number }`
+ */
+export function generateSignatureWithTimestamp(
+  secret: string,
+  payload: string,
+  tsSeconds = (Date.now() / 1000) | 0,
+): { signature: string; timestamp: number } {
+  const signingBody = `${tsSeconds}.${payload}`;
+  const hmac = createHmac("sha256", secret);
+  hmac.update(signingBody, "utf8");
+  const signature = `sha256=${hmac.digest("hex")}`;
+  return { signature, timestamp: tsSeconds };
+}
+
 // ─── Signature Verification ───────────────────────────────────────────────────
 
 /**
@@ -64,3 +94,27 @@ export function verifySignature(
 
   return timingSafeEqual(Buffer.from(expected, "utf8"), Buffer.from(received, "utf8"));
 }
+
+/**
+ * Verify timestamp-bound signature with replay-window enforcement.
+ *
+ * Signing input must be `{timestamp}.{payload}` and the timestamp must be
+ * inside the accepted tolerance window (default ±300 s).
+ */
+export function verifySignatureWithTimestamp(
+  secret: string,
+  payload: string,
+  received: string,
+  timestampSeconds: number,
+  nowSeconds = Math.floor(Date.now() / 1000),
+  toleranceSeconds = 300,
+): boolean {
+  if (!Number.isInteger(timestampSeconds)) return false;
+  if (!Number.isInteger(nowSeconds)) return false;
+  if (!Number.isFinite(toleranceSeconds) || toleranceSeconds < 0) return false;
+  if (Math.abs(nowSeconds - timestampSeconds) > toleranceSeconds) return false;
+
+  const expected = generateSignature(secret, `${timestampSeconds}.${payload}`);
+  if (expected.length !== received.length) return false;
+  return timingSafeEqual(Buffer.from(expected, "utf8"), Buffer.from(received, "utf8"));
+}
diff --git a/apps/api/src/workers/circuit-breaker.ts b/apps/api/src/workers/circuit-breaker.ts
new file mode 100644
index 0000000..c17561c
--- /dev/null
+++ b/apps/api/src/workers/circuit-breaker.ts
@@ -0,0 +1,399 @@
+/**
+ * circuit-breaker.ts — Per-webhook circuit breaker, Redis + DB persistent.
+ *
+ * State model
+ * ───────────
+ *   CLOSED    — normal operation (failure_streak < threshold)
+ *   OPEN      — webhook disabled; deliveries are skipped until
+ *               circuit_open_until has elapsed
+ *   HALF-OPEN — cooldown elapsed; next delivery attempt re-enables the webhook
+ *               if it succeeds (or re-opens the circuit if it fails)
+ *
+ * Persistence strategy
+ * ────────────────────
+ *   Hot path (per delivery): Redis only (sub-ms reads, atomic INCR)
+ *   Cold start / Redis flush: DB is the authoritative source of truth.
+ *     - openCircuit()  writes both Redis and DB
+ *     - closeCircuit() clears both Redis and DB
+ *     - syncCircuitBreakerState() is called once at startup to repopulate
+ *       Redis from DB, guaranteeing open circuits survive Redis restarts
+ *
+ * Redis keys
+ * ──────────
+ *   cb:failure_streak:{webhookId}    — INCR counter, TTL=24 h
+ *   cb:recovery_cooldown:{webhookId} — EX key, TTL=cooldown seconds
+ */
+
+import type { FastifyBaseLogger } from "fastify";
+import type { Redis as IORedis } from "ioredis";
+import { supabaseServiceClient as supabase } from "../config/supabase.js";
+import { insertAuditRecord } from "../utils/audit.js";
+
+// ─── Constants ─────────────────────────────────────────────────────────────
+
+/** Consecutive failures required to open the circuit. */
+export const CIRCUIT_OPEN_THRESHOLD = 5;
+
+/** How long a webhook stays disabled before auto-recovery attempt (ms). */
+export const CIRCUIT_RECOVERY_COOLDOWN_MS = 10 * 60_000; // 10 min
+
+/** How often to scan DB for expired open circuits and re-enable them. */
+export const CIRCUIT_RECOVERY_SCAN_INTERVAL_MS = 60_000; // 1 min
+
+/** TTL for the Redis streak key — auto-cleans idle webhooks. */
+const STREAK_TTL_SECONDS = 86_400; // 24 h
+
+// ─── Redis key helpers ──────────────────────────────────────────────────────
+
+export function streakKey(webhookId: string): string {
+  return `cb:failure_streak:${webhookId}`;
+}
+
+export function cooldownKey(webhookId: string): string {
+  return `cb:recovery_cooldown:${webhookId}`;
+}
+
+// ─── Cold-start sync ────────────────────────────────────────────────────────
+
+/**
+ * Re-populate Redis from DB on process start.
+ *
+ * Reads every webhook row whose circuit_open_until timestamp is still in the
+ * future and sets the Redis cooldown key with the remaining TTL.  This ensures
+ * that open circuits survive a Redis flush or process restart.
+ *
+ * Call once from server.ts after workers are started.
+ */
+export async function syncCircuitBreakerState(
+  redis: IORedis,
+  log: FastifyBaseLogger,
+): Promise<void> {
+  try {
+    const now = new Date().toISOString();
+    const { data: openWebhooks, error } = await supabase
+      .from("webhooks")
+      .select("id, circuit_open_until, failure_streak")
+      .gt("circuit_open_until", now);
+
+    if (error) {
+      log.warn({ error: error.message }, "circuit-breaker: startup sync DB query failed");
+      return;
+    }
+
+    if (!openWebhooks?.length) {
+      log.info("circuit-breaker: startup sync — no open circuits found");
+      return;
+    }
+
+    const pipeline = redis.pipeline();
+    for (const wh of openWebhooks) {
+      const openUntil = new Date(wh.circuit_open_until as string).getTime();
+      const remainingMs = openUntil - Date.now();
+      if (remainingMs <= 0) continue;
+
+      const remainingSec = Math.ceil(remainingMs / 1000);
+      pipeline.set(cooldownKey(wh.id as string), "1", "EX", remainingSec);
+      if ((wh.failure_streak as number) > 0) {
+        pipeline.set(streakKey(wh.id as string), String(wh.failure_streak));
+        pipeline.expire(streakKey(wh.id as string), STREAK_TTL_SECONDS);
+      }
+    }
+    await pipeline.exec();
+
+    log.info(
+      { count: openWebhooks.length },
+      "circuit-breaker: startup sync — restored open circuits from DB",
+    );
+  } catch (err) {
+    log.warn(
+      { error: err instanceof Error ? err.message : String(err) },
+      "circuit-breaker: startup sync failed (non-fatal)",
+    );
+  }
+}
+
+// ─── Per-delivery operations ────────────────────────────────────────────────
+
+/**
+ * Record a successful delivery — resets failure streak in both Redis and DB.
+ */
+export async function recordDeliverySuccess(
+  webhookId: string,
+  redis: IORedis,
+  log: FastifyBaseLogger,
+): Promise<void> {
+  try {
+    const key = streakKey(webhookId);
+    const prev = await redis.getdel(key);
+    const prevStreak = prev ? parseInt(prev, 10) : 0;
+
+    if (prevStreak > 0) {
+      log.info(
+        { webhookId, previousStreak: prevStreak, circuitBreaker: "reset" },
+        "circuit-breaker: streak reset after successful delivery",
+      );
+      // Persist the cleared streak to DB (non-blocking — failure is non-fatal)
+      supabase
+        .from("webhooks")
+        .update({ failure_streak: 0 })
+        .eq("id", webhookId)
+        .then(({ error }) => {
+          if (error) {
+            log.warn({ webhookId, error: error.message }, "circuit-breaker: failed to persist streak reset to DB");
+          }
+        });
+    }
+  } catch (err) {
+    log.warn(
+      { webhookId, error: err instanceof Error ? err.message : String(err) },
+      "circuit-breaker: failed to reset streak (Redis error)",
+    );
+  }
+}
+
+/**
+ * Record a delivery failure — increments streak in Redis + DB,
+ * opens the circuit if the threshold is reached.
+ *
+ * @returns current streak count
+ */
+export async function recordDeliveryFailure(
+  webhookId: string,
+  redis: IORedis,
+  log: FastifyBaseLogger,
+): Promise<number> {
+  try {
+    const key = streakKey(webhookId);
+    const streak = await redis.incr(key);
+    await redis.expire(key, STREAK_TTL_SECONDS);
+
+    log.info(
+      { webhookId, streak, threshold: CIRCUIT_OPEN_THRESHOLD, circuitBreaker: "failure" },
+      "circuit-breaker: failure recorded",
+    );
+
+    // Persist streak to DB every increment so restarts see the latest value.
+    // Fire-and-forget — delivery path must not block on DB write.
+    supabase
+      .from("webhooks")
+      .update({ failure_streak: streak })
+      .eq("id", webhookId)
+      .then(({ error }) => {
+        if (error) {
+          log.warn({ webhookId, error: error.message }, "circuit-breaker: failed to persist streak to DB");
+        }
+      });
+
+    if (streak >= CIRCUIT_OPEN_THRESHOLD) {
+      await openCircuit(webhookId, streak, redis, log);
+    }
+
+    return streak;
+  } catch (err) {
+    log.warn(
+      { webhookId, error: err instanceof Error ? err.message : String(err) },
+      "circuit-breaker: failed to record failure (Redis error)",
+    );
+    return 0;
+  }
+}
+
+// ─── Circuit open / close ───────────────────────────────────────────────────
+
+/**
+ * Open the circuit — disable the webhook in DB + set Redis cooldown key.
+ * Writes `circuit_open_until` to the webhooks row for cross-restart persistence.
+ */
+async function openCircuit(
+  webhookId: string,
+  streak: number,
+  redis: IORedis,
+  log: FastifyBaseLogger,
+): Promise<void> {
+  const cooldownSeconds = Math.ceil(CIRCUIT_RECOVERY_COOLDOWN_MS / 1000);
+  const openUntil = new Date(Date.now() + CIRCUIT_RECOVERY_COOLDOWN_MS).toISOString();
+
+  log.warn(
+    {
+      webhookId,
+      streak,
+      threshold: CIRCUIT_OPEN_THRESHOLD,
+      recoveryCooldownMs: CIRCUIT_RECOVERY_COOLDOWN_MS,
+      circuitBreaker: "open",
+    },
+    "circuit-breaker: OPEN — disabling webhook temporarily",
+  );
+
+  // DB write: persist open state + timestamp so it survives restarts.
+  const { error } = await supabase
+    .from("webhooks")
+    .update({
+      is_active:          false,
+      failure_streak:     streak,
+      circuit_open_until: openUntil,
+    })
+    .eq("id", webhookId);
+
+  if (error) {
+    log.error(
+      { webhookId, error: error.message },
+      "circuit-breaker: failed to persist OPEN state to DB",
+    );
+    // Still set Redis key so in-process workers respect the circuit.
+  }
+
+  // Redis cooldown key: hot-path check
+  await redis.set(cooldownKey(webhookId), "1", "EX", cooldownSeconds);
+
+  log.warn(
+    { webhookId, cooldownSeconds, openUntil, circuitBreaker: "open" },
+    "circuit-breaker: webhook disabled, auto-recovery scheduled",
+  );
+
+  await insertAuditRecord({
+    event:         "CIRCUIT_BREAKER_OPENED",
+    resource_type: "webhook",
+    resource_id:   webhookId,
+    payload:       { streak, threshold: CIRCUIT_OPEN_THRESHOLD, cooldown_seconds: cooldownSeconds, open_until: openUntil },
+  });
+}
+
+/**
+ * Check if a webhook's circuit is ready for auto-recovery.
+ * Checks Redis first (fast), falls back to DB (resilient).
+ *
+ * @returns true if the cooldown has elapsed and circuit can be closed
+ */
+export async function isCircuitReadyToRecover(
+  webhookId: string,
+  redis: IORedis,
+): Promise<boolean> {
+  // Primary: Redis TTL-based check
+  try {
+    const exists = await redis.exists(cooldownKey(webhookId));
+    if (exists === 1) return false; // cooldown still active
+  } catch {
+    // Redis unavailable — fall through to DB check
+  }
+
+  // Fallback: DB authoritative check
+  const { data } = await supabase
+    .from("webhooks")
+    .select("circuit_open_until")
+    .eq("id", webhookId)
+    .single();
+
+  if (!data?.circuit_open_until) return true; // no open circuit in DB
+  return new Date(data.circuit_open_until as string) <= new Date();
+}
+
+/**
+ * Close the circuit — re-enable the webhook, clear streak, clear cooldown.
+ * Writes to both DB and Redis.
+ */
+export async function closeCircuit(
+  webhookId: string,
+  redis: IORedis,
+  log: FastifyBaseLogger,
+): Promise<void> {
+  log.info(
+    { webhookId, circuitBreaker: "closed" },
+    "circuit-breaker: CLOSED — re-enabling webhook",
+  );
+
+  // DB: re-enable and clear circuit state
+  const { error } = await supabase
+    .from("webhooks")
+    .update({
+      is_active:          true,
+      failure_streak:     0,
+      circuit_open_until: null,
+    })
+    .eq("id", webhookId);
+
+  if (error) {
+    log.error({ webhookId, error: error.message }, "circuit-breaker: failed to persist CLOSED state to DB");
+  }
+
+  // Redis: clear streak + cooldown keys
+  await redis.del(streakKey(webhookId));
+  await redis.del(cooldownKey(webhookId));
+
+  await insertAuditRecord({
+    event:         "CIRCUIT_BREAKER_CLOSED",
+    resource_type: "webhook",
+    resource_id:   webhookId,
+    payload:       {},
+  });
+}
+
+/** Interval handle so the recovery scanner is started only once per process. */
+let _circuitRecoveryInterval: ReturnType<typeof setInterval> | undefined;
+
+/**
+ * Find all expired open circuits and close them.
+ *
+ * This restores webhook activity after cooldown without requiring a new
+ * delivery attempt to trigger recovery logic.
+ */
+export async function recoverExpiredCircuits(
+  redis: IORedis,
+  log: FastifyBaseLogger,
+): Promise<number> {
+  const now = new Date().toISOString();
+
+  const { data: recoverable, error } = await supabase
+    .from("webhooks")
+    .select("id")
+    .eq("is_active", false)
+    .not("circuit_open_until", "is", null)
+    .lte("circuit_open_until", now)
+    .limit(500);
+
+  if (error) {
+    log.warn({ error: error.message }, "circuit-breaker: recovery scan query failed");
+    return 0;
+  }
+
+  if (!recoverable?.length) return 0;
+
+  let recovered = 0;
+  for (const row of recoverable as Array<{ id: string }>) {
+    try {
+      await closeCircuit(row.id, redis, log);
+      recovered++;
+    } catch (err) {
+      log.warn(
+        {
+          webhookId: row.id,
+          error: err instanceof Error ? err.message : String(err),
+        },
+        "circuit-breaker: failed to recover expired circuit",
+      );
+    }
+  }
+
+  if (recovered > 0) {
+    log.info({ recovered }, "circuit-breaker: recovered expired circuits");
+  }
+
+  return recovered;
+}
+
+/** Start periodic scan that closes circuits after cooldown expiration. */
+export function startCircuitRecoveryInterval(
+  redis: IORedis,
+  log: FastifyBaseLogger,
+): ReturnType<typeof setInterval> {
+  if (_circuitRecoveryInterval) return _circuitRecoveryInterval;
+
+  void recoverExpiredCircuits(redis, log);
+  _circuitRecoveryInterval = setInterval(
+    () => {
+      void recoverExpiredCircuits(redis, log);
+    },
+    CIRCUIT_RECOVERY_SCAN_INTERVAL_MS,
+  );
+  _circuitRecoveryInterval.unref();
+  return _circuitRecoveryInterval;
+}
diff --git a/apps/api/src/workers/retry-intents.ts b/apps/api/src/workers/retry-intents.ts
index 7c3efda..d0c8dd2 100644
--- a/apps/api/src/workers/retry-intents.ts
+++ b/apps/api/src/workers/retry-intents.ts
@@ -29,7 +29,9 @@ function nextRetryIso(retryCount: number): string {
     RETRY_MAX_DELAY_SECONDS,
     RETRY_BASE_DELAY_SECONDS * (2 ** Math.max(0, retryCount - 1)),
   );
-  return new Date(Date.now() + backoffSeconds * 1000).toISOString();
+  // Add 0–20% jitter to prevent thundering-herd when many intents retry at once.
+  const jitterSeconds = backoffSeconds * 0.2 * Math.random();
+  return new Date(Date.now() + (backoffSeconds + jitterSeconds) * 1000).toISOString();
 }
 
 export async function persistRetryIntent(
diff --git a/apps/api/src/workers/startup.ts b/apps/api/src/workers/startup.ts
index ffe90f4..aa17096 100644
--- a/apps/api/src/workers/startup.ts
+++ b/apps/api/src/workers/startup.ts
@@ -1,6 +1,21 @@
 import type { FastifyInstance } from "fastify";
 import { env } from "../config/env.js";
 
+// ─── Worker registry ──────────────────────────────────────────────────────────
+
+/**
+ * Canonical list of all background worker types.
+ * Adding a new worker here automatically propagates the expected count to
+ * /ready, /admin/system-health, and all boot logs — no manual number updates.
+ */
+export const WORKER_TYPES = ["distance", "analytics", "webhook"] as const;
+export type WorkerType = (typeof WORKER_TYPES)[number];
+
+/** Expected number of background workers in a fully-started process. */
+export function getExpectedWorkerCount(): number {
+  return WORKER_TYPES.length;
+}
+
 /**
  * Overrides accepted by shouldStartWorkers() for unit-test injection.
  * Production code always calls shouldStartWorkers() with no arguments.
diff --git a/apps/api/src/workers/webhook.queue.ts b/apps/api/src/workers/webhook.queue.ts
index 3503298..c4d7557 100644
--- a/apps/api/src/workers/webhook.queue.ts
+++ b/apps/api/src/workers/webhook.queue.ts
@@ -8,12 +8,23 @@
  *
  * Job payload contains everything the worker needs to sign and deliver
  * the request without additional DB round-trips in the hot path.
+ *
+ * DLQ retention
+ * ─────────────
+ *  - Max DLQ size:     WEBHOOK_DLQ_MAX_SIZE  (default 10 000 jobs)
+ *  - Retention window: WEBHOOK_DLQ_RETENTION_DAYS (default 30 days)
+ *  - Jobs older than the window are archived to webhook_dlq_archive (DB)
+ *    then removed from Redis.
+ *  - purgeDlqJobs() is called on process start and every hour by the
+ *    purge interval started in webhook.worker.ts.
  */
 
 import { Queue } from "bullmq";
 import { getRedisConnectionOptions } from "../config/redis.js";
 import { env } from "../config/env.js";
 import { QueueOverloadedError } from "../utils/errors.js";
+import { supabaseServiceClient as supabase } from "../config/supabase.js";
+import { insertAuditRecord } from "../utils/audit.js";
 
 // ─── Job Payload ──────────────────────────────────────────────────────────────
 
@@ -47,42 +58,41 @@ export const WEBHOOK_QUEUE_NAME = "webhook-delivery" as const;
 // ─── Retry back-off delays (milliseconds) ────────────────────────────────────
 //
 // Attempt 1 → immediate (delay = 0, handled as first-try in BullMQ)
-// Attempt 2 → 30 s
-// Attempt 3 → 2 min
-// Attempt 4 → 10 min
+// Attempt 2 → 1 min
+// Attempt 3 → 5 min
+// Attempt 4 → 15 min
 // Attempt 5 → 1 h
 //
-// This matches the spec. BullMQ's built-in exponential backoff is not used
-// here because the spec defines specific absolute delays (not a geometric
-// series), so we supply a custom `delay` per job via the retry handler.
+// Production-grade exponential backoff matching the audit spec.
+// After attempt 5 fails, the delivery moves to the Dead-Letter Queue (DLQ).
 
 export const WEBHOOK_RETRY_DELAYS_MS: ReadonlyArray<number> = [
-  0,          // attempt 1 — immediate
-  30_000,     // attempt 2 — 30 s
-  120_000,    // attempt 3 — 2 min
-  600_000,    // attempt 4 — 10 min
-  3_600_000,  // attempt 5 — 1 h
+  0,           // attempt 1 — immediate
+  60_000,      // attempt 2 — 1 min
+  300_000,     // attempt 3 — 5 min
+  900_000,     // attempt 4 — 15 min
+  3_600_000,   // attempt 5 — 1 h
 ];
 
 export const WEBHOOK_MAX_ATTEMPTS = WEBHOOK_RETRY_DELAYS_MS.length;
 
 /**
- * Calculate retry delay with ±10% jitter to prevent thundering herd.
+ * Calculate retry delay with one-sided 0-20% jitter to prevent thundering herd.
  *
  * Without jitter, 100 failed deliveries all retry at the same time,
  * creating a synchronized spike that can cascade. Jitter spreads retries
  * across a window, stabilizing the system.
  *
- * Example: baseDelay=30s → 27-33s range (±10% jitter)
+ * Example: baseDelay=60s → 60-72s range (+0-20% jitter)
  *
  * @param attemptNumber 1-based attempt number (1=first retry, 2=second, etc.)
  * @returns delay in milliseconds for this retry
  */
 export function calculateRetryDelay(attemptNumber: number): number {
   const baseDelay = WEBHOOK_RETRY_DELAYS_MS[attemptNumber - 1];
-  // ±10% jitter: add/subtract up to 10% of base delay
-  const jitterRange = baseDelay * 0.1;
-  const jitterMs = jitterRange * (Math.random() * 2 - 1); // [-jitterRange, +jitterRange]
+  if (baseDelay === 0) return 0; // attempt 1 is always immediate — no jitter
+  // Mandatory formula: delay = base + random(0-20% of base)
+  const jitterMs = baseDelay * 0.2 * Math.random();
   return Math.round(baseDelay + jitterMs);
 }
 
@@ -141,6 +151,12 @@ export async function enqueueWebhookDelivery(
     {
       jobId: `delivery:${data.delivery_id}:${data.attempt_number}`,
       delay: delayMs,
+      // Priority ensures fresh first-attempt deliveries are never starved by
+      // a flood of retry jobs under sustained load.
+      // BullMQ priority: lower number = higher priority (1 = highest).
+      //   attempt 1 (first delivery) → priority 1 — processed first
+      //   attempt 2+  (retries)      → priority 2 — processed after fresh jobs
+      priority: data.attempt_number === 1 ? 1 : 2,
     },
   );
 }
@@ -157,3 +173,232 @@ export async function getWebhookQueueDepth(): Promise<number> {
   ]);
   return waiting + delayed;
 }
+
+// ─── Dead-Letter Queue (DLQ) ─────────────────────────────────────────────────
+//
+// Jobs that exhaust all retry attempts are moved here for visibility and
+// potential manual reprocessing by admins. The DLQ is a separate BullMQ
+// queue so it does not pollute the main delivery queue metrics.
+
+export const WEBHOOK_DLQ_NAME = "webhook-delivery-dlq" as const;
+
+let _webhookDlq: Queue<WebhookDeliveryJobData> | undefined;
+
+function getWebhookDlq(): Queue<WebhookDeliveryJobData> {
+  if (_webhookDlq) return _webhookDlq;
+
+  _webhookDlq = new Queue<WebhookDeliveryJobData>(WEBHOOK_DLQ_NAME, {
+    connection: getRedisConnectionOptions(),
+    defaultJobOptions: {
+      attempts: 1,
+      removeOnComplete: false, // keep DLQ entries for admin inspection
+      removeOnFail: false,
+    },
+  });
+
+  return _webhookDlq;
+}
+
+/**
+ * Move a permanently failed delivery job to the Dead-Letter Queue.
+ *
+ * Enforces DLQ_MAX_SIZE: if the DLQ is at capacity, the oldest job is
+ * archived and evicted before the new job is added.
+ */
+export async function enqueueToDlq(data: WebhookDeliveryJobData): Promise<void> {
+  const dlq = getWebhookDlq();
+
+  // ── Max-size guard ────────────────────────────────────────────────────────
+  const depth = await dlq.getWaitingCount();
+  if (depth >= env.WEBHOOK_DLQ_MAX_SIZE) {
+    // Evict the oldest job to stay within the cap.
+    const [oldest] = await dlq.getWaiting(0, 0);
+    if (oldest) {
+      await _archiveAndRemoveDlqJob(oldest, "max_size_eviction");
+    }
+  }
+
+  await dlq.add(
+    "dlq-delivery",
+    data,
+    { jobId: `dlq:${data.delivery_id}` },
+  );
+}
+
+/**
+ * Return the current DLQ depth for health monitoring.
+ */
+export async function getWebhookDlqDepth(): Promise<number> {
+  const dlq = getWebhookDlq();
+  return dlq.getWaitingCount();
+}
+
+/**
+ * Replay a single DLQ job by delivery ID.
+ *
+ * Called from `POST /admin/webhook-dlq/:deliveryId/replay`.
+ * Moves the job back to the main delivery queue for re-attempt.
+ *
+ * @returns `true` if the job was found and replayed, `false` if not found.
+ */
+export async function replayWebhookDlqJob(deliveryId: string): Promise<boolean> {
+  const dlq = getWebhookDlq();
+  const jobId = `dlq:${deliveryId}`;
+  const job = await dlq.getJob(jobId);
+  if (!job) return false;
+
+  // Re-enqueue in main queue with attempt_number reset to 1 — fresh start.
+  const data: WebhookDeliveryJobData = { ...job.data, attempt_number: 1 };
+  await enqueueWebhookDelivery(data, 0);
+  await job.remove();
+  return true;
+}
+
+/**
+ * List all jobs currently in the DLQ (up to `limit`).
+ * Used by the admin review UI.
+ */
+export async function listWebhookDlqJobs(
+  limit = 50,
+): Promise<Array<{ jobId: string; data: WebhookDeliveryJobData; failedAt?: number }>> {
+  const dlq = getWebhookDlq();
+  const jobs = await dlq.getWaiting(0, limit - 1);
+  return jobs.map((j) => ({
+    jobId:    j.id ?? "(unknown)",
+    data:     j.data,
+    failedAt: j.timestamp,
+  }));
+}
+
+// ─── DLQ Retention / Purge ────────────────────────────────────────────────────
+
+/**
+ * Archive a single DLQ job to the DB then remove it from Redis.
+ * Internal helper; exported for testability.
+ */
+export async function _archiveAndRemoveDlqJob(
+  job: { id?: string; data: WebhookDeliveryJobData; timestamp: number },
+  reason: string,
+): Promise<void> {
+  const { data } = job;
+  const failedAt = new Date(job.timestamp).toISOString();
+
+  await supabase.from("webhook_dlq_archive").insert({
+    delivery_id:    data.delivery_id,
+    webhook_id:     data.webhook_id,
+    event_id:       data.event_id,
+    url:            data.url,
+    attempt_number: data.attempt_number,
+    failed_at:      failedAt,
+    reason,
+  });
+
+  await insertAuditRecord({
+    event: "WEBHOOK_DLQ_DELETED",
+    resource_type: "webhook_delivery",
+    resource_id: data.delivery_id,
+    payload: {
+      webhook_id: data.webhook_id,
+      event_id: data.event_id,
+      attempt_number: data.attempt_number,
+      failed_at: failedAt,
+      reason,
+    },
+  });
+
+  // Remove from BullMQ after successful archive write.
+  // If the archive insert failed, Supabase-js throws so the job is NOT removed —
+  // the retention policy degrades gracefully to "keep but warn" rather than lose data.
+  const dlq = getWebhookDlq();
+  const liveJob = job.id ? await dlq.getJob(job.id) : undefined;
+  await liveJob?.remove();
+}
+
+/**
+ * Purge DLQ jobs older than WEBHOOK_DLQ_RETENTION_DAYS.
+ *
+ * For each expired job:
+ *  1. Archive payload to webhook_dlq_archive (DB)
+ *  2. Remove from BullMQ (Redis)
+ *
+ * Also enforces WEBHOOK_DLQ_MAX_SIZE: if depth still exceeds the cap after
+ * expiry-based purge, continues evicting oldest jobs until under the cap.
+ *
+ * Call on startup and then every hour (managed by startDlqPurgeInterval()).
+ *
+ * @returns count of jobs archived and removed
+ */
+export async function purgeDlqJobs(log?: { info: (msg: string, ctx?: object) => void; warn: (msg: string, ctx?: object) => void }): Promise<number> {
+  const dlq = getWebhookDlq();
+  const retentionMs = env.WEBHOOK_DLQ_RETENTION_DAYS * 24 * 3_600_000;
+  const cutoffMs    = Date.now() - retentionMs;
+
+  // Fetch all waiting jobs — DLQ is expected to be small (O(hundreds) max)
+  const allJobs   = await dlq.getWaiting(0, -1);
+  const expired   = allJobs.filter((j) => j.timestamp < cutoffMs);
+  let   purgeCount = 0;
+
+  for (const job of expired) {
+    try {
+      await _archiveAndRemoveDlqJob(job, "retention_policy");
+      purgeCount++;
+    } catch (err) {
+      log?.warn("dlq-purge: failed to archive job", {
+        jobId: job.id,
+        error: err instanceof Error ? err.message : String(err),
+      });
+    }
+  }
+
+  // After expiry purge, enforce max size by evicting oldest remaining jobs.
+  const remaining = await dlq.getWaiting(0, -1);
+  const overflow  = remaining.length - env.WEBHOOK_DLQ_MAX_SIZE;
+  if (overflow > 0) {
+    // Oldest first (lowest timestamp)
+    const toEvict = remaining
+      .sort((a, b) => a.timestamp - b.timestamp)
+      .slice(0, overflow);
+
+    for (const job of toEvict) {
+      try {
+        await _archiveAndRemoveDlqJob(job, "max_size_eviction");
+        purgeCount++;
+      } catch (err) {
+        log?.warn("dlq-purge: failed to evict overflow job", {
+          jobId: job.id,
+          error: err instanceof Error ? err.message : String(err),
+        });
+      }
+    }
+  }
+
+  if (purgeCount > 0) {
+    log?.info("dlq-purge: completed", { purgeCount, retentionDays: env.WEBHOOK_DLQ_RETENTION_DAYS });
+  }
+
+  return purgeCount;
+}
+
+/** Purge interval handle — stored so the interval can be cleared in tests. */
+let _dlqPurgeInterval: ReturnType<typeof setInterval> | undefined;
+
+/**
+ * Start the hourly DLQ purge background interval.
+ * Returns the interval handle for cleanup.  Safe to call multiple times.
+ */
+export function startDlqPurgeInterval(
+  log?: { info: (msg: string, ctx?: object) => void; warn: (msg: string, ctx?: object) => void },
+): ReturnType<typeof setInterval> {
+  if (_dlqPurgeInterval) return _dlqPurgeInterval;
+
+  // Run once immediately on startup, then every hour.
+  void purgeDlqJobs(log);
+
+  _dlqPurgeInterval = setInterval(
+    () => { void purgeDlqJobs(log); },
+    3_600_000, // 1 hour
+  );
+  _dlqPurgeInterval.unref(); // Don't block process exit
+
+  return _dlqPurgeInterval;
+}
diff --git a/apps/api/src/workers/webhook.worker.ts b/apps/api/src/workers/webhook.worker.ts
index e11dd55..2f07f41 100644
--- a/apps/api/src/workers/webhook.worker.ts
+++ b/apps/api/src/workers/webhook.worker.ts
@@ -4,7 +4,8 @@
  * Lifecycle per job:
  *  1. Fetch the event payload from webhook_events.
  *  2. Serialize the envelope to a stable JSON string.
- *  3. Generate HMAC-SHA256 signature over the raw body.
+ *  3. Generate timestamp-bound HMAC-SHA256 signature over `timestamp.raw_body`.
+ *  4. Reject payloads above WEBHOOK_MAX_PAYLOAD_BYTES (move to DLQ).
  *  4. POST to the webhook URL with a 5 s timeout.
  *  5. On success → mark delivery as `success`.
  *  6. On failure → schedule a retry (exponential delays) up to MAX_ATTEMPTS.
@@ -14,7 +15,7 @@
  *  - DNS rebinding defence: The hostname is resolved immediately before the
  *    HTTP request and checked against private IP ranges.
  *  - Request timeout enforced at 5 s.
- *  - Signature is HMAC-SHA256(secret, rawBody), header: X-FieldTrack-Signature.
+ *  - Signature is HMAC-SHA256(secret, `${timestamp}.${rawBody}`), header: X-FieldTrack-Signature.
  *
  * Worker gate: `startWebhookWorker()` is only called when
  * `shouldStartWorkers()` returns true (WORKERS_ENABLED=true AND not test env).
@@ -24,17 +25,70 @@ import { Worker } from "bullmq";
 import type { Job } from "bullmq";
 import type { FastifyInstance } from "fastify";
 import dns from "node:dns/promises";
-import { redisConnectionOptions } from "../config/redis.js";
+import { Redis } from "ioredis";
+import { redisConnectionOptions, getRedisConnectionOptions } from "../config/redis.js";
 import { supabaseServiceClient as supabase } from "../config/supabase.js";
-import { generateSignature } from "../utils/hmac.js";
+import { generateSignatureWithTimestamp } from "../utils/hmac.js";
 import { subscribeToEventBus } from "./webhook-event.service.js";
 import {
   WEBHOOK_QUEUE_NAME,
   WEBHOOK_MAX_ATTEMPTS,
   enqueueWebhookDelivery,
+  enqueueToDlq,
   calculateRetryDelay,
+  startDlqPurgeInterval,
   type WebhookDeliveryJobData,
 } from "./webhook.queue.js";
+import {
+  recordDeliverySuccess,
+  recordDeliveryFailure,
+  startCircuitRecoveryInterval,
+} from "./circuit-breaker.js";
+import {
+  webhookDeliveriesTotal,
+  webhookFailuresTotal,
+  webhookRetriesTotal,
+} from "../plugins/prometheus.js";
+import { env } from "../config/env.js";
+
+// ─── Metrics helpers ──────────────────────────────────────────────────────────
+
+/**
+ * Map a raw event_type string to a bounded Prometheus label value.
+ *
+ * Prometheus label cardinality must stay bounded.  Event types arrive from
+ * the DB payload and could theoretically be any string (e.g. from a future
+ * schema migration, a bug, or a bad INSERT).  Mapping unknowns to "other"
+ * keeps the label set finite and prevents cardinality explosion.
+ *
+ * Update this set whenever a new EventDataMap key is added to event-bus.ts.
+ */
+const KNOWN_EVENT_TYPES = new Set<string>([
+  "employee.checked_in",
+  "employee.checked_out",
+  "expense.created",
+  "expense.approved",
+  "expense.rejected",
+  "employee.created",
+]);
+
+function normalizeEventType(raw: string | undefined): string {
+  if (!raw) return "unknown";
+  return KNOWN_EVENT_TYPES.has(raw) ? raw : "other";
+}
+
+// ─── Shared Redis client for circuit-breaker streak counters ──────────────────
+// Lazy-created so tests that never call startWebhookWorker() pay zero cost.
+let _cbRedis: Redis | undefined;
+function getCbRedis(): Redis {
+  if (!_cbRedis) {
+    _cbRedis = new Redis(getRedisConnectionOptions());
+    _cbRedis.on("error", () => {
+      // Swallow — circuit-breaker Redis errors are non-fatal (delivery still proceeds)
+    });
+  }
+  return _cbRedis;
+}
 
 // ─── Private IP ranges (DNS rebinding defence) ───────────────────────────────
 
@@ -58,6 +112,7 @@ function isPrivateAddress(ip: string): boolean {
 // ─── HTTP delivery ────────────────────────────────────────────────────────────
 
 const DELIVERY_TIMEOUT_MS = 5_000;
+const WEBHOOK_PAYLOAD_MAX_BYTES = env.WEBHOOK_MAX_PAYLOAD_BYTES;
 
 /**
  * Perform one HTTP delivery attempt.
@@ -71,6 +126,9 @@ async function deliverWebhook(
   url: string,
   rawBody: string,
   signature: string,
+  eventType: string,
+  timestamp: number,
+  deliveryId: string,
 ): Promise<{ status: number; body: string }> {
   // ── DNS rebinding defence ──────────────────────────────────────────────────
   const parsed = new URL(url);
@@ -105,12 +163,17 @@ async function deliverWebhook(
     const response = await fetch(url, {
       method: "POST",
       headers: {
-        "Content-Type":            "application/json",
-        "X-FieldTrack-Signature":  signature,
-        "X-FieldTrack-Event":      "webhook-delivery",
-        "User-Agent":              "FieldTrack-Webhooks/1.0",
+        "Content-Type":               "application/json",
+        "X-FieldTrack-Signature":     signature,
+        "X-FieldTrack-Event":         eventType,
+        "X-FieldTrack-Timestamp":     String(timestamp),
+        "X-FieldTrack-Delivery-Id":   deliveryId,
+        "User-Agent":                 "FieldTrack-Webhooks/1.0",
       },
       body: rawBody,
+      // Never follow redirects — a redirect could point to an internal address
+      // that bypassed the SSRF DNS check performed above.
+      redirect: "error",
       signal: controller.signal,
     });
 
@@ -208,8 +271,25 @@ async function scheduleRetryOrFail(
   } else {
     app.log.warn(
       { deliveryId, webhookId: webhook_id, attemptNumber },
-      "webhook.worker: max attempts reached, delivery permanently failed",
+      "webhook.worker: max attempts reached, moving delivery to DLQ",
     );
+    // Move to Dead-Letter Queue so the delivery remains visible to admins.
+    try {
+      await enqueueToDlq({
+        delivery_id:    deliveryId,
+        webhook_id,
+        event_id,
+        url,
+        secret,
+        attempt_number: attemptNumber,
+      });
+    } catch (dlqErr: unknown) {
+      const msg = dlqErr instanceof Error ? dlqErr.message : String(dlqErr);
+      app.log.error(
+        { deliveryId, webhookId: webhook_id, error: msg },
+        "webhook.worker: failed to enqueue to DLQ — delivery already marked failed in DB",
+      );
+    }
   }
 }
 
@@ -240,6 +320,25 @@ export function startWebhookWorker(app: FastifyInstance): Worker | null {
         "webhook.worker: processing delivery job",
       );
 
+      // ── Idempotency guard ────────────────────────────────────────────────
+      // Verify the delivery row is still in `pending` state before proceeding.
+      // Prevents duplicate delivery if BullMQ re-delivers a job (e.g. after an
+      // ungraceful shutdown) or an admin manually retried while a queued job
+      // was already in flight.
+      const { data: deliveryCheck } = await supabase
+        .from("webhook_deliveries")
+        .select("status")
+        .eq("id", delivery_id)
+        .single();
+
+      if (deliveryCheck && deliveryCheck.status === "success") {
+        app.log.info(
+          { deliveryId: delivery_id, webhookId: webhook_id },
+          "webhook.worker: delivery already succeeded \u2014 skipping duplicate job",
+        );
+        return;
+      }
+
       // ── Fetch event payload ──────────────────────────────────────────────
       const { data: eventRow, error: fetchError } = await supabase
         .from("webhook_events")
@@ -265,20 +364,85 @@ export function startWebhookWorker(app: FastifyInstance): Worker | null {
 
       // ── Build and sign the request body ───────────────────────────────────
       const rawBody = JSON.stringify(eventRow.payload);
-      const signature = generateSignature(secret, rawBody);
+      const { signature, timestamp: deliveryTs } = generateSignatureWithTimestamp(secret, rawBody);
+      // Extract the event type from the envelope payload for the request header.
+      // The payload is the full EventEnvelope which always carries a `type` field.
+      const eventType =
+        (eventRow.payload as Record<string, unknown>).type as string | undefined
+        ?? "webhook-delivery";
+
+      const payloadBytes = Buffer.byteLength(rawBody, "utf8");
+      if (payloadBytes > WEBHOOK_PAYLOAD_MAX_BYTES) {
+        const message =
+          `Payload size ${payloadBytes} bytes exceeds cap ${WEBHOOK_PAYLOAD_MAX_BYTES} bytes`;
+        app.log.error(
+          {
+            deliveryId: delivery_id,
+            webhookId: webhook_id,
+            eventId: event_id,
+            payloadBytes,
+            maxBytes: WEBHOOK_PAYLOAD_MAX_BYTES,
+          },
+          "webhook.worker: payload exceeds size cap, marking failed",
+        );
+
+        await supabase
+          .from("webhook_deliveries")
+          .update({
+            status: "failed",
+            attempt_count: attempt_number,
+            response_body: message,
+            last_attempt_at: new Date().toISOString(),
+            next_retry_at: null,
+          })
+          .eq("id", delivery_id);
+
+        webhookDeliveriesTotal
+          .labels({ event_type: normalizeEventType(eventType), status: "failed" })
+          .inc();
+        webhookFailuresTotal
+          .labels({ event_type: normalizeEventType(eventType) })
+          .inc();
+
+        try {
+          await enqueueToDlq({
+            delivery_id,
+            webhook_id,
+            event_id,
+            url,
+            secret,
+            attempt_number,
+          });
+        } catch (dlqErr: unknown) {
+          app.log.error(
+            {
+              deliveryId: delivery_id,
+              webhookId: webhook_id,
+              error: dlqErr instanceof Error ? dlqErr.message : String(dlqErr),
+            },
+            "webhook.worker: failed to enqueue oversize payload delivery to DLQ",
+          );
+        }
+        return;
+      }
 
       // ── Deliver ───────────────────────────────────────────────────────────
       try {
-        const { status, body } = await deliverWebhook(url, rawBody, signature);
+        const { status, body } = await deliverWebhook(url, rawBody, signature, eventType, deliveryTs, delivery_id);
         const succeeded = status >= 200 && status < 300;
 
         if (succeeded) {
           await markSuccess(delivery_id, status, body);
+          await recordDeliverySuccess(webhook_id, getCbRedis(), app.log);
+          webhookDeliveriesTotal
+            .labels({ event_type: normalizeEventType(eventType), status: "success" })
+            .inc();
           app.log.info(
             { deliveryId: delivery_id, webhookId: webhook_id, responseStatus: status },
             "webhook.worker: delivery succeeded",
           );
         } else {
+          const willRetry = attempt_number + 1 <= WEBHOOK_MAX_ATTEMPTS;
           app.log.warn(
             {
               deliveryId: delivery_id,
@@ -288,6 +452,19 @@ export function startWebhookWorker(app: FastifyInstance): Worker | null {
             },
             "webhook.worker: delivery got non-2xx response, scheduling retry",
           );
+          await recordDeliveryFailure(webhook_id, getCbRedis(), app.log);
+          webhookDeliveriesTotal
+            .labels({ event_type: normalizeEventType(eventType), status: "failed" })
+            .inc();
+          if (willRetry) {
+            webhookRetriesTotal
+              .labels({ event_type: normalizeEventType(eventType) })
+              .inc();
+          } else {
+            webhookFailuresTotal
+              .labels({ event_type: normalizeEventType(eventType) })
+              .inc();
+          }
           await scheduleRetryOrFail(
             delivery_id,
             webhook_id,
@@ -302,6 +479,7 @@ export function startWebhookWorker(app: FastifyInstance): Worker | null {
         }
       } catch (err: unknown) {
         const message = err instanceof Error ? err.message : String(err);
+        const willRetry = attempt_number + 1 <= WEBHOOK_MAX_ATTEMPTS;
         app.log.error(
           {
             deliveryId: delivery_id,
@@ -311,6 +489,19 @@ export function startWebhookWorker(app: FastifyInstance): Worker | null {
           },
           "webhook.worker: delivery attempt threw error, scheduling retry",
         );
+        await recordDeliveryFailure(webhook_id, getCbRedis(), app.log);
+        webhookDeliveriesTotal
+          .labels({ event_type: normalizeEventType(eventType), status: "error" })
+          .inc();
+        if (willRetry) {
+          webhookRetriesTotal
+            .labels({ event_type: normalizeEventType(eventType) })
+            .inc();
+        } else {
+          webhookFailuresTotal
+            .labels({ event_type: normalizeEventType(eventType) })
+            .inc();
+        }
         await scheduleRetryOrFail(
           delivery_id,
           webhook_id,
@@ -326,7 +517,7 @@ export function startWebhookWorker(app: FastifyInstance): Worker | null {
     },
     {
       connection: redisConnectionOptions,
-      concurrency: 5,
+      concurrency: env.WEBHOOK_WORKER_CONCURRENCY,
       lockDuration: 30_000,
     },
   );
@@ -339,6 +530,13 @@ export function startWebhookWorker(app: FastifyInstance): Worker | null {
     );
   });
 
-  app.log.info("webhook.worker: started");
+  // Start the hourly DLQ retention purge (archives expired jobs, enforces max size).
+  startDlqPurgeInterval(app.log);
+  startCircuitRecoveryInterval(getCbRedis(), app.log);
+
+  app.log.info(
+    { concurrency: env.WEBHOOK_WORKER_CONCURRENCY },
+    "webhook.worker: started",
+  );
   return worker;
 }
diff --git a/apps/api/tests/integration/admin/webhooks.integration.test.ts b/apps/api/tests/integration/admin/webhooks.integration.test.ts
index 70a40a0..80de00c 100644
--- a/apps/api/tests/integration/admin/webhooks.integration.test.ts
+++ b/apps/api/tests/integration/admin/webhooks.integration.test.ts
@@ -21,6 +21,14 @@ vi.mock("../../../src/config/redis.js", () => ({
   redisConnectionOptions: {},
 }));
 
+// shouldStartWorkers must return true so the retry endpoint does not reject
+// with 503 "Workers not enabled" in test context.
+vi.mock("../../../src/workers/startup.js", () => ({
+  shouldStartWorkers: vi.fn().mockReturnValue(true),
+  areWorkersStarted: vi.fn().mockReturnValue(true),
+  startWorkers: vi.fn().mockResolvedValue(undefined),
+}));
+
 vi.mock("../../../src/workers/distance.queue.js", () => ({
   enqueueDistanceJob: vi.fn().mockResolvedValue(undefined),
 }));
@@ -31,10 +39,12 @@ vi.mock("../../../src/workers/analytics.queue.js", () => ({
 
 vi.mock("../../../src/workers/webhook.queue.js", () => ({
   enqueueWebhookDelivery: vi.fn().mockResolvedValue(undefined),
+  enqueueToDlq:           vi.fn().mockResolvedValue(undefined),
   WEBHOOK_QUEUE_NAME:     "webhook-delivery",
-  WEBHOOK_RETRY_DELAYS_MS: [0, 30_000, 120_000, 600_000, 3_600_000],
+  WEBHOOK_RETRY_DELAYS_MS: [0, 60_000, 300_000, 900_000, 3_600_000],
   WEBHOOK_MAX_ATTEMPTS:    5,
   getWebhookQueueDepth:   vi.fn().mockResolvedValue(0),
+  getWebhookDlqDepth:     vi.fn().mockResolvedValue(0),
 }));
 
 vi.mock("../../../src/modules/webhooks/webhooks.repository.js", () => ({
@@ -377,6 +387,23 @@ describe("Webhooks Admin API", () => {
 
       expect(res.statusCode).toBe(200);
     });
+
+    it("returns 403 for EMPLOYEE role", async () => {
+      const res = await app.inject({
+        method:  "GET",
+        url:     "/admin/webhook-deliveries",
+        headers: { authorization: `Bearer ${employeeToken}` },
+      });
+      expect(res.statusCode).toBe(403);
+    });
+
+    it("returns 401 with no token", async () => {
+      const res = await app.inject({
+        method: "GET",
+        url:    "/admin/webhook-deliveries",
+      });
+      expect(res.statusCode).toBe(401);
+    });
   });
 
   // ─── POST /admin/webhook-deliveries/:id/retry ───────────────────────────────
@@ -436,5 +463,22 @@ describe("Webhooks Admin API", () => {
 
       expect(res.statusCode).toBe(400);
     });
+
+    it("returns 403 for EMPLOYEE role", async () => {
+      const res = await app.inject({
+        method:  "POST",
+        url:     `/admin/webhook-deliveries/${DELIVERY_ID}/retry`,
+        headers: { authorization: `Bearer ${employeeToken}` },
+      });
+      expect(res.statusCode).toBe(403);
+    });
+
+    it("returns 401 with no token", async () => {
+      const res = await app.inject({
+        method: "POST",
+        url:    `/admin/webhook-deliveries/${DELIVERY_ID}/retry`,
+      });
+      expect(res.statusCode).toBe(401);
+    });
   });
 });
diff --git a/apps/api/tests/unit/utils/webhook.unit.test.ts b/apps/api/tests/unit/utils/webhook.unit.test.ts
index 54d8b02..28000b0 100644
--- a/apps/api/tests/unit/utils/webhook.unit.test.ts
+++ b/apps/api/tests/unit/utils/webhook.unit.test.ts
@@ -12,7 +12,15 @@
  * graph can be wired up without vi.doMock / vi.resetModules complications.
  */
 
-import { describe, it, expect } from "vitest";
+import { describe, it, expect, vi } from "vitest";
+
+// Mock Redis and BullMQ queue creation so importing webhook.queue.ts does not
+// attempt a real TCP connection to redis://localhost:6379 in unit-test context.
+vi.mock("../../../src/config/redis.js", () => ({
+  redisClient:             { on: vi.fn(), quit: vi.fn(), disconnect: vi.fn() },
+  getRedisConnectionOptions: vi.fn().mockReturnValue({ host: "localhost", port: 6379 }),
+  redisConnectionOptions:  { host: "localhost", port: 6379 },
+}));
 
 // ─── hmac.ts ─────────────────────────────────────────────────────────────────
 
@@ -43,6 +51,18 @@ describe("generateSignature", () => {
   });
 });
 
+describe("generateSignatureWithTimestamp", () => {
+  it("should sign timestamp.payload and return timestamp", async () => {
+    const { generateSignature, generateSignatureWithTimestamp } = await import("../../../src/utils/hmac.js");
+    const payload = JSON.stringify({ id: "evt_123" });
+    const ts = 1_700_000_000;
+    const { signature, timestamp } = generateSignatureWithTimestamp("secret", payload, ts);
+
+    expect(timestamp).toBe(ts);
+    expect(signature).toBe(generateSignature("secret", `${ts}.${payload}`));
+  });
+});
+
 describe("verifySignature", () => {
   it("should return true for a correctly generated signature", async () => {
     const { generateSignature, verifySignature } = await import("../../../src/utils/hmac.js");
@@ -72,6 +92,34 @@ describe("verifySignature", () => {
   });
 });
 
+describe("verifySignatureWithTimestamp", () => {
+  it("returns true for valid signature inside tolerance", async () => {
+    const { generateSignatureWithTimestamp, verifySignatureWithTimestamp } = await import(
+      "../../../src/utils/hmac.js"
+    );
+    const payload = JSON.stringify({ type: "expense.created" });
+    const now = 1_700_000_000;
+    const { signature, timestamp } = generateSignatureWithTimestamp("secret", payload, now - 60);
+
+    expect(
+      verifySignatureWithTimestamp("secret", payload, signature, timestamp, now, 300),
+    ).toBe(true);
+  });
+
+  it("returns false when timestamp is outside tolerance", async () => {
+    const { generateSignatureWithTimestamp, verifySignatureWithTimestamp } = await import(
+      "../../../src/utils/hmac.js"
+    );
+    const payload = JSON.stringify({ type: "expense.created" });
+    const now = 1_700_000_000;
+    const { signature, timestamp } = generateSignatureWithTimestamp("secret", payload, now - 400);
+
+    expect(
+      verifySignatureWithTimestamp("secret", payload, signature, timestamp, now, 300),
+    ).toBe(false);
+  });
+});
+
 // ─── url-validator.ts ─────────────────────────────────────────────────────────
 
 describe("validateWebhookUrl", () => {
@@ -124,11 +172,29 @@ describe("WEBHOOK_RETRY_DELAYS_MS", () => {
     );
     expect(WEBHOOK_MAX_ATTEMPTS).toBe(5);
     expect(WEBHOOK_RETRY_DELAYS_MS).toHaveLength(5);
-    expect(WEBHOOK_RETRY_DELAYS_MS[0]).toBe(0);          // attempt 1 immediate
-    expect(WEBHOOK_RETRY_DELAYS_MS[1]).toBe(30_000);     // attempt 2 → 30 s
-    expect(WEBHOOK_RETRY_DELAYS_MS[2]).toBe(120_000);    // attempt 3 → 2 min
-    expect(WEBHOOK_RETRY_DELAYS_MS[3]).toBe(600_000);    // attempt 4 → 10 min
-    expect(WEBHOOK_RETRY_DELAYS_MS[4]).toBe(3_600_000);  // attempt 5 → 1 h
+    expect(WEBHOOK_RETRY_DELAYS_MS[0]).toBe(0);           // attempt 1 immediate
+    expect(WEBHOOK_RETRY_DELAYS_MS[1]).toBe(60_000);      // attempt 2 → 1 min
+    expect(WEBHOOK_RETRY_DELAYS_MS[2]).toBe(300_000);     // attempt 3 → 5 min
+    expect(WEBHOOK_RETRY_DELAYS_MS[3]).toBe(900_000);     // attempt 4 → 15 min
+    expect(WEBHOOK_RETRY_DELAYS_MS[4]).toBe(3_600_000);   // attempt 5 → 1 h
+  });
+});
+
+describe("calculateRetryDelay", () => {
+  it("should never return less than base delay", async () => {
+    const { calculateRetryDelay, WEBHOOK_RETRY_DELAYS_MS } = await import(
+      "../../../src/workers/webhook.queue.js"
+    );
+    vi.spyOn(Math, "random").mockReturnValue(0);
+    expect(calculateRetryDelay(2)).toBe(WEBHOOK_RETRY_DELAYS_MS[1]);
+    vi.restoreAllMocks();
+  });
+
+  it("should cap jitter at +20% when Math.random() is 1", async () => {
+    const { calculateRetryDelay } = await import("../../../src/workers/webhook.queue.js");
+    vi.spyOn(Math, "random").mockReturnValue(1);
+    expect(calculateRetryDelay(2)).toBe(72_000); // 60_000 + 20%
+    vi.restoreAllMocks();
   });
 });
 
diff --git a/apps/web/next.config.mjs b/apps/web/next.config.mjs
index f3daa52..6173a49 100644
--- a/apps/web/next.config.mjs
+++ b/apps/web/next.config.mjs
@@ -1,5 +1,7 @@
 /** @type {import('next').NextConfig} */
 
+const isDev = process.env.NODE_ENV === 'development';
+
 // NEXT_PUBLIC_API_BASE_URL controls how the browser reaches the backend.
 //
 // Mode A — Direct (recommended for Vercel):
@@ -47,6 +49,7 @@ const nextConfig = {
       "https://*.tiles.mapbox.com", // Mapbox raster / vector tiles
       "https://api.mapbox.com",     // Mapbox geocoding, directions, styles
       "https://events.mapbox.com",  // Mapbox telemetry
+      "https://*.tile.openstreetmap.org", // Leaflet / OpenStreetMap tiles
     ];
     // Only add the API origin when it is a full URL — same-origin requests
     // (/api/proxy path) are already covered by 'self' above.
@@ -67,7 +70,9 @@ const nextConfig = {
             key: "Content-Security-Policy",
             value: [
               "default-src 'self'",
-              "script-src 'self' 'unsafe-inline'",
+              // In development, Next.js Fast Refresh (HMR) requires 'unsafe-eval'.
+              // Without it the React event system breaks and forms submit natively.
+              isDev ? "script-src 'self' 'unsafe-inline' 'unsafe-eval'" : "script-src 'self' 'unsafe-inline'",
               "style-src 'self' 'unsafe-inline'",
               // blob: required for Mapbox GL sprite / image atlas
               "img-src 'self' data: blob: https:",
diff --git a/apps/web/package.json b/apps/web/package.json
index db821bf..19d2cce 100644
--- a/apps/web/package.json
+++ b/apps/web/package.json
@@ -15,6 +15,7 @@
   },
   "dependencies": {
     "@fieldtrack/types": "*",
+    "@radix-ui/react-alert-dialog": "^1.1.15",
     "@radix-ui/react-avatar": "^1.1.2",
     "@radix-ui/react-dialog": "^1.1.4",
     "@radix-ui/react-dropdown-menu": "^2.1.4",
@@ -28,10 +29,12 @@
     "@supabase/supabase-js": "^2.46.2",
     "@tanstack/react-query": "^5.62.7",
     "@types/leaflet": "^1.9.21",
+    "@types/leaflet.markercluster": "^1.5.6",
     "class-variance-authority": "^0.7.1",
     "clsx": "^2.1.1",
     "framer-motion": "^12.36.0",
     "leaflet": "^1.9.4",
+    "leaflet.markercluster": "^1.5.3",
     "lucide-react": "^0.468.0",
     "mapbox-gl": "^3.8.0",
     "next": "^15.1.3",
diff --git a/apps/web/src/app/(protected)/admin/monitoring/map/EmployeeMap.tsx b/apps/web/src/app/(protected)/admin/monitoring/map/EmployeeMap.tsx
index 2191dbc..8efbb8b 100644
--- a/apps/web/src/app/(protected)/admin/monitoring/map/EmployeeMap.tsx
+++ b/apps/web/src/app/(protected)/admin/monitoring/map/EmployeeMap.tsx
@@ -1,61 +1,80 @@
 "use client";
 
 /**
- * EmployeeMap — Leaflet map component.
+ * EmployeeMap — Leaflet map with MarkerClusterGroup support.
  *
  * Imported dynamically with `ssr: false` from the parent page because Leaflet
  * accesses `window` at module initialisation time and will crash Next.js SSR.
  *
  * Marker colour scheme:
- *   ACTIVE  → green  (checked in within the last 2 hours)
- *   RECENT  → orange (checked out, still this calendar day)
- *   INACTIVE → grey  (no session activity today)
+ *   ACTIVE   → green  (checked in within the last 2 hours)
+ *   RECENT   → orange (checked out, still this calendar day)
+ *   INACTIVE → grey   (no session activity today)
+ *
+ * Selected employee → enlarged SVG + pulsing ring overlay.
+ * Clustering        → nearby markers grouped at low zoom via MarkerClusterGroup.
  */
 
 import { useEffect, useRef } from "react";
 import type { Map as LeafletMap, Marker as LeafletMarker } from "leaflet";
 import L from "leaflet";
+import "leaflet.markercluster";
 import type { EmployeeMapMarker } from "@/types";
 
-// ─── Marker icon colours matching status ──────────────────────────────────────
+// ─── Marker icon colours ──────────────────────────────────────────────────────
 
 const STATUS_COLOURS: Record<EmployeeMapMarker["status"], string> = {
-  ACTIVE: "#22c55e",   // green-500
-  RECENT: "#f97316",  // orange-500
-  INACTIVE: "#94a3b8", // slate-400
+  ACTIVE:   "#22c55e",  // green-500
+  RECENT:   "#f97316",  // orange-500
+  INACTIVE: "#94a3b8",  // slate-400
 };
 
-function makeIcon(status: EmployeeMapMarker["status"]) {
+function makeIcon(
+  status: EmployeeMapMarker["status"],
+  selected = false
+) {
   const colour = STATUS_COLOURS[status];
-  // Inline SVG circle marker — avoids the default Leaflet PNG which requires
-  // webpack file-loader config. Works in all build setups without extra config.
+  const size   = selected ? 32 : 24;
+  const inner  = selected ? 8  : 5;
+
+  const pulse = selected
+    ? `<circle cx="16" cy="16" r="14" fill="${colour}" opacity="0.25">
+         <animate attributeName="r" values="14;18;14" dur="1.6s" repeatCount="indefinite"/>
+         <animate attributeName="opacity" values="0.25;0;0.25" dur="1.6s" repeatCount="indefinite"/>
+       </circle>`
+    : "";
+
   const svg = `
-    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
-      <circle cx="12" cy="12" r="10" fill="${colour}" opacity="0.9"/>
-      <circle cx="12" cy="12" r="5"  fill="#fff"      opacity="0.7"/>
+    <svg xmlns="http://www.w3.org/2000/svg" width="${size}" height="${size}" viewBox="0 0 ${size} ${size}">
+      ${pulse}
+      <circle cx="${size / 2}" cy="${size / 2}" r="${size / 2 - 2}" fill="${colour}" opacity="${selected ? 1 : 0.9}"/>
+      <circle cx="${size / 2}" cy="${size / 2}" r="${inner}" fill="#fff" opacity="0.8"/>
     </svg>
   `.trim();
 
   return L.divIcon({
     html: svg,
-    className: "",      // prevent Leaflet's default white-box class
-    iconSize: [24, 24],
-    iconAnchor: [12, 12],
-    popupAnchor: [0, -14],
+    className: "",       // prevent Leaflet's default white-box class
+    iconSize:   [size, size],
+    iconAnchor: [size / 2, size / 2],
+    popupAnchor: [0, -(size / 2 + 2)],
   });
 }
 
-// ─── Popup HTML (pure string — Leaflet renders these) ─────────────────────────
+// ─── Popup HTML ───────────────────────────────────────────────────────────────
 
 function buildPopupHtml(m: EmployeeMapMarker): string {
-  const ts = new Date(m.recordedAt).toLocaleString();
+  const ts   = new Date(m.recordedAt).toLocaleString();
   const code = m.employeeCode ? ` (${m.employeeCode})` : "";
-  const statusColour = m.status === "ACTIVE" ? "green" : m.status === "RECENT" ? "orange" : "grey";
+  const statusColour =
+    m.status === "ACTIVE" ? "#22c55e" :
+    m.status === "RECENT" ? "#f97316" : "#94a3b8";
+
   return `
-    <div style="min-width:160px;font-family:sans-serif;font-size:13px">
+    <div style="min-width:170px;font-family:sans-serif;font-size:13px;line-height:1.5">
       <strong style="font-size:14px">${m.employeeName}${code}</strong><br/>
-      <span style="color:${statusColour};font-weight:600">${m.status}</span><br/>
-      <span style="color:#555;font-size:11px">Last fix: ${ts}</span>
+      <span style="color:${statusColour};font-weight:700;text-transform:uppercase;font-size:11px">${m.status}</span><br/>
+      <span style="color:#888;font-size:11px">Last fix: ${ts}</span>
     </div>
   `.trim();
 }
@@ -63,28 +82,26 @@ function buildPopupHtml(m: EmployeeMapMarker): string {
 // ─── Component ────────────────────────────────────────────────────────────────
 
 interface Props {
-  markers: EmployeeMapMarker[];
-  isLoading: boolean;
+  markers:            EmployeeMapMarker[];
+  isLoading:          boolean;
+  selectedEmployeeId?: string | null;
 }
 
-export default function EmployeeMap({ markers, isLoading }: Props) {
+export default function EmployeeMap({ markers, isLoading, selectedEmployeeId }: Props) {
   const mapContainerRef = useRef<HTMLDivElement>(null);
-  const mapRef = useRef<LeafletMap | null>(null);
-  const markerLayerRef = useRef<LeafletMarker[]>([]);
+  const mapRef          = useRef<LeafletMap | null>(null);
+  const clusterGroupRef = useRef<L.MarkerClusterGroup | null>(null);
+  // Track current markers by employeeId → Leaflet marker
+  const markerMapRef    = useRef<Map<string, LeafletMarker>>(new Map());
 
-  // Initialise Leaflet map once
+  // ── Initialise Leaflet map once ────────────────────────────────────────────
   useEffect(() => {
     if (!mapContainerRef.current || mapRef.current) return;
+    const container = mapContainerRef.current;
 
-    // Leaflet's default icon path breaks with webpack/Next.js — fix it
-    // by telling it to use an empty icon. We override icons per-marker anyway.
-    // @ts-expect-error _getIconUrl is an internal Leaflet method
-    delete L.Icon.Default.prototype._getIconUrl;
-    L.Icon.Default.mergeOptions({ iconUrl: "", shadowUrl: "" });
-
-    const map = L.map(mapContainerRef.current, {
-      center: [20, 0],   // world view until we fit to markers
-      zoom: 2,
+    const map = L.map(container, {
+      center: [20, 0],
+      zoom:   2,
       zoomControl: true,
     });
 
@@ -94,46 +111,106 @@ export default function EmployeeMap({ markers, isLoading }: Props) {
       maxZoom: 19,
     }).addTo(map);
 
+    // Marker cluster group with custom cluster icon
+    const clusterGroup = L.markerClusterGroup({
+      maxClusterRadius: 60,
+      showCoverageOnHover: false,
+      iconCreateFunction(cluster) {
+        const count = cluster.getChildCount();
+        return L.divIcon({
+          html: `<div style="
+            width:36px;height:36px;border-radius:50%;
+            background:rgba(99,102,241,0.85);
+            border:2px solid rgba(99,102,241,0.4);
+            color:#fff;font-weight:700;font-size:13px;
+            display:flex;align-items:center;justify-content:center;
+            box-shadow:0 2px 8px rgba(0,0,0,0.25);
+          ">${count}</div>`,
+          className: "",
+          iconSize: [36, 36],
+          iconAnchor: [18, 18],
+        });
+      },
+    });
+    map.addLayer(clusterGroup);
+    clusterGroupRef.current = clusterGroup;
     mapRef.current = map;
 
+    // ResizeObserver → invalidateSize when container dimensions change
+    const ro = new ResizeObserver(() => map.invalidateSize({ animate: false }));
+    ro.observe(container);
+
+    const raf = requestAnimationFrame(() => map.invalidateSize({ animate: false }));
+
     return () => {
+      ro.disconnect();
+      cancelAnimationFrame(raf);
       map.remove();
-      mapRef.current = null;
+      mapRef.current     = null;
+      clusterGroupRef.current = null;
+      markerMapRef.current.clear();
     };
   }, []);
 
-  // Update markers whenever data changes
+  // ── Sync markers when data or selection changes ────────────────────────────
   useEffect(() => {
-    const map = mapRef.current;
-    if (!map) return;
-
-    // Remove old markers
-    for (const m of markerLayerRef.current) {
-      m.remove();
+    const map          = mapRef.current;
+    const clusterGroup = clusterGroupRef.current;
+    if (!map || !clusterGroup) return;
+
+    const incoming = new Map(markers.map((m) => [m.employeeId, m]));
+    const existing = markerMapRef.current;
+
+    // Remove markers no longer in the data set
+    for (const [id, leafletMarker] of existing) {
+      if (!incoming.has(id)) {
+        clusterGroup.removeLayer(leafletMarker);
+        existing.delete(id);
+      }
     }
-    markerLayerRef.current = [];
-
-    if (markers.length === 0) return;
 
-    const newMarkers: LeafletMarker[] = [];
     const latLngs: [number, number][] = [];
+    const toAdd: LeafletMarker[] = [];
 
     for (const m of markers) {
-      const icon = makeIcon(m.status);
-      const marker = L.marker([m.latitude, m.longitude], { icon })
-        .addTo(map)
-        .bindPopup(buildPopupHtml(m));
-      newMarkers.push(marker);
+      const isSelected = selectedEmployeeId === m.employeeId;
+      const icon = makeIcon(m.status, isSelected);
       latLngs.push([m.latitude, m.longitude]);
+
+      if (existing.has(m.employeeId)) {
+        // Update existing marker position + icon (smooth move, no remove/re-add)
+        const lm = existing.get(m.employeeId)!;
+        lm.setLatLng([m.latitude, m.longitude]);
+        lm.setIcon(icon);
+        lm.setPopupContent(buildPopupHtml(m));
+      } else {
+        // New marker
+        const lm = L.marker([m.latitude, m.longitude], { icon }).bindPopup(buildPopupHtml(m));
+        existing.set(m.employeeId, lm);
+        toAdd.push(lm);
+      }
     }
 
-    markerLayerRef.current = newMarkers;
+    if (toAdd.length > 0) {
+      clusterGroup.addLayers(toAdd);
+    }
 
-    // Fit the map to show all markers (with a small padding)
-    if (latLngs.length > 0) {
+    markerMapRef.current = existing;
+
+    // Auto-centre: only on first data load (when no markers existed before)
+    if (latLngs.length > 0 && existing.size === toAdd.length) {
       map.fitBounds(L.latLngBounds(latLngs), { padding: [40, 40], maxZoom: 14 });
     }
-  }, [markers]);
+
+    // Pan to selected employee marker if it exists
+    if (selectedEmployeeId) {
+      const sel = existing.get(selectedEmployeeId);
+      if (sel) {
+        map.setView(sel.getLatLng(), Math.max(map.getZoom(), 13), { animate: true });
+        sel.openPopup();
+      }
+    }
+  }, [markers, selectedEmployeeId]);
 
   return (
     <div className="relative h-full w-full">
@@ -142,7 +219,6 @@ export default function EmployeeMap({ markers, isLoading }: Props) {
           <span className="text-sm text-muted-foreground">Loading positions…</span>
         </div>
       )}
-      {/* The map mounts into this div */}
       <div ref={mapContainerRef} className="h-full w-full" />
     </div>
   );
diff --git a/apps/web/src/app/(protected)/admin/monitoring/map/page.tsx b/apps/web/src/app/(protected)/admin/monitoring/map/page.tsx
index e3f15b7..f78477b 100644
--- a/apps/web/src/app/(protected)/admin/monitoring/map/page.tsx
+++ b/apps/web/src/app/(protected)/admin/monitoring/map/page.tsx
@@ -1,5 +1,6 @@
 "use client";
 
+import { useState } from "react";
 import dynamic from "next/dynamic";
 import { useEffect } from "react";
 import { useRouter } from "next/navigation";
@@ -8,7 +9,9 @@ import { useAdminMap } from "@/hooks/queries/useDashboard";
 import { ErrorBanner } from "@/components/ErrorBanner";
 import { Badge } from "@/components/ui/badge";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
-import { MapPin, RefreshCw } from "lucide-react";
+import { MapPin, RefreshCw, Users, Search } from "lucide-react";
+import { Input } from "@/components/ui/input";
+import { cn } from "@/lib/utils";
 import type { EmployeeMapMarker } from "@/types";
 
 // ─── Dynamic Leaflet import (SSR disabled — Leaflet uses `window`) ────────────
@@ -16,25 +19,79 @@ import type { EmployeeMapMarker } from "@/types";
 const EmployeeMap = dynamic(() => import("./EmployeeMap"), {
   ssr: false,
   loading: () => (
-    <div className="flex h-96 items-center justify-center rounded-lg bg-muted text-muted-foreground">
+    <div className="flex h-full items-center justify-center rounded-b-lg bg-muted text-sm text-muted-foreground">
       Loading map…
     </div>
   ),
 });
 
-// ─── Status badge helper ──────────────────────────────────────────────────────
+// ─── Helpers ──────────────────────────────────────────────────────────────────
 
-const STATUS_VARIANTS: Record<
-  EmployeeMapMarker["status"],
-  "default" | "secondary" | "outline"
-> = {
-  ACTIVE: "default",
-  RECENT: "secondary",
-  INACTIVE: "outline",
+const STATUS_DOT: Record<EmployeeMapMarker["status"], string> = {
+  ACTIVE:   "bg-emerald-500",
+  RECENT:   "bg-orange-400",
+  INACTIVE: "bg-slate-400",
 };
 
-function statusLabel(status: EmployeeMapMarker["status"]) {
-  return status.charAt(0) + status.slice(1).toLowerCase();
+const STATUS_LABEL: Record<EmployeeMapMarker["status"], string> = {
+  ACTIVE:   "Active",
+  RECENT:   "Recent",
+  INACTIVE: "Inactive",
+};
+
+// ─── Employee List Item ────────────────────────────────────────────────────────
+
+function EmployeeListItem({
+  marker,
+  selected,
+  onClick,
+}: {
+  marker: EmployeeMapMarker;
+  selected: boolean;
+  onClick: () => void;
+}) {
+  const initials = marker.employeeName
+    .split(" ")
+    .slice(0, 2)
+    .map((n) => n[0] ?? "")
+    .join("")
+    .toUpperCase();
+
+  return (
+    <button
+      onClick={onClick}
+      className={cn(
+        "flex w-full items-center gap-3 rounded-lg px-3 py-2.5 text-left transition-colors",
+        selected
+          ? "bg-primary/10 text-primary"
+          : "hover:bg-accent/60 text-foreground"
+      )}
+    >
+      <div
+        className={cn(
+          "flex h-8 w-8 shrink-0 items-center justify-center rounded-full text-[11px] font-bold",
+          selected ? "bg-primary text-primary-foreground" : "bg-muted text-muted-foreground"
+        )}
+      >
+        {initials}
+      </div>
+      <div className="min-w-0 flex-1">
+        <p className="truncate text-sm font-medium leading-none">
+          {marker.employeeName}
+        </p>
+        {marker.employeeCode && (
+          <p className="text-[11px] text-muted-foreground mt-0.5">{marker.employeeCode}</p>
+        )}
+      </div>
+      <span
+        className={cn(
+          "flex h-2 w-2 shrink-0 rounded-full",
+          STATUS_DOT[marker.status],
+          marker.status === "ACTIVE" && "animate-pulse"
+        )}
+      />
+    </button>
+  );
 }
 
 // ─── Page ─────────────────────────────────────────────────────────────────────
@@ -42,6 +99,8 @@ function statusLabel(status: EmployeeMapMarker["status"]) {
 export default function MonitoringMapPage() {
   const { permissions } = useAuth();
   const router = useRouter();
+  const [selectedId, setSelectedId] = useState<string | null>(null);
+  const [search, setSearch] = useState("");
 
   useEffect(() => {
     if (!permissions.viewAnalytics) {
@@ -53,67 +112,157 @@ export default function MonitoringMapPage() {
 
   if (!permissions.viewAnalytics) return null;
 
-  const activeCount = markers.filter((m) => m.status === "ACTIVE").length;
-  const recentCount = markers.filter((m) => m.status === "RECENT").length;
+  const activeCount   = markers.filter((m) => m.status === "ACTIVE").length;
+  const recentCount   = markers.filter((m) => m.status === "RECENT").length;
+  const inactiveCount = markers.filter((m) => m.status === "INACTIVE").length;
+
+  const filtered = markers.filter((m) =>
+    search
+      ? m.employeeName.toLowerCase().includes(search.toLowerCase()) ||
+        (m.employeeCode ?? "").toLowerCase().includes(search.toLowerCase())
+      : true
+  );
+
+  function handleSelect(id: string) {
+    setSelectedId((prev) => (prev === id ? null : id));
+  }
 
   return (
     <div className="space-y-4">
       {/* Header */}
       <div className="flex items-center justify-between">
         <div>
-          <h1 className="text-2xl font-bold">Live Employee Map</h1>
+          <h1 className="text-2xl font-bold tracking-tight">Live Employee Map</h1>
           <p className="text-sm text-muted-foreground">
             Showing latest GPS position per employee. Refreshes every 30 s.
           </p>
         </div>
         <div className="flex items-center gap-3">
-          <span className="text-xs text-muted-foreground">
-            {dataUpdatedAt
-              ? `Updated ${new Date(dataUpdatedAt).toLocaleTimeString()}`
-              : null}
-          </span>
+          {dataUpdatedAt ? (
+            <span className="text-xs text-muted-foreground">
+              Updated {new Date(dataUpdatedAt).toLocaleTimeString()}
+            </span>
+          ) : null}
           <button
             onClick={() => void refetch()}
-            className="flex items-center gap-1 rounded px-2 py-1 text-sm text-muted-foreground hover:bg-muted"
+            className="flex items-center gap-1.5 rounded-lg border border-border/60 px-3 py-1.5 text-sm text-muted-foreground hover:bg-accent/60 hover:text-foreground transition-colors"
           >
-            <RefreshCw className="h-4 w-4" />
+            <RefreshCw className="h-3.5 w-3.5" />
             Refresh
           </button>
         </div>
       </div>
 
       {/* Summary badges */}
-      <div className="flex gap-2">
-        <Badge variant="default">
-          <MapPin className="mr-1 h-3 w-3" />
+      <div className="flex flex-wrap gap-2">
+        <Badge className="gap-1.5 bg-emerald-500/15 text-emerald-700 dark:text-emerald-400 border-emerald-500/30 hover:bg-emerald-500/20">
+          <span className="h-1.5 w-1.5 rounded-full bg-emerald-500 animate-pulse" />
           {activeCount} Active
         </Badge>
-        <Badge variant="secondary">{recentCount} Recent</Badge>
-        <Badge variant="outline">{markers.length} Total on map</Badge>
+        <Badge variant="secondary" className="gap-1.5">
+          <span className="h-1.5 w-1.5 rounded-full bg-orange-400" />
+          {recentCount} Recent
+        </Badge>
+        <Badge variant="outline" className="gap-1.5">
+          <MapPin className="h-3 w-3" />
+          {markers.length} on map
+        </Badge>
       </div>
 
       {/* Error */}
       {error ? <ErrorBanner error={error as Error} /> : null}
 
-      {/* Map */}
-      <Card>
-        <CardHeader className="pb-2">
-          <CardTitle className="text-base">Employee Positions</CardTitle>
-        </CardHeader>
-        <CardContent className="p-0">
-          <div className="h-[calc(100vh-22rem)] min-h-80 overflow-hidden rounded-b-lg">
-            <EmployeeMap markers={markers} isLoading={isLoading} />
-          </div>
-        </CardContent>
-      </Card>
+      {/* Main content: map + employee list */}
+      <div className="grid grid-cols-1 xl:grid-cols-[1fr_280px] gap-4">
+        {/* Map */}
+        <Card className="overflow-hidden">
+          <CardHeader className="pb-2">
+            <CardTitle className="text-base">Employee Positions</CardTitle>
+          </CardHeader>
+          <CardContent className="p-0">
+            <div className="h-[calc(100vh-22rem)] min-h-80">
+              <EmployeeMap
+                markers={markers}
+                isLoading={isLoading}
+                selectedEmployeeId={selectedId}
+              />
+            </div>
+          </CardContent>
+        </Card>
+
+        {/* Employee sidebar */}
+        <Card className="flex flex-col overflow-hidden">
+          <CardHeader className="pb-2 shrink-0">
+            <CardTitle className="text-base flex items-center gap-2">
+              <Users className="h-4 w-4 text-muted-foreground" />
+              Employees
+            </CardTitle>
+          </CardHeader>
+          <CardContent className="flex flex-col gap-2 p-3 overflow-hidden">
+            {/* Search */}
+            <div className="relative">
+              <Search className="absolute left-2.5 top-1/2 -translate-y-1/2 h-3.5 w-3.5 text-muted-foreground" />
+              <Input
+                placeholder="Search employees…"
+                value={search}
+                onChange={(e) => setSearch(e.target.value)}
+                className="pl-8 h-8 text-sm"
+              />
+            </div>
+
+            {/* Status summary */}
+            <div className="flex gap-3 text-xs text-muted-foreground px-1">
+              <span><span className="font-semibold text-emerald-600 dark:text-emerald-400">{activeCount}</span> active</span>
+              <span><span className="font-semibold text-orange-500">{recentCount}</span> recent</span>
+              <span><span className="font-semibold text-slate-400">{inactiveCount}</span> inactive</span>
+            </div>
+
+            {/* Scrollable list */}
+            <div className="flex-1 overflow-y-auto space-y-0.5" style={{ maxHeight: "calc(100vh - 28rem)" }}>
+              {isLoading && (
+                <div className="flex flex-col gap-2 py-4">
+                  {Array.from({ length: 5 }).map((_, i) => (
+                    <div key={i} className="flex items-center gap-3 px-3 py-2">
+                      <div className="h-8 w-8 rounded-full bg-muted animate-pulse" />
+                      <div className="h-4 flex-1 rounded bg-muted animate-pulse" />
+                    </div>
+                  ))}
+                </div>
+              )}
+
+              {!isLoading && filtered.length === 0 && (
+                <p className="py-8 text-center text-sm text-muted-foreground">No employees found</p>
+              )}
+
+              {/* Sort: ACTIVE first, then RECENT, then INACTIVE */}
+              {[...filtered]
+                .sort((a, b) => {
+                  const order = { ACTIVE: 0, RECENT: 1, INACTIVE: 2 };
+                  return order[a.status] - order[b.status];
+                })
+                .map((m) => (
+                  <EmployeeListItem
+                    key={m.employeeId}
+                    marker={m}
+                    selected={selectedId === m.employeeId}
+                    onClick={() => handleSelect(m.employeeId)}
+                  />
+                ))}
+            </div>
+          </CardContent>
+        </Card>
+      </div>
 
       {/* Empty state */}
-      {!isLoading && markers.length === 0 && !error ? (
-        <p className="text-center text-sm text-muted-foreground">
-          No employees with GPS data found. Markers appear after employees check in and record a
-          location point.
-        </p>
-      ) : null}
+      {!isLoading && markers.length === 0 && !error && (
+        <div className="flex flex-col items-center justify-center py-12 gap-3 text-center">
+          <MapPin className="h-10 w-10 text-muted-foreground/30" />
+          <p className="text-sm font-medium text-muted-foreground">No GPS data yet</p>
+          <p className="text-xs text-muted-foreground/60 max-w-sm">
+            Markers appear after employees check in and record a location point.
+          </p>
+        </div>
+      )}
     </div>
   );
 }
diff --git a/apps/web/src/app/(protected)/admin/webhooks/page.tsx b/apps/web/src/app/(protected)/admin/webhooks/page.tsx
new file mode 100644
index 0000000..7e25147
--- /dev/null
+++ b/apps/web/src/app/(protected)/admin/webhooks/page.tsx
@@ -0,0 +1,819 @@
+"use client";
+
+import { useState } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+import { useToast } from "@/components/ui/use-toast";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Label } from "@/components/ui/label";
+import { Badge } from "@/components/ui/badge";
+import { Skeleton } from "@/components/ui/skeleton";
+import {
+  Sheet,
+  SheetContent,
+  SheetHeader,
+  SheetTitle,
+  SheetFooter,
+} from "@/components/ui/sheet";
+import {
+  AlertDialog,
+  AlertDialogAction,
+  AlertDialogCancel,
+  AlertDialogContent,
+  AlertDialogDescription,
+  AlertDialogFooter,
+  AlertDialogHeader,
+  AlertDialogTitle,
+} from "@/components/ui/alert-dialog";
+import {
+  Webhook,
+  Plus,
+  Trash2,
+  RefreshCw,
+  ChevronDown,
+  ChevronUp,
+  CheckCircle2,
+  XCircle,
+  Clock,
+  ToggleLeft,
+  ToggleRight,
+  Eye,
+  EyeOff,
+  Copy,
+  Check,
+} from "lucide-react";
+import { cn } from "@/lib/utils";
+import {
+  useWebhooks,
+  useWebhookDeliveries,
+  useCreateWebhook,
+  useUpdateWebhook,
+  useDeleteWebhook,
+  useRetryDelivery,
+  WEBHOOK_EVENT_TYPES,
+  type WebhookRecord,
+  type WebhookDelivery,
+  type DeliveryStatus,
+  type CreateWebhookBody,
+} from "@/hooks/queries/useWebhooks";
+
+// ─── Constants ────────────────────────────────────────────────────────────────
+
+const EVENT_LABELS: Record<string, string> = {
+  "employee.checked_in": "Check In",
+  "employee.checked_out": "Check Out",
+  "expense.created": "Expense Created",
+  "expense.approved": "Expense Approved",
+  "expense.rejected": "Expense Rejected",
+  "employee.created": "Employee Created",
+};
+
+const STATUS_CONFIG: Record<
+  DeliveryStatus,
+  { label: string; icon: React.ElementType; className: string }
+> = {
+  success: { label: "Success", icon: CheckCircle2, className: "text-emerald-500" },
+  failed:  { label: "Failed",  icon: XCircle,     className: "text-rose-500" },
+  pending: { label: "Pending", icon: Clock,        className: "text-amber-500" },
+};
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function formatRelativeTime(iso: string): string {
+  const diff = Date.now() - new Date(iso).getTime();
+  if (diff < 60_000) return "Just now";
+  if (diff < 3_600_000) return `${Math.floor(diff / 60_000)}m ago`;
+  if (diff < 86_400_000) return `${Math.floor(diff / 3_600_000)}h ago`;
+  return `${Math.floor(diff / 86_400_000)}d ago`;
+}
+
+// ─── Delivery Status Badge ────────────────────────────────────────────────────
+
+function DeliveryStatusBadge({ status }: { status: DeliveryStatus }) {
+  const { label, icon: Icon, className } = STATUS_CONFIG[status];
+  return (
+    <span className={cn("flex items-center gap-1 text-xs font-semibold", className)}>
+      <Icon className="h-3.5 w-3.5" />
+      {label}
+    </span>
+  );
+}
+
+// ─── Expandable Payload Row ───────────────────────────────────────────────────
+
+function DeliveryRow({ delivery, onRetry, isRetrying }: {
+  delivery: WebhookDelivery;
+  onRetry: (id: string) => void;
+  isRetrying: boolean;
+}) {
+  const [expanded, setExpanded] = useState(false);
+
+  return (
+    <div className="border-b border-border/40 last:border-0">
+      <button
+        onClick={() => setExpanded(!expanded)}
+        className="flex w-full items-center gap-3 px-4 py-3 text-left hover:bg-accent/40 transition-colors"
+      >
+        <DeliveryStatusBadge status={delivery.status} />
+        <span className="flex-1 min-w-0">
+          <span className="text-xs text-muted-foreground">
+            {delivery.last_attempt_at
+              ? formatRelativeTime(delivery.last_attempt_at)
+              : "Not attempted"}
+          </span>
+        </span>
+        {delivery.response_status != null && (
+          <span
+            className={cn(
+              "text-xs font-mono font-semibold px-1.5 py-0.5 rounded",
+              delivery.response_status >= 200 && delivery.response_status < 300
+                ? "bg-emerald-100 text-emerald-700 dark:bg-emerald-950/40 dark:text-emerald-400"
+                : "bg-rose-100 text-rose-700 dark:bg-rose-950/40 dark:text-rose-400"
+            )}
+          >
+            {delivery.response_status}
+          </span>
+        )}
+        <span className="text-xs text-muted-foreground/60">
+          #{delivery.attempt_count} attempt{delivery.attempt_count !== 1 ? "s" : ""}
+        </span>
+        {delivery.status === "failed" && (
+          <Button
+            size="sm"
+            variant="outline"
+            className="h-7 text-xs gap-1.5"
+            disabled={isRetrying}
+            onClick={(e) => { e.stopPropagation(); onRetry(delivery.id); }}
+          >
+            <RefreshCw className={cn("h-3 w-3", isRetrying && "animate-spin")} />
+            Retry
+          </Button>
+        )}
+        {expanded ? (
+          <ChevronUp className="h-3.5 w-3.5 text-muted-foreground shrink-0" />
+        ) : (
+          <ChevronDown className="h-3.5 w-3.5 text-muted-foreground shrink-0" />
+        )}
+      </button>
+
+      <AnimatePresence>
+        {expanded && (
+          <motion.div
+            initial={{ height: 0, opacity: 0 }}
+            animate={{ height: "auto", opacity: 1 }}
+            exit={{ height: 0, opacity: 0 }}
+            transition={{ duration: 0.18 }}
+            className="overflow-hidden"
+          >
+            <div className="px-4 pb-3 pt-0 space-y-2">
+              <div className="rounded-lg bg-muted/60 p-3">
+                <p className="text-[10px] font-semibold uppercase tracking-widest text-muted-foreground/60 mb-1.5">
+                  Response Body
+                </p>
+                <pre className="text-xs font-mono text-foreground/80 whitespace-pre-wrap break-all max-h-40 overflow-y-auto">
+                  {delivery.response_body ?? "(no response body)"}
+                </pre>
+              </div>
+              <div className="flex gap-4 text-xs text-muted-foreground">
+                <span>Event: <code className="font-mono text-foreground/70">{delivery.event_id.slice(0, 8)}…</code></span>
+                <span>Delivery: <code className="font-mono text-foreground/70">{delivery.id.slice(0, 8)}…</code></span>
+              </div>
+            </div>
+          </motion.div>
+        )}
+      </AnimatePresence>
+    </div>
+  );
+}
+
+// ─── Deliveries Panel ─────────────────────────────────────────────────────────
+
+function DeliveriesPanel({ webhookId }: { webhookId: string | null }) {
+  const [page, setPage] = useState(1);
+  const [statusFilter, setStatusFilter] = useState<DeliveryStatus | undefined>(undefined);
+  const retryDelivery = useRetryDelivery();
+  const { toast } = useToast();
+
+  const { data, isLoading } = useWebhookDeliveries(
+    page,
+    20,
+    webhookId ?? undefined,
+    statusFilter,
+  );
+
+  const deliveries = data?.data ?? [];
+  const total = data?.pagination.total ?? 0;
+  const hasMore = page * 20 < total;
+
+  function handleRetry(id: string) {
+    retryDelivery.mutate(id, {
+      onSuccess: () => toast({ title: "Delivery queued for retry" }),
+      onError: (e) => toast({ variant: "destructive", title: "Retry failed", description: e.message }),
+    });
+  }
+
+  const FILTERS: { key: DeliveryStatus | undefined; label: string }[] = [
+    { key: undefined, label: "All" },
+    { key: "pending", label: "Pending" },
+    { key: "success", label: "Success" },
+    { key: "failed", label: "Failed" },
+  ];
+
+  return (
+    <div className="space-y-3">
+      {/* Filters */}
+      <div className="flex gap-1 rounded-lg border bg-muted/40 p-1 w-fit">
+        {FILTERS.map((f) => (
+          <button
+            key={String(f.key)}
+            onClick={() => { setStatusFilter(f.key); setPage(1); }}
+            className={cn(
+              "rounded-md px-3 py-1 text-xs font-medium transition-colors",
+              statusFilter === f.key
+                ? "bg-background shadow-sm text-foreground"
+                : "text-muted-foreground hover:text-foreground"
+            )}
+          >
+            {f.label}
+          </button>
+        ))}
+      </div>
+
+      <div className="rounded-xl border bg-card overflow-hidden">
+        {isLoading && (
+          <div className="divide-y">
+            {Array.from({ length: 5 }).map((_, i) => (
+              <div key={i} className="flex items-center gap-3 px-4 py-3">
+                <Skeleton className="h-4 w-20" />
+                <Skeleton className="h-3 w-28 flex-1" />
+                <Skeleton className="h-5 w-10" />
+              </div>
+            ))}
+          </div>
+        )}
+
+        {!isLoading && deliveries.length === 0 && (
+          <div className="flex flex-col items-center justify-center py-14 gap-3">
+            <Clock className="h-10 w-10 text-muted-foreground/30" />
+            <p className="text-sm font-medium text-muted-foreground">No deliveries yet</p>
+            <p className="text-xs text-muted-foreground/60 text-center max-w-xs">
+              Deliveries appear here when a webhook event is triggered.
+            </p>
+          </div>
+        )}
+
+        {!isLoading && deliveries.length > 0 && (
+          <div>
+            {deliveries.map((d) => (
+              <DeliveryRow
+                key={d.id}
+                delivery={d}
+                onRetry={handleRetry}
+                isRetrying={retryDelivery.isPending && retryDelivery.variables === d.id}
+              />
+            ))}
+          </div>
+        )}
+      </div>
+
+      {(deliveries.length > 0 || page > 1) && (
+        <div className="flex items-center justify-between text-xs text-muted-foreground">
+          <span>{total} total deliveries</span>
+          <div className="flex gap-2">
+            <Button size="sm" variant="outline" className="h-7 text-xs" disabled={page === 1} onClick={() => setPage(p => p - 1)}>
+              Previous
+            </Button>
+            <Button size="sm" variant="outline" className="h-7 text-xs" disabled={!hasMore} onClick={() => setPage(p => p + 1)}>
+              Next
+            </Button>
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
+
+// ─── Webhook Card ─────────────────────────────────────────────────────────────
+
+function WebhookCard({
+  webhook,
+  onEdit,
+  onDelete,
+}: {
+  webhook: WebhookRecord;
+  onEdit: (w: WebhookRecord) => void;
+  onDelete: (id: string) => void;
+}) {
+  const [showDeliveries, setShowDeliveries] = useState(false);
+  const [copied, setCopied] = useState(false);
+
+  const updateWebhook = useUpdateWebhook(webhook.id);
+  const { toast } = useToast();
+
+  function handleToggleActive() {
+    updateWebhook.mutate(
+      { is_active: !webhook.is_active },
+      {
+        onSuccess: () =>
+          toast({ title: `Webhook ${!webhook.is_active ? "enabled" : "disabled"}` }),
+        onError: (e) =>
+          toast({ variant: "destructive", title: "Update failed", description: e.message }),
+      }
+    );
+  }
+
+  function copyUrl() {
+    void navigator.clipboard.writeText(webhook.url);
+    setCopied(true);
+    setTimeout(() => setCopied(false), 2000);
+  }
+
+  return (
+    <motion.div
+      initial={{ opacity: 0, y: 6 }}
+      animate={{ opacity: 1, y: 0 }}
+      exit={{ opacity: 0, y: -6 }}
+      className="rounded-xl border bg-card overflow-hidden"
+    >
+      {/* Header row */}
+      <div className="flex items-start gap-3 p-4">
+        {/* Status dot */}
+        <div className="mt-1 shrink-0">
+          {webhook.is_active ? (
+            <span className="relative flex h-2.5 w-2.5">
+              <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-emerald-400 opacity-75" />
+              <span className="relative inline-flex h-2.5 w-2.5 rounded-full bg-emerald-500" />
+            </span>
+          ) : (
+            <span className="h-2.5 w-2.5 rounded-full bg-muted-foreground/30 inline-block" />
+          )}
+        </div>
+
+        {/* URL + events */}
+        <div className="flex-1 min-w-0 space-y-2">
+          <div className="flex items-center gap-2">
+            <code className="text-sm font-mono truncate text-foreground/80 flex-1 min-w-0">
+              {webhook.url}
+            </code>
+            <button
+              onClick={copyUrl}
+              className="shrink-0 text-muted-foreground hover:text-foreground transition-colors"
+              title="Copy URL"
+            >
+              {copied ? <Check className="h-3.5 w-3.5 text-emerald-500" /> : <Copy className="h-3.5 w-3.5" />}
+            </button>
+          </div>
+          <div className="flex flex-wrap gap-1.5">
+            {webhook.events.map((e) => (
+              <Badge
+                key={e}
+                variant="secondary"
+                className="text-[10px] font-medium px-1.5 py-0"
+              >
+                {EVENT_LABELS[e] ?? e}
+              </Badge>
+            ))}
+          </div>
+        </div>
+
+        {/* Actions */}
+        <div className="shrink-0 flex items-center gap-1">
+          <button
+            onClick={handleToggleActive}
+            disabled={updateWebhook.isPending}
+            className="p-1.5 rounded-lg hover:bg-accent/60 transition-colors text-muted-foreground hover:text-foreground disabled:opacity-50"
+            title={webhook.is_active ? "Disable webhook" : "Enable webhook"}
+          >
+            {webhook.is_active ? (
+              <ToggleRight className="h-5 w-5 text-emerald-500" />
+            ) : (
+              <ToggleLeft className="h-5 w-5" />
+            )}
+          </button>
+          <button
+            onClick={() => onEdit(webhook)}
+            className="p-1.5 rounded-lg hover:bg-accent/60 transition-colors text-muted-foreground hover:text-foreground"
+            title="Edit webhook"
+          >
+            <Eye className="h-4 w-4" />
+          </button>
+          <button
+            onClick={() => onDelete(webhook.id)}
+            className="p-1.5 rounded-lg hover:bg-rose-100 dark:hover:bg-rose-950/30 transition-colors text-muted-foreground hover:text-rose-500"
+            title="Delete webhook"
+          >
+            <Trash2 className="h-4 w-4" />
+          </button>
+        </div>
+      </div>
+
+      {/* Deliveries toggle */}
+      <button
+        onClick={() => setShowDeliveries(!showDeliveries)}
+        className="flex w-full items-center justify-between px-4 py-2 border-t border-border/40 bg-muted/20 hover:bg-muted/40 transition-colors text-xs text-muted-foreground hover:text-foreground"
+      >
+        <span className="font-medium">Delivery History</span>
+        {showDeliveries ? <ChevronUp className="h-3.5 w-3.5" /> : <ChevronDown className="h-3.5 w-3.5" />}
+      </button>
+
+      <AnimatePresence>
+        {showDeliveries && (
+          <motion.div
+            initial={{ height: 0, opacity: 0 }}
+            animate={{ height: "auto", opacity: 1 }}
+            exit={{ height: 0, opacity: 0 }}
+            transition={{ duration: 0.2 }}
+            className="overflow-hidden"
+          >
+            <div className="p-4 pt-0 mt-4">
+              <DeliveriesPanel webhookId={webhook.id} />
+            </div>
+          </motion.div>
+        )}
+      </AnimatePresence>
+    </motion.div>
+  );
+}
+
+// ─── Create / Edit Sheet ──────────────────────────────────────────────────────
+
+interface WebhookFormState {
+  url: string;
+  secret: string;
+  events: Set<string>;
+}
+
+function WebhookSheet({
+  open,
+  editing,
+  onClose,
+}: {
+  open: boolean;
+  editing: WebhookRecord | null;
+  onClose: () => void;
+}) {
+  const { toast } = useToast();
+  const createWebhook = useCreateWebhook();
+  // Always call the hook — pass editing.id when editing, empty string otherwise.
+  // An empty string never triggers a real request (mutations are on-demand).
+  const updateWebhook = useUpdateWebhook(editing?.id ?? "");
+  const [showSecret, setShowSecret] = useState(false);
+  const [form, setForm] = useState<WebhookFormState>({
+    url: "",
+    secret: "",
+    events: new Set(),
+  });
+
+  // Sync form when the editing target changes
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  useState(() => {
+    if (editing) {
+      setForm({ url: editing.url, secret: "", events: new Set(editing.events) });
+    } else {
+      setForm({ url: "", secret: "", events: new Set() });
+    }
+  });
+
+  function handleOpen(isOpen: boolean) {
+    if (!isOpen) {
+      setForm({ url: "", secret: "", events: new Set() });
+      onClose();
+    }
+  }
+
+  function toggleEvent(event: string) {
+    setForm((f) => {
+      const next = new Set(f.events);
+      next.has(event) ? next.delete(event) : next.add(event);
+      return { ...f, events: next };
+    });
+  }
+
+  function handleSubmit(e: React.FormEvent) {
+    e.preventDefault();
+
+    if (form.events.size === 0) {
+      toast({ variant: "destructive", title: "Select at least one event" });
+      return;
+    }
+
+    if (editing) {
+      const patch: Parameters<typeof updateWebhook.mutate>[0] = {
+        url: form.url || editing.url,
+        events: [...form.events] as CreateWebhookBody["events"],
+      };
+      if (form.secret) patch.secret = form.secret;
+
+      updateWebhook.mutate(patch, {
+        onSuccess: () => { toast({ title: "Webhook updated" }); onClose(); },
+        onError: (err) => toast({ variant: "destructive", title: "Update failed", description: err.message }),
+      });
+    } else {
+      if (form.url.length < 5) {
+        toast({ variant: "destructive", title: "Enter a valid URL" });
+        return;
+      }
+      if (form.secret.length < 16) {
+        toast({ variant: "destructive", title: "Secret must be ≥ 16 characters" });
+        return;
+      }
+      createWebhook.mutate(
+        {
+          url: form.url,
+          secret: form.secret,
+          events: [...form.events] as CreateWebhookBody["events"],
+        },
+        {
+          onSuccess: () => { toast({ title: "Webhook registered" }); onClose(); },
+          onError: (err) => toast({ variant: "destructive", title: "Failed to create webhook", description: err.message }),
+        }
+      );
+    }
+  }
+
+  const isPending = createWebhook.isPending || updateWebhook.isPending;
+
+  return (
+    <Sheet open={open} onOpenChange={handleOpen}>
+      <SheetContent side="right" className="w-full sm:max-w-[460px] flex flex-col gap-0 p-0">
+        <SheetHeader className="px-6 py-5 border-b">
+          <SheetTitle className="flex items-center gap-2">
+            <Webhook className="h-5 w-5 text-primary" />
+            {editing ? "Edit Webhook" : "Register Webhook"}
+          </SheetTitle>
+        </SheetHeader>
+
+        <form onSubmit={handleSubmit} className="flex flex-col flex-1 overflow-y-auto">
+          <div className="flex-1 space-y-5 px-6 py-5">
+            {/* URL */}
+            <div className="space-y-1.5">
+              <Label htmlFor="wh-url">Endpoint URL</Label>
+              <Input
+                id="wh-url"
+                type="url"
+                placeholder="https://example.com/webhooks/fieldtrack"
+                value={form.url}
+                onChange={(e) => setForm((f) => ({ ...f, url: e.target.value }))}
+                required={!editing}
+              />
+              <p className="text-xs text-muted-foreground">
+                FieldTrack will POST JSON events to this URL.
+              </p>
+            </div>
+
+            {/* Secret */}
+            <div className="space-y-1.5">
+              <Label htmlFor="wh-secret">
+                {editing ? "Secret (leave blank to keep current)" : "Signing Secret"}
+              </Label>
+              <div className="relative">
+                <Input
+                  id="wh-secret"
+                  type={showSecret ? "text" : "password"}
+                  placeholder={editing ? "••••••••••••••••" : "min. 16 characters"}
+                  value={form.secret}
+                  onChange={(e) => setForm((f) => ({ ...f, secret: e.target.value }))}
+                  required={!editing}
+                  className="pr-10"
+                />
+                <button
+                  type="button"
+                  onClick={() => setShowSecret(!showSecret)}
+                  className="absolute right-3 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground"
+                >
+                  {showSecret ? <EyeOff className="h-4 w-4" /> : <Eye className="h-4 w-4" />}
+                </button>
+              </div>
+              <p className="text-xs text-muted-foreground">
+                Used to sign the <code className="font-mono text-xs">X-FieldTrack-Signature</code> header.
+              </p>
+            </div>
+
+            {/* Events */}
+            <div className="space-y-2">
+              <Label>Events to Subscribe</Label>
+              <div className="grid grid-cols-1 gap-2">
+                {WEBHOOK_EVENT_TYPES.map((event) => {
+                  const checked = form.events.has(event);
+                  return (
+                    <label
+                      key={event}
+                      className={cn(
+                        "flex items-center gap-3 rounded-lg border px-3 py-2.5 cursor-pointer transition-colors",
+                        checked
+                          ? "border-primary/50 bg-primary/5"
+                          : "border-border/60 hover:bg-accent/40"
+                      )}
+                    >
+                      <input
+                        type="checkbox"
+                        checked={checked}
+                        onChange={() => toggleEvent(event)}
+                        className="h-4 w-4 rounded accent-primary"
+                      />
+                      <div className="flex-1 min-w-0">
+                        <p className="text-sm font-medium">{EVENT_LABELS[event]}</p>
+                        <p className="text-xs text-muted-foreground font-mono">{event}</p>
+                      </div>
+                    </label>
+                  );
+                })}
+              </div>
+            </div>
+          </div>
+
+          <SheetFooter className="px-6 py-4 border-t gap-2">
+            <Button type="button" variant="outline" onClick={onClose} className="flex-1">
+              Cancel
+            </Button>
+            <Button type="submit" disabled={isPending} className="flex-1">
+              {isPending ? "Saving…" : editing ? "Save Changes" : "Register Webhook"}
+            </Button>
+          </SheetFooter>
+        </form>
+      </SheetContent>
+    </Sheet>
+  );
+}
+
+// ─── Delete Confirm Dialog ────────────────────────────────────────────────────
+
+function DeleteWebhookDialog({
+  webhookId,
+  onClose,
+}: {
+  webhookId: string | null;
+  onClose: () => void;
+}) {
+  const deleteWebhook = useDeleteWebhook();
+  const { toast } = useToast();
+
+  function handleConfirm() {
+    if (!webhookId) return;
+    deleteWebhook.mutate(webhookId, {
+      onSuccess: () => { toast({ title: "Webhook deleted" }); onClose(); },
+      onError: (e) => { toast({ variant: "destructive", title: "Delete failed", description: e.message }); onClose(); },
+    });
+  }
+
+  return (
+    <AlertDialog open={!!webhookId} onOpenChange={(open) => !open && onClose()}>
+      <AlertDialogContent>
+        <AlertDialogHeader>
+          <AlertDialogTitle>Delete webhook?</AlertDialogTitle>
+          <AlertDialogDescription>
+            This will permanently remove the webhook endpoint and all its delivery
+            history. This action cannot be undone.
+          </AlertDialogDescription>
+        </AlertDialogHeader>
+        <AlertDialogFooter>
+          <AlertDialogCancel>Cancel</AlertDialogCancel>
+          <AlertDialogAction
+            onClick={handleConfirm}
+            className="bg-destructive text-destructive-foreground hover:bg-destructive/90"
+          >
+            {deleteWebhook.isPending ? "Deleting…" : "Delete"}
+          </AlertDialogAction>
+        </AlertDialogFooter>
+      </AlertDialogContent>
+    </AlertDialog>
+  );
+}
+
+// ─── Page ─────────────────────────────────────────────────────────────────────
+
+export default function WebhooksPage() {
+  const { data: webhooks, isLoading, error } = useWebhooks();
+  const [sheetOpen, setSheetOpen] = useState(false);
+  const [editingWebhook, setEditingWebhook] = useState<WebhookRecord | null>(null);
+  const [deletingId, setDeletingId] = useState<string | null>(null);
+
+  function openCreate() {
+    setEditingWebhook(null);
+    setSheetOpen(true);
+  }
+
+  function openEdit(w: WebhookRecord) {
+    setEditingWebhook(w);
+    setSheetOpen(true);
+  }
+
+  function closeSheet() {
+    setSheetOpen(false);
+    setEditingWebhook(null);
+  }
+
+  return (
+    <div className="space-y-6">
+      {/* Page header */}
+      <div className="flex items-center justify-between">
+        <div>
+          <h1 className="text-2xl font-bold tracking-tight flex items-center gap-2">
+            <Webhook className="h-6 w-6 text-primary" />
+            Webhooks
+          </h1>
+          <p className="text-muted-foreground text-sm mt-0.5">
+            Register HTTP endpoints to receive real-time FieldTrack events.
+          </p>
+        </div>
+        <Button onClick={openCreate} className="gap-2">
+          <Plus className="h-4 w-4" />
+          Add Webhook
+        </Button>
+      </div>
+
+      {/* Error state */}
+      {error && (
+        <div className="rounded-xl border border-destructive/30 bg-destructive/5 p-4 text-sm text-destructive">
+          Failed to load webhooks: {error.message}
+        </div>
+      )}
+
+      {/* Loading state */}
+      {isLoading && (
+        <div className="space-y-3">
+          {Array.from({ length: 3 }).map((_, i) => (
+            <div key={i} className="rounded-xl border bg-card p-4 space-y-3">
+              <div className="flex items-center gap-3">
+                <Skeleton className="h-2.5 w-2.5 rounded-full" />
+                <Skeleton className="h-4 flex-1" />
+                <Skeleton className="h-7 w-20" />
+              </div>
+              <div className="flex gap-2">
+                <Skeleton className="h-5 w-20 rounded-full" />
+                <Skeleton className="h-5 w-24 rounded-full" />
+              </div>
+            </div>
+          ))}
+        </div>
+      )}
+
+      {/* Empty state */}
+      {!isLoading && !error && (webhooks ?? []).length === 0 && (
+        <div className="flex flex-col items-center justify-center rounded-2xl border border-dashed border-border/60 bg-muted/20 py-20 gap-4 text-center">
+          <div className="rounded-full bg-muted p-4">
+            <Webhook className="h-8 w-8 text-muted-foreground/50" />
+          </div>
+          <div>
+            <p className="font-semibold text-foreground">No webhooks registered</p>
+            <p className="text-sm text-muted-foreground mt-1 max-w-sm">
+              Register an HTTP endpoint to receive real-time events like check-ins,
+              expense submissions, and employee updates.
+            </p>
+          </div>
+          <Button onClick={openCreate} variant="outline" className="gap-2 mt-2">
+            <Plus className="h-4 w-4" />
+            Register your first webhook
+          </Button>
+        </div>
+      )}
+
+      {/* Webhook cards */}
+      {!isLoading && (webhooks ?? []).length > 0 && (
+        <>
+          {/* Summary bar */}
+          <div className="flex items-center gap-4 text-sm text-muted-foreground">
+            <span>
+              <span className="font-semibold text-foreground">{webhooks!.length}</span>{" "}
+              webhook{webhooks!.length !== 1 ? "s" : ""}
+            </span>
+            <span>·</span>
+            <span>
+              <span className="font-semibold text-emerald-600 dark:text-emerald-400">
+                {webhooks!.filter((w) => w.is_active).length}
+              </span>{" "}
+              active
+            </span>
+          </div>
+
+          <AnimatePresence mode="popLayout">
+            <div className="space-y-3">
+              {webhooks!.map((webhook) => (
+                <WebhookCard
+                  key={webhook.id}
+                  webhook={webhook}
+                  onEdit={openEdit}
+                  onDelete={setDeletingId}
+                />
+              ))}
+            </div>
+          </AnimatePresence>
+        </>
+      )}
+
+      {/* Global delivery history — shows all org deliveries when no specific webhook is selected */}
+      {!isLoading && (webhooks ?? []).length > 0 && (
+        <div className="space-y-3">
+          <h2 className="text-sm font-semibold text-muted-foreground uppercase tracking-wider">
+            All Deliveries
+          </h2>
+          <DeliveriesPanel webhookId={null} />
+        </div>
+      )}
+
+      {/* Sheets + Dialogs */}
+      <WebhookSheet open={sheetOpen} editing={editingWebhook} onClose={closeSheet} />
+      <DeleteWebhookDialog webhookId={deletingId} onClose={() => setDeletingId(null)} />
+    </div>
+  );
+}
diff --git a/apps/web/src/app/(protected)/profile/page.tsx b/apps/web/src/app/(protected)/profile/page.tsx
index 7f510ee..0ad3196 100644
--- a/apps/web/src/app/(protected)/profile/page.tsx
+++ b/apps/web/src/app/(protected)/profile/page.tsx
@@ -2,12 +2,15 @@
 
 import { useMyProfile } from "@/hooks/queries/useProfile";
 import { useLeaderboard } from "@/hooks/queries/useAnalytics";
+import { useAuth } from "@/hooks/useAuth";
 import { ErrorBanner } from "@/components/ErrorBanner";
 import { Skeleton } from "@/components/ui/skeleton";
 import { ProfileView } from "@/components/ProfileView";
 import { PageTransition } from "@/components/motion";
+import { UserCircle } from "lucide-react";
 
 export default function MyProfilePage() {
+  const { user, role } = useAuth();
   const { data: profile, isLoading: profileLoading, error } = useMyProfile();
   const { data: leaderboard } = useLeaderboard("distance", 50);
 
@@ -35,7 +38,27 @@ export default function MyProfilePage() {
             </div>
           </div>
         ) : error ? (
-          <ErrorBanner error={error} />
+          role === "ADMIN" ? (
+            // Admins typically don't have a field employee profile — show a graceful message
+            <div className="flex flex-col items-center gap-4 rounded-xl border border-border/60 bg-card p-12 text-center shadow-sm">
+              <div className="flex h-16 w-16 items-center justify-center rounded-full bg-primary/10">
+                <UserCircle className="h-8 w-8 text-primary" />
+              </div>
+              <div>
+                <p className="text-lg font-semibold">{user?.email?.split("@")[0] ?? "Admin"}</p>
+                <p className="mt-1 text-sm text-muted-foreground">{user?.email}</p>
+                <span className="mt-2 inline-block rounded-full bg-amber-100 px-3 py-0.5 text-xs font-semibold text-amber-800 dark:bg-amber-500/15 dark:text-amber-400">
+                  {role}
+                </span>
+              </div>
+              <p className="max-w-sm text-sm text-muted-foreground">
+                Administrator accounts do not have a field employee profile. Employee performance
+                metrics, GPS sessions, and attendance data are accessible through the admin dashboard.
+              </p>
+            </div>
+          ) : (
+            <ErrorBanner error={error} />
+          )
         ) : profile ? (
           <ProfileView profile={profile} rank={myRank} />
         ) : null}
@@ -43,4 +66,3 @@ export default function MyProfilePage() {
     </PageTransition>
   );
 }
-
diff --git a/apps/web/src/app/globals.css b/apps/web/src/app/globals.css
index 54cfe52..7fe4f07 100644
--- a/apps/web/src/app/globals.css
+++ b/apps/web/src/app/globals.css
@@ -1,4 +1,7 @@
 @import "mapbox-gl/dist/mapbox-gl.css";
+@import "leaflet/dist/leaflet.css";
+@import "leaflet.markercluster/dist/MarkerCluster.css";
+@import "leaflet.markercluster/dist/MarkerCluster.Default.css";
 
 @tailwind base;
 @tailwind components;
diff --git a/apps/web/src/app/providers.tsx b/apps/web/src/app/providers.tsx
index 1e82833..a59255e 100644
--- a/apps/web/src/app/providers.tsx
+++ b/apps/web/src/app/providers.tsx
@@ -5,12 +5,12 @@ import { queryClient } from "@/lib/query-client";
 import { AuthProvider } from "@/contexts/AuthContext";
 import { ThemeProvider } from "@/components/providers/theme-provider";
 import { Toaster } from "@/components/ui/toaster";
+import { useToast } from "@/components/ui/use-toast";
 import { validateEnv } from "@/lib/env";
 import { useEffect } from "react";
 
 function EnvValidator({ children }: { children: React.ReactNode }) {
   useEffect(() => {
-    // Log API routing mode on every startup — instant misconfiguration visibility.
     console.log("[FieldTrack] API mode:", {
       base: process.env.NEXT_PUBLIC_API_BASE_URL ?? "(not set)",
       proxy: process.env.API_DESTINATION_URL ?? "(not set — only relevant in proxy mode)",
@@ -26,6 +26,29 @@ function EnvValidator({ children }: { children: React.ReactNode }) {
   return <>{children}</>;
 }
 
+/**
+ * GlobalErrorToast — listens for `fieldtrack:query-error` events emitted by
+ * the query-client.ts error handler and shows a toast notification.
+ */
+function GlobalErrorToast() {
+  const { toast } = useToast();
+
+  useEffect(() => {
+    function handler(e: Event) {
+      const detail = (e as CustomEvent<{ message: string }>).detail;
+      toast({
+        variant: "destructive",
+        title: "Something went wrong",
+        description: detail.message ?? "An unexpected error occurred.",
+      });
+    }
+    window.addEventListener("fieldtrack:query-error", handler);
+    return () => window.removeEventListener("fieldtrack:query-error", handler);
+  }, [toast]);
+
+  return null;
+}
+
 export function Providers({ children }: { children: React.ReactNode }) {
   return (
     <ThemeProvider>
@@ -33,6 +56,7 @@ export function Providers({ children }: { children: React.ReactNode }) {
         <AuthProvider>
           <EnvValidator>
             {children}
+            <GlobalErrorToast />
             <Toaster />
           </EnvValidator>
         </AuthProvider>
diff --git a/apps/web/src/components/layout/Header.tsx b/apps/web/src/components/layout/Header.tsx
index 7260acf..9416e0e 100644
--- a/apps/web/src/components/layout/Header.tsx
+++ b/apps/web/src/components/layout/Header.tsx
@@ -117,13 +117,13 @@ function AvatarInitials({ name, size = "sm" }: { name: string; size?: "sm" | "md
 
 export function Header() {
   const { user, role, logout } = useAuth();
+  const isAdmin = role === "ADMIN";
   const { data: profile } = useMyProfile();
   const { from, to } = useTodayRange();
-  const { data: orgSummary } = useOrgSummary(from, to);
+  const { data: orgSummary } = useOrgSummary(from, to, isAdmin);
   const today = useTodayString();
   const [searchOpen, setSearchOpen] = useState(false);
 
-  const isAdmin = role === "ADMIN";
   const displayName = profile?.name ?? user?.email?.split("@")[0] ?? "Account";
   const firstName = displayName.split(" ")[0];
 
diff --git a/apps/web/src/components/layout/Sidebar.tsx b/apps/web/src/components/layout/Sidebar.tsx
index da7b322..ba8ad0b 100644
--- a/apps/web/src/components/layout/Sidebar.tsx
+++ b/apps/web/src/components/layout/Sidebar.tsx
@@ -16,6 +16,8 @@ import {
   ChevronLeft,
   ChevronRight,
   Users,
+  Map,
+  Webhook,
 } from "lucide-react";
 import { cn } from "@/lib/utils";
 import { useAuth } from "@/hooks/useAuth";
@@ -177,6 +179,16 @@ export function SidebarNav({ collapsed = false }: { collapsed?: boolean }) {
           label: "Employees",
           icon: <Users className="h-4 w-4" />,
         },
+        {
+          href: "/admin/monitoring/map",
+          label: "Live Map",
+          icon: <Map className="h-4 w-4" />,
+        },
+        {
+          href: "/admin/webhooks",
+          label: "Webhooks",
+          icon: <Webhook className="h-4 w-4" />,
+        },
       ]
     : [];
 
diff --git a/apps/web/src/components/ui/alert-dialog.tsx b/apps/web/src/components/ui/alert-dialog.tsx
new file mode 100644
index 0000000..a5d0458
--- /dev/null
+++ b/apps/web/src/components/ui/alert-dialog.tsx
@@ -0,0 +1,127 @@
+"use client";
+
+import * as React from "react";
+import * as AlertDialogPrimitive from "@radix-ui/react-alert-dialog";
+import { cn } from "@/lib/utils";
+import { buttonVariants } from "@/components/ui/button";
+
+const AlertDialog = AlertDialogPrimitive.Root;
+const AlertDialogTrigger = AlertDialogPrimitive.Trigger;
+const AlertDialogPortal = AlertDialogPrimitive.Portal;
+
+const AlertDialogOverlay = React.forwardRef<
+  React.ElementRef<typeof AlertDialogPrimitive.Overlay>,
+  React.ComponentPropsWithoutRef<typeof AlertDialogPrimitive.Overlay>
+>(({ className, ...props }, ref) => (
+  <AlertDialogPrimitive.Overlay
+    className={cn(
+      "fixed inset-0 z-50 bg-black/80 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0",
+      className
+    )}
+    {...props}
+    ref={ref}
+  />
+));
+AlertDialogOverlay.displayName = AlertDialogPrimitive.Overlay.displayName;
+
+const AlertDialogContent = React.forwardRef<
+  React.ElementRef<typeof AlertDialogPrimitive.Content>,
+  React.ComponentPropsWithoutRef<typeof AlertDialogPrimitive.Content>
+>(({ className, ...props }, ref) => (
+  <AlertDialogPortal>
+    <AlertDialogOverlay />
+    <AlertDialogPrimitive.Content
+      ref={ref}
+      className={cn(
+        "fixed left-[50%] top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border bg-background p-6 shadow-lg duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] sm:rounded-lg",
+        className
+      )}
+      {...props}
+    />
+  </AlertDialogPortal>
+));
+AlertDialogContent.displayName = AlertDialogPrimitive.Content.displayName;
+
+const AlertDialogHeader = ({
+  className,
+  ...props
+}: React.HTMLAttributes<HTMLDivElement>) => (
+  <div
+    className={cn("flex flex-col space-y-2 text-center sm:text-left", className)}
+    {...props}
+  />
+);
+AlertDialogHeader.displayName = "AlertDialogHeader";
+
+const AlertDialogFooter = ({
+  className,
+  ...props
+}: React.HTMLAttributes<HTMLDivElement>) => (
+  <div
+    className={cn("flex flex-col-reverse sm:flex-row sm:justify-end sm:space-x-2", className)}
+    {...props}
+  />
+);
+AlertDialogFooter.displayName = "AlertDialogFooter";
+
+const AlertDialogTitle = React.forwardRef<
+  React.ElementRef<typeof AlertDialogPrimitive.Title>,
+  React.ComponentPropsWithoutRef<typeof AlertDialogPrimitive.Title>
+>(({ className, ...props }, ref) => (
+  <AlertDialogPrimitive.Title
+    ref={ref}
+    className={cn("text-lg font-semibold", className)}
+    {...props}
+  />
+));
+AlertDialogTitle.displayName = AlertDialogPrimitive.Title.displayName;
+
+const AlertDialogDescription = React.forwardRef<
+  React.ElementRef<typeof AlertDialogPrimitive.Description>,
+  React.ComponentPropsWithoutRef<typeof AlertDialogPrimitive.Description>
+>(({ className, ...props }, ref) => (
+  <AlertDialogPrimitive.Description
+    ref={ref}
+    className={cn("text-sm text-muted-foreground", className)}
+    {...props}
+  />
+));
+AlertDialogDescription.displayName = AlertDialogPrimitive.Description.displayName;
+
+const AlertDialogAction = React.forwardRef<
+  React.ElementRef<typeof AlertDialogPrimitive.Action>,
+  React.ComponentPropsWithoutRef<typeof AlertDialogPrimitive.Action>
+>(({ className, ...props }, ref) => (
+  <AlertDialogPrimitive.Action
+    ref={ref}
+    className={cn(buttonVariants(), className)}
+    {...props}
+  />
+));
+AlertDialogAction.displayName = AlertDialogPrimitive.Action.displayName;
+
+const AlertDialogCancel = React.forwardRef<
+  React.ElementRef<typeof AlertDialogPrimitive.Cancel>,
+  React.ComponentPropsWithoutRef<typeof AlertDialogPrimitive.Cancel>
+>(({ className, ...props }, ref) => (
+  <AlertDialogPrimitive.Cancel
+    ref={ref}
+    className={cn(buttonVariants({ variant: "outline" }), "mt-2 sm:mt-0", className)}
+    {...props}
+  />
+));
+AlertDialogCancel.displayName = AlertDialogPrimitive.Cancel.displayName;
+
+export {
+  AlertDialog,
+  AlertDialogPortal,
+  AlertDialogOverlay,
+  AlertDialogTrigger,
+  AlertDialogContent,
+  AlertDialogHeader,
+  AlertDialogFooter,
+  AlertDialogTitle,
+  AlertDialogDescription,
+  AlertDialogAction,
+  AlertDialogCancel,
+};
diff --git a/apps/web/src/contexts/AuthContext.tsx b/apps/web/src/contexts/AuthContext.tsx
index 194c890..df68505 100644
--- a/apps/web/src/contexts/AuthContext.tsx
+++ b/apps/web/src/contexts/AuthContext.tsx
@@ -4,6 +4,7 @@ import React, { createContext, useContext, useEffect, useState } from "react";
 import { Session, User, AuthChangeEvent } from "@supabase/supabase-js";
 import { supabase } from "@/lib/supabase";
 import { derivePermissions } from "@/lib/permissions";
+import { extractRoleFromSession } from "@/lib/auth/role";
 import { UserRole, UserPermissions } from "@/types";
 
 interface AuthContextValue {
@@ -40,10 +41,7 @@ export function AuthProvider({ children }: { children: React.ReactNode }) {
   const [isLoading, setIsLoading] = useState(true);
 
   function extractRole(s: Session): UserRole {
-    const metaRole =
-      (s.user.user_metadata?.role as UserRole | undefined) ??
-      (s.user.app_metadata?.role as UserRole | undefined);
-    return metaRole ?? "EMPLOYEE";
+    return extractRoleFromSession(s, { allowUserMetadataFallback: true });
   }
 
   useEffect(() => {
diff --git a/apps/web/src/hooks/queries/useAnalytics.ts b/apps/web/src/hooks/queries/useAnalytics.ts
index 8bcfe66..c4ac0a6 100644
--- a/apps/web/src/hooks/queries/useAnalytics.ts
+++ b/apps/web/src/hooks/queries/useAnalytics.ts
@@ -1,11 +1,11 @@
 "use client";
 
-import { useQuery } from "@tanstack/react-query";
+import { useQuery, keepPreviousData } from "@tanstack/react-query";
 import { apiGet } from "@/lib/api/client";
 import { API } from "@/lib/api/endpoints";
 import { OrgSummaryData, TopPerformerEntry, SessionTrendEntry, LeaderboardEntry } from "@/types";
 
-export function useOrgSummary(from?: string, to?: string) {
+export function useOrgSummary(from?: string, to?: string, enabled = true) {
   return useQuery<OrgSummaryData>({
     queryKey: ["orgSummary", from, to],
     queryFn: () => {
@@ -14,6 +14,9 @@ export function useOrgSummary(from?: string, to?: string) {
       if (to) params["to"] = to;
       return apiGet<OrgSummaryData>(API.orgSummary, params);
     },
+    enabled,
+    staleTime: 30_000,        // dashboard stats: fresh for 30s
+    placeholderData: keepPreviousData,
   });
 }
 
@@ -32,6 +35,8 @@ export function useTopPerformers(
       if (to) params["to"] = to;
       return apiGet<TopPerformerEntry[]>(API.topPerformers, params);
     },
+    staleTime: 60_000,        // chart data: fresh for 1 min
+    placeholderData: keepPreviousData,
   });
 }
 
@@ -44,6 +49,8 @@ export function useSessionTrend(from?: string, to?: string) {
       if (to) params["to"] = to;
       return apiGet<SessionTrendEntry[]>(API.sessionTrend, params);
     },
+    staleTime: 60_000,        // trend chart: fresh for 1 min
+    placeholderData: keepPreviousData,
   });
 }
 
@@ -62,5 +69,7 @@ export function useLeaderboard(
       if (to) params["to"] = to;
       return apiGet<LeaderboardEntry[]>(API.leaderboard, params);
     },
+    staleTime: 120_000,       // ranking: fresh for 2 min (slow-moving)
+    placeholderData: keepPreviousData,
   });
 }
diff --git a/apps/web/src/hooks/queries/useDashboard.ts b/apps/web/src/hooks/queries/useDashboard.ts
index f39634c..f16a8c2 100644
--- a/apps/web/src/hooks/queries/useDashboard.ts
+++ b/apps/web/src/hooks/queries/useDashboard.ts
@@ -9,6 +9,7 @@ export function useMyDashboard() {
   return useQuery<DashboardSummary>({
     queryKey: ["myDashboard"],
     queryFn: () => apiGet<DashboardSummary>(API.myDashboard),
+    staleTime: 30_000,
   });
 }
 
diff --git a/apps/web/src/hooks/queries/useEmployees.ts b/apps/web/src/hooks/queries/useEmployees.ts
index 34c5633..3afe696 100644
--- a/apps/web/src/hooks/queries/useEmployees.ts
+++ b/apps/web/src/hooks/queries/useEmployees.ts
@@ -1,6 +1,6 @@
 "use client";
 
-import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query";
+import { useQuery, useMutation, useQueryClient, keepPreviousData } from "@tanstack/react-query";
 import { apiGet, apiGetPaginated, apiPost, apiPatch } from "@/lib/api/client";
 import { API } from "@/lib/api/endpoints";
 import type { PaginatedResponse } from "@/types";
@@ -33,6 +33,8 @@ export function useEmployeeList(
       if (filters?.search) params["search"] = filters.search;
       return apiGetPaginated<EmployeeRecord>(API.listEmployees, params);
     },
+    staleTime: 120_000,      // employee roster: fresh for 2 min
+    placeholderData: keepPreviousData,
   });
 }
 
@@ -41,6 +43,7 @@ export function useEmployee(id: string | null) {
     queryKey: ["employee", id],
     enabled: id !== null,
     queryFn: () => apiGet<EmployeeRecord>(API.getEmployee(id!)),
+    staleTime: 120_000,
   });
 }
 
diff --git a/apps/web/src/hooks/queries/useExpenses.ts b/apps/web/src/hooks/queries/useExpenses.ts
index 550930e..6761720 100644
--- a/apps/web/src/hooks/queries/useExpenses.ts
+++ b/apps/web/src/hooks/queries/useExpenses.ts
@@ -21,6 +21,8 @@ export function useMyExpenses(page: number, limit: number) {
         page: String(page),
         limit: String(limit),
       }),
+    staleTime: 30_000,
+    placeholderData: keepPreviousData,
   });
 }
 
@@ -32,6 +34,8 @@ export function useOrgExpenses(page: number, limit: number) {
         page: String(page),
         limit: String(limit),
       }),
+    staleTime: 30_000,
+    placeholderData: keepPreviousData,
   });
 }
 
@@ -63,7 +67,8 @@ export function useAllOrgExpenses() {
     queryFn: ({ pageParam }) =>
       apiGetPaginated<Expense>(API.orgExpenses, {
         page: String(pageParam),
-        limit: "1000",
+        // Backend validates limit <= 100 for /admin/expenses.
+        limit: "100",
       }),
     initialPageParam: 1,
     getNextPageParam: (lastPage, allPages) => {
diff --git a/apps/web/src/hooks/queries/useSessions.ts b/apps/web/src/hooks/queries/useSessions.ts
index 15ef35c..f1a599b 100644
--- a/apps/web/src/hooks/queries/useSessions.ts
+++ b/apps/web/src/hooks/queries/useSessions.ts
@@ -14,6 +14,8 @@ export function useMySessions(page: number, limit: number) {
         page: String(page),
         limit: String(limit),
       }),
+    staleTime: 30_000,
+    placeholderData: keepPreviousData,
   });
 }
 
@@ -25,6 +27,8 @@ export function useOrgSessions(page: number, limit: number) {
         page: String(page),
         limit: String(limit),
       }),
+    staleTime: 30_000,
+    placeholderData: keepPreviousData,
   });
 }
 
@@ -42,7 +46,8 @@ export function useAllOrgSessions() {
     queryFn: ({ pageParam }) =>
       apiGetPaginated<AttendanceSession>(API.adminSessions, {
         page: String(pageParam),
-        limit: "1000",
+        // Backend validates limit <= 100 for /admin/sessions.
+        limit: "100",
       }),
     staleTime: 60_000,
     placeholderData: keepPreviousData,
diff --git a/apps/web/src/hooks/queries/useWebhooks.ts b/apps/web/src/hooks/queries/useWebhooks.ts
new file mode 100644
index 0000000..65c7ddd
--- /dev/null
+++ b/apps/web/src/hooks/queries/useWebhooks.ts
@@ -0,0 +1,135 @@
+"use client";
+
+import { useQuery, useMutation, useQueryClient, keepPreviousData } from "@tanstack/react-query";
+import { apiGet, apiGetPaginated, apiPost, apiPatch, apiDelete } from "@/lib/api/client";
+import { API } from "@/lib/api/endpoints";
+import type { PaginatedResponse } from "@/types";
+
+// ─── Types ────────────────────────────────────────────────────────────────────
+
+export const WEBHOOK_EVENT_TYPES = [
+  "employee.checked_in",
+  "employee.checked_out",
+  "expense.created",
+  "expense.approved",
+  "expense.rejected",
+  "employee.created",
+] as const;
+
+export type WebhookEventType = (typeof WEBHOOK_EVENT_TYPES)[number];
+
+export interface WebhookRecord {
+  id: string;
+  organization_id: string;
+  url: string;
+  is_active: boolean;
+  events: WebhookEventType[];
+  created_at: string;
+  updated_at: string;
+}
+
+export type DeliveryStatus = "pending" | "success" | "failed";
+
+export interface WebhookDelivery {
+  id: string;
+  webhook_id: string;
+  event_id: string;
+  organization_id: string;
+  status: DeliveryStatus;
+  attempt_count: number;
+  response_status: number | null;
+  response_body: string | null;
+  last_attempt_at: string | null;
+  next_retry_at: string | null;
+  created_at: string;
+}
+
+export interface CreateWebhookBody {
+  url: string;
+  events: WebhookEventType[];
+  secret: string;
+}
+
+export interface UpdateWebhookBody {
+  url?: string;
+  events?: WebhookEventType[];
+  is_active?: boolean;
+  secret?: string;
+}
+
+// ─── Queries ──────────────────────────────────────────────────────────────────
+
+/** List all webhooks for the org. */
+export function useWebhooks() {
+  return useQuery<WebhookRecord[]>({
+    queryKey: ["webhooks"],
+    queryFn: () => apiGet<WebhookRecord[]>(API.webhooks),
+    staleTime: 30_000,
+  });
+}
+
+/** Paginated delivery history, optionally filtered by webhookId or status. */
+export function useWebhookDeliveries(
+  page: number,
+  limit: number,
+  webhookId?: string,
+  status?: DeliveryStatus
+) {
+  return useQuery<PaginatedResponse<WebhookDelivery>>({
+    queryKey: ["webhookDeliveries", page, limit, webhookId, status],
+    queryFn: () => {
+      const params: Record<string, string> = {
+        page: String(page),
+        limit: String(limit),
+      };
+      if (webhookId) params["webhook_id"] = webhookId;
+      if (status) params["status"] = status;
+      return apiGetPaginated<WebhookDelivery>(API.webhookDeliveries, params);
+    },
+    staleTime: 15_000,         // deliveries: refresh more frequently
+    placeholderData: keepPreviousData,
+  });
+}
+
+// ─── Mutations ────────────────────────────────────────────────────────────────
+
+/** Register a new webhook endpoint. */
+export function useCreateWebhook() {
+  const client = useQueryClient();
+  return useMutation<WebhookRecord, Error, CreateWebhookBody>({
+    mutationFn: (body) => apiPost<WebhookRecord>(API.webhooks, body),
+    onSuccess: () => void client.invalidateQueries({ queryKey: ["webhooks"] }),
+  });
+}
+
+/** Update a webhook's URL, events, active state, or secret. */
+export function useUpdateWebhook(id: string) {
+  const client = useQueryClient();
+  return useMutation<WebhookRecord, Error, UpdateWebhookBody>({
+    mutationFn: (body) => apiPatch<WebhookRecord>(API.webhookById(id), body),
+    onSuccess: () => void client.invalidateQueries({ queryKey: ["webhooks"] }),
+  });
+}
+
+/** Delete a webhook and all its delivery history. */
+export function useDeleteWebhook() {
+  const client = useQueryClient();
+  return useMutation<void, Error, string>({
+    mutationFn: (id) => apiDelete(API.webhookById(id)),
+    onSuccess: () => {
+      void client.invalidateQueries({ queryKey: ["webhooks"] });
+      void client.invalidateQueries({ queryKey: ["webhookDeliveries"] });
+    },
+  });
+}
+
+/** Manually retry a failed (or succeeded) delivery. */
+export function useRetryDelivery() {
+  const client = useQueryClient();
+  return useMutation<WebhookDelivery, Error, string>({
+    mutationFn: (deliveryId) =>
+      apiPost<WebhookDelivery>(API.retryDelivery(deliveryId), {}),
+    onSuccess: () =>
+      void client.invalidateQueries({ queryKey: ["webhookDeliveries"] }),
+  });
+}
diff --git a/apps/web/src/hooks/useAuth.ts b/apps/web/src/hooks/useAuth.ts
index a82a146..68fe6e3 100644
--- a/apps/web/src/hooks/useAuth.ts
+++ b/apps/web/src/hooks/useAuth.ts
@@ -3,6 +3,8 @@
 import { useRouter } from "next/navigation";
 import { supabase } from "@/lib/supabase";
 import { useAuthContext } from "@/contexts/AuthContext";
+import { extractRoleFromSession } from "@/lib/auth/role";
+import { clearAuthTokenCache } from "@/lib/api/client";
 import { UserRole } from "@/types";
 import { queryClient } from "@/lib/query-client";
 
@@ -11,15 +13,15 @@ export function useAuth() {
   const { user, session, role, permissions, isLoading } = useAuthContext();
 
   async function login(email: string, password: string): Promise<UserRole> {
+    // Prevent stale bearer reuse when switching users (e.g. employee -> admin).
+    clearAuthTokenCache();
     const { data, error } = await supabase.auth.signInWithPassword({ email, password });
     if (error) throw error;
-    const metaRole =
-      (data.session.user.user_metadata?.role as UserRole | undefined) ??
-      (data.session.user.app_metadata?.role as UserRole | undefined);
-    return metaRole ?? "EMPLOYEE";
+    return extractRoleFromSession(data.session, { allowUserMetadataFallback: true });
   }
 
   async function logout(): Promise<void> {
+    clearAuthTokenCache();
     await supabase.auth.signOut();
     queryClient.clear();
     router.push("/login");
diff --git a/apps/web/src/lib/api/client.ts b/apps/web/src/lib/api/client.ts
index 2d325da..66a55a2 100644
--- a/apps/web/src/lib/api/client.ts
+++ b/apps/web/src/lib/api/client.ts
@@ -6,6 +6,11 @@ import { ApiError, ApiResponse, PaginatedResponse } from "@/types";
 let cachedToken: string | null = null;
 let tokenExpiry: number = 0;
 
+export function clearAuthTokenCache(): void {
+  cachedToken = null;
+  tokenExpiry = 0;
+}
+
 async function getAuthHeaders(): Promise<Record<string, string>> {
   const now = Date.now();
   
@@ -40,8 +45,7 @@ async function getAuthHeaders(): Promise<Record<string, string>> {
 
 async function handleAuthFailure(): Promise<void> {
   // Clear cached token
-  cachedToken = null;
-  tokenExpiry = 0;
+  clearAuthTokenCache();
   
   // Sign out and redirect
   await supabase.auth.signOut();
@@ -281,3 +285,34 @@ export async function apiPatch<T>(path: string, body: unknown): Promise<T> {
 
   return handleResponse<T>(response);
 }
+
+export async function apiDelete(path: string): Promise<void> {
+  if (!env.NEXT_PUBLIC_API_BASE_URL) {
+    throw new ApiError(
+      "NEXT_PUBLIC_API_BASE_URL is not set.",
+      500
+    );
+  }
+  const headers = await getAuthHeaders();
+  const response = await fetchWithTimeout(`${env.NEXT_PUBLIC_API_BASE_URL}${path}`, {
+    method: "DELETE",
+    headers,
+  });
+
+  // 204 No Content is success — nothing to parse
+  if (response.status === 204) return;
+
+  if (response.status === 401) {
+    await handleAuthFailure();
+    throw new ApiError("Unauthorized. Please log in again.", 401);
+  }
+
+  if (!response.ok) {
+    const text = await response.text();
+    throw new ApiError(
+      `HTTP ${response.status} error from API`,
+      response.status
+    );
+  }
+}
+
diff --git a/apps/web/src/lib/api/endpoints.ts b/apps/web/src/lib/api/endpoints.ts
index 35dd781..aa98db8 100644
--- a/apps/web/src/lib/api/endpoints.ts
+++ b/apps/web/src/lib/api/endpoints.ts
@@ -60,4 +60,14 @@ export const API = {
   adminEvents: "/admin/events",
   /** GPS playback points for a specific session. */
   sessionLocations: (id: string) => `/admin/sessions/${id}/locations`,
+
+  // Webhooks
+  /** List all registered webhooks for the org (secrets omitted). */
+  webhooks: "/admin/webhooks",
+  /** Create / update / delete a webhook (/:id for PATCH+DELETE). */
+  webhookById: (id: string) => `/admin/webhooks/${id}`,
+  /** Paginated delivery attempts. */
+  webhookDeliveries: "/admin/webhook-deliveries",
+  /** Retry a specific delivery. */
+  retryDelivery: (id: string) => `/admin/webhook-deliveries/${id}/retry`,
 } as const;
diff --git a/apps/web/src/lib/auth/role.ts b/apps/web/src/lib/auth/role.ts
new file mode 100644
index 0000000..5d48068
--- /dev/null
+++ b/apps/web/src/lib/auth/role.ts
@@ -0,0 +1,54 @@
+import type { Session } from "@supabase/supabase-js";
+import { UserRole } from "@/types";
+
+function toUserRole(value: unknown): UserRole | undefined {
+  return value === "ADMIN" || value === "EMPLOYEE" ? value : undefined;
+}
+
+function decodeBase64Url(input: string): string {
+  const normalized = input.replace(/-/g, "+").replace(/_/g, "/");
+  const padded = normalized.padEnd(Math.ceil(normalized.length / 4) * 4, "=");
+
+  if (typeof atob === "function") {
+    return atob(padded);
+  }
+
+  // Next.js middleware runs in an edge-like runtime where atob exists.
+  // This fallback keeps the helper usable in Node-based contexts too.
+  const nodeBuffer = (globalThis as { Buffer?: { from: (data: string, enc: string) => { toString: (enc: string) => string } } }).Buffer;
+  if (!nodeBuffer) {
+    throw new Error("No base64 decoder available in current runtime");
+  }
+  return nodeBuffer.from(padded, "base64").toString("utf-8");
+}
+
+export function extractRoleFromAccessToken(accessToken: string | null | undefined): UserRole | undefined {
+  if (!accessToken) return undefined;
+
+  try {
+    const parts = accessToken.split(".");
+    if (parts.length < 2) return undefined;
+    const payload = JSON.parse(decodeBase64Url(parts[1])) as Record<string, unknown>;
+    return toUserRole(payload.role);
+  } catch {
+    return undefined;
+  }
+}
+
+export function extractRoleFromSession(
+  session: Session,
+  options: { allowUserMetadataFallback?: boolean } = {}
+): UserRole {
+  const claimRole = extractRoleFromAccessToken(session.access_token);
+  if (claimRole) return claimRole;
+
+  const appMetaRole = toUserRole((session.user.app_metadata as Record<string, unknown> | undefined)?.role);
+  if (appMetaRole) return appMetaRole;
+
+  if (options.allowUserMetadataFallback) {
+    const userMetaRole = toUserRole((session.user.user_metadata as Record<string, unknown> | undefined)?.role);
+    if (userMetaRole) return userMetaRole;
+  }
+
+  return "EMPLOYEE";
+}
diff --git a/apps/web/src/lib/query-client.ts b/apps/web/src/lib/query-client.ts
index b40cd7c..f3c6beb 100644
--- a/apps/web/src/lib/query-client.ts
+++ b/apps/web/src/lib/query-client.ts
@@ -1,5 +1,27 @@
+/**
+ * query-client.ts — Global React Query client.
+ *
+ * Default options prevent refetch storms:
+ *   - staleTime: 60s  — data is "fresh" for 1 min (per-hook overrides are more specific)
+ *   - retry: 1        — one retry on network failure
+ *   - refetchOnWindowFocus: false — don't hammer API on tab switch
+ *
+ * Global error handler fires a toast for any failed query, providing consistent
+ * error visibility without each page needing its own error boundary.
+ */
+
 import { QueryClient } from "@tanstack/react-query";
 
+function showErrorToast(message: string) {
+  // Fires a custom event that the Toaster (in providers.tsx) listens to.
+  // This avoids importing the toast hook here (hooks can't be used outside React).
+  if (typeof window !== "undefined") {
+    window.dispatchEvent(
+      new CustomEvent("fieldtrack:query-error", { detail: { message } })
+    );
+  }
+}
+
 export const queryClient = new QueryClient({
   defaultOptions: {
     queries: {
@@ -7,5 +29,18 @@ export const queryClient = new QueryClient({
       retry: 1,
       refetchOnWindowFocus: false,
     },
+    mutations: {
+      // Do NOT auto-toast mutations — pages handle mutation errors inline
+      // (with form validation feedback, toast on onError callback, etc.)
+    },
   },
 });
+
+// Wire global query error handler after client is constructed
+queryClient.getQueryCache().config.onError = (error) => {
+  const msg =
+    error instanceof Error ? error.message : "An unexpected error occurred";
+  // Suppress 401 errors — auth failures redirect to /login automatically
+  if (msg.toLowerCase().includes("unauthorized")) return;
+  showErrorToast(msg);
+};
diff --git a/apps/web/src/middleware.ts b/apps/web/src/middleware.ts
index 29d496a..e8f3df3 100644
--- a/apps/web/src/middleware.ts
+++ b/apps/web/src/middleware.ts
@@ -1,6 +1,7 @@
 import { createServerClient } from "@supabase/ssr";
 import { NextResponse, type NextRequest } from "next/server";
 import type { CookieOptions } from "@supabase/ssr";
+import { extractRoleFromSession } from "@/lib/auth/role";
 
 /**
  * Auth + role middleware.
@@ -13,7 +14,7 @@ import type { CookieOptions } from "@supabase/ssr";
  *   /login, /_next/*, /favicon.ico, static assets
  *
  * Role-protected routes:
- *   /admin/** → requires role = "ADMIN" in user_metadata
+ *   /admin/** → requires role = "ADMIN" in JWT claims
  */
 export async function middleware(request: NextRequest) {
   const response = NextResponse.next({
@@ -51,11 +52,20 @@ export async function middleware(request: NextRequest) {
 
   const { pathname } = request.nextUrl;
 
+  // Let proxied API requests pass through untouched so the backend can return
+  // proper JSON errors (401/403/etc.) instead of this middleware redirecting
+  // fetches to the HTML login page.
+  if (pathname.startsWith("/api/proxy")) {
+    return response;
+  }
+
   // Already on the login page — don't redirect in a loop
   if (pathname.startsWith("/login")) {
-    // If user is already authenticated, send them to the right landing page
+    // If user is already authenticated, send them to their correct landing page
     if (session) {
-      return NextResponse.redirect(new URL("/sessions", request.url));
+      const role = extractRoleFromSession(session, { allowUserMetadataFallback: false });
+      const landing = role === "ADMIN" ? "/admin/sessions" : "/sessions";
+      return NextResponse.redirect(new URL(landing, request.url));
     }
     return response;
   }
@@ -68,11 +78,9 @@ export async function middleware(request: NextRequest) {
   }
 
   // Role-based protection for /admin routes.
-  // The role is embedded in app_metadata by the custom_access_token_hook, which reads
-  // the authoritative value from public.users.role (server-controlled).
-  // user_metadata is user-editable and MUST NOT be used for authorization decisions.
+  // Use JWT/app_metadata-derived claims only; avoid user_metadata for authz.
   if (pathname.startsWith("/admin")) {
-    const role = (session.user?.app_metadata as Record<string, unknown> | undefined)?.role as string | undefined;
+    const role = extractRoleFromSession(session, { allowUserMetadataFallback: false });
     if (role !== "ADMIN") {
       // Redirect employees and unknown roles away from admin pages.
       return NextResponse.redirect(new URL("/sessions", request.url));
@@ -86,11 +94,12 @@ export const config = {
   matcher: [
     /*
      * Match all paths EXCEPT:
+     *  - api/proxy      (proxied backend API; backend handles auth/errors)
      *  - _next/static  (static files)
      *  - _next/image   (image optimisation)
      *  - favicon.ico
      *  - public assets (png, jpg, svg, etc.)
      */
-    "/((?!_next/static|_next/image|favicon\\.ico|.*\\.(?:png|jpg|jpeg|gif|svg|ico|webp|woff2?|ttf|otf|css|js)).*)",
+    "/((?!api/proxy|_next/static|_next/image|favicon\\.ico|.*\\.(?:png|jpg|jpeg|gif|svg|ico|webp|woff2?|ttf|otf|css|js)).*)",
   ],
 };
diff --git a/docs/SLO.md b/docs/SLO.md
new file mode 100644
index 0000000..33b7f6e
--- /dev/null
+++ b/docs/SLO.md
@@ -0,0 +1,136 @@
+# FieldTrack Service Level Objectives (SLOs)
+
+This document defines the service-level objectives for FieldTrack production services.  Each SLO has a corresponding error budget and alert rules in `infra/prometheus/alerts.yml`.
+
+---
+
+## Definitions
+
+| Term | Meaning |
+|---|---|
+| **SLO** | Service Level Objective — the target reliability level |
+| **SLI** | Service Level Indicator — the metric used to measure the objective |
+| **Error Budget** | Allowable downtime / failure rate before the SLO is violated |
+| **Burn Rate** | How fast the error budget is being consumed relative to normal |
+
+---
+
+## SLO 1 — API Availability
+
+| | |
+|---|---|
+| **SLI** | `up{job=~"fieldtrack-backend.*"}` |
+| **Target** | 99.9% monthly availability |
+| **Error budget** | 43.8 minutes / month |
+| **Window** | 30-day rolling |
+
+### Rationale
+Sub-1h monthly downtime budget is appropriate for a B2B scheduling SaaS.  Breaching this SLO triggers an incident review.
+
+---
+
+## SLO 2 — API Latency
+
+| | |
+|---|---|
+| **SLI** | `histogram_quantile(0.95, ...)` over `http_request_duration_seconds_bucket` |
+| **Target p95** | < 500 ms |
+| **Target p99** | < 2 000 ms |
+| **Error budget** | 5% of requests may exceed the p95 threshold |
+| **Window** | 5-minute rolling (monitored), 1-hour burn rate (alerting) |
+
+### Rationale
+500 ms p95 ensures interactive response times for the React frontend.  The 2 s p99 provides a safety margin for background operations (bulk import, report generation) without breaching the user-visible latency SLO.
+
+---
+
+## SLO 3 — API Error Rate
+
+| | |
+|---|---|
+| **SLI** | `rate(http_requests_total{status_code=~"5.."}[5m]) / rate(http_requests_total[5m])` |
+| **Target** | < 1% 5xx error rate |
+| **Error budget** | 1% of requests may fail with 5xx |
+| **Window** | 5-minute rolling |
+
+### Rationale
+1% is tight but achievable given the stateless Fastify API + managed Supabase backend.  4xx errors (client mistakes) are excluded from the SLO.
+
+---
+
+## SLO 4 — Webhook Delivery
+
+| | |
+|---|---|
+| **SLI** | Fraction of webhook deliveries that eventually succeed within the retry window |
+| **Target** | 99% of deliveries succeed within 1 hour (across all retry attempts) |
+| **Error budget** | 1% permanent failure rate |
+| **Window** | 1-hour rolling |
+
+### Retry schedule (for reference)
+
+| Attempt | Delay from previous |
+|---|---|
+| 1 | Immediate |
+| 2 | ~1 min (±20% jitter) |
+| 3 | ~5 min (±20% jitter) |
+| 4 | ~15 min (±20% jitter) |
+| 5 | ~1 h (±20% jitter) |
+| After attempt 5 | Moved to Dead-Letter Queue |
+
+All 5 retry attempts fit within the 1-hour SLO window.
+
+### Rationale
+Webhook delivery failures directly affect customer integrations.  The DLQ captures permanent failures for manual replay; the SLO tracks the fraction that need manual intervention.
+
+---
+
+## SLO 5 — Dead-Letter Queue Depth
+
+| | |
+|---|---|
+| **SLI** | `dlq_size{queue="webhook-delivery-dlq"}` |
+| **Target** | DLQ depth stays below 100 jobs |
+| **Error budget** | DLQ may transiently spike above 100 for < 30 minutes |
+| **Window** | 30-minute sustained |
+
+### Rationale
+A DLQ backlog above 100 indicates a systemic delivery failure (bad endpoint configuration, network partition) requiring operator attention.  Transient spikes under 30 minutes are tolerated.
+
+---
+
+## Error Budget Alert Strategy
+
+The following multi-burn-rate windows are used for the error budget alerts to catch both fast burns (page immediately) and slow burns (ticket within the hour):
+
+| Window | Burn rate threshold | Severity | Action |
+|---|---|---|---|
+| 1h/5m | 14× | critical | Page on-call |
+| 6h/30m | 6× | warning | Open ticket |
+| 1d/2h | 3× | warning | Engineering review |
+
+---
+
+## Alert → SLO Mapping
+
+| Alert name | SLO | Severity |
+|---|---|---|
+| `FieldTrackHighErrorRate` | SLO 3 | critical |
+| `FieldTrackSloErrorBudgetBurnFast` | SLO 3 | critical |
+| `FieldTrackSloErrorBudgetBurnSlow` | SLO 3 | warning |
+| `FieldTrackHighLatency` | SLO 2 | warning |
+| `FieldTrackLatencyP99High` | SLO 2 p99 | warning |
+| `WebhookDeliveryFailureRateHigh` | SLO 4 | critical |
+| `WebhookDeliveryFailureRateWarning` | SLO 4 | warning |
+| `WebhookDlqGrowing` | SLO 5 | warning |
+| `WebhookCircuitBreakerOpened` | SLO 4 | warning |
+| `DeploymentFailure` | SLO 1 | critical |
+| `ReadinessCheckFailing` | SLO 1 | critical |
+
+---
+
+## Review Cadence
+
+- **Monthly**: review error budget consumption; adjust SLO thresholds if engineering velocity is affected.
+- **Post-incident**: update error budget retroactively; add alert tuning if a regression was missed.
+- **Quarterly**: revisit SLO targets vs. customer expectations.
diff --git a/docs/WEBHOOK_SIGNATURES.md b/docs/WEBHOOK_SIGNATURES.md
new file mode 100644
index 0000000..5161c9a
--- /dev/null
+++ b/docs/WEBHOOK_SIGNATURES.md
@@ -0,0 +1,194 @@
+# FieldTrack Webhook Signature Verification
+
+Every outbound webhook request from FieldTrack includes security headers that allow receivers to verify authenticity and reject replayed requests.
+
+---
+
+## Headers
+
+| Header | Example value | Purpose |
+|---|---|---|
+| `X-FieldTrack-Signature` | `sha256=a3f1c8...` | HMAC-SHA256 of the signing body (see below) |
+| `X-FieldTrack-Timestamp` | `1711618200` | Unix timestamp **in seconds** at delivery time |
+| `X-FieldTrack-Event` | `employee.checked_in` | Logical event type for routing |
+| `X-FieldTrack-Delivery-Id` | `1b2f...-uuid` | Unique delivery attempt id for idempotency / replay dedupe |
+
+---
+
+## Signing algorithm
+
+```
+signing_body = "<timestamp>.<raw_request_body>"
+signature    = "sha256=" + hex( HMAC-SHA256( secret, signing_body ) )
+```
+
+Where:
+- `<timestamp>` is the value of `X-FieldTrack-Timestamp` (decimal string, no padding)
+- `<raw_request_body>` is the **exact** bytes of the HTTP request body (UTF-8 JSON, no re-serialisation)
+- `secret` is the **per-webhook signing secret** shown in the FieldTrack webhooks dashboard
+- The HMAC key is the raw UTF-8 string of the secret (not Base64-decoded)
+- Dot (`.`) is the separator between timestamp and body
+
+### Why timestamp-bound?
+
+Including the timestamp in the signing input means the same payload signed at a different time produces a different signature.  This prevents _replay attacks_: a valid request captured by a MITM cannot be replayed after the tolerance window expires.
+
+**Receivers MUST reject requests where `|now - timestamp| > 300 seconds` (5 minutes).**
+
+---
+
+## Verification steps (receiver side)
+
+1. Extract `X-FieldTrack-Timestamp` → `ts` (integer)
+2. Verify `|time.now() - ts| <= 300` — reject with HTTP 400 if stale.
+3. Construct `signing_body = ts + "." + request_body_string`
+4. Compute `expected = "sha256=" + hex(HMAC-SHA256(secret, signing_body))`
+5. Compare `expected` to `X-FieldTrack-Signature` using a **timing-safe** equality function.
+6. Reject with HTTP 401 if signatures do not match.
+7. Optional replay guard: store `X-FieldTrack-Delivery-Id` for 24 h and reject duplicates.
+
+> ⚠ **Never** use regular string equality (`==`) to compare signatures — it is vulnerable to timing attacks. Always use `hmac.compare_digest` (Python) or `crypto.timingSafeEqual` (Node.js).
+
+---
+
+## Node.js verification example
+
+```typescript
+import { createHmac, timingSafeEqual } from "node:crypto";
+import type { IncomingMessage, ServerResponse } from "node:http";
+
+const TOLERANCE_SECONDS = 300;
+
+function verifyFieldTrackWebhook(
+  rawBody: string,
+  secret: string,
+  receivedSignature: string,
+  receivedTimestamp: string,
+): boolean {
+  // 1. Validate timestamp within tolerance window
+  const ts  = parseInt(receivedTimestamp, 10);
+  const now = Math.floor(Date.now() / 1000);
+  if (isNaN(ts) || Math.abs(now - ts) > TOLERANCE_SECONDS) {
+    return false; // stale or malformed timestamp
+  }
+
+  // 2. Reconstruct signing body
+  const signingBody = `${ts}.${rawBody}`;
+
+  // 3. Compute expected signature
+  const hmac     = createHmac("sha256", secret);
+  hmac.update(signingBody, "utf8");
+  const expected = `sha256=${hmac.digest("hex")}`;
+
+  // 4. Timing-safe comparison
+  if (expected.length !== receivedSignature.length) return false;
+  return timingSafeEqual(
+    Buffer.from(expected,          "utf8"),
+    Buffer.from(receivedSignature, "utf8"),
+  );
+}
+
+// ── Express / raw middleware example ─────────────────────────────────────────
+
+import express from "express";
+
+const app = express();
+
+// Must use raw body middleware — JSON.parse() changes byte representation.
+app.use("/webhooks/fieldtrack", express.raw({ type: "application/json" }));
+
+app.post("/webhooks/fieldtrack", (req: IncomingMessage & { body: Buffer }, res: ServerResponse) => {
+  const rawBody   = (req as express.Request).body.toString("utf8");
+  const signature = (req as express.Request).headers["x-fieldtrack-signature"] as string ?? "";
+  const timestamp = (req as express.Request).headers["x-fieldtrack-timestamp"] as string ?? "";
+  const secret    = process.env.FIELDTRACK_WEBHOOK_SECRET ?? "";
+
+  if (!verifyFieldTrackWebhook(rawBody, secret, signature, timestamp)) {
+    res.writeHead(401);
+    res.end("Invalid signature");
+    return;
+  }
+
+  const event = JSON.parse(rawBody);
+  console.log("Received event:", event.type);
+  res.writeHead(200);
+  res.end("OK");
+});
+```
+
+---
+
+## Python verification example
+
+```python
+import hashlib
+import hmac
+import time
+from flask import Flask, request, abort
+
+TOLERANCE_SECONDS = 300
+app = Flask(__name__)
+
+
+def verify_fieldtrack_webhook(
+    raw_body: bytes,
+    secret: str,
+    received_signature: str,
+    received_timestamp: str,
+) -> bool:
+    # 1. Validate timestamp within tolerance window
+    try:
+        ts = int(received_timestamp)
+    except (ValueError, TypeError):
+        return False
+
+    if abs(time.time() - ts) > TOLERANCE_SECONDS:
+        return False  # stale
+
+    # 2. Reconstruct signing body (bytes)
+    signing_body = f"{ts}.".encode() + raw_body
+
+    # 3. Compute expected signature
+    mac      = hmac.new(secret.encode("utf-8"), signing_body, hashlib.sha256)
+    expected = "sha256=" + mac.hexdigest()
+
+    # 4. Timing-safe comparison
+    return hmac.compare_digest(expected, received_signature)
+
+
+@app.route("/webhooks/fieldtrack", methods=["POST"])
+def receive_webhook():
+    raw_body  = request.get_data()           # raw bytes before JSON decode
+    signature = request.headers.get("X-FieldTrack-Signature", "")
+    timestamp = request.headers.get("X-FieldTrack-Timestamp", "")
+    secret    = "your-webhook-secret-here"   # from FieldTrack dashboard
+
+    if not verify_fieldtrack_webhook(raw_body, secret, signature, timestamp):
+        abort(401, "Invalid signature")
+
+    event = request.get_json()
+    print(f"Received event: {event['type']}")
+    return "", 200
+```
+
+---
+
+## Common mistakes
+
+| Mistake | Impact | Fix |
+|---|---|---|
+| Re-serialising the body before signing (e.g. `json.dumps(json.loads(body))`) | Signature mismatch on any non-canonical JSON | Hash the **raw bytes** received over the wire |
+| Skipping the timestamp check | Replay attacks possible indefinitely | Always validate `\|now - ts\| <= 300` |
+| Using `==` for signature comparison | Timing oracle leaks partial secret | Use `hmac.compare_digest` / `timingSafeEqual` |
+| Decoding the secret from Base64 | Wrong key bytes → signature always fails | Use the secret string as-is (UTF-8) |
+| Signing `body` instead of `timestamp.body` | Valid signatures but no replay protection | Always prepend timestamp + dot |
+
+---
+
+## Rotating secrets
+
+1. Generate a new secret in the FieldTrack webhooks dashboard.
+2. Update your receiver to accept **both** the old and new secret during a transition window (check both; accept if either matches).
+3. Once all in-flight requests have been delivered, remove the old secret check.
+
+FieldTrack re-signs all new deliveries with the new secret immediately upon rotation; retries of existing deliveries use the secret active at the time of the original enqueue.
diff --git a/infra/grafana/dashboards/fieldtrack.json b/infra/grafana/dashboards/fieldtrack.json
index 37efa89..83bef40 100644
--- a/infra/grafana/dashboards/fieldtrack.json
+++ b/infra/grafana/dashboards/fieldtrack.json
@@ -525,6 +525,138 @@
                     "refId": "B"
                 }
             ]
+        },
+        {
+            "title": "API Error Budget Remaining (30d)",
+            "type": "stat",
+            "gridPos": {
+                "h": 8,
+                "w": 8,
+                "x": 0,
+                "y": 36
+            },
+            "datasource": {
+                "type": "prometheus",
+                "uid": ""
+            },
+            "fieldConfig": {
+                "defaults": {
+                    "color": {
+                        "mode": "thresholds"
+                    },
+                    "unit": "percentunit",
+                    "thresholds": {
+                        "steps": [
+                            {
+                                "color": "red",
+                                "value": null
+                            },
+                            {
+                                "color": "yellow",
+                                "value": 0.5
+                            },
+                            {
+                                "color": "green",
+                                "value": 0.9
+                            }
+                        ]
+                    }
+                },
+                "overrides": []
+            },
+            "targets": [
+                {
+                    "expr": "fieldtrack:api_error_budget_remaining_30d",
+                    "legendFormat": "Remaining",
+                    "refId": "A"
+                }
+            ]
+        },
+        {
+            "title": "API Error Burn Rate (1h / 6h)",
+            "type": "timeseries",
+            "gridPos": {
+                "h": 8,
+                "w": 8,
+                "x": 8,
+                "y": 36
+            },
+            "datasource": {
+                "type": "prometheus",
+                "uid": ""
+            },
+            "fieldConfig": {
+                "defaults": {
+                    "color": {
+                        "mode": "palette-classic"
+                    },
+                    "custom": {
+                        "drawStyle": "line",
+                        "fillOpacity": 15,
+                        "lineWidth": 2
+                    },
+                    "unit": "percentunit"
+                },
+                "overrides": []
+            },
+            "targets": [
+                {
+                    "expr": "fieldtrack:api_error_rate_1h",
+                    "legendFormat": "1h",
+                    "refId": "A"
+                },
+                {
+                    "expr": "fieldtrack:api_error_rate_6h",
+                    "legendFormat": "6h",
+                    "refId": "B"
+                }
+            ]
+        },
+        {
+            "title": "Webhook Permanent Failure Rate (5m)",
+            "type": "stat",
+            "gridPos": {
+                "h": 8,
+                "w": 8,
+                "x": 16,
+                "y": 36
+            },
+            "datasource": {
+                "type": "prometheus",
+                "uid": ""
+            },
+            "fieldConfig": {
+                "defaults": {
+                    "color": {
+                        "mode": "thresholds"
+                    },
+                    "unit": "percentunit",
+                    "thresholds": {
+                        "steps": [
+                            {
+                                "color": "green",
+                                "value": null
+                            },
+                            {
+                                "color": "yellow",
+                                "value": 0.1
+                            },
+                            {
+                                "color": "red",
+                                "value": 0.3
+                            }
+                        ]
+                    }
+                },
+                "overrides": []
+            },
+            "targets": [
+                {
+                    "expr": "fieldtrack:webhook_failure_rate_5m",
+                    "legendFormat": "Failure Rate",
+                    "refId": "A"
+                }
+            ]
         }
     ],
     "schemaVersion": 39,
@@ -544,5 +676,5 @@
     "timezone": "browser",
     "title": "FieldTrack 2.0 — Backend & System",
     "uid": "fieldtrack-backend",
-    "version": 1
+    "version": 2
 }
\ No newline at end of file
diff --git a/infra/nginx/fieldtrack.conf b/infra/nginx/fieldtrack.conf
index 09f97e1..44a66a9 100644
--- a/infra/nginx/fieldtrack.conf
+++ b/infra/nginx/fieldtrack.conf
@@ -12,7 +12,7 @@ upstream fieldtrack_backend {
     keepalive 32;
 }
 
-limit_req_zone $binary_remote_addr zone=fieldtrack_api:10m rate=30r/s;
+limit_req_zone $binary_remote_addr zone=fieldtrack_api:10m rate=60r/s;
 limit_req_zone $binary_remote_addr zone=fieldtrack_health:10m rate=5r/s;
 
 # Cloudflare IPs
@@ -86,7 +86,7 @@ server {
 
     listen 443 ssl;
     listen [::]:443 ssl;
-    # ❌ removed invalid http2 directive
+    http2 on;
 
     server_name __API_HOSTNAME__;
 
diff --git a/infra/prometheus/alerts.yml b/infra/prometheus/alerts.yml
index abe5b5e..6b9a2e2 100644
--- a/infra/prometheus/alerts.yml
+++ b/infra/prometheus/alerts.yml
@@ -1,5 +1,46 @@
 groups:
 
+# ---------------------------------------------------------
+# RECORDING RULES
+# ---------------------------------------------------------
+
+- name: fieldtrack_recording_rules
+  rules:
+  - record: fieldtrack:api_requests_rate_5m
+    expr: sum(rate(http_requests_total{job=~"fieldtrack-backend.*"}[5m]))
+
+  - record: fieldtrack:api_errors_5xx_rate_5m
+    expr: sum(rate(http_requests_total{job=~"fieldtrack-backend.*",status_code=~"5.."}[5m]))
+
+  - record: fieldtrack:api_error_rate_5m
+    expr: fieldtrack:api_errors_5xx_rate_5m / clamp_min(fieldtrack:api_requests_rate_5m, 1e-9)
+
+  - record: fieldtrack:api_error_rate_1h
+    expr: |
+      sum(rate(http_requests_total{job=~"fieldtrack-backend.*",status_code=~"5.."}[1h]))
+      /
+      clamp_min(sum(rate(http_requests_total{job=~"fieldtrack-backend.*"}[1h])), 1e-9)
+
+  - record: fieldtrack:api_error_rate_6h
+    expr: |
+      sum(rate(http_requests_total{job=~"fieldtrack-backend.*",status_code=~"5.."}[6h]))
+      /
+      clamp_min(sum(rate(http_requests_total{job=~"fieldtrack-backend.*"}[6h])), 1e-9)
+
+  - record: fieldtrack:webhook_failure_rate_5m
+    expr: |
+      sum(rate(webhook_failures_total[5m]))
+      /
+      clamp_min(sum(rate(webhook_deliveries_total[5m])), 1e-9)
+
+  - record: fieldtrack:api_error_budget_remaining_30d
+    expr: |
+      1 - (
+        sum(increase(http_requests_total{job=~"fieldtrack-backend.*",status_code=~"5.."}[30d]))
+        /
+        clamp_min(sum(increase(http_requests_total{job=~"fieldtrack-backend.*"}[30d])), 1)
+      )
+
 # ---------------------------------------------------------
 # API HEALTH
 # ---------------------------------------------------------
@@ -8,17 +49,23 @@ groups:
   rules:
 
   - alert: FieldTrackHighErrorRate
-    expr: |
-      sum(rate(http_requests_total{job=~"fieldtrack-backend.*",status_code=~"5.."}[5m]))
-      /
-      sum(rate(http_requests_total{job=~"fieldtrack-backend.*"}[5m]))
-      > 0.05
+    expr: fieldtrack:api_error_rate_5m > 0.05
     for: 2m
     labels:
       severity: critical
     annotations:
       summary: "High API error rate"
       description: "5xx errors exceed 5%"
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/ROLLBACK_QUICKREF.md"
+      runbook: |
+        Cause: Application throwing unhandled errors or DB/dependency failures.
+        Actions:
+          1. Check container logs: docker logs fieldtrack-api --tail 200
+          2. Check /system-health endpoint from VPS
+          3. Review recent deployments: git log --oneline -10
+          4. If DB: check Supabase dashboard for connection pool saturation
+          5. If memory: check HostMemoryPressure alert and restart container
+          6. Rollback if needed: see docs/ROLLBACK_QUICKREF.md
 
   - alert: FieldTrackHighLatency
     expr: |
@@ -32,6 +79,15 @@ groups:
     annotations:
       summary: "High API latency"
       description: "p95 latency above 1 second"
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/SLO.md"
+      runbook: |
+        Cause: Slow DB queries, queue contention, or upstream dependency latency.
+        Actions:
+          1. Open Grafana latency panel (p95/p99) and identify spike start time
+          2. Check slow-response logs in Loki (`slow_response` and `very_slow_response`)
+          3. Check DB load and connection saturation in Supabase dashboard
+          4. Inspect queue backlogs via GET /admin/system-health
+          5. Roll back recent deployment if latency regression started post-release
 
   - alert: FieldTrackAvgLatencyHigh
     expr: |
@@ -45,6 +101,15 @@ groups:
     annotations:
       summary: "FieldTrack API latency exceeded threshold"
       description: "Average response time exceeded 500 ms for 5 minutes"
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/SLO.md"
+      runbook: |
+        Cause: Sustained performance degradation across many routes.
+        Actions:
+          1. Compare avg latency with p95/p99 to identify broad vs tail issue
+          2. Review top routes by request rate and latency in Grafana
+          3. Inspect backend logs for DB timeout and retry patterns
+          4. Validate Redis and Supabase health via /ready and /system-health
+          5. Trigger rollback if regression is tied to latest deploy
 
 # ---------------------------------------------------------
 # WORKER ALERTS
@@ -61,6 +126,15 @@ groups:
     annotations:
       summary: "Distance worker jobs failing at high rate"
       description: "More than 3 distance recalculation jobs permanently failed in the last 5 minutes. Check Redis connectivity and the distance-engine queue."
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/OBSERVABILITY_ARCHITECTURE.md"
+      runbook: |
+        Cause: Redis connectivity failure, Supabase query errors, or malformed GPS data.
+        Actions:
+          1. Check Redis: redis-cli -u $REDIS_URL ping
+          2. Check worker logs: docker logs fieldtrack-api | grep "Distance worker"
+          3. Inspect failed queue: GET /admin/system-health (worker section)
+          4. Replay stuck sessions via queue_retry_intents if needed
+          5. Check for GPS point anomalies (MAX_POINTS_PER_SESSION exceeded)
 
   - alert: AnalyticsQueueBacklogGrowing
     expr: analytics_queue_depth > 500
@@ -69,6 +143,16 @@ groups:
       severity: warning
     annotations:
       summary: "Analytics queue backlog high"
+      description: "Analytics queue depth exceeded 500 for 5 minutes"
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/OBSERVABILITY_ARCHITECTURE.md"
+      runbook: |
+        Cause: Worker throughput below enqueue rate or downstream DB contention.
+        Actions:
+          1. Check analytics worker logs for repeated errors/timeouts
+          2. Inspect queue depth in GET /admin/system-health
+          3. Validate Redis latency and connection health
+          4. Check Supabase CPU/connection pressure
+          5. Temporarily scale worker concurrency if safe
 
   # Phase 22: Fire if more than 5 analytics jobs permanently fail within 5 minutes.
   # This indicates a systemic problem (bad DB schema change, Supabase outage, etc.)
@@ -81,6 +165,15 @@ groups:
     annotations:
       summary: "Analytics jobs failing at high rate"
       description: "More than 5 analytics jobs permanently failed (exhausted all retries) in the last 5 minutes. Check the analytics-failed dead letter queue and worker logs."
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/OBSERVABILITY_ARCHITECTURE.md"
+      runbook: |
+        Cause: Supabase schema change, DB connection exhaustion, or analytics aggregation bug.
+        Actions:
+          1. Check worker logs: docker logs fieldtrack-api | grep "analytics"
+          2. Inspect dead letter queue via GET /admin/system-health
+          3. Verify DB schema: check employee_daily_metrics and org_daily_metrics tables
+          4. If transient: failed jobs auto-expire after 72 h; monitor retry_intents_dead metric
+          5. If persistent: hotfix deployment required — see docs/ROLLBACK_QUICKREF.md
 
 # ---------------------------------------------------------
 # HOST ALERTS
@@ -96,6 +189,16 @@ groups:
       severity: warning
     annotations:
       summary: "High CPU usage"
+      description: "Host CPU usage above 85% for 5 minutes"
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/OBSERVABILITY_ARCHITECTURE.md"
+      runbook: |
+        Cause: Traffic surge, runaway process, or expensive query loops.
+        Actions:
+          1. Check top CPU consumers on host (`top`/`htop`)
+          2. Correlate with request rate and queue depth in Grafana
+          3. Inspect container logs for retry storms or hot loops
+          4. Scale out backend replicas or reduce noisy traffic source
+          5. Roll back if a recent deploy caused the spike
 
   - alert: HostMemoryPressure
     expr: |
@@ -108,6 +211,16 @@ groups:
       severity: warning
     annotations:
       summary: "High memory usage"
+      description: "Host memory usage above 85% for 5 minutes"
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/ROLLBACK_QUICKREF.md"
+      runbook: |
+        Cause: Memory leak, oversized cache, or traffic burst.
+        Actions:
+          1. Inspect container RSS and heap charts in Grafana
+          2. Check process logs for OOM warnings and GC pressure
+          3. Restart affected container if memory does not recover
+          4. If recurring post-deploy, roll back and open incident
+          5. Confirm host swap/disk not under pressure simultaneously
 
   - alert: DiskAlmostFull
     expr: |
@@ -121,6 +234,16 @@ groups:
       severity: critical
     annotations:
       summary: "Disk usage above 85%"
+      description: "Root filesystem usage exceeded 85% for 5 minutes"
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/DEPLOYMENT.md"
+      runbook: |
+        Cause: Log growth, artifact buildup, or runaway temp files.
+        Actions:
+          1. Identify large directories (`du -sh /*` on host)
+          2. Rotate/prune Docker images and logs
+          3. Verify Loki/Promtail retention settings
+          4. Free space before deployment operations
+          5. Increase disk capacity if growth trend persists
 
 # ---------------------------------------------------------
 # DEPLOYMENT & INFRASTRUCTURE ALERTS
@@ -137,6 +260,15 @@ groups:
     annotations:
       summary: "Redis is unreachable"
       description: "Redis has been down for more than 2 minutes. BullMQ workers, rate limiting, and the auth context cache will all degrade until Redis recovers."
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/ARCHITECTURE.md"
+      runbook: |
+        Cause: Redis container crash, OOM kill, or network partition.
+        Actions:
+          1. Check container: docker ps | grep redis; docker logs redis --tail 50
+          2. Restart if crashed: docker restart redis (or docker compose up -d redis)
+          3. Verify BullMQ reconnects: check worker logs after Redis recovery
+          4. Rate limiting degrades gracefully (requests allowed through) during outage
+          5. Circuit-breaker state is DB-backed and survives Redis restart
 
   - alert: DeploymentFailure
     expr: up{job=~"fieldtrack-backend.*"} == 0
@@ -146,6 +278,16 @@ groups:
     annotations:
       summary: "Backend container is down"
       description: "{{ $labels.job }} has been down for more than 2 minutes. Check deployment logs and container status."
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/ROLLBACK_QUICKREF.md"
+      runbook: |
+        Cause: Container OOM, crash loop, failed deployment, or host issue.
+        Actions:
+          1. Check status: docker ps -a | grep fieldtrack
+          2. Inspect last 100 lines: docker logs fieldtrack-api --tail 100
+          3. Check exit code: docker inspect fieldtrack-api | jq '.[0].State'
+          4. Restart if safe: docker restart fieldtrack-api
+          5. Rollback if bad deploy: see docs/ROLLBACK_QUICKREF.md
+          6. Check host memory/disk: node_memory and node_filesystem alerts
 
   - alert: ReadinessCheckFailing
     expr: probe_success{job="fieldtrack-readiness"} == 0
@@ -155,6 +297,15 @@ groups:
     annotations:
       summary: "Readiness check failing"
       description: "/ready endpoint has been failing for 3 minutes. Check DB, Redis, and Supabase connectivity."
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/ROLLBACK_QUICKREF.md"
+      runbook: |
+        Cause: One or more hard dependencies unhealthy (Redis/Supabase/BullMQ).
+        Actions:
+          1. Hit /ready and /health manually from VPS
+          2. Check Redis ping and Supabase connectivity
+          3. Inspect container logs for startup/recovery errors
+          4. Check worker state in /admin/system-health
+          5. Roll back if issue began immediately after deployment
 
 # ---------------------------------------------------------
 # TLS CERTIFICATE ALERTS
@@ -171,6 +322,15 @@ groups:
     annotations:
       summary: "TLS certificate expiring within 14 days"
       description: "Certificate for {{ $labels.instance }} expires in less than 14 days. Renew via certbot."
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/DEPLOYMENT.md"
+      runbook: |
+        Cause: Certificate nearing expiry date.
+        Actions:
+          1. Verify expiry date using blackbox panel and `openssl s_client`
+          2. Renew certificate (certbot or managed provider)
+          3. Reload NGINX and confirm certificate chain
+          4. Recheck probe_ssl_earliest_cert_expiry metric
+          5. Confirm no stale cert served via CDN edge
 
   - alert: TLSCertExpired
     expr: probe_ssl_earliest_cert_expiry{job="fieldtrack-readiness"} - time() < 0
@@ -179,4 +339,221 @@ groups:
       severity: critical
     annotations:
       summary: "TLS certificate has expired"
-      description: "Certificate for {{ $labels.instance }} has expired. All HTTPS traffic is failing."
\ No newline at end of file
+      description: "Certificate for {{ $labels.instance }} has expired. All HTTPS traffic is failing."
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/DEPLOYMENT.md"
+      runbook: |
+        Cause: Certificate renewal failed or cert not reloaded.
+        Actions:
+          1. Renew certificate immediately
+          2. Reload NGINX and verify HTTPS handshake
+          3. Validate Cloudflare/full-chain configuration
+          4. Confirm /health and /ready are reachable over HTTPS
+          5. Open incident and track customer impact window
+
+# ---------------------------------------------------------
+# WEBHOOK DELIVERY SLOs  (SLO 4 + SLO 5)
+# See docs/SLO.md for full SLO definitions and error-budget
+# burn-rate strategy.
+# ---------------------------------------------------------
+
+- name: fieldtrack_webhook_slo_alerts
+  rules:
+
+  # --- SLO 4: Webhook delivery permanent failure rate > 10% for 5 m (warning) --
+  - alert: WebhookDeliveryFailureRateWarning
+    expr: fieldtrack:webhook_failure_rate_5m > 0.10
+    for: 5m
+    labels:
+      severity: warning
+    annotations:
+      summary: "Webhook permanent failure rate above 10%"
+      description: >-
+        More than 10% of webhook deliveries are permanently failing (all retries
+        exhausted) over the last 5 minutes.  Check receiver endpoints and circuit
+        breaker status.  DLQ jobs can be replayed via POST /admin/webhook-dlq/:id/replay.
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/WEBHOOK_SIGNATURES.md"
+      runbook: |
+        Cause: Elevated webhook failures for one or more receivers.
+        Actions:
+          1. Check webhook worker logs for dominant error patterns
+          2. Inspect DLQ depth and recent failed deliveries
+          3. Confirm receiver endpoints are reachable and returning 2xx
+          4. Check circuit breaker status in webhooks table
+          5. Replay DLQ jobs after root cause is fixed
+
+  # --- SLO 4: Webhook delivery permanent failure rate > 30% for 2 m (critical) -
+  - alert: WebhookDeliveryFailureRateHigh
+    expr: fieldtrack:webhook_failure_rate_5m > 0.30
+    for: 2m
+    labels:
+      severity: critical
+    annotations:
+      summary: "Webhook permanent failure rate critically high (>30%)"
+      description: >-
+        Over 30% of webhook deliveries are permanently failing.  This is a
+        customer-visible outage for all orgs with active webhooks.  Investigate
+        immediately: check DB connectivity, receiver endpoints, and circuit breaker
+        state.
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/WEBHOOK_SIGNATURES.md"
+      runbook: |
+        Cause: Mass endpoint failures, DB outage, or a code bug in the delivery worker.
+        Actions:
+          1. Check worker logs: docker logs fieldtrack-api | grep "webhook.worker"
+          2. Inspect DLQ: GET /admin/webhook-dlq (admin token required)
+          3. Check circuit breaker state: query webhooks table for circuit_open_until IS NOT NULL
+          4. Replay DLQ entries after fixing root cause: POST /admin/webhook-dlq/:id/replay
+          5. If DB issue: check Supabase dashboard, verify webhook_deliveries writes
+          6. If code bug: rollback deployment — see docs/ROLLBACK_QUICKREF.md
+
+  # --- SLO 5: DLQ depth above 100 for 30 min --------------------------------
+  - alert: WebhookDlqGrowing
+    expr: dlq_size{queue="webhook-delivery-dlq"} > 100
+    for: 30m
+    labels:
+      severity: warning
+    annotations:
+      summary: "Webhook DLQ depth above 100 for 30 minutes"
+      description: >-
+        The webhook dead-letter queue has had more than 100 unprocessed jobs for
+        30 minutes.  This indicates sustained delivery failures that exceed the
+        normal transient-failure pattern.  Review DLQ via GET /admin/webhook-dlq
+        and replay or purge stale entries.
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/OBSERVABILITY_ARCHITECTURE.md"
+      runbook: |
+        Cause: Persistent downstream delivery failures.
+        Actions:
+          1. Review DLQ entries and identify repeated endpoint failures
+          2. Confirm webhook receiver health and DNS/TLS validity
+          3. Inspect retry/error metrics and circuit breaker audit entries
+          4. Purge stale DLQ entries after archival is confirmed
+          5. Replay jobs only after receivers are healthy
+
+  # --- Circuit breaker: any webhook circuit opened (leading indicator) -------
+  #
+  # webhook_failures_total counts permanent failures; a sudden spike often
+  # indicates a circuit breaker tripped.  A short `for: 0m` (fires immediately)
+  # gives the earliest possible signal to investigate the affected endpoint.
+  - alert: WebhookCircuitBreakerOpened
+    expr: increase(webhook_failures_total[2m]) > 5
+    for: 0m
+    labels:
+      severity: warning
+    annotations:
+      summary: "Webhook failure spike — possible circuit breaker activation"
+      description: >-
+        More than 5 permanent webhook failures occurred in the last 2 minutes.
+        A circuit breaker may have opened, pausing delivery to one or more
+        endpoints.  Check circuit breaker state in webhook_deliveries and the
+        webhooks.circuit_open_until column.
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/WEBHOOK_SIGNATURES.md"
+      runbook: |
+        Cause: Rapid repeated delivery failures triggered circuit breaker protection.
+        Actions:
+          1. Query webhooks with circuit_open_until > now()
+          2. Validate receiver status codes and timeout behavior
+          3. Confirm auto-recovery scanner is running in worker logs
+          4. Check whether failures are payload/size related vs network
+          5. Re-enable/replay once endpoint stability is restored
+
+  # --- Rate limit burst spike -----------------------------------------------
+  - alert: RateLimitBurstSpike
+    expr: increase(security_rate_limit_hits_total[5m]) > 500
+    for: 2m
+    labels:
+      severity: warning
+    annotations:
+      summary: "Rate limiter blocking unusually high request volume"
+      description: >-
+        More than 500 requests were rate-limited in the last 5 minutes.  This
+        may indicate a misconfigured client, a burst from a single org, or the
+        start of a DoS attempt.  Review the rate-limit logs to identify the
+        offending org / IP.
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/ARCHITECTURE.md"
+      runbook: |
+        Cause: Burst traffic beyond per-user/per-org sliding window limits.
+        Actions:
+          1. Inspect rate-limit logs for top offending keys
+          2. Confirm traffic is expected (batch job) vs malicious
+          3. Check Redis health to ensure limiter is functioning correctly
+          4. Apply temporary edge-level mitigation if attack suspected
+          5. Tune per-org/per-user thresholds only with incident review
+
+# ---------------------------------------------------------
+# API ERROR BUDGET BURN RATE  (SLO 3 multi-window alerting)
+# See docs/SLO.md §Error Budget Alert Strategy
+# ---------------------------------------------------------
+
+- name: fieldtrack_slo_error_budget
+  rules:
+
+  # Fast burn: 1 h window at 14x burn rate (>14% error rate)
+  # exhausts monthly error budget in ~2 days if sustained.
+  - alert: FieldTrackSloErrorBudgetBurnFast
+    expr: fieldtrack:api_error_rate_1h > 0.14
+    for: 5m
+    labels:
+      severity: critical
+    annotations:
+      summary: "API error budget burning fast (14x rate)"
+      description: >-
+        The 1-hour error rate exceeds 14% (14x normal budget burn).  At this
+        rate the monthly error budget will be exhausted in under 2 days.
+        Investigate 5xx errors immediately - check logs, DB connectivity, and
+        recent deployments.
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/SLO.md"
+      runbook: |
+        Cause: Sustained high error rate burning error budget at 14x the normal rate.
+        Actions:
+          1. Identify failing routes: check Grafana → FieldTrack API dashboard
+          2. Check container logs for exceptions: docker logs fieldtrack-api --tail 500
+          3. Check DB connectivity: /ready endpoint from VPS
+          4. If recent deploy: rollback immediately — see docs/ROLLBACK_QUICKREF.md
+          5. Open an incident; notify stakeholders if budget < 50%
+
+  # Slow burn: 6 h window at 6x burn rate (>6% error rate)
+  # exhausts monthly error budget in ~5 days if sustained.
+  - alert: FieldTrackSloErrorBudgetBurnSlow
+    expr: fieldtrack:api_error_rate_6h > 0.06
+    for: 15m
+    labels:
+      severity: warning
+    annotations:
+      summary: "API error budget burning (6x rate over 6 h)"
+      description: >-
+        The 6-hour error rate exceeds 6% (6x normal budget burn).  Open a ticket
+        and investigate the root cause before the error budget is exhausted.
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/SLO.md"
+      runbook: |
+        Cause: Sustained elevated 5xx errors over a long window.
+        Actions:
+          1. Review error budget remaining metric on Grafana dashboard
+          2. Identify top failing routes and error classes
+          3. Correlate with deployments and infra incidents
+          4. Open reliability ticket and assign owner
+          5. Plan mitigations before entering critical burn threshold
+
+  # p99 latency SLO breach - 2 s threshold (SLO 2)
+  - alert: FieldTrackLatencyP99High
+    expr: |
+      histogram_quantile(
+        0.99,
+        sum(rate(http_request_duration_seconds_bucket{job=~"fieldtrack-backend.*"}[10m])) by (le)
+      ) > 2
+    for: 10m
+    labels:
+      severity: warning
+    annotations:
+      summary: "API p99 latency above 2 s (SLO 2 breach)"
+      description: >-
+        The 99th-percentile API response time has been above 2 seconds for 10
+        minutes.  This breaches the p99 latency SLO defined in docs/SLO.md.
+        Check slow queries, worker queue depths, and DB connection pool saturation.
+      runbook_url: "https://github.com/fieldtrack/fieldtrack/blob/main/docs/SLO.md"
+      runbook: |
+        Cause: Tail-latency degradation affecting a subset of requests.
+        Actions:
+          1. Inspect p99 panel and compare with p95 for tail amplification
+          2. Review very_slow_response logs for route-level concentration
+          3. Check DB wait events and queue backlog growth
+          4. Reduce load or scale services if saturation detected
+          5. Roll back if latency regression tracks a release
\ No newline at end of file
diff --git a/package-lock.json b/package-lock.json
index 4bbc840..5a07342 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -4068,6 +4068,7 @@
       "version": "0.1.0",
       "dependencies": {
         "@fieldtrack/types": "*",
+        "@radix-ui/react-alert-dialog": "^1.1.15",
         "@radix-ui/react-avatar": "^1.1.2",
         "@radix-ui/react-dialog": "^1.1.4",
         "@radix-ui/react-dropdown-menu": "^2.1.4",
@@ -4081,10 +4082,12 @@
         "@supabase/supabase-js": "^2.46.2",
         "@tanstack/react-query": "^5.62.7",
         "@types/leaflet": "^1.9.21",
+        "@types/leaflet.markercluster": "^1.5.6",
         "class-variance-authority": "^0.7.1",
         "clsx": "^2.1.1",
         "framer-motion": "^12.36.0",
         "leaflet": "^1.9.4",
+        "leaflet.markercluster": "^1.5.3",
         "lucide-react": "^0.468.0",
         "mapbox-gl": "^3.8.0",
         "next": "^15.1.3",
@@ -4199,92 +4202,6 @@
         }
       }
     },
-    "apps/web/node_modules/@radix-ui/react-dialog": {
-      "version": "1.1.15",
-      "license": "MIT",
-      "dependencies": {
-        "@radix-ui/primitive": "1.1.3",
-        "@radix-ui/react-compose-refs": "1.1.2",
-        "@radix-ui/react-context": "1.1.2",
-        "@radix-ui/react-dismissable-layer": "1.1.11",
-        "@radix-ui/react-focus-guards": "1.1.3",
-        "@radix-ui/react-focus-scope": "1.1.7",
-        "@radix-ui/react-id": "1.1.1",
-        "@radix-ui/react-portal": "1.1.9",
-        "@radix-ui/react-presence": "1.1.5",
-        "@radix-ui/react-primitive": "2.1.3",
-        "@radix-ui/react-slot": "1.2.3",
-        "@radix-ui/react-use-controllable-state": "1.2.2",
-        "aria-hidden": "^1.2.4",
-        "react-remove-scroll": "^2.6.3"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
-        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "apps/web/node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-context": {
-      "version": "1.1.2",
-      "license": "MIT",
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "apps/web/node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-primitive": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
-      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
-      "license": "MIT",
-      "dependencies": {
-        "@radix-ui/react-slot": "1.2.3"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
-        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "apps/web/node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-slot": {
-      "version": "1.2.3",
-      "license": "MIT",
-      "dependencies": {
-        "@radix-ui/react-compose-refs": "1.1.2"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
     "apps/web/node_modules/@radix-ui/react-dropdown-menu": {
       "version": "2.1.16",
       "license": "MIT",
@@ -7850,6 +7767,57 @@
       "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
       "license": "MIT"
     },
+    "node_modules/@radix-ui/react-alert-dialog": {
+      "version": "1.1.15",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-alert-dialog/-/react-alert-dialog-1.1.15.tgz",
+      "integrity": "sha512-oTVLkEw5GpdRe29BqJ0LSDFWI3qu0vR1M0mUkOQWDIUnY/QIkLpgDMWuKxP94c2NAC2LGcgVhG1ImF3jkZ5wXw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-dialog": "1.1.15",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-alert-dialog/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-arrow": {
       "version": "1.1.7",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
@@ -7975,6 +7943,65 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-dialog": {
+      "version": "1.1.15",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.1.15.tgz",
+      "integrity": "sha512-TCglVRtzlffRNxRMEyR36DGBLJpeusFcgMVD9PZEzAKnUs1lKCgX5u9BmC2Yg+LL9MgZDugFFs1Vl+Jp4t/PGw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-dismissable-layer": "1.1.11",
+        "@radix-ui/react-focus-guards": "1.1.3",
+        "@radix-ui/react-focus-scope": "1.1.7",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-portal": "1.1.9",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-slot": "1.2.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "aria-hidden": "^1.2.4",
+        "react-remove-scroll": "^2.6.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-direction": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.1.tgz",
@@ -9203,6 +9230,15 @@
         "@types/geojson": "*"
       }
     },
+    "node_modules/@types/leaflet.markercluster": {
+      "version": "1.5.6",
+      "resolved": "https://registry.npmjs.org/@types/leaflet.markercluster/-/leaflet.markercluster-1.5.6.tgz",
+      "integrity": "sha512-I7hZjO2+isVXGYWzKxBp8PsCzAYCJBc29qBdFpquOCkS7zFDqUsUvkEOyQHedsk/Cy5tocQzf+Ndorm5W9YKTQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@types/leaflet": "^1.9"
+      }
+    },
     "node_modules/@types/react": {
       "version": "19.2.14",
       "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.14.tgz",
@@ -13377,6 +13413,15 @@
       "integrity": "sha512-nxS1ynzJOmOlHp+iL3FyWqK89GtNL8U8rvlMOsQdTTssxZwCXh8N2NB3GDQOL+YR3XnWyZAxwQixURb+FA74PA==",
       "license": "BSD-2-Clause"
     },
+    "node_modules/leaflet.markercluster": {
+      "version": "1.5.3",
+      "resolved": "https://registry.npmjs.org/leaflet.markercluster/-/leaflet.markercluster-1.5.3.tgz",
+      "integrity": "sha512-vPTw/Bndq7eQHjLBVlWpnGeLa3t+3zGiuM7fJwCkiMFq+nmRuG3RI3f7f4N4TDX7T4NpbAXpR2+NTRSEGfCSeA==",
+      "license": "MIT",
+      "peerDependencies": {
+        "leaflet": "^1.3.1"
+      }
+    },
     "node_modules/levn": {
       "version": "0.4.1",
       "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
diff --git a/run_api_smoke.ps1 b/run_api_smoke.ps1
new file mode 100644
index 0000000..fa23f9d
--- /dev/null
+++ b/run_api_smoke.ps1
@@ -0,0 +1,90 @@
+$ErrorActionPreference='Continue'
+$artifact='apps/api/dist/server.js'
+if (-not (Test-Path $artifact)) {
+  Write-Output "ARTIFACT_MISSING:$artifact"
+  exit 1
+}
+$stdout=Join-Path $PWD 'api_smoke_stdout.log'
+$stderr=Join-Path $PWD 'api_smoke_stderr.log'
+Remove-Item $stdout,$stderr -ErrorAction SilentlyContinue
+$proc=Start-Process -FilePath node -ArgumentList $artifact -PassThru -RedirectStandardOutput $stdout -RedirectStandardError $stderr -WorkingDirectory $PWD
+Write-Output "STARTED_PID:$($proc.Id)"
+$started=$false
+for ($i=0; $i -lt 20; $i++) {
+  Start-Sleep -Milliseconds 500
+  $proc.Refresh()
+  if ($proc.HasExited) { break }
+  try {
+    $null=Invoke-WebRequest -Uri 'http://127.0.0.1:3000/health' -UseBasicParsing -TimeoutSec 2
+    $started=$true
+    break
+  } catch { }
+}
+$proc.Refresh()
+if ($proc.HasExited) {
+  Write-Output "STARTUP_FAILED:Process exited with code $($proc.ExitCode)"
+  Write-Output 'STDERR_BEGIN'
+  if (Test-Path $stderr) { Get-Content $stderr -Tail 200 }
+  Write-Output 'STDERR_END'
+  Write-Output 'STDOUT_BEGIN'
+  if (Test-Path $stdout) { Get-Content $stdout -Tail 200 }
+  Write-Output 'STDOUT_END'
+  Write-Output 'PROBES_SKIPPED:true'
+  exit 0
+}
+if ($started) { Write-Output 'STARTUP_CONFIRMED:true' } else { Write-Output 'STARTUP_UNCONFIRMED:true' }
+$urls=@(
+  'http://127.0.0.1:3000/health',
+  'http://127.0.0.1:3000/ready',
+  'http://127.0.0.1:3000/metrics',
+  'http://127.0.0.1:3000/admin/system-health'
+)
+foreach ($u in $urls) {
+  try {
+    $resp=Invoke-WebRequest -Uri $u -UseBasicParsing -TimeoutSec 5 -MaximumRedirection 0
+    $body=$resp.Content
+    if ($null -eq $body) { $body='' }
+    $snippet=($body.Substring(0, [Math]::Min(200, $body.Length)) -replace "`r|`n", ' ')
+    Write-Output "PROBE:$u STATUS:$([int]$resp.StatusCode) OK:true BODY:$snippet"
+  } catch {
+    $ex=$_.Exception
+    $status=''
+    $body=''
+    if ($ex.Response) {
+      try { $status=[int]$ex.Response.StatusCode } catch { $status='' }
+      try {
+        $stream=$ex.Response.GetResponseStream()
+        if ($stream) {
+          $reader=New-Object System.IO.StreamReader($stream)
+          $body=$reader.ReadToEnd()
+          $reader.Close()
+        }
+      } catch { }
+    }
+    $msg=($ex.Message -replace "`r|`n", ' ')
+    $snippet=''
+    if ($body) { $snippet=($body.Substring(0, [Math]::Min(200, $body.Length)) -replace "`r|`n", ' ') }
+    Write-Output "PROBE:$u STATUS:$status OK:false ERROR:$msg BODY:$snippet"
+  }
+}
+if (-not $proc.HasExited) {
+  try {
+    Stop-Process -Id $proc.Id
+    Start-Sleep -Milliseconds 500
+  } catch {
+    Write-Output "STOP_ERROR:$($_.Exception.Message)"
+  }
+}
+$proc.Refresh()
+Write-Output "PROCESS_EXITED:$($proc.HasExited)"
+if (-not $proc.HasExited) {
+  try {
+    $proc.Kill()
+    Write-Output 'KILLED:true'
+  } catch {
+    Write-Output "KILL_ERROR:$($_.Exception.Message)"
+  }
+}
+Write-Output 'STDERR_TAIL_BEGIN'
+if (Test-Path $stderr) { Get-Content $stderr -Tail 80 }
+Write-Output 'STDERR_TAIL_END'
diff --git a/supabase/migrations/20260328134113_add_admin_audit_log.sql b/supabase/migrations/20260328134113_add_admin_audit_log.sql
new file mode 100644
index 0000000..1a7ad58
--- /dev/null
+++ b/supabase/migrations/20260328134113_add_admin_audit_log.sql
@@ -0,0 +1,25 @@
+-- Migration: add_admin_audit_log
+-- Creates an immutable audit trail for admin actions (DLQ replays, circuit
+-- breaker events, etc.).  Written via the service client so no RLS is needed;
+-- API-layer auth already restricts who can read or trigger logged events.
+
+CREATE TABLE IF NOT EXISTS public.admin_audit_log (
+  id              UUID         PRIMARY KEY DEFAULT gen_random_uuid(),
+  event           TEXT         NOT NULL,
+  actor_id        UUID,
+  organization_id UUID,
+  resource_type   TEXT,
+  resource_id     TEXT,
+  payload         JSONB        NOT NULL DEFAULT '{}'::jsonb,
+  created_at      TIMESTAMPTZ  NOT NULL DEFAULT now()
+);
+
+-- Descending created_at first so page queries (before=<ts>) stay fast.
+CREATE INDEX IF NOT EXISTS idx_admin_audit_log_org_created
+  ON public.admin_audit_log (organization_id, created_at DESC);
+
+CREATE INDEX IF NOT EXISTS idx_admin_audit_log_event_created
+  ON public.admin_audit_log (event, created_at DESC);
+
+COMMENT ON TABLE public.admin_audit_log IS
+  'Immutable audit trail of privileged admin actions (DLQ replays, circuit breaker state changes, etc.)';
diff --git a/supabase/migrations/20260328134130_circuit_breaker_persistence.sql b/supabase/migrations/20260328134130_circuit_breaker_persistence.sql
new file mode 100644
index 0000000..3ff7731
--- /dev/null
+++ b/supabase/migrations/20260328134130_circuit_breaker_persistence.sql
@@ -0,0 +1,31 @@
+-- Migration: persist circuit-breaker state on the webhooks table.
+--
+-- Problem: failure_streak and disabled_until are currently Redis-only.
+-- A Redis restart or eviction loses all in-flight streak data, allowing a
+-- misbehaving endpoint to reset its consecutive-failure count for free.
+--
+-- Solution:
+--   failure_streak   INT    — mirrors cb:failure_streak:{id} in Redis
+--   circuit_open_until  TIMESTAMPTZ NULL — set when circuit is OPEN,
+--                         NULL when CLOSED/HALF-OPEN
+--
+-- The application layer treats DB as the authoritative source of truth on
+-- cold-start; Redis is the hot-path cache.  On each process start, a sync
+-- function reads all webhooks with circuit_open_until IS NOT NULL and
+-- re-populates the Redis cooldown key so delivery workers respect open
+-- circuits even after a Redis flush.
+
+ALTER TABLE public.webhooks
+  ADD COLUMN IF NOT EXISTS failure_streak      INT         NOT NULL DEFAULT 0,
+  ADD COLUMN IF NOT EXISTS circuit_open_until  TIMESTAMPTZ;
+
+-- Index so the startup sync query can find open circuits quickly.
+CREATE INDEX IF NOT EXISTS idx_webhooks_circuit_open
+  ON public.webhooks (circuit_open_until)
+  WHERE circuit_open_until IS NOT NULL;
+
+COMMENT ON COLUMN public.webhooks.failure_streak IS
+  'Consecutive delivery failures (Redis-mirrored). Resets on any successful delivery.';
+
+COMMENT ON COLUMN public.webhooks.circuit_open_until IS
+  'When non-NULL the circuit is OPEN and no deliveries are attempted until this timestamp.';
diff --git a/supabase/migrations/20260328134140_webhook_dlq_archive.sql b/supabase/migrations/20260328134140_webhook_dlq_archive.sql
new file mode 100644
index 0000000..34cb394
--- /dev/null
+++ b/supabase/migrations/20260328134140_webhook_dlq_archive.sql
@@ -0,0 +1,30 @@
+-- Migration: webhook DLQ archival and retention support.
+--
+-- DLQ entries currently accumulate indefinitely in BullMQ (Redis).
+-- When a job ages out of the retention window, the application archives
+-- a snapshot to this table before removing the BullMQ job.  This gives
+-- operators a permanent, queryable history without unbounded Redis growth.
+--
+-- Schema is intentionally write-once (no updates, no deletes) so the
+-- table acts as an immutable audit trail.
+
+CREATE TABLE IF NOT EXISTS public.webhook_dlq_archive (
+  id              UUID         PRIMARY KEY DEFAULT gen_random_uuid(),
+  delivery_id     TEXT         NOT NULL,
+  webhook_id      TEXT         NOT NULL,
+  event_id        TEXT         NOT NULL,
+  url             TEXT         NOT NULL,
+  attempt_number  INT          NOT NULL,
+  failed_at       TIMESTAMPTZ  NOT NULL,
+  archived_at     TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  reason          TEXT         NOT NULL DEFAULT 'retention_policy'
+);
+
+CREATE INDEX IF NOT EXISTS idx_dlq_archive_webhook_id
+  ON public.webhook_dlq_archive (webhook_id, archived_at DESC);
+
+CREATE INDEX IF NOT EXISTS idx_dlq_archive_archived_at
+  ON public.webhook_dlq_archive (archived_at DESC);
+
+COMMENT ON TABLE public.webhook_dlq_archive IS
+  'Immutable archive of DLQ jobs removed by the retention policy or manually purged.';
diff --git a/supabase/migrations/20260328135403_phase29_hardening_rls_and_search_path.sql b/supabase/migrations/20260328135403_phase29_hardening_rls_and_search_path.sql
new file mode 100644
index 0000000..f234194
--- /dev/null
+++ b/supabase/migrations/20260328135403_phase29_hardening_rls_and_search_path.sql
@@ -0,0 +1,65 @@
+-- Phase 29: migration reconciliation hardening
+-- 1) Enable RLS on newly introduced public tables
+-- 2) Add explicit policies for service_role and authenticated admin reads
+-- 3) Lock trigger function search_path
+
+alter table if exists public.admin_audit_log enable row level security;
+alter table if exists public.webhook_dlq_archive enable row level security;
+
+drop policy if exists service_role_only_admin_audit_log on public.admin_audit_log;
+create policy service_role_only_admin_audit_log
+  on public.admin_audit_log
+  for all
+  to service_role
+  using (true)
+  with check (true);
+
+drop policy if exists admin_read_admin_audit_log on public.admin_audit_log;
+create policy admin_read_admin_audit_log
+  on public.admin_audit_log
+  for select
+  to authenticated
+  using (
+    organization_id = (
+      select u.organization_id
+      from public.users u
+      where u.id = (select auth.uid())
+    )
+    and (
+      select u.role
+      from public.users u
+      where u.id = (select auth.uid())
+    ) = 'ADMIN'
+  );
+
+drop policy if exists service_role_only_webhook_dlq_archive on public.webhook_dlq_archive;
+create policy service_role_only_webhook_dlq_archive
+  on public.webhook_dlq_archive
+  for all
+  to service_role
+  using (true)
+  with check (true);
+
+drop policy if exists admin_read_webhook_dlq_archive on public.webhook_dlq_archive;
+create policy admin_read_webhook_dlq_archive
+  on public.webhook_dlq_archive
+  for select
+  to authenticated
+  using (
+    webhook_id in (
+      select w.id::text
+      from public.webhooks w
+      where w.organization_id = (
+        select u.organization_id
+        from public.users u
+        where u.id = (select auth.uid())
+      )
+    )
+    and (
+      select u.role
+      from public.users u
+      where u.id = (select auth.uid())
+    ) = 'ADMIN'
+  );
+
+alter function public.set_updated_at() set search_path = public, pg_temp;

From c6e6b47a2b4dead7a2b0d8b898bc314e1fa0b27b Mon Sep 17 00:00:00 2001
From: rajashish147 <rajashish147@gmail.com>
Date: Sat, 28 Mar 2026 21:17:33 +0530
Subject: [PATCH 2/3] feat(webhook): implement Dead-Letter Queue (DLQ)
 management routes and services

---
 apps/api/src/app.ts                           |   2 -
 .../src/modules/admin/webhook-dlq.routes.ts   | 144 +++++++-----------
 .../modules/webhooks/webhooks.repository.ts   |  70 +++++++++
 .../src/modules/webhooks/webhooks.schema.ts   |  27 ++++
 .../src/modules/webhooks/webhooks.service.ts  |   9 ++
 apps/api/src/routes/index.ts                  |   2 +
 .../admin/webhooks.integration.test.ts        | 137 +++++++++++++++++
 7 files changed, 297 insertions(+), 94 deletions(-)

diff --git a/apps/api/src/app.ts b/apps/api/src/app.ts
index 60f0177..91ff495 100644
--- a/apps/api/src/app.ts
+++ b/apps/api/src/app.ts
@@ -195,11 +195,9 @@ export async function buildApp(): Promise<FastifyInstance> {
     const { adminQueuesRoutes } = await import("./modules/admin/queues.routes.js");
     const { adminRetryIntentsRoutes } = await import("./modules/admin/retry-intents.routes.js");
     const { systemHealthRoutes } = await import("./modules/admin/system-health.routes.js");
-    const { webhookDlqRoutes } = await import("./modules/admin/webhook-dlq.routes.js");
     await app.register(adminQueuesRoutes);
     await app.register(adminRetryIntentsRoutes);
     await app.register(systemHealthRoutes);
-    await app.register(webhookDlqRoutes);
   }
 
   // Admin audit log — not worker-gated (pure DB, no Redis required).
diff --git a/apps/api/src/modules/admin/webhook-dlq.routes.ts b/apps/api/src/modules/admin/webhook-dlq.routes.ts
index bae65c3..3172057 100644
--- a/apps/api/src/modules/admin/webhook-dlq.routes.ts
+++ b/apps/api/src/modules/admin/webhook-dlq.routes.ts
@@ -1,35 +1,44 @@
 /**
- * webhook-dlq.routes.ts — Admin API for Dead-Letter Queue (DLQ) management.
+ * webhook-dlq.routes.ts — Admin API for failed webhook deliveries.
  *
- * GET  /admin/webhook-dlq            — list DLQ jobs pending review
- * POST /admin/webhook-dlq/:id/replay — replay a single DLQ job (reset attempt_count)
+ * GET  /admin/webhook-dlq         — list failed webhook deliveries for this org
+ * POST /admin/webhook-dlq/:id/retry — retry a failed delivery
  *
- * All routes require ADMIN role (JWT + RBAC).
- * Only available when WORKERS_ENABLED=true (registered from app.ts).
- *
- * Replay semantics:
- *  - Removes the job from the DLQ
- *  - Re-enqueues into the main webhook-delivery queue with attempt_number=1
- *  - Resets attempt_count in DB to allow full retry schedule
- *  - Logs a structured audit entry on every replay
+ * This route is DB-backed off `public.webhook_deliveries`, not the in-memory
+ * BullMQ DLQ queue. It is always registered so unauthenticated callers receive
+ * 401 instead of 404 even when workers are disabled.
  */
 
 import type { FastifyInstance } from "fastify";
 import { z } from "zod";
 import { authenticate } from "../../middleware/auth.js";
 import { requireRole } from "../../middleware/role-guard.js";
-import {
-  replayWebhookDlqJob,
-  listWebhookDlqJobs,
-  getWebhookDlqDepth,
-} from "../../workers/webhook.queue.js";
-import { supabaseServiceClient as supabase } from "../../config/supabase.js";
-import { NotFoundError } from "../../utils/errors.js";
+import { webhooksService } from "../webhooks/webhooks.service.js";
 import { handleError } from "../../utils/response.js";
-import { insertAuditRecord } from "../../utils/audit.js";
+import {
+  dlqListQuerySchema,
+  webhookDlqDeliverySchema,
+} from "../webhooks/webhooks.schema.js";
 
-const DLQ_REPLAY_COOLDOWN_MS = 5_000;
-let lastDlqReplayAt = 0;
+const dlqListResponseSchema = z.object({
+  success: z.literal(true),
+  data: z.array(webhookDlqDeliverySchema),
+  meta: z.object({
+    limit: z.number().int(),
+    offset: z.number().int(),
+    count: z.number().int(),
+  }),
+});
+
+const dlqRetryResponseSchema = z.object({
+  success: z.literal(true),
+  data: z.object({
+    id: z.string().uuid(),
+    status: z.string(),
+    attempt_count: z.number(),
+    next_retry_at: z.string().nullable(),
+  }),
+});
 
 export async function webhookDlqRoutes(app: FastifyInstance): Promise<void> {
   // ── GET /admin/webhook-dlq ─────────────────────────────────────────────────
@@ -38,108 +47,59 @@ export async function webhookDlqRoutes(app: FastifyInstance): Promise<void> {
     {
       schema: {
         tags: ["admin", "webhooks"],
-        description: "List jobs in the webhook Dead-Letter Queue (ADMIN only).",
-        querystring: z.object({
-          limit: z.coerce.number().int().min(1).max(100).default(50),
-        }),
+        description: "List failed webhook deliveries for this organization (ADMIN only).",
+        querystring: dlqListQuerySchema,
+        response: { 200: dlqListResponseSchema },
       },
       preValidation: [authenticate, requireRole("ADMIN")],
     },
     async (request, reply) => {
       try {
-        const { limit } = request.query as { limit: number };
-        const [jobs, depth] = await Promise.all([
-          listWebhookDlqJobs(limit),
-          getWebhookDlqDepth(),
-        ]);
+        const query = dlqListQuerySchema.parse(request.query);
+        const { data, total } = await webhooksService.listDlqDeliveries(request, query);
         reply.status(200).send({
           success: true,
-          dlq_depth: depth,
-          jobs,
+          data,
+          meta: {
+            limit: query.limit,
+            offset: query.offset,
+            count: total,
+          },
         });
       } catch (error) {
-        handleError(error, request, reply, "Failed to list DLQ jobs");
+        handleError(error, request, reply, "Failed to list DLQ deliveries");
       }
     },
   );
 
-  // ── POST /admin/webhook-dlq/:id/replay ────────────────────────────────────
+  // ── POST /admin/webhook-dlq/:id/retry ─────────────────────────────────────
   app.post<{ Params: { id: string } }>(
-    "/admin/webhook-dlq/:id/replay",
+    "/admin/webhook-dlq/:id/retry",
     {
       schema: {
         tags: ["admin", "webhooks"],
-        description: "Replay a DLQ job: re-enqueue with attempt_count reset (ADMIN only).",
+        description: "Retry a failed webhook delivery (ADMIN only).",
         params: z.object({ id: z.string().uuid() }),
+        response: { 200: dlqRetryResponseSchema },
       },
       preValidation: [authenticate, requireRole("ADMIN")],
     },
     async (request, reply) => {
       try {
         const { id: deliveryId } = request.params;
-        const adminId = (request as { user?: { sub?: string } }).user?.sub;
-        const orgId   = (request as { organizationId?: string }).organizationId;
-
-        // Per-admin replay cooldown — prevents accidental mass re-delivery
-        const now = Date.now();
-        const elapsed = now - lastDlqReplayAt;
-        if (elapsed < DLQ_REPLAY_COOLDOWN_MS) {
-          reply.status(429).send({
-            success: false,
-            error: `DLQ replay rate-limited. Retry in ${DLQ_REPLAY_COOLDOWN_MS - elapsed}ms.`,
-          });
-          return;
-        }
-        lastDlqReplayAt = now;
-
-        const replayed = await replayWebhookDlqJob(deliveryId);
-        if (!replayed) {
-          throw new NotFoundError(`DLQ job for delivery ${deliveryId} not found`);
-        }
-
-        // Reset attempt_count in DB so the full retry schedule applies
-        await supabase
-          .from("webhook_deliveries")
-          .update({
-            status:        "pending",
-            attempt_count:  0,
-            next_retry_at:  new Date().toISOString(),
-          })
-          .eq("id", deliveryId);
-
-        // Structured audit log — queryable in Grafana/Loki
-        request.log.info(
-          {
-            audit:      true,
-            event:      "WEBHOOK_DLQ_REPLAY",
-            deliveryId,
-            adminId,
-            organizationId: orgId,
-            timestamp:  new Date().toISOString(),
-          },
-          "webhook-dlq: DLQ job replayed by admin",
-        );
-
-        // Persist to DB audit trail for GET /admin/audit-log
-        await insertAuditRecord({
-          event:          "WEBHOOK_DLQ_REPLAY",
-          actor_id:       adminId,
-          organization_id: orgId,
-          resource_type:  "webhook_delivery",
-          resource_id:    deliveryId,
-          payload:        { replayed_at: new Date().toISOString() },
-        });
+        const delivery = await webhooksService.retryDelivery(request, deliveryId);
 
         reply.status(200).send({
           success: true,
           data: {
-            delivery_id: deliveryId,
-            replayed_at: new Date().toISOString(),
-            message: "Job re-queued with attempt_count reset",
+            id: delivery.id,
+            status: delivery.status,
+            attempt_count: delivery.attempt_count,
+            next_retry_at: delivery.next_retry_at,
           },
         });
       } catch (error) {
-        handleError(error, request, reply, "Failed to replay DLQ job");
+        handleError(error, request, reply, "Failed to retry DLQ delivery");
       }
     },
   );
diff --git a/apps/api/src/modules/webhooks/webhooks.repository.ts b/apps/api/src/modules/webhooks/webhooks.repository.ts
index 792f512..2ea8589 100644
--- a/apps/api/src/modules/webhooks/webhooks.repository.ts
+++ b/apps/api/src/modules/webhooks/webhooks.repository.ts
@@ -16,10 +16,14 @@ import type {
   WebhookPublic,
   WebhookDelivery,
   DeliveryListQuery,
+  DlqListQuery,
+  WebhookDlqDelivery,
 } from "./webhooks.schema.js";
 
 const WEBHOOK_DELIVERY_COLUMNS =
   "id, webhook_id, event_id, organization_id, status, attempt_count, response_status, response_body, last_attempt_at, next_retry_at, created_at";
+const WEBHOOK_DLQ_COLUMNS =
+  "id, webhook_id, organization_id, event_id, event_type, payload, status, attempt_count, response_status, response_body, last_error, next_retry_at, last_attempt_at, created_at";
 
 // ─── Webhook CRUD ─────────────────────────────────────────────────────────────
 
@@ -133,6 +137,72 @@ export const webhooksRepository = {
     return { data: (data ?? []) as WebhookDelivery[], total: count ?? 0 };
   },
 
+  /**
+   * Paginated list of failed delivery rows for the admin DLQ view.
+   *
+   * Uses `last_attempt_at` consistently in both DB query and API response.
+   */
+  async listDlqDeliveries(
+    request: FastifyRequest,
+    query: DlqListQuery,
+  ): Promise<{ data: WebhookDlqDelivery[]; total: number }> {
+    const from = query.offset;
+    const to = query.offset + query.limit - 1;
+
+    let q = orgTable(request, "webhook_deliveries")
+      .select(WEBHOOK_DLQ_COLUMNS, { count: "exact" })
+      .eq("status", "failed")
+      .order("last_attempt_at", { ascending: false, nullsFirst: false })
+      .range(from, to);
+
+    if (query.webhook_id) {
+      q = (q as ReturnType<typeof q.eq>).eq("webhook_id", query.webhook_id);
+    }
+    if (query.event_type) {
+      q = (q as ReturnType<typeof q.eq>).eq("event_type", query.event_type);
+    }
+
+    const { data, error, count } = await q;
+    if (error) throw new Error(`Failed to list webhook DLQ deliveries: ${error.message}`);
+
+    const rows = (data ?? []) as Array<{
+      id: string;
+      webhook_id: string;
+      organization_id: string;
+      event_id: string;
+      event_type: string | null;
+      payload: unknown | null;
+      status: "failed";
+      attempt_count: number;
+      response_status: number | null;
+      response_body: string | null;
+      last_error: string | null;
+      next_retry_at: string | null;
+      last_attempt_at: string | null;
+      created_at: string;
+    }>;
+
+    return {
+      data: rows.map((row) => ({
+        id: row.id,
+        webhook_id: row.webhook_id,
+        organization_id: row.organization_id,
+        event_id: row.event_id,
+        event_type: row.event_type,
+        payload: row.payload,
+        status: row.status,
+        attempts: row.attempt_count,
+        response_status: row.response_status,
+        response_body: row.response_body,
+        last_error: row.last_error,
+        next_retry_at: row.next_retry_at,
+        last_attempt_at: row.last_attempt_at,
+        created_at: row.created_at,
+      })),
+      total: count ?? 0,
+    };
+  },
+
   /** Fetch a single delivery row by id. */
   async findDeliveryById(
     request: FastifyRequest,
diff --git a/apps/api/src/modules/webhooks/webhooks.schema.ts b/apps/api/src/modules/webhooks/webhooks.schema.ts
index 2b13571..18dc3c1 100644
--- a/apps/api/src/modules/webhooks/webhooks.schema.ts
+++ b/apps/api/src/modules/webhooks/webhooks.schema.ts
@@ -90,6 +90,25 @@ export const webhookDeliverySchema = z.object({
 });
 export type WebhookDelivery = z.infer<typeof webhookDeliverySchema>;
 
+export const webhookDlqDeliverySchema = z.object({
+  id:              z.string().uuid(),
+  webhook_id:      z.string().uuid(),
+  organization_id: z.string().uuid(),
+  event_id:        z.string().uuid(),
+  event_type:      z.string().nullable(),
+  payload:         z.unknown().nullable(),
+  status:          z.literal("failed"),
+  attempts:        z.number(),
+  response_status: z.number().nullable(),
+  response_body:   z.string().nullable(),
+  last_error:      z.string().nullable(),
+  next_retry_at:   z.string().nullable(),
+  // DB and API use the same timestamp name to avoid semantic drift.
+  last_attempt_at: z.string().nullable(),
+  created_at:      z.string(),
+});
+export type WebhookDlqDelivery = z.infer<typeof webhookDlqDeliverySchema>;
+
 // ─── Query params ──────────────────────────────────────────────────────────────
 
 export const deliveryListQuerySchema = z.object({
@@ -99,3 +118,11 @@ export const deliveryListQuerySchema = z.object({
   status:     z.enum(["pending", "success", "failed"]).optional(),
 });
 export type DeliveryListQuery = z.infer<typeof deliveryListQuerySchema>;
+
+export const dlqListQuerySchema = z.object({
+  limit:      z.coerce.number().int().min(1).max(100).default(50),
+  offset:     z.coerce.number().int().min(0).default(0),
+  event_type: z.string().min(1).optional(),
+  webhook_id: z.string().uuid().optional(),
+});
+export type DlqListQuery = z.infer<typeof dlqListQuerySchema>;
diff --git a/apps/api/src/modules/webhooks/webhooks.service.ts b/apps/api/src/modules/webhooks/webhooks.service.ts
index 2187fb4..86cc3e6 100644
--- a/apps/api/src/modules/webhooks/webhooks.service.ts
+++ b/apps/api/src/modules/webhooks/webhooks.service.ts
@@ -22,6 +22,8 @@ import type {
   WebhookPublic,
   WebhookDelivery,
   DeliveryListQuery,
+  DlqListQuery,
+  WebhookDlqDelivery,
 } from "./webhooks.schema.js";
 
 export const webhooksService = {
@@ -87,6 +89,13 @@ export const webhooksService = {
     return webhooksRepository.listDeliveries(request, query);
   },
 
+  async listDlqDeliveries(
+    request: FastifyRequest,
+    query: DlqListQuery,
+  ): Promise<{ data: WebhookDlqDelivery[]; total: number }> {
+    return webhooksRepository.listDlqDeliveries(request, query);
+  },
+
   /**
    * Manually retry a delivery.
    *
diff --git a/apps/api/src/routes/index.ts b/apps/api/src/routes/index.ts
index bdc50e5..5c6f7a6 100644
--- a/apps/api/src/routes/index.ts
+++ b/apps/api/src/routes/index.ts
@@ -13,6 +13,7 @@ import { dashboardRoutes } from "../modules/dashboard/dashboard.routes.js";
 import { profileRoutes } from "../modules/profile/profile.routes.js";
 import { adminDashboardRoutes } from "../modules/admin/dashboard.routes.js";
 import { adminMapRoutes } from "../modules/admin/map.routes.js";
+import { webhookDlqRoutes } from "../modules/admin/webhook-dlq.routes.js";
 import { eventsRoutes } from "./events.routes.js";
 import { webhooksRoutes } from "../modules/webhooks/webhooks.routes.js";
 
@@ -31,6 +32,7 @@ export async function registerRoutes(app: FastifyInstance): Promise<void> {
   await app.register(profileRoutes);
   await app.register(adminDashboardRoutes);
   await app.register(adminMapRoutes);
+  await app.register(webhookDlqRoutes);
   await app.register(eventsRoutes);
   await app.register(webhooksRoutes);
 }
diff --git a/apps/api/tests/integration/admin/webhooks.integration.test.ts b/apps/api/tests/integration/admin/webhooks.integration.test.ts
index 80de00c..72ca940 100644
--- a/apps/api/tests/integration/admin/webhooks.integration.test.ts
+++ b/apps/api/tests/integration/admin/webhooks.integration.test.ts
@@ -55,6 +55,7 @@ vi.mock("../../../src/modules/webhooks/webhooks.repository.js", () => ({
     update:                vi.fn(),
     delete:                vi.fn(),
     listDeliveries:        vi.fn(),
+    listDlqDeliveries:     vi.fn(),
     findDeliveryById:      vi.fn(),
     findWebhookSecretById: vi.fn(),
     resetDeliveryForRetry: vi.fn(),
@@ -86,7 +87,9 @@ import {
   buildTestApp,
   signAdminToken,
   signEmployeeToken,
+  TEST_ADMIN_ID,
   TEST_ORG_ID,
+  TEST_ORG_ID_B,
 } from "../../setup/test-server.js";
 import { webhooksRepository } from "../../../src/modules/webhooks/webhooks.repository.js";
 
@@ -121,16 +124,35 @@ const deliveryRow = {
   created_at:       now,
 };
 
+const dlqDeliveryRow = {
+  id:               DELIVERY_ID,
+  webhook_id:       WEBHOOK_ID,
+  organization_id:  TEST_ORG_ID,
+  event_id:         EVENT_ID,
+  event_type:       "expense.created",
+  payload:          { type: "expense.created", amount: 123.45 },
+  status:           "failed" as const,
+  attempts:         3,
+  response_status:  500,
+  response_body:    "Internal Server Error",
+  last_error:       "Receiver returned 500",
+  next_retry_at:    null,
+  last_attempt_at:  now,
+  created_at:       now,
+};
+
 // ─── Test suite ───────────────────────────────────────────────────────────────
 
 describe("Webhooks Admin API", () => {
   let app: FastifyInstance;
   let adminToken: string;
+  let adminTokenOrgB: string;
   let employeeToken: string;
 
   beforeAll(async () => {
     app = await buildTestApp();
     adminToken    = signAdminToken(app);
+    adminTokenOrgB = signAdminToken(app, TEST_ADMIN_ID, TEST_ORG_ID_B);
     employeeToken = signEmployeeToken(app);
   });
 
@@ -406,6 +428,121 @@ describe("Webhooks Admin API", () => {
     });
   });
 
+  // ─── GET /admin/webhook-dlq ────────────────────────────────────────────────
+
+  describe("GET /admin/webhook-dlq", () => {
+    it("returns failed deliveries for ADMIN", async () => {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      vi.mocked((webhooksRepository as any).listDlqDeliveries).mockResolvedValueOnce({
+        data: [dlqDeliveryRow],
+        total: 1,
+      });
+
+      const res = await app.inject({
+        method:  "GET",
+        url:     "/admin/webhook-dlq?limit=50&offset=0",
+        headers: { authorization: `Bearer ${adminToken}` },
+      });
+
+      expect(res.statusCode).toBe(200);
+      const body = res.json<{
+        success: boolean;
+        data: typeof dlqDeliveryRow[];
+        meta: { limit: number; offset: number; count: number };
+      }>();
+      expect(body.success).toBe(true);
+      expect(body.data).toHaveLength(1);
+      expect(body.data[0].status).toBe("failed");
+      expect(body.meta).toEqual({ limit: 50, offset: 0, count: 1 });
+    });
+
+    it("accepts event_type and webhook_id filters", async () => {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      vi.mocked((webhooksRepository as any).listDlqDeliveries).mockResolvedValueOnce({
+        data: [],
+        total: 0,
+      });
+
+      const res = await app.inject({
+        method: "GET",
+        url: `/admin/webhook-dlq?event_type=expense.created&webhook_id=${WEBHOOK_ID}`,
+        headers: { authorization: `Bearer ${adminToken}` },
+      });
+
+      expect(res.statusCode).toBe(200);
+    });
+
+    it("returns 403 for EMPLOYEE role", async () => {
+      const res = await app.inject({
+        method:  "GET",
+        url:     "/admin/webhook-dlq",
+        headers: { authorization: `Bearer ${employeeToken}` },
+      });
+      expect(res.statusCode).toBe(403);
+    });
+
+    it("returns 401 with no token", async () => {
+      const res = await app.inject({
+        method: "GET",
+        url: "/admin/webhook-dlq",
+      });
+      expect(res.statusCode).toBe(401);
+    });
+  });
+
+  // ─── POST /admin/webhook-dlq/:id/retry ─────────────────────────────────────
+
+  describe("POST /admin/webhook-dlq/:id/retry", () => {
+    it("retries a failed DLQ delivery", async () => {
+      const { enqueueWebhookDelivery } = await import(
+        "../../../src/workers/webhook.queue.js"
+      );
+
+      const webhookWithSecret = {
+        id: WEBHOOK_ID,
+        url: "https://example.com/hook",
+        secret: "s3cr3t_value_long_enough",
+      };
+      const updatedDelivery = { ...deliveryRow, status: "pending" as const };
+
+      vi.mocked(webhooksRepository.findDeliveryById).mockResolvedValueOnce(deliveryRow);
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      vi.mocked((webhooksRepository as any).findWebhookSecretById).mockResolvedValueOnce(webhookWithSecret);
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      vi.mocked((webhooksRepository as any).resetDeliveryForRetry).mockResolvedValueOnce(updatedDelivery);
+
+      const res = await app.inject({
+        method:  "POST",
+        url:     `/admin/webhook-dlq/${DELIVERY_ID}/retry`,
+        headers: { authorization: `Bearer ${adminToken}` },
+      });
+
+      expect(res.statusCode).toBe(200);
+      expect(vi.mocked(enqueueWebhookDelivery)).toHaveBeenCalledOnce();
+    });
+
+    it("returns 404 for admin from another organization", async () => {
+      // Org-scoped lookup should return null for cross-org delivery ids.
+      vi.mocked(webhooksRepository.findDeliveryById).mockResolvedValueOnce(null);
+
+      const res = await app.inject({
+        method:  "POST",
+        url:     `/admin/webhook-dlq/${DELIVERY_ID}/retry`,
+        headers: { authorization: `Bearer ${adminTokenOrgB}` },
+      });
+
+      expect(res.statusCode).toBe(404);
+    });
+
+    it("returns 401 with no token", async () => {
+      const res = await app.inject({
+        method: "POST",
+        url: `/admin/webhook-dlq/${DELIVERY_ID}/retry`,
+      });
+      expect(res.statusCode).toBe(401);
+    });
+  });
+
   // ─── POST /admin/webhook-deliveries/:id/retry ───────────────────────────────
 
   describe("POST /admin/webhook-deliveries/:id/retry", () => {

From 6eec3d2f69ea001bc5d494df445afff59aecf62c Mon Sep 17 00:00:00 2001
From: rajashish147 <rajashish147@gmail.com>
Date: Sat, 28 Mar 2026 21:37:26 +0530
Subject: [PATCH 3/3] fix(nginx): remove http2 directive from server
 configuration

---
 infra/nginx/fieldtrack.conf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/infra/nginx/fieldtrack.conf b/infra/nginx/fieldtrack.conf
index 44a66a9..2b5b27a 100644
--- a/infra/nginx/fieldtrack.conf
+++ b/infra/nginx/fieldtrack.conf
@@ -86,7 +86,6 @@ server {
 
     listen 443 ssl;
     listen [::]:443 ssl;
-    http2 on;
 
     server_name __API_HOSTNAME__;