hexclave · aadesh18 · Mar 23, 2026 · Mar 23, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/apps/backend/.env b/apps/backend/.env
@@ -120,6 +120,7 @@ STACK_TELEGRAM_CHAT_ID=# enter your telegram chat id
 STACK_MINTLIFY_MCP_URL=# override the Mintlify MCP server used by the backend's AI docs tool bundle. Defaults to https://stackauth-e0affa27.mintlify.app/mcp
 
 # MCP review tool (SpacetimeDB)
-STACK_SPACETIMEDB_URI=# SpacetimeDB host URI; default empty (logging disabled)
+STACK_SPACETIMEDB_URL=# SpacetimeDB host URL; default empty (logging disabled)
 STACK_SPACETIMEDB_DB_NAME=# SpacetimeDB database name
 STACK_MCP_LOG_TOKEN=# shared secret gating the log_mcp_call reducer; must match EXPECTED_LOG_TOKEN in apps/internal-tool/spacetimedb/src/index.ts
+STACK_SPACETIMEDB_SERVICE_TOKEN=# backend's SpacetimeDB-minted identity token
diff --git a/apps/backend/.env.development b/apps/backend/.env.development
@@ -114,10 +114,14 @@ STACK_QSTASH_CURRENT_SIGNING_KEY=sig_7kYjw48mhY7kAjqNGcy6cr29RJ6r
 STACK_QSTASH_NEXT_SIGNING_KEY=sig_5ZB6DVzB1wjE8S6rZ7eenA8Pdnhs
 
 # MCP review tool (SpacetimeDB)
-STACK_SPACETIMEDB_URI=ws://localhost:${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}39
+STACK_SPACETIMEDB_URL=http://localhost:${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}39
 STACK_SPACETIMEDB_DB_NAME=stack-auth-llm
 STACK_MCP_LOG_TOKEN=change-me
 
+# To provision locally: `curl -X POST http://127.0.0.1:${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}39/v1/identity`
+# then copy the `token` field from the response.
+STACK_SPACETIMEDB_SERVICE_TOKEN=
+
 # Clickhouse
 STACK_CLICKHOUSE_URL=http://localhost:${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}36
 STACK_CLICKHOUSE_ADMIN_USER=stackframe

diff --git a/apps/backend/prisma/seed.ts b/apps/backend/prisma/seed.ts
@@ -425,6 +425,19 @@ export async function seed() {
       }
     }
 
+    const existingDefaultUser = await usersCrudHandlers.adminRead({
+      tenancy: internalTenancy,
+      user_id: defaultUserId,
+    });
+    const existingMetadata = (existingDefaultUser.client_read_only_metadata ?? {}) as Record<string, unknown>;
+    await usersCrudHandlers.adminUpdate({
+      tenancy: internalTenancy,
+      user_id: defaultUserId,
+      data: {
+        client_read_only_metadata: { ...existingMetadata, isAiChatReviewer: true },
+      },
+    });
+
     // Create or ensure TeamMember exists before granting permissions.
     // Using upsert here (instead of create inside the else block above) ensures
     // idempotency when adminInternalAccess changes between seed runs.

diff --git a/apps/backend/src/app/api/internal/[transport]/route.ts b/apps/backend/src/app/api/internal/[transport]/route.ts
@@ -81,10 +81,14 @@ const handler = createMcpHandler(
             .join("\n\n") ??
           "";
 
-        const responseConversationId = body.conversationId ?? conversationId ?? "";
+        const responseConversationId = body.conversationId ?? conversationId;
+        const bodyText = text.length > 0 ? text : "(empty response)";
+        const fullText = responseConversationId
+          ? `${bodyText}\n\n[conversationId: ${responseConversationId} — pass this value as the conversationId parameter in your next ask_stack_auth call to continue this conversation]`
+          : bodyText;
 
         return {
-          content: [{ type: "text", text: `${text.length > 0 ? text : "(empty response)"}\n\n[conversationId: ${responseConversationId} - pass this value as the conversationId parameter in your next ask_stack_auth call to continue this conversation]` }],
+          content: [{ type: "text", text: fullText }],
         };
       },
     );

diff --git a/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts b/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts
@@ -1,21 +1,44 @@
-import { logMcpCall } from "@/lib/ai/mcp-logger";
+import {
+  assertProjectAccess,
+  handleGenerateMode,
+  handleStreamMode,
+} from "@/lib/ai/ai-query-handlers";
+import type { CommonLogFields, ModeContext } from "@/lib/ai/types";
 import { selectModel } from "@/lib/ai/models";
-import { getFullSystemPrompt } from "@/lib/ai/prompts";
-import { reviewMcpCall } from "@/lib/ai/qa-reviewer";
+import { getFullSystemPrompt, type SystemPromptId } from "@/lib/ai/prompts";
 import { requestBodySchema } from "@/lib/ai/schema";
 import { getTools } from "@/lib/ai/tools";
-import { getVerifiedQaContext } from "@/lib/ai/verified-qa";
-import { listManagedProjectIds } from "@/lib/projects";
+import { getVerifiedQaContext } from "@/lib/ai/qa/verified-qa";
 import { SmartResponse } from "@/route-handlers/smart-response";
 import { createSmartRouteHandler } from "@/route-handlers/smart-route-handler";
-import { runAsynchronouslyAndWaitUntil } from "@/utils/background-tasks";
 import { validateImageAttachments } from "@stackframe/stack-shared/dist/ai/image-limits";
-import { ChatContent } from "@stackframe/stack-shared/dist/interface/admin-interface";
+import { KnownErrors } from "@stackframe/stack-shared/dist/known-errors";
 import { yupMixed, yupObject, yupString } from "@stackframe/stack-shared/dist/schema-fields";
 import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env";
-import { StatusError } from "@stackframe/stack-shared/dist/utils/errors";
-import { Json } from "@stackframe/stack-shared/dist/utils/json";
-import { generateText, stepCountIs, streamText, type ModelMessage } from "ai";
+import type { ModelMessage } from "ai";
+
+function getStepLimit(systemPromptId: SystemPromptId, hasTools: boolean): number {
+  if (!hasTools) return 1;
+  if (systemPromptId === "docs-ask-ai" || systemPromptId === "command-center-ask-ai") return 50;
+  if (systemPromptId === "create-dashboard") return 12;
+  return 5;
+}
+
+async function buildSystemPrompt(systemPromptId: SystemPromptId): Promise<string> {
+  let systemPrompt = getFullSystemPrompt(systemPromptId);
+  const isDocsOrSearch = systemPromptId === "docs-ask-ai" || systemPromptId === "command-center-ask-ai";
+  if (isDocsOrSearch) {
+    // Stuffing the entire verified QA corpus into the system prompt on every
+    // request is intentionally naive — it grows monotonically with each new
+    // QA pair and re-fetches/re-sends content that's unchanged across
+    // requests. Once the corpus is large enough to matter we should swap to
+    // a retriever based system (maybe something like an embedding-based retriever
+    // (top-k by query similarity)) and/or cache the assembled context,
+    // but for the current corpus size this is fine and lets the model see everything
+    systemPrompt += await getVerifiedQaContext();
+  }
+  return systemPrompt;
+}
 
 export const POST = createSmartRouteHandler({
   metadata: {
@@ -34,144 +57,81 @@ export const POST = createSmartRouteHandler({
     const { quality, speed, systemPrompt: systemPromptId, tools: toolNames, messages, projectId } = body;
 
     if (projectId != null) {
-      if (fullReq.auth?.project.id !== "internal") {
-        throw new StatusError(StatusError.Forbidden, "You do not have access to this project");
-      }
-      const user = fullReq.auth.user;
-      if (user == null) {
-        throw new StatusError(StatusError.Forbidden, "You do not have access to this project");
-      }
-      const managedProjectIds = await listManagedProjectIds(user);
-      if (!managedProjectIds.includes(projectId)) {
-        throw new StatusError(StatusError.Forbidden, "You do not have access to this project");
-      }
+      await assertProjectAccess(projectId, fullReq.auth);
     }
-
     const imageValidationResult = validateImageAttachments(messages);
     if (!imageValidationResult.ok) {
-      throw new StatusError(StatusError.BadRequest, imageValidationResult.reason);
+      const { failure } = imageValidationResult;
+      switch (failure.code) {
+        case "too_many": {
+          throw new KnownErrors.TooManyImageAttachments(failure.maxImages);
+        }
+        case "too_large": {
+          throw new KnownErrors.ImageAttachmentTooLarge(failure.maxBytes, failure.actualBytes);
+        }
+      }
     }
 
     const authenticatedApiKey = isAuthenticated
       ? getEnvVariable("STACK_OPENROUTER_AUTHENTICATED_API_KEY", "")
       : "";
     const model = selectModel(quality, speed, isAuthenticated, authenticatedApiKey || undefined);
-    const isDocsOrSearch = systemPromptId === "docs-ask-ai" || systemPromptId === "command-center-ask-ai";
-    let systemPrompt = getFullSystemPrompt(systemPromptId);
-    if (isDocsOrSearch) {
-      systemPrompt += await getVerifiedQaContext();
-    }
+    const systemPrompt = await buildSystemPrompt(systemPromptId);
     const tools = await getTools(toolNames, { auth: fullReq.auth, targetProjectId: projectId });
     const toolsArg = Object.keys(tools).length > 0 ? tools : undefined;
-    const isCreateDashboard = systemPromptId === "create-dashboard";
-    const isBuildAnalyticsQuery = systemPromptId === "build-analytics-query";
-    const stepLimit = toolsArg == null
-      ? 1
-      : isDocsOrSearch
-        ? 50
-        : isCreateDashboard
-          ? 12
-          : isBuildAnalyticsQuery
-            ? 5
-            : 5;
+    const stepLimit = getStepLimit(systemPromptId, toolsArg != null);
+
+    const correlationId = crypto.randomUUID();
+    const conversationIdForLog = body.mcpCallMetadata
+      ? body.mcpCallMetadata.conversationId ?? crypto.randomUUID()
+      : undefined;
+    const common: CommonLogFields = {
+      correlationId,
+      mode,
+      systemPromptId,
+      quality,
+      speed,
+      modelId: String(model.modelId),
+      isAuthenticated,
+      projectId: projectId ?? undefined,
+      userId: fullReq.auth?.user?.id,
+      requestedToolsJson: JSON.stringify(toolNames),
+      messagesJson: JSON.stringify(messages),
+      mcpCorrelationId: body.mcpCallMetadata ? correlationId : undefined,
+      conversationId: conversationIdForLog,
+    };
+    const startedAt = performance.now();
 
+    const isAnthropic = model.modelId.startsWith("anthropic/");
+    // Can be optimized: only opt into prompt caching for routes that are hit
+    // frequently enough to amortize the write.
+    const systemMessage: ModelMessage = {
+      role: "system",
+      content: systemPrompt,
+      ...(isAnthropic && {
+        providerOptions: {
+          openrouter: { cacheControl: { type: "ephemeral" } },
+        },
+      }),
+    };
     // Cast: the schema narrows role and leaves content as unknown, but the
     // AI SDK accepts a superset (role: "system" etc.). We've intentionally
     // excluded `system` at the schema layer to prevent prompt-injection via
     // client-supplied system messages — see schema.ts.
     const modelMessages = messages as unknown as ModelMessage[];
+    const cachedMessages: ModelMessage[] = [systemMessage, ...modelMessages];
 
-    if (mode === "stream") {
-      const result = streamText({
-        model,
-        system: systemPrompt,
-        messages: modelMessages,
-        tools: toolsArg,
-        stopWhen: stepCountIs(stepLimit),
-      });
-      return {
-        statusCode: 200,
-        bodyType: "response" as const,
-        body: result.toUIMessageStreamResponse(),
-      };
-    } else {
-      const startedAt = Date.now();
-      const controller = new AbortController();
-      const timeoutId = setTimeout(() => controller.abort(), 120_000);
-      const result = await generateText({
-        model,
-        system: systemPrompt,
-        messages: modelMessages,
-        tools: toolsArg,
-        abortSignal: controller.signal,
-        stopWhen: stepCountIs(stepLimit),
-      }).finally(() => clearTimeout(timeoutId));
-
-      const content: ChatContent = result.steps.flatMap((step) => {
-        const blocks: ChatContent = [];
-        if (step.text) {
-          blocks.push({ type: "text", text: step.text });
-        }
-        const outById = new Map(step.toolResults.map((r) => [r.toolCallId, r.output as Json]));
-        for (const call of step.toolCalls) {
-          blocks.push({
-            type: "tool-call",
-            toolName: call.toolName,
-            toolCallId: call.toolCallId,
-            args: call.input as Json,
-            argsText: JSON.stringify(call.input),
-            result: outById.get(call.toolCallId) ?? null,
-          });
-        }
-        return blocks;
-      });
-
-      let responseConversationId: string | undefined;
-      if (body.mcpCallMetadata != null) {
-        const correlationId = crypto.randomUUID();
-        const conversationId = body.mcpCallMetadata.conversationId ?? crypto.randomUUID();
-        responseConversationId = conversationId;
-        const firstUserMessage = messages.find(m => m.role === "user");
-        const question = typeof firstUserMessage?.content === "string"
-          ? firstUserMessage.content
-          : JSON.stringify(firstUserMessage?.content ?? "");
-
-        const innerToolCallsJson = JSON.stringify(content.filter(b => b.type === "tool-call"));
+    const ctx: ModeContext = { model, cachedMessages, toolsArg, stepLimit, common, startedAt };
+    const extras = {
+      messages,
+      mcpCallMetadata: body.mcpCallMetadata ?? undefined,
+      correlationId,
+      conversationIdForLog,
+    };
 
-        const logPromise = logMcpCall({
-          correlationId,
-          toolName: body.mcpCallMetadata.toolName,
-          reason: body.mcpCallMetadata.reason,
-          userPrompt: body.mcpCallMetadata.userPrompt,
-          conversationId,
-          question,
-          response: result.text,
-          stepCount: result.steps.length,
-          innerToolCallsJson,
-          durationMs: BigInt(Date.now() - startedAt),
-          modelId: String(model.modelId),
-          errorMessage: undefined,
-        });
-        runAsynchronouslyAndWaitUntil(logPromise);
-
-        runAsynchronouslyAndWaitUntil(reviewMcpCall({
-          logPromise,
-          correlationId,
-          question,
-          reason: body.mcpCallMetadata.reason,
-          response: result.text,
-        }));
-      }
-
-      return {
-        statusCode: 200,
-        bodyType: "json" as const,
-        body: {
-          content,
-          finalText: result.text,
-          conversationId: responseConversationId ?? null,
-        },
-      };
+    if (mode === "stream") {
+      return handleStreamMode({ ...ctx, ...extras });
     }
+    return await handleGenerateMode({ ...ctx, ...extras });
   },
 });