From 23fc62f3c209a7d60faaa2089fd4d52839de9496 Mon Sep 17 00:00:00 2001 From: Ashish Kumar Singh Date: Fri, 21 Nov 2025 01:46:01 -0500 Subject: [PATCH 1/3] refactor: split agent config by deployment type and reduce resource limits Split AGENT_CONFIG into platform-specific and vanilla deployments based on PLATFORM_MODEL_PROVIDERS environment variable. Add documentation comments explaining config usage. Reduce deepDebugger max tool calling depth from 100 to 50 and conversation compactification threshold from 100k to 50k tokens. Add Google Vertex AI models (GPT OSS 120B and Kimi K2 Thinking) to model catalog. Enhance blueprint system prompt to emphas --- worker/agents/constants.ts | 2 +- worker/agents/inferutils/config.ts | 105 +++++++++++++++++- worker/agents/inferutils/config.types.ts | 44 ++++---- .../operations/UserConversationProcessor.ts | 2 +- worker/agents/planning/blueprint.ts | 11 +- 5 files changed, 135 insertions(+), 29 deletions(-) diff --git a/worker/agents/constants.ts b/worker/agents/constants.ts index 907f3a84..3dc6e686 100644 --- a/worker/agents/constants.ts +++ b/worker/agents/constants.ts @@ -113,7 +113,7 @@ export const MAX_TOOL_CALLING_DEPTH_DEFAULT = 7; export const getMaxToolCallingDepth = (agentActionKey: AgentActionKey | 'testModelConfig') => { switch (agentActionKey) { case 'deepDebugger': - return 100; + return 50; default: return MAX_TOOL_CALLING_DEPTH_DEFAULT; } diff --git a/worker/agents/inferutils/config.ts b/worker/agents/inferutils/config.ts index bd8b21a2..97e75ac0 100644 --- a/worker/agents/inferutils/config.ts +++ b/worker/agents/inferutils/config.ts @@ -7,8 +7,19 @@ import { LiteModels, RegularModels, } from "./config.types"; +import { env } from 'cloudflare:workers'; -export const AGENT_CONFIG: AgentConfig = { +export const AGENT_CONFIG: AgentConfig = env.PLATFORM_MODEL_PROVIDERS ? +//====================================================================================== +// ATTENTION! This config is will most likely NOT work right away! +// It requires specific API keys and Cloudflare AI Gateway setup. +//====================================================================================== +/* +These are the configs we use at build.cloudflare.dev +You may need to provide API keys for these models in your environment or use Cloudflare AI Gateway unified billing +for seamless model access without managing multiple keys. +*/ +{ templateSelection: { name: AIModels.GEMINI_2_5_FLASH_LITE, max_tokens: 2000, @@ -73,7 +84,98 @@ export const AGENT_CONFIG: AgentConfig = { }, // Not used right now screenshotAnalysis: { + name: AIModels.DISABLED, + reasoning_effort: 'medium', + max_tokens: 8000, + temperature: 1, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + }, + realtimeCodeFixer: { + name: AIModels.DISABLED, + reasoning_effort: 'low', + max_tokens: 32000, + temperature: 1, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + }, + // Not used right now + fastCodeFixer: { + name: AIModels.DISABLED, + reasoning_effort: undefined, + max_tokens: 64000, + temperature: 0.0, + fallbackModel: AIModels.GEMINI_2_5_PRO, + }, +} : +//====================================================================================== +// ATTENTION! This is the most likely config being used in your deployment +//====================================================================================== +/* These are the default out-of-the box gemini-only models used when PLATFORM_MODEL_PROVIDERS is not set */ +{ + templateSelection: { + name: AIModels.GEMINI_2_5_FLASH_LITE, + max_tokens: 2000, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + temperature: 0.6, + }, + blueprint: { + name: AIModels.GEMINI_2_5_PRO, + reasoning_effort: 'medium', + max_tokens: 64000, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + temperature: 0.7, + }, + projectSetup: { + name: AIModels.GEMINI_2_5_PRO, + reasoning_effort: 'low', + max_tokens: 10000, + temperature: 0.2, + fallbackModel: AIModels.GEMINI_2_5_PRO, + }, + phaseGeneration: { name: AIModels.GEMINI_2_5_PRO, + reasoning_effort: 'low', + max_tokens: 32000, + temperature: 0.2, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + }, + firstPhaseImplementation: { + name: AIModels.GEMINI_2_5_PRO, + reasoning_effort: 'low', + max_tokens: 64000, + temperature: 0.2, + fallbackModel: AIModels.GEMINI_2_5_PRO, + }, + phaseImplementation: { + name: AIModels.GEMINI_2_5_PRO, + reasoning_effort: 'low', + max_tokens: 64000, + temperature: 0.2, + fallbackModel: AIModels.GEMINI_2_5_PRO, + }, + conversationalResponse: { + name: AIModels.GEMINI_2_5_FLASH, + reasoning_effort: 'low', + max_tokens: 4000, + temperature: 0, + fallbackModel: AIModels.GEMINI_2_5_PRO, + }, + deepDebugger: { + name: AIModels.GEMINI_2_5_PRO, + reasoning_effort: 'high', + max_tokens: 8000, + temperature: 0.5, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + }, + fileRegeneration: { + name: AIModels.GEMINI_2_5_PRO, + reasoning_effort: 'low', + max_tokens: 32000, + temperature: 0, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + }, + // Not used right now + screenshotAnalysis: { + name: AIModels.DISABLED, reasoning_effort: 'medium', max_tokens: 8000, temperature: 1, @@ -96,6 +198,7 @@ export const AGENT_CONFIG: AgentConfig = { }, }; + export const AGENT_CONSTRAINTS: Map = new Map([ ['fastCodeFixer', { allowedModels: new Set([AIModels.DISABLED]), diff --git a/worker/agents/inferutils/config.types.ts b/worker/agents/inferutils/config.types.ts index 63cd2a05..8e94fbee 100644 --- a/worker/agents/inferutils/config.types.ts +++ b/worker/agents/inferutils/config.types.ts @@ -252,28 +252,28 @@ const MODELS_MASTER = { nonReasoning: true, } }, - // GROK_4_1_FAST: { - // id: 'grok/grok-4.1-fast', - // config: { - // name: 'Grok 4.1 Fast', - // size: ModelSize.LITE, - // provider: 'grok', - // creditCost: 0.8, // $0.20 - // contextSize: 2_000_000, // 2M Context - // nonReasoning: true, - // } - // }, - // GROQ_GPT_120_OSS: { - // id: 'groq/gpt-oss-120b', - // config: { - // name: 'GROQ GPT 120B OSS', - // size: ModelSize.LITE, - // provider: 'groq', - // creditCost: 0.4, // $0.25 - // contextSize: 131072, // 128K Context - // nonReasoning: true, - // } - // }, + + // --- Vertex Models --- + VERTEX_GPT_OSS_120: { + id: 'google-vertex-ai/openai/gpt-oss-120b', + config: { + name: 'Google Vertex GPT OSS 120B', + size: ModelSize.LITE, + provider: 'google-vertex-ai', + creditCost: 0.36, // $0.09 + contextSize: 131072, // 128K Context + } + }, + VERTEX_KIMI_THINKING: { + id: 'google-vertex-ai/moonshotai/kimi-k2-thinking', + config: { + name: 'Google Vertex Kimi K2 Thinking', + size: ModelSize.LITE, + provider: 'google-vertex-ai', + creditCost: 2, // $0.50 + contextSize: 262144, // 256K Context + } + }, } as const; /** diff --git a/worker/agents/operations/UserConversationProcessor.ts b/worker/agents/operations/UserConversationProcessor.ts index 43ebaafe..f04e5572 100644 --- a/worker/agents/operations/UserConversationProcessor.ts +++ b/worker/agents/operations/UserConversationProcessor.ts @@ -25,7 +25,7 @@ const CHUNK_SIZE = 64; // Compactification thresholds const COMPACTIFICATION_CONFIG = { MAX_TURNS: 40, // Trigger after 50 conversation turns - MAX_ESTIMATED_TOKENS: 100000, + MAX_ESTIMATED_TOKENS: 50000, PRESERVE_RECENT_MESSAGES: 10, // Always keep last 10 messages uncompacted CHARS_PER_TOKEN: 4, // Rough estimation: 1 token ≈ 4 characters } as const; diff --git a/worker/agents/planning/blueprint.ts b/worker/agents/planning/blueprint.ts index 5f3148f9..63482b78 100644 --- a/worker/agents/planning/blueprint.ts +++ b/worker/agents/planning/blueprint.ts @@ -19,22 +19,23 @@ const SYSTEM_PROMPT = ` - You are tasked with creating a detailed yet concise, information-dense blueprint (PRD) for a web application project for our client: designing and outlining the frontend UI/UX and core functionality of the application with exceptional focus on visual appeal and user experience. + You are tasked with creating a detailed yet concise, information-dense blueprint (PRD) for a web application project for our client: designing and outlining the frontend UI/UX (user interface, user experience) and core functionality of the application with exceptional focus on visual appeal, user experience, product quality, completion and polish. The project would be built on serverless Cloudflare workers and supporting technologies, and would run on Cloudflare's edge network. The project would be seeded with a starting template. - Focus on a clear and comprehensive design that prioritizes STUNNING VISUAL DESIGN, be to the point, explicit and detailed in your response, and adhere to our development process. + Focus on a clear and comprehensive design that prioritizes STUNNING VISUAL DESIGN, polish and depth, be to the point, explicit and detailed in your response, and adhere to our development process. Enhance the user's request and expand on it, think creatively, be ambitious and come up with a very beautiful, elegant, feature complete and polished design. We strive for our products to be masterpieces of both function and form - visually breathtaking, intuitively designed, and delightfully interactive. - **REMEMBER: This is not a toy or educational project. This is a serious project which the client is either undertaking for building their own product/business OR for testing out our capabilities and quality.** + **REMEMBER: This is not a toy or educational project. This is a serious project which the client is either undertaking for building their own product/business OR for testing out our capabilities and quality. We do not just expect an MVP, We expect a production-ready, polished, and exceptional solution** Design the product described by the client and come up with a really nice and professional name for the product. Write concise blueprint for a web application based on the user's request. Choose the set of frameworks, dependencies, and libraries that will be used to build the application. - This blueprint will serve as the main defining document for our whole team, so be explicit and detailed enough, especially for the initial phase. + This blueprint will serve as the main defining and guiding document for our whole team, so be explicit and detailed enough, especially for the initial phase. Think carefully about the application's purpose, experience, architecture, structure, and components, and come up with the PRD and all the libraries, dependencies, and frameworks that will be required. **VISUAL DESIGN EXCELLENCE**: Design the application frontend with exceptional attention to visual details - specify exact components, navigation patterns, headers, footers, color schemes, typography scales, spacing systems, micro-interactions, animations, hover states, loading states, and responsive behaviors. **USER EXPERIENCE FOCUS**: Plan intuitive user flows, clear information hierarchy, accessible design patterns, and delightful interactions that make users want to use the application. Build upon the provided template. Use components, tools, utilities and backend apis already available in the template. + Think and **BREAKDOWN** The project into multiple incremental phases that build upon each other to create a complete, polished product following our . @@ -77,6 +78,7 @@ const SYSTEM_PROMPT = ` • **TEMPLATE ENHANCEMENT:** Build upon the while significantly elevating its visual appeal. Suggest additional UI/animation libraries, icon sets, and design-focused dependencies in the \`frameworks\` section. - Enhance existing project patterns with beautiful visual treatments - Add sophisticated styling and interaction libraries as needed + - Be aware of template design/layout short-comings and take it into account during your planning and in pitfalls. ## Important use case specific instructions: {{usecaseSpecificInstructions}} @@ -101,6 +103,7 @@ const SYSTEM_PROMPT = ` + • **Ultra think:** Do thorough thinking internally first before writing the blueprint. Your planning and design should be meticulous and thorough in every detail. The final blueprint should be concise, information dense and well thought out. • **Completeness is Crucial:** The AI coder relies *solely* on this blueprint. Leave no ambiguity. • **Precision in UI/Layout:** Define visual structure explicitly. Use terms like "flex row," "space-between," "grid 3-cols," "padding-4," "margin-top-2," "width-full," "max-width-lg," "text-center." Specify responsive behavior. • **Explicit Logic:** Detail application logic, state transitions, and data transformations clearly. From a89044068d6aae24e77dfb16fa370f2c18942bb6 Mon Sep 17 00:00:00 2001 From: Ashish Kumar Singh Date: Fri, 21 Nov 2025 03:26:08 -0500 Subject: [PATCH 2/3] refactor: adjust blueprint and implementation agent model configurations Switch blueprint agent from Gemini 3 Pro Preview to OpenAI 5 Mini with reduced max tokens (32k). Update firstPhaseImplementation and phaseImplementation agents to use Gemini 2.5 Pro instead of Gemini 3 Pro Preview. Lower firstPhaseImplementation temperature from 1.0 to 0.2 for more consistent output. Update blueprint system prompt to clarify project seriousness by replacing "toy or educational" with "toy or demo". --- worker/agents/inferutils/config.ts | 10 +++++----- worker/agents/planning/blueprint.ts | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/worker/agents/inferutils/config.ts b/worker/agents/inferutils/config.ts index 97e75ac0..57e5e634 100644 --- a/worker/agents/inferutils/config.ts +++ b/worker/agents/inferutils/config.ts @@ -27,9 +27,9 @@ for seamless model access without managing multiple keys. temperature: 0.6, }, blueprint: { - name: AIModels.GEMINI_3_PRO_PREVIEW, + name: AIModels.OPENAI_5_MINI, reasoning_effort: 'medium', - max_tokens: 64000, + max_tokens: 32000, fallbackModel: AIModels.GEMINI_2_5_FLASH, temperature: 1.0, }, @@ -48,14 +48,14 @@ for seamless model access without managing multiple keys. fallbackModel: AIModels.GEMINI_2_5_FLASH, }, firstPhaseImplementation: { - name: AIModels.GEMINI_3_PRO_PREVIEW, + name: AIModels.GEMINI_2_5_PRO, reasoning_effort: 'low', max_tokens: 48000, - temperature: 1, + temperature: 0.2, fallbackModel: AIModels.GEMINI_2_5_PRO, }, phaseImplementation: { - name: AIModels.GEMINI_3_PRO_PREVIEW, + name: AIModels.GEMINI_2_5_PRO, reasoning_effort: 'low', max_tokens: 48000, temperature: 0.2, diff --git a/worker/agents/planning/blueprint.ts b/worker/agents/planning/blueprint.ts index 63482b78..f347cd7d 100644 --- a/worker/agents/planning/blueprint.ts +++ b/worker/agents/planning/blueprint.ts @@ -24,7 +24,7 @@ const SYSTEM_PROMPT = ` Focus on a clear and comprehensive design that prioritizes STUNNING VISUAL DESIGN, polish and depth, be to the point, explicit and detailed in your response, and adhere to our development process. Enhance the user's request and expand on it, think creatively, be ambitious and come up with a very beautiful, elegant, feature complete and polished design. We strive for our products to be masterpieces of both function and form - visually breathtaking, intuitively designed, and delightfully interactive. - **REMEMBER: This is not a toy or educational project. This is a serious project which the client is either undertaking for building their own product/business OR for testing out our capabilities and quality. We do not just expect an MVP, We expect a production-ready, polished, and exceptional solution** + **REMEMBER: This is not a toy or demo project. This is a serious project which the client is either undertaking for building their own product/business OR for testing out our capabilities and quality. We do not just expect an MVP, We expect a production-ready, polished, and exceptional solution** From e65d6152de7cf301746580c301811c3954a4edc3 Mon Sep 17 00:00:00 2001 From: Ashish Kumar Singh Date: Fri, 21 Nov 2025 04:02:33 -0500 Subject: [PATCH 3/3] refactor: extract shared agent configs and improve config organization --- worker/agents/inferutils/config.ts | 147 +++++++++++------------------ 1 file changed, 57 insertions(+), 90 deletions(-) diff --git a/worker/agents/inferutils/config.ts b/worker/agents/inferutils/config.ts index 57e5e634..2e0a97f6 100644 --- a/worker/agents/inferutils/config.ts +++ b/worker/agents/inferutils/config.ts @@ -9,23 +9,54 @@ import { } from "./config.types"; import { env } from 'cloudflare:workers'; -export const AGENT_CONFIG: AgentConfig = env.PLATFORM_MODEL_PROVIDERS ? -//====================================================================================== -// ATTENTION! This config is will most likely NOT work right away! -// It requires specific API keys and Cloudflare AI Gateway setup. -//====================================================================================== -/* -These are the configs we use at build.cloudflare.dev -You may need to provide API keys for these models in your environment or use Cloudflare AI Gateway unified billing -for seamless model access without managing multiple keys. -*/ -{ +// Common configs - these are good defaults +const COMMON_AGENT_CONFIGS = { templateSelection: { name: AIModels.GEMINI_2_5_FLASH_LITE, max_tokens: 2000, fallbackModel: AIModels.GEMINI_2_5_FLASH, temperature: 0.6, }, + screenshotAnalysis: { + name: AIModels.DISABLED, + reasoning_effort: 'medium' as const, + max_tokens: 8000, + temperature: 1, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + }, + realtimeCodeFixer: { + name: AIModels.DISABLED, + reasoning_effort: 'low' as const, + max_tokens: 32000, + temperature: 1, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + }, + fastCodeFixer: { + name: AIModels.DISABLED, + reasoning_effort: undefined, + max_tokens: 64000, + temperature: 0.0, + fallbackModel: AIModels.GEMINI_2_5_PRO, + }, +} as const; + +const SHARED_IMPLEMENTATION_CONFIG = { + reasoning_effort: 'low' as const, + max_tokens: 48000, + temperature: 0.2, + fallbackModel: AIModels.GEMINI_2_5_PRO, +}; + +//====================================================================================== +// ATTENTION! Platform config requires specific API keys and Cloudflare AI Gateway setup. +//====================================================================================== +/* +These are the configs used at build.cloudflare.dev +You may need to provide API keys for these models in your environment or use +Cloudflare AI Gateway unified billing for seamless model access without managing multiple keys. +*/ +const PLATFORM_AGENT_CONFIG: AgentConfig = { + ...COMMON_AGENT_CONFIGS, blueprint: { name: AIModels.OPENAI_5_MINI, reasoning_effort: 'medium', @@ -49,17 +80,11 @@ for seamless model access without managing multiple keys. }, firstPhaseImplementation: { name: AIModels.GEMINI_2_5_PRO, - reasoning_effort: 'low', - max_tokens: 48000, - temperature: 0.2, - fallbackModel: AIModels.GEMINI_2_5_PRO, + ...SHARED_IMPLEMENTATION_CONFIG, }, phaseImplementation: { name: AIModels.GEMINI_2_5_PRO, - reasoning_effort: 'low', - max_tokens: 48000, - temperature: 0.2, - fallbackModel: AIModels.GEMINI_2_5_PRO, + ...SHARED_IMPLEMENTATION_CONFIG, }, conversationalResponse: { name: AIModels.GROK_4_FAST, @@ -82,41 +107,14 @@ for seamless model access without managing multiple keys. temperature: 1, fallbackModel: AIModels.GROK_CODE_FAST_1, }, - // Not used right now - screenshotAnalysis: { - name: AIModels.DISABLED, - reasoning_effort: 'medium', - max_tokens: 8000, - temperature: 1, - fallbackModel: AIModels.GEMINI_2_5_FLASH, - }, - realtimeCodeFixer: { - name: AIModels.DISABLED, - reasoning_effort: 'low', - max_tokens: 32000, - temperature: 1, - fallbackModel: AIModels.GEMINI_2_5_FLASH, - }, - // Not used right now - fastCodeFixer: { - name: AIModels.DISABLED, - reasoning_effort: undefined, - max_tokens: 64000, - temperature: 0.0, - fallbackModel: AIModels.GEMINI_2_5_PRO, - }, -} : +}; + //====================================================================================== -// ATTENTION! This is the most likely config being used in your deployment +// Default Gemini-only config (most likely used in your deployment) //====================================================================================== /* These are the default out-of-the box gemini-only models used when PLATFORM_MODEL_PROVIDERS is not set */ -{ - templateSelection: { - name: AIModels.GEMINI_2_5_FLASH_LITE, - max_tokens: 2000, - fallbackModel: AIModels.GEMINI_2_5_FLASH, - temperature: 0.6, - }, +const DEFAULT_AGENT_CONFIG: AgentConfig = { + ...COMMON_AGENT_CONFIGS, blueprint: { name: AIModels.GEMINI_2_5_PRO, reasoning_effort: 'medium', @@ -126,31 +124,19 @@ for seamless model access without managing multiple keys. }, projectSetup: { name: AIModels.GEMINI_2_5_PRO, - reasoning_effort: 'low', - max_tokens: 10000, - temperature: 0.2, - fallbackModel: AIModels.GEMINI_2_5_PRO, + ...SHARED_IMPLEMENTATION_CONFIG, }, phaseGeneration: { name: AIModels.GEMINI_2_5_PRO, - reasoning_effort: 'low', - max_tokens: 32000, - temperature: 0.2, - fallbackModel: AIModels.GEMINI_2_5_FLASH, + ...SHARED_IMPLEMENTATION_CONFIG, }, firstPhaseImplementation: { name: AIModels.GEMINI_2_5_PRO, - reasoning_effort: 'low', - max_tokens: 64000, - temperature: 0.2, - fallbackModel: AIModels.GEMINI_2_5_PRO, + ...SHARED_IMPLEMENTATION_CONFIG, }, phaseImplementation: { name: AIModels.GEMINI_2_5_PRO, - reasoning_effort: 'low', - max_tokens: 64000, - temperature: 0.2, - fallbackModel: AIModels.GEMINI_2_5_PRO, + ...SHARED_IMPLEMENTATION_CONFIG, }, conversationalResponse: { name: AIModels.GEMINI_2_5_FLASH, @@ -173,31 +159,12 @@ for seamless model access without managing multiple keys. temperature: 0, fallbackModel: AIModels.GEMINI_2_5_FLASH, }, - // Not used right now - screenshotAnalysis: { - name: AIModels.DISABLED, - reasoning_effort: 'medium', - max_tokens: 8000, - temperature: 1, - fallbackModel: AIModels.GEMINI_2_5_FLASH, - }, - realtimeCodeFixer: { - name: AIModels.DISABLED, - reasoning_effort: 'low', - max_tokens: 32000, - temperature: 1, - fallbackModel: AIModels.GEMINI_2_5_FLASH, - }, - // Not used right now - fastCodeFixer: { - name: AIModels.DISABLED, - reasoning_effort: undefined, - max_tokens: 64000, - temperature: 0.0, - fallbackModel: AIModels.GEMINI_2_5_PRO, - }, }; +export const AGENT_CONFIG: AgentConfig = env.PLATFORM_MODEL_PROVIDERS + ? PLATFORM_AGENT_CONFIG + : DEFAULT_AGENT_CONFIG; + export const AGENT_CONSTRAINTS: Map = new Map([ ['fastCodeFixer', {