diff --git a/worker/agents/constants.ts b/worker/agents/constants.ts index 907f3a84..3dc6e686 100644 --- a/worker/agents/constants.ts +++ b/worker/agents/constants.ts @@ -113,7 +113,7 @@ export const MAX_TOOL_CALLING_DEPTH_DEFAULT = 7; export const getMaxToolCallingDepth = (agentActionKey: AgentActionKey | 'testModelConfig') => { switch (agentActionKey) { case 'deepDebugger': - return 100; + return 50; default: return MAX_TOOL_CALLING_DEPTH_DEFAULT; } diff --git a/worker/agents/inferutils/config.ts b/worker/agents/inferutils/config.ts index bd8b21a2..2e0a97f6 100644 --- a/worker/agents/inferutils/config.ts +++ b/worker/agents/inferutils/config.ts @@ -7,18 +7,60 @@ import { LiteModels, RegularModels, } from "./config.types"; +import { env } from 'cloudflare:workers'; -export const AGENT_CONFIG: AgentConfig = { +// Common configs - these are good defaults +const COMMON_AGENT_CONFIGS = { templateSelection: { name: AIModels.GEMINI_2_5_FLASH_LITE, max_tokens: 2000, fallbackModel: AIModels.GEMINI_2_5_FLASH, temperature: 0.6, }, + screenshotAnalysis: { + name: AIModels.DISABLED, + reasoning_effort: 'medium' as const, + max_tokens: 8000, + temperature: 1, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + }, + realtimeCodeFixer: { + name: AIModels.DISABLED, + reasoning_effort: 'low' as const, + max_tokens: 32000, + temperature: 1, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + }, + fastCodeFixer: { + name: AIModels.DISABLED, + reasoning_effort: undefined, + max_tokens: 64000, + temperature: 0.0, + fallbackModel: AIModels.GEMINI_2_5_PRO, + }, +} as const; + +const SHARED_IMPLEMENTATION_CONFIG = { + reasoning_effort: 'low' as const, + max_tokens: 48000, + temperature: 0.2, + fallbackModel: AIModels.GEMINI_2_5_PRO, +}; + +//====================================================================================== +// ATTENTION! Platform config requires specific API keys and Cloudflare AI Gateway setup. +//====================================================================================== +/* +These are the configs used at build.cloudflare.dev +You may need to provide API keys for these models in your environment or use +Cloudflare AI Gateway unified billing for seamless model access without managing multiple keys. +*/ +const PLATFORM_AGENT_CONFIG: AgentConfig = { + ...COMMON_AGENT_CONFIGS, blueprint: { - name: AIModels.GEMINI_3_PRO_PREVIEW, + name: AIModels.OPENAI_5_MINI, reasoning_effort: 'medium', - max_tokens: 64000, + max_tokens: 32000, fallbackModel: AIModels.GEMINI_2_5_FLASH, temperature: 1.0, }, @@ -37,18 +79,12 @@ export const AGENT_CONFIG: AgentConfig = { fallbackModel: AIModels.GEMINI_2_5_FLASH, }, firstPhaseImplementation: { - name: AIModels.GEMINI_3_PRO_PREVIEW, - reasoning_effort: 'low', - max_tokens: 48000, - temperature: 1, - fallbackModel: AIModels.GEMINI_2_5_PRO, + name: AIModels.GEMINI_2_5_PRO, + ...SHARED_IMPLEMENTATION_CONFIG, }, phaseImplementation: { - name: AIModels.GEMINI_3_PRO_PREVIEW, - reasoning_effort: 'low', - max_tokens: 48000, - temperature: 0.2, - fallbackModel: AIModels.GEMINI_2_5_PRO, + name: AIModels.GEMINI_2_5_PRO, + ...SHARED_IMPLEMENTATION_CONFIG, }, conversationalResponse: { name: AIModels.GROK_4_FAST, @@ -71,31 +107,65 @@ export const AGENT_CONFIG: AgentConfig = { temperature: 1, fallbackModel: AIModels.GROK_CODE_FAST_1, }, - // Not used right now - screenshotAnalysis: { +}; + +//====================================================================================== +// Default Gemini-only config (most likely used in your deployment) +//====================================================================================== +/* These are the default out-of-the box gemini-only models used when PLATFORM_MODEL_PROVIDERS is not set */ +const DEFAULT_AGENT_CONFIG: AgentConfig = { + ...COMMON_AGENT_CONFIGS, + blueprint: { name: AIModels.GEMINI_2_5_PRO, reasoning_effort: 'medium', + max_tokens: 64000, + fallbackModel: AIModels.GEMINI_2_5_FLASH, + temperature: 0.7, + }, + projectSetup: { + name: AIModels.GEMINI_2_5_PRO, + ...SHARED_IMPLEMENTATION_CONFIG, + }, + phaseGeneration: { + name: AIModels.GEMINI_2_5_PRO, + ...SHARED_IMPLEMENTATION_CONFIG, + }, + firstPhaseImplementation: { + name: AIModels.GEMINI_2_5_PRO, + ...SHARED_IMPLEMENTATION_CONFIG, + }, + phaseImplementation: { + name: AIModels.GEMINI_2_5_PRO, + ...SHARED_IMPLEMENTATION_CONFIG, + }, + conversationalResponse: { + name: AIModels.GEMINI_2_5_FLASH, + reasoning_effort: 'low', + max_tokens: 4000, + temperature: 0, + fallbackModel: AIModels.GEMINI_2_5_PRO, + }, + deepDebugger: { + name: AIModels.GEMINI_2_5_PRO, + reasoning_effort: 'high', max_tokens: 8000, - temperature: 1, + temperature: 0.5, fallbackModel: AIModels.GEMINI_2_5_FLASH, }, - realtimeCodeFixer: { - name: AIModels.DISABLED, + fileRegeneration: { + name: AIModels.GEMINI_2_5_PRO, reasoning_effort: 'low', max_tokens: 32000, - temperature: 1, + temperature: 0, fallbackModel: AIModels.GEMINI_2_5_FLASH, }, - // Not used right now - fastCodeFixer: { - name: AIModels.DISABLED, - reasoning_effort: undefined, - max_tokens: 64000, - temperature: 0.0, - fallbackModel: AIModels.GEMINI_2_5_PRO, - }, }; +export const AGENT_CONFIG: AgentConfig = env.PLATFORM_MODEL_PROVIDERS + ? PLATFORM_AGENT_CONFIG + : DEFAULT_AGENT_CONFIG; + + export const AGENT_CONSTRAINTS: Map = new Map([ ['fastCodeFixer', { allowedModels: new Set([AIModels.DISABLED]), diff --git a/worker/agents/inferutils/config.types.ts b/worker/agents/inferutils/config.types.ts index 63cd2a05..8e94fbee 100644 --- a/worker/agents/inferutils/config.types.ts +++ b/worker/agents/inferutils/config.types.ts @@ -252,28 +252,28 @@ const MODELS_MASTER = { nonReasoning: true, } }, - // GROK_4_1_FAST: { - // id: 'grok/grok-4.1-fast', - // config: { - // name: 'Grok 4.1 Fast', - // size: ModelSize.LITE, - // provider: 'grok', - // creditCost: 0.8, // $0.20 - // contextSize: 2_000_000, // 2M Context - // nonReasoning: true, - // } - // }, - // GROQ_GPT_120_OSS: { - // id: 'groq/gpt-oss-120b', - // config: { - // name: 'GROQ GPT 120B OSS', - // size: ModelSize.LITE, - // provider: 'groq', - // creditCost: 0.4, // $0.25 - // contextSize: 131072, // 128K Context - // nonReasoning: true, - // } - // }, + + // --- Vertex Models --- + VERTEX_GPT_OSS_120: { + id: 'google-vertex-ai/openai/gpt-oss-120b', + config: { + name: 'Google Vertex GPT OSS 120B', + size: ModelSize.LITE, + provider: 'google-vertex-ai', + creditCost: 0.36, // $0.09 + contextSize: 131072, // 128K Context + } + }, + VERTEX_KIMI_THINKING: { + id: 'google-vertex-ai/moonshotai/kimi-k2-thinking', + config: { + name: 'Google Vertex Kimi K2 Thinking', + size: ModelSize.LITE, + provider: 'google-vertex-ai', + creditCost: 2, // $0.50 + contextSize: 262144, // 256K Context + } + }, } as const; /** diff --git a/worker/agents/operations/UserConversationProcessor.ts b/worker/agents/operations/UserConversationProcessor.ts index 43ebaafe..f04e5572 100644 --- a/worker/agents/operations/UserConversationProcessor.ts +++ b/worker/agents/operations/UserConversationProcessor.ts @@ -25,7 +25,7 @@ const CHUNK_SIZE = 64; // Compactification thresholds const COMPACTIFICATION_CONFIG = { MAX_TURNS: 40, // Trigger after 50 conversation turns - MAX_ESTIMATED_TOKENS: 100000, + MAX_ESTIMATED_TOKENS: 50000, PRESERVE_RECENT_MESSAGES: 10, // Always keep last 10 messages uncompacted CHARS_PER_TOKEN: 4, // Rough estimation: 1 token ≈ 4 characters } as const; diff --git a/worker/agents/planning/blueprint.ts b/worker/agents/planning/blueprint.ts index 5f3148f9..f347cd7d 100644 --- a/worker/agents/planning/blueprint.ts +++ b/worker/agents/planning/blueprint.ts @@ -19,22 +19,23 @@ const SYSTEM_PROMPT = ` - You are tasked with creating a detailed yet concise, information-dense blueprint (PRD) for a web application project for our client: designing and outlining the frontend UI/UX and core functionality of the application with exceptional focus on visual appeal and user experience. + You are tasked with creating a detailed yet concise, information-dense blueprint (PRD) for a web application project for our client: designing and outlining the frontend UI/UX (user interface, user experience) and core functionality of the application with exceptional focus on visual appeal, user experience, product quality, completion and polish. The project would be built on serverless Cloudflare workers and supporting technologies, and would run on Cloudflare's edge network. The project would be seeded with a starting template. - Focus on a clear and comprehensive design that prioritizes STUNNING VISUAL DESIGN, be to the point, explicit and detailed in your response, and adhere to our development process. + Focus on a clear and comprehensive design that prioritizes STUNNING VISUAL DESIGN, polish and depth, be to the point, explicit and detailed in your response, and adhere to our development process. Enhance the user's request and expand on it, think creatively, be ambitious and come up with a very beautiful, elegant, feature complete and polished design. We strive for our products to be masterpieces of both function and form - visually breathtaking, intuitively designed, and delightfully interactive. - **REMEMBER: This is not a toy or educational project. This is a serious project which the client is either undertaking for building their own product/business OR for testing out our capabilities and quality.** + **REMEMBER: This is not a toy or demo project. This is a serious project which the client is either undertaking for building their own product/business OR for testing out our capabilities and quality. We do not just expect an MVP, We expect a production-ready, polished, and exceptional solution** Design the product described by the client and come up with a really nice and professional name for the product. Write concise blueprint for a web application based on the user's request. Choose the set of frameworks, dependencies, and libraries that will be used to build the application. - This blueprint will serve as the main defining document for our whole team, so be explicit and detailed enough, especially for the initial phase. + This blueprint will serve as the main defining and guiding document for our whole team, so be explicit and detailed enough, especially for the initial phase. Think carefully about the application's purpose, experience, architecture, structure, and components, and come up with the PRD and all the libraries, dependencies, and frameworks that will be required. **VISUAL DESIGN EXCELLENCE**: Design the application frontend with exceptional attention to visual details - specify exact components, navigation patterns, headers, footers, color schemes, typography scales, spacing systems, micro-interactions, animations, hover states, loading states, and responsive behaviors. **USER EXPERIENCE FOCUS**: Plan intuitive user flows, clear information hierarchy, accessible design patterns, and delightful interactions that make users want to use the application. Build upon the provided template. Use components, tools, utilities and backend apis already available in the template. + Think and **BREAKDOWN** The project into multiple incremental phases that build upon each other to create a complete, polished product following our . @@ -77,6 +78,7 @@ const SYSTEM_PROMPT = ` • **TEMPLATE ENHANCEMENT:** Build upon the while significantly elevating its visual appeal. Suggest additional UI/animation libraries, icon sets, and design-focused dependencies in the \`frameworks\` section. - Enhance existing project patterns with beautiful visual treatments - Add sophisticated styling and interaction libraries as needed + - Be aware of template design/layout short-comings and take it into account during your planning and in pitfalls. ## Important use case specific instructions: {{usecaseSpecificInstructions}} @@ -101,6 +103,7 @@ const SYSTEM_PROMPT = ` + • **Ultra think:** Do thorough thinking internally first before writing the blueprint. Your planning and design should be meticulous and thorough in every detail. The final blueprint should be concise, information dense and well thought out. • **Completeness is Crucial:** The AI coder relies *solely* on this blueprint. Leave no ambiguity. • **Precision in UI/Layout:** Define visual structure explicitly. Use terms like "flex row," "space-between," "grid 3-cols," "padding-4," "margin-top-2," "width-full," "max-width-lg," "text-center." Specify responsive behavior. • **Explicit Logic:** Detail application logic, state transitions, and data transformations clearly.