code · pull · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -49,6 +49,7 @@
     "test:postgres": " node --env-file=variables/test.env ./node_modules/mocha/bin/mocha -r esbuild-register -r \"./src/test/testSetup.ts\" \"src/modules/postgres/*.test.ts\" --timeout 10000",
     "test:mongo": "    node --env-file=variables/test.env ./node_modules/mocha/bin/mocha -r esbuild-register -r \"./src/test/testSetup.ts\" \"src/modules/mongo/*.test.ts\" --timeout 10000",
     "test:db": "       node --env-file=variables/test.env ./node_modules/mocha/bin/mocha -r esbuild-register -r \"./src/test/testSetup.ts\" \"src/modules/{firestore,mongo,postgres}/*.test.ts\" --timeout 10000",
+    "test:single": "   node --env-file=variables/test.env ./node_modules/mocha/bin/mocha -r esbuild-register -r \"./src/test/testSetup.ts\" --timeout 10000 --exit",
     "test:ci:firestore": "firebase emulators:exec --only firestore \"npm run test:firestore\"",
     "test:ci:postgres": " npm run test:postgres",
     "test:ci:mongo": "    npm run test:mongo",
@@ -99,7 +100,7 @@
     "@grpc/grpc-js": "^1.12.6",
     "@microsoft/tiktokenizer": "^1.0.8",
     "@mistralai/mistralai": "^1.7.1",
-    "@modelcontextprotocol/sdk": "^1.13.2",
+    "@modelcontextprotocol/sdk": "^1.17.1",
     "@mozilla/readability": "^0.6.0",
     "@octokit/request": "^5.1.0",
     "@openrouter/ai-sdk-provider": "^0.4.5",

diff --git a/src/agent/agentContext.test.ts b/src/agent/agentContext.test.ts
@@ -6,7 +6,7 @@ import { deserializeContext, serializeContext } from '#agent/agentSerialization'
 import type { RunAgentConfig } from '#agent/autonomous/autonomousAgentRunner';
 import { appContext } from '#app/applicationContext';
 import { LlmTools } from '#functions/llmTools';
-import { openaiGPT41 } from '#llm/services/openai';
+import { openaiGPT5 } from '#llm/services/openai';
 import type { AgentContext } from '#shared/agent/agent.model';
 import { functionRegistry } from '../functionRegistry';
 
@@ -19,10 +19,10 @@ describe('agentContext', () => {
 	describe('serialisation', () => {
 		it('should be be identical after serialisation and deserialization', async () => {
 			const llms = {
-				easy: openaiGPT41(),
-				medium: openaiGPT41(),
-				hard: openaiGPT41(),
-				xhard: openaiGPT41(),
+				easy: openaiGPT5(),
+				medium: openaiGPT5(),
+				hard: openaiGPT5(),
+				xhard: openaiGPT5(),
 			};
 			// We want to check that the FileSystem gets re-added by the resetFileSystemFunction function
 			const functions = new LlmFunctionsImpl(LlmTools); // FileSystemRead

diff --git a/src/agent/autonomous/agentCompletion.ts b/src/agent/autonomous/agentCompletion.ts
@@ -4,15 +4,14 @@ import type { FunctionCallResult } from '#shared/llm/llm.model';
 import { envVar } from '#utils/env-var';
 
 /**
- * Runs the completionHandler on an agent
- * @param agent
+ * Executes the completion handler for a given agent. If the handler throws an error, it logs an error.
+ * @param agent - The agent context containing the completion handler to be invoked.
  */
 export async function runAgentCompleteHandler(agent: AgentContext): Promise<void> {
 	try {
 		await agent.completedHandler?.notifyCompleted(agent);
 	} catch (e) {
-		logger.warn(e, `Completion handler error for agent ${agent.agentId}`);
-		throw e;
+		logger.error(e, `Completion handler error for agent ${agent.agentId}`);
 	}
 }
 

diff --git a/src/agent/autonomous/autonomousAgentRunner.ts b/src/agent/autonomous/autonomousAgentRunner.ts
@@ -383,10 +383,6 @@ async function checkRepoHomeAndWorkingDirectory(agent: AgentContext) {
 		agent.typedAiRepoDir = currentRepoDir;
 	}
 	const workingDir = fss.getWorkingDirectory();
-	logger.info({ workingDir }, 'Verifying working directory exists');
 	const workDirExists = await fss.directoryExists(workingDir);
-	if (!workDirExists) {
-		throw new Error(`Working directory ${workingDir} does not exist or is not a directory.`);
-	}
-	logger.info({ workingDir }, 'Working directory verified.');
+	if (!workDirExists) throw new Error(`Working directory ${workingDir} does not exist or is not a directory.`);
 }
diff --git a/src/cli/cli.ts b/src/cli/cli.ts
@@ -3,10 +3,10 @@ import path, { join } from 'node:path';
 import { systemDir } from '#app/appDirs';
 import { FastMediumLLM } from '#llm/multi-agent/fastMedium';
 import { MAD_Balanced, MAD_Fast, MAD_SOTA } from '#llm/multi-agent/reasoning-debate';
-import { Claude4_Opus_Vertex } from '#llm/services/anthropic-vertex';
+import { Claude4_1_Opus_Vertex } from '#llm/services/anthropic-vertex';
 import { cerebrasQwen3_235b_Thinking, cerebrasQwen3_Coder } from '#llm/services/cerebras';
 import { defaultLLMs } from '#llm/services/defaultLlms';
-import { openAIo3 } from '#llm/services/openai';
+import { openaiGPT5, openaiGPT5mini, openaiGPT5nano } from '#llm/services/openai';
 import { perplexityDeepResearchLLM, perplexityLLM, perplexityReasoningProLLM } from '#llm/services/perplexity-llm';
 import { xai_Grok4 } from '#llm/services/xai';
 import { logger } from '#o11y/logger';
@@ -21,11 +21,13 @@ export const LLM_CLI_ALIAS: Record<string, () => LLM> = {
 	f: cerebrasQwen3_235b_Thinking,
 	cc: cerebrasQwen3_Coder,
 	x: xai_Grok4,
-	o3: openAIo3,
+	g5: openaiGPT5,
+	g5m: openaiGPT5mini,
+	g5n: openaiGPT5nano,
 	madb: MAD_Balanced,
 	mads: MAD_SOTA,
 	madf: MAD_Fast,
-	opus: Claude4_Opus_Vertex,
+	opus: Claude4_1_Opus_Vertex,
 	pp1: perplexityLLM,
 	pp2: perplexityReasoningProLLM,
 	pp3: perplexityDeepResearchLLM,

diff --git a/src/cli/gen.ts b/src/cli/gen.ts
@@ -1,7 +1,7 @@
 import '#fastify/trace-init/trace-init'; // leave an empty line next so this doesn't get sorted from the first line
 
 import { writeFileSync } from 'node:fs';
-import { initInMemoryApplicationContext } from '#app/applicationContext';
+import { initApplicationContext, initInMemoryApplicationContext } from '#app/applicationContext';
 import { ReasonerDebateLLM } from '#llm/multi-agent/reasoning-debate';
 import { defaultLLMs } from '#llm/services/defaultLlms';
 import { countTokens } from '#llm/tokens';
@@ -13,11 +13,13 @@ import { parsePromptWithImages } from './promptParser';
 // npm run gen
 
 async function main() {
-	await initInMemoryApplicationContext();
-
 	const { initialPrompt: rawPrompt, llmId, flags } = parseProcessArgs();
 	const { textPrompt, userContent } = parsePromptWithImages(rawPrompt);
 
+	// -s save to database
+	if (flags.s) await initApplicationContext();
+	else await initInMemoryApplicationContext();
+
 	let llm: LLM = defaultLLMs().medium;
 	if (llmId) {
 		if (!LLM_CLI_ALIAS[llmId]) {

diff --git a/src/functions/scm/git.ts b/src/functions/scm/git.ts
@@ -74,7 +74,7 @@ export class Git implements VersionControlSystem {
 
 		// The fix is to execute a specific commit command that targets only the added files.
 		const commitResult = await execCommand(`git commit -m ${arg(commitMessage)} -- ${filesToAdd}`);
-		// Pre-commit hooks make call lint/commit commands with
+		// Pre-commit hooks may make call lint/commit commands with characters for colours etc
 		commitResult.stdout = formatAnsiWithMarkdownLinks(commitResult.stdout);
 		failOnError(`Failed to commit changes for files: ${files.join(', ')}`, commitResult);
 	}

diff --git a/src/llm/multi-agent/blackberry.ts b/src/llm/multi-agent/blackberry.ts
@@ -1,7 +1,7 @@
 import { BaseLLM } from '#llm/base-llm';
 import { Claude4_Sonnet_Vertex } from '#llm/services/anthropic-vertex';
 import { fireworksLlama3_405B } from '#llm/services/fireworks';
-import { openaiGPT41 } from '#llm/services/openai';
+import { openaiGPT5 } from '#llm/services/openai';
 import { logger } from '#o11y/logger';
 import type { GenerateTextOptions, LLM } from '#shared/llm/llm.model';
 
@@ -65,7 +65,7 @@ const MIND_OVER_DATA_SYS_PROMPT = `When addressing a problem, employ "Comparativ
 `;
 
 export class Blackberry extends BaseLLM {
-	llms: LLM[] = [Claude4_Sonnet_Vertex(), openaiGPT41(), Claude4_Sonnet_Vertex()];
+	llms: LLM[] = [Claude4_Sonnet_Vertex(), openaiGPT5(), Claude4_Sonnet_Vertex()];
 	mediator: LLM = Claude4_Sonnet_Vertex();
 
 	constructor() {

diff --git a/src/llm/multi-agent/reasoning-debate.ts b/src/llm/multi-agent/reasoning-debate.ts
@@ -2,9 +2,9 @@ import { BaseLLM } from '#llm/base-llm';
 import { getLLM } from '#llm/llmFactory';
 import { FastMediumLLM } from '#llm/multi-agent/fastMedium';
 import { anthropicClaude4_Sonnet } from '#llm/services/anthropic';
-import { Claude4_Opus_Vertex, Claude4_Sonnet_Vertex } from '#llm/services/anthropic-vertex';
+import { Claude4_1_Opus_Vertex, Claude4_Sonnet_Vertex } from '#llm/services/anthropic-vertex';
 import { deepinfraDeepSeekR1 } from '#llm/services/deepinfra';
-import { openAIo3 } from '#llm/services/openai';
+import { openaiGPT5 } from '#llm/services/openai';
 import { vertexGemini_2_5_Pro } from '#llm/services/vertexai';
 import { xai_Grok4 } from '#llm/services/xai';
 import { logger } from '#o11y/logger';
@@ -83,7 +83,7 @@ export function MAD_Balanced(): LLM {
 	return new ReasonerDebateLLM(
 		'Balanced',
 		vertexGemini_2_5_Pro,
-		[vertexGemini_2_5_Pro, xai_Grok4, openAIo3],
+		[vertexGemini_2_5_Pro, xai_Grok4, openaiGPT5],
 		'MAD:Balanced multi-agent debate (Gemini 2.5 Pro, Grok 4, o3)',
 	);
 }
@@ -92,7 +92,7 @@ export function MAD_Balanced4(): LLM {
 	return new ReasonerDebateLLM(
 		'Balanced4',
 		vertexGemini_2_5_Pro,
-		[vertexGemini_2_5_Pro, xai_Grok4, openAIo3, Claude4_Sonnet_Vertex],
+		[vertexGemini_2_5_Pro, xai_Grok4, openaiGPT5, Claude4_Sonnet_Vertex],
 		'MAD:Balanced multi-agent debate (Gemini 2.5 Pro, Grok 4, o3, Sonnet 4)',
 	);
 }
@@ -116,7 +116,7 @@ export function MAD_Anthropic(): LLM {
 }
 
 export function MAD_OpenAI(): LLM {
-	return new ReasonerDebateLLM('OpenAI', openAIo3, [openAIo3, openAIo3, openAIo3], 'MAD:OpenAI multi-agent debate (o3 x3)');
+	return new ReasonerDebateLLM('OpenAI', openaiGPT5, [openaiGPT5, openaiGPT5, openaiGPT5], 'MAD:OpenAI multi-agent debate (GPT5 x3)');
 }
 
 export function MAD_Grok(): LLM {
@@ -126,9 +126,9 @@ export function MAD_Grok(): LLM {
 export function MAD_SOTA(): LLM {
 	return new ReasonerDebateLLM(
 		'SOTA',
-		xai_Grok4,
-		[openAIo3, Claude4_Opus_Vertex, vertexGemini_2_5_Pro, xai_Grok4],
-		'MAD:SOTA multi-agent debate (Opus 4, o3, Gemini 2.5 Pro, Grok 4)',
+		openaiGPT5,
+		[openaiGPT5, Claude4_1_Opus_Vertex, vertexGemini_2_5_Pro, xai_Grok4],
+		'MAD:SOTA multi-agent debate (Opus 4, GPT5, Gemini 2.5 Pro, Grok 4)',
 	);
 }
 

diff --git a/src/llm/services/anthropic-vertex.ts b/src/llm/services/anthropic-vertex.ts
@@ -14,15 +14,18 @@ export function anthropicVertexLLMRegistry(): Record<string, () => LLM> {
 	return {
 		[`${ANTHROPIC_VERTEX_SERVICE}:claude-3-5-haiku`]: Claude3_5_Haiku_Vertex,
 		[`${ANTHROPIC_VERTEX_SERVICE}:claude-sonnet-4`]: Claude4_Sonnet_Vertex,
-		[`${ANTHROPIC_VERTEX_SERVICE}:claude-opus-4`]: Claude4_Opus_Vertex,
+		[`${ANTHROPIC_VERTEX_SERVICE}:claude-opus-4-1@20250805`]: Claude4_1_Opus_Vertex,
 	};
 }
 
 // Supported image types image/jpeg', 'image/png', 'image/gif' or 'image/webp'
-export function Claude4_Opus_Vertex(): LLM {
-	return new AnthropicVertexLLM('Claude 4 Opus (Vertex)', 'claude-opus-4', 200_000, anthropicCostFunction(15, 75));
+
+// https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/opus-4-1
+export function Claude4_1_Opus_Vertex(): LLM {
+	return new AnthropicVertexLLM('Claude 4.1 Opus (Vertex)', 'claude-opus-4-1@20250805', 200_000, anthropicCostFunction(15, 75), ['claude-opus-4']);
 }
 
+// https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/sonnet-4
 export function Claude4_Sonnet_Vertex(): LLM {
 	return new AnthropicVertexLLM('Claude 4 Sonnet (Vertex)', 'claude-sonnet-4', 200_000, anthropicCostFunction(3, 15));
 }
@@ -55,7 +58,7 @@ export function ClaudeVertexLLMs(): AgentLLMs {
 		easy: Claude3_5_Haiku_Vertex(),
 		medium: Claude4_Sonnet_Vertex(),
 		hard: Claude4_Sonnet_Vertex(),
-		xhard: Claude4_Opus_Vertex(),
+		xhard: Claude4_1_Opus_Vertex(),
 	};
 }
 
@@ -73,8 +76,8 @@ let gcloudProjectIndex = 0;
  * Vertex AI models - Gemini
  */
 class AnthropicVertexLLM extends AiLLM<GoogleVertexAnthropicProvider> {
-	constructor(displayName: string, model: string, maxInputToken: number, calculateCosts: LlmCostFunction) {
-		super(displayName, ANTHROPIC_VERTEX_SERVICE, model, maxInputToken, calculateCosts);
+	constructor(displayName: string, model: string, maxInputToken: number, calculateCosts: LlmCostFunction, oldIds?: string[]) {
+		super(displayName, ANTHROPIC_VERTEX_SERVICE, model, maxInputToken, calculateCosts, oldIds);
 	}
 
 	protected apiKey(): string {

diff --git a/src/llm/services/anthropic.ts b/src/llm/services/anthropic.ts
@@ -12,26 +12,18 @@ export function anthropicLLMRegistry(): Record<string, () => LLM> {
 	return {
 		[`${ANTHROPIC_SERVICE}:claude-3-5-haiku`]: Claude3_5_Haiku,
 		[`${ANTHROPIC_SERVICE}:claude-sonnet-4-0`]: anthropicClaude4_Sonnet,
-		[`${ANTHROPIC_SERVICE}:claude-opus-4-0`]: anthropicClaude4_Opus,
+		[`${ANTHROPIC_SERVICE}:claude-opus-4-1-20250805`]: anthropicClaude4_1_Opus,
 	};
 }
 
-export function anthropicClaude4_Opus(): LLM {
-	return new Anthropic('Claude 4 Opus (Anthropic)', 'claude-opus-4-0', anthropicCostFunction(15, 75));
+export function anthropicClaude4_1_Opus(): LLM {
+	return new Anthropic('Claude 4.1 Opus (Anthropic)', 'claude-opus-4-1-20250805', anthropicCostFunction(15, 75), ['claude-opus-4-0']);
 }
 
 export function anthropicClaude4_Sonnet(): LLM {
 	return new Anthropic('Claude 4 Sonnet (Anthropic)', 'claude-sonnet-4-0', anthropicCostFunction(3, 15));
 }
 
-// export function Claude3_5_Sonnet() {
-// 	return new Anthropic('Claude 3.5 Sonnet', 'claude-3-5-sonnet-20241022', 3, 15);
-// }
-
-// export function Claude3_7_Sonnet() {
-// 	return new Anthropic('Claude 3.7 Sonnet', 'claude-3-7-sonnet-latest', 3, 15);
-// }
-
 export function Claude3_5_Haiku(): LLM {
 	return new Anthropic('Claude 3.5 Haiku', 'claude-3-5-haiku-20241022', anthropicCostFunction(1, 5));
 }
@@ -54,7 +46,7 @@ function anthropicCostFunction(inputMil: number, outputMil: number): LlmCostFunc
 
 export function ClaudeLLMs(): AgentLLMs {
 	const sonnet4 = anthropicClaude4_Sonnet();
-	const opus = anthropicClaude4_Opus();
+	const opus = anthropicClaude4_1_Opus();
 	return {
 		easy: Claude3_5_Haiku(),
 		medium: sonnet4,
@@ -64,8 +56,8 @@ export function ClaudeLLMs(): AgentLLMs {
 }
 
 export class Anthropic extends AiLLM<AnthropicProvider> {
-	constructor(displayName: string, model: string, calculateCosts: LlmCostFunction) {
-		super(displayName, ANTHROPIC_SERVICE, model, 200_000, calculateCosts);
+	constructor(displayName: string, model: string, calculateCosts: LlmCostFunction, oldIds?: string[]) {
+		super(displayName, ANTHROPIC_SERVICE, model, 200_000, calculateCosts, oldIds);
 	}
 
 	protected apiKey(): string {

diff --git a/src/llm/services/cerebras.ts b/src/llm/services/cerebras.ts
@@ -65,7 +65,7 @@ export class CerebrasLLM extends AiLLM<OpenAIProvider> {
 		if (this.getModel().includes('qwen-3')) {
 			return wrapLanguageModel({
 				model: aiModel,
-				middleware: extractReasoningMiddleware({ tagName: 'think' }),
+				middleware: extractReasoningMiddleware({ tagName: 'think', startWithReasoning: true }),
 			});
 		}
 		return aiModel;

diff --git a/src/llm/services/defaultLlms.ts b/src/llm/services/defaultLlms.ts
@@ -11,7 +11,7 @@ import { cerebrasQwen3_235b_Thinking } from './cerebras';
 import { Gemini_2_5_Flash, Gemini_2_5_Pro } from './gemini';
 import { groqLlama4_Scout } from './groq';
 import { Ollama_LLMs } from './ollama';
-import { openAIo3, openaiGPT41, openaiGPT41mini } from './openai';
+import { openaiGPT5, openaiGPT5mini } from './openai';
 import { xai_Grok4 } from './xai';
 
 let _summaryLLM: LLM;
@@ -31,15 +31,15 @@ export function defaultLLMs(): AgentLLMs {
 	// 	return _defaultLLMs;
 	// }
 
-	const easyLLMs = [new FastEasyLLM(), vertexGemini_2_5_Flash(), Gemini_2_5_Flash(), groqLlama4_Scout(), openaiGPT41mini(), Claude3_5_Haiku()];
+	const easyLLMs = [new FastEasyLLM(), vertexGemini_2_5_Flash(), Gemini_2_5_Flash(), groqLlama4_Scout(), openaiGPT5mini(), Claude3_5_Haiku()];
 	const easy: LLM | undefined = easyLLMs.find((llm) => llm.isConfigured());
 	if (!easy) throw new Error('No default easy LLM configured');
 
-	const mediumLLMs = [new FastMediumLLM(), vertexGemini_2_5_Flash(), Gemini_2_5_Flash(), cerebrasQwen3_235b_Thinking(), openaiGPT41(), Claude3_5_Haiku()];
+	const mediumLLMs = [new FastMediumLLM(), vertexGemini_2_5_Flash(), Gemini_2_5_Flash(), cerebrasQwen3_235b_Thinking(), openaiGPT5(), Claude3_5_Haiku()];
 	const medium: LLM | undefined = mediumLLMs.find((llm) => llm.isConfigured());
 	if (!medium) throw new Error('No default medium LLM configured');
 
-	const hardLLMs = [vertexGemini_2_5_Pro(), Gemini_2_5_Pro(), xai_Grok4(), openAIo3(), anthropicClaude4_Sonnet()];
+	const hardLLMs = [vertexGemini_2_5_Pro(), Gemini_2_5_Pro(), xai_Grok4(), openaiGPT5(), anthropicClaude4_Sonnet()];
 	const hard: LLM | undefined = hardLLMs.find((llm) => llm.isConfigured());
 	if (!hard) throw new Error('No default hard LLM configured');
 

diff --git a/src/llm/services/llm.int.ts b/src/llm/services/llm.int.ts
@@ -6,7 +6,7 @@ import { deepSeekV3 } from '#llm/services/deepseek';
 import { fireworksLlama3_70B } from '#llm/services/fireworks';
 import { nebiusDeepSeekR1 } from '#llm/services/nebius';
 import { Ollama_Phi3 } from '#llm/services/ollama';
-import { openaiGPT41mini } from '#llm/services/openai';
+import { openaiGPT5mini } from '#llm/services/openai';
 import { perplexityLLM } from '#llm/services/perplexity-llm';
 import { sambanovaDeepseekR1, sambanovaLlama3_3_70b, sambanovaLlama3_3_70b_R1_Distill } from '#llm/services/sambanova';
 import { togetherDeepSeekR1_0528_tput } from '#llm/services/together';
@@ -197,7 +197,7 @@ describe('LLMs', () => {
 	});
 
 	describe('OpenAI', () => {
-		const llm = openaiGPT41mini();
+		const llm = openaiGPT5mini();
 
 		it('should generateText', async () => {
 			const response = await llm.generateText(SKY_PROMPT, { temperature: 0, id: 'test' });