Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"test:postgres": " node --env-file=variables/test.env ./node_modules/mocha/bin/mocha -r esbuild-register -r \"./src/test/testSetup.ts\" \"src/modules/postgres/*.test.ts\" --timeout 10000",
"test:mongo": " node --env-file=variables/test.env ./node_modules/mocha/bin/mocha -r esbuild-register -r \"./src/test/testSetup.ts\" \"src/modules/mongo/*.test.ts\" --timeout 10000",
"test:db": " node --env-file=variables/test.env ./node_modules/mocha/bin/mocha -r esbuild-register -r \"./src/test/testSetup.ts\" \"src/modules/{firestore,mongo,postgres}/*.test.ts\" --timeout 10000",
"test:single": " node --env-file=variables/test.env ./node_modules/mocha/bin/mocha -r esbuild-register -r \"./src/test/testSetup.ts\" --timeout 10000 --exit",
"test:ci:firestore": "firebase emulators:exec --only firestore \"npm run test:firestore\"",
"test:ci:postgres": " npm run test:postgres",
"test:ci:mongo": " npm run test:mongo",
Expand Down Expand Up @@ -99,7 +100,7 @@
"@grpc/grpc-js": "^1.12.6",
"@microsoft/tiktokenizer": "^1.0.8",
"@mistralai/mistralai": "^1.7.1",
"@modelcontextprotocol/sdk": "^1.13.2",
"@modelcontextprotocol/sdk": "^1.17.1",
"@mozilla/readability": "^0.6.0",
"@octokit/request": "^5.1.0",
"@openrouter/ai-sdk-provider": "^0.4.5",
Expand Down
10 changes: 5 additions & 5 deletions src/agent/agentContext.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { deserializeContext, serializeContext } from '#agent/agentSerialization'
import type { RunAgentConfig } from '#agent/autonomous/autonomousAgentRunner';
import { appContext } from '#app/applicationContext';
import { LlmTools } from '#functions/llmTools';
import { openaiGPT41 } from '#llm/services/openai';
import { openaiGPT5 } from '#llm/services/openai';
import type { AgentContext } from '#shared/agent/agent.model';
import { functionRegistry } from '../functionRegistry';

Expand All @@ -19,10 +19,10 @@ describe('agentContext', () => {
describe('serialisation', () => {
it('should be be identical after serialisation and deserialization', async () => {
const llms = {
easy: openaiGPT41(),
medium: openaiGPT41(),
hard: openaiGPT41(),
xhard: openaiGPT41(),
easy: openaiGPT5(),
medium: openaiGPT5(),
hard: openaiGPT5(),
xhard: openaiGPT5(),
};
// We want to check that the FileSystem gets re-added by the resetFileSystemFunction function
const functions = new LlmFunctionsImpl(LlmTools); // FileSystemRead
Expand Down
7 changes: 3 additions & 4 deletions src/agent/autonomous/agentCompletion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@ import type { FunctionCallResult } from '#shared/llm/llm.model';
import { envVar } from '#utils/env-var';

/**
* Runs the completionHandler on an agent
* @param agent
* Executes the completion handler for a given agent. If the handler throws an error, it logs an error.
* @param agent - The agent context containing the completion handler to be invoked.
*/
export async function runAgentCompleteHandler(agent: AgentContext): Promise<void> {
try {
await agent.completedHandler?.notifyCompleted(agent);
} catch (e) {
logger.warn(e, `Completion handler error for agent ${agent.agentId}`);
throw e;
logger.error(e, `Completion handler error for agent ${agent.agentId}`);
}
}

Expand Down
6 changes: 1 addition & 5 deletions src/agent/autonomous/autonomousAgentRunner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -383,10 +383,6 @@ async function checkRepoHomeAndWorkingDirectory(agent: AgentContext) {
agent.typedAiRepoDir = currentRepoDir;
}
const workingDir = fss.getWorkingDirectory();
logger.info({ workingDir }, 'Verifying working directory exists');
const workDirExists = await fss.directoryExists(workingDir);
if (!workDirExists) {
throw new Error(`Working directory ${workingDir} does not exist or is not a directory.`);
}
logger.info({ workingDir }, 'Working directory verified.');
if (!workDirExists) throw new Error(`Working directory ${workingDir} does not exist or is not a directory.`);
}
10 changes: 6 additions & 4 deletions src/cli/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ import path, { join } from 'node:path';
import { systemDir } from '#app/appDirs';
import { FastMediumLLM } from '#llm/multi-agent/fastMedium';
import { MAD_Balanced, MAD_Fast, MAD_SOTA } from '#llm/multi-agent/reasoning-debate';
import { Claude4_Opus_Vertex } from '#llm/services/anthropic-vertex';
import { Claude4_1_Opus_Vertex } from '#llm/services/anthropic-vertex';
import { cerebrasQwen3_235b_Thinking, cerebrasQwen3_Coder } from '#llm/services/cerebras';
import { defaultLLMs } from '#llm/services/defaultLlms';
import { openAIo3 } from '#llm/services/openai';
import { openaiGPT5, openaiGPT5mini, openaiGPT5nano } from '#llm/services/openai';
import { perplexityDeepResearchLLM, perplexityLLM, perplexityReasoningProLLM } from '#llm/services/perplexity-llm';
import { xai_Grok4 } from '#llm/services/xai';
import { logger } from '#o11y/logger';
Expand All @@ -21,11 +21,13 @@ export const LLM_CLI_ALIAS: Record<string, () => LLM> = {
f: cerebrasQwen3_235b_Thinking,
cc: cerebrasQwen3_Coder,
x: xai_Grok4,
o3: openAIo3,
g5: openaiGPT5,
g5m: openaiGPT5mini,
g5n: openaiGPT5nano,
madb: MAD_Balanced,
mads: MAD_SOTA,
madf: MAD_Fast,
opus: Claude4_Opus_Vertex,
opus: Claude4_1_Opus_Vertex,
pp1: perplexityLLM,
pp2: perplexityReasoningProLLM,
pp3: perplexityDeepResearchLLM,
Expand Down
8 changes: 5 additions & 3 deletions src/cli/gen.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import '#fastify/trace-init/trace-init'; // leave an empty line next so this doesn't get sorted from the first line

import { writeFileSync } from 'node:fs';
import { initInMemoryApplicationContext } from '#app/applicationContext';
import { initApplicationContext, initInMemoryApplicationContext } from '#app/applicationContext';
import { ReasonerDebateLLM } from '#llm/multi-agent/reasoning-debate';
import { defaultLLMs } from '#llm/services/defaultLlms';
import { countTokens } from '#llm/tokens';
Expand All @@ -13,11 +13,13 @@ import { parsePromptWithImages } from './promptParser';
// npm run gen

async function main() {
await initInMemoryApplicationContext();

const { initialPrompt: rawPrompt, llmId, flags } = parseProcessArgs();
const { textPrompt, userContent } = parsePromptWithImages(rawPrompt);

// -s save to database
if (flags.s) await initApplicationContext();
else await initInMemoryApplicationContext();

let llm: LLM = defaultLLMs().medium;
if (llmId) {
if (!LLM_CLI_ALIAS[llmId]) {
Expand Down
2 changes: 1 addition & 1 deletion src/functions/scm/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ export class Git implements VersionControlSystem {

// The fix is to execute a specific commit command that targets only the added files.
const commitResult = await execCommand(`git commit -m ${arg(commitMessage)} -- ${filesToAdd}`);
// Pre-commit hooks make call lint/commit commands with
// Pre-commit hooks may make call lint/commit commands with characters for colours etc
commitResult.stdout = formatAnsiWithMarkdownLinks(commitResult.stdout);
failOnError(`Failed to commit changes for files: ${files.join(', ')}`, commitResult);
}
Expand Down
4 changes: 2 additions & 2 deletions src/llm/multi-agent/blackberry.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { BaseLLM } from '#llm/base-llm';
import { Claude4_Sonnet_Vertex } from '#llm/services/anthropic-vertex';
import { fireworksLlama3_405B } from '#llm/services/fireworks';
import { openaiGPT41 } from '#llm/services/openai';
import { openaiGPT5 } from '#llm/services/openai';
import { logger } from '#o11y/logger';
import type { GenerateTextOptions, LLM } from '#shared/llm/llm.model';

Expand Down Expand Up @@ -65,7 +65,7 @@ const MIND_OVER_DATA_SYS_PROMPT = `When addressing a problem, employ "Comparativ
`;

export class Blackberry extends BaseLLM {
llms: LLM[] = [Claude4_Sonnet_Vertex(), openaiGPT41(), Claude4_Sonnet_Vertex()];
llms: LLM[] = [Claude4_Sonnet_Vertex(), openaiGPT5(), Claude4_Sonnet_Vertex()];
mediator: LLM = Claude4_Sonnet_Vertex();

constructor() {
Expand Down
16 changes: 8 additions & 8 deletions src/llm/multi-agent/reasoning-debate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ import { BaseLLM } from '#llm/base-llm';
import { getLLM } from '#llm/llmFactory';
import { FastMediumLLM } from '#llm/multi-agent/fastMedium';
import { anthropicClaude4_Sonnet } from '#llm/services/anthropic';
import { Claude4_Opus_Vertex, Claude4_Sonnet_Vertex } from '#llm/services/anthropic-vertex';
import { Claude4_1_Opus_Vertex, Claude4_Sonnet_Vertex } from '#llm/services/anthropic-vertex';
import { deepinfraDeepSeekR1 } from '#llm/services/deepinfra';
import { openAIo3 } from '#llm/services/openai';
import { openaiGPT5 } from '#llm/services/openai';
import { vertexGemini_2_5_Pro } from '#llm/services/vertexai';
import { xai_Grok4 } from '#llm/services/xai';
import { logger } from '#o11y/logger';
Expand Down Expand Up @@ -83,7 +83,7 @@ export function MAD_Balanced(): LLM {
return new ReasonerDebateLLM(
'Balanced',
vertexGemini_2_5_Pro,
[vertexGemini_2_5_Pro, xai_Grok4, openAIo3],
[vertexGemini_2_5_Pro, xai_Grok4, openaiGPT5],
'MAD:Balanced multi-agent debate (Gemini 2.5 Pro, Grok 4, o3)',
);
}
Expand All @@ -92,7 +92,7 @@ export function MAD_Balanced4(): LLM {
return new ReasonerDebateLLM(
'Balanced4',
vertexGemini_2_5_Pro,
[vertexGemini_2_5_Pro, xai_Grok4, openAIo3, Claude4_Sonnet_Vertex],
[vertexGemini_2_5_Pro, xai_Grok4, openaiGPT5, Claude4_Sonnet_Vertex],
'MAD:Balanced multi-agent debate (Gemini 2.5 Pro, Grok 4, o3, Sonnet 4)',
);
}
Expand All @@ -116,7 +116,7 @@ export function MAD_Anthropic(): LLM {
}

export function MAD_OpenAI(): LLM {
return new ReasonerDebateLLM('OpenAI', openAIo3, [openAIo3, openAIo3, openAIo3], 'MAD:OpenAI multi-agent debate (o3 x3)');
return new ReasonerDebateLLM('OpenAI', openaiGPT5, [openaiGPT5, openaiGPT5, openaiGPT5], 'MAD:OpenAI multi-agent debate (GPT5 x3)');
}

export function MAD_Grok(): LLM {
Expand All @@ -126,9 +126,9 @@ export function MAD_Grok(): LLM {
export function MAD_SOTA(): LLM {
return new ReasonerDebateLLM(
'SOTA',
xai_Grok4,
[openAIo3, Claude4_Opus_Vertex, vertexGemini_2_5_Pro, xai_Grok4],
'MAD:SOTA multi-agent debate (Opus 4, o3, Gemini 2.5 Pro, Grok 4)',
openaiGPT5,
[openaiGPT5, Claude4_1_Opus_Vertex, vertexGemini_2_5_Pro, xai_Grok4],
'MAD:SOTA multi-agent debate (Opus 4, GPT5, Gemini 2.5 Pro, Grok 4)',
);
}

Expand Down
15 changes: 9 additions & 6 deletions src/llm/services/anthropic-vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,18 @@ export function anthropicVertexLLMRegistry(): Record<string, () => LLM> {
return {
[`${ANTHROPIC_VERTEX_SERVICE}:claude-3-5-haiku`]: Claude3_5_Haiku_Vertex,
[`${ANTHROPIC_VERTEX_SERVICE}:claude-sonnet-4`]: Claude4_Sonnet_Vertex,
[`${ANTHROPIC_VERTEX_SERVICE}:claude-opus-4`]: Claude4_Opus_Vertex,
[`${ANTHROPIC_VERTEX_SERVICE}:claude-opus-4-1@20250805`]: Claude4_1_Opus_Vertex,
};
}

// Supported image types image/jpeg', 'image/png', 'image/gif' or 'image/webp'
export function Claude4_Opus_Vertex(): LLM {
return new AnthropicVertexLLM('Claude 4 Opus (Vertex)', 'claude-opus-4', 200_000, anthropicCostFunction(15, 75));

// https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/opus-4-1
export function Claude4_1_Opus_Vertex(): LLM {
return new AnthropicVertexLLM('Claude 4.1 Opus (Vertex)', 'claude-opus-4-1@20250805', 200_000, anthropicCostFunction(15, 75), ['claude-opus-4']);
}

// https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/sonnet-4
export function Claude4_Sonnet_Vertex(): LLM {
return new AnthropicVertexLLM('Claude 4 Sonnet (Vertex)', 'claude-sonnet-4', 200_000, anthropicCostFunction(3, 15));
}
Expand Down Expand Up @@ -55,7 +58,7 @@ export function ClaudeVertexLLMs(): AgentLLMs {
easy: Claude3_5_Haiku_Vertex(),
medium: Claude4_Sonnet_Vertex(),
hard: Claude4_Sonnet_Vertex(),
xhard: Claude4_Opus_Vertex(),
xhard: Claude4_1_Opus_Vertex(),
};
}

Expand All @@ -73,8 +76,8 @@ let gcloudProjectIndex = 0;
* Vertex AI models - Gemini
*/
class AnthropicVertexLLM extends AiLLM<GoogleVertexAnthropicProvider> {
constructor(displayName: string, model: string, maxInputToken: number, calculateCosts: LlmCostFunction) {
super(displayName, ANTHROPIC_VERTEX_SERVICE, model, maxInputToken, calculateCosts);
constructor(displayName: string, model: string, maxInputToken: number, calculateCosts: LlmCostFunction, oldIds?: string[]) {
super(displayName, ANTHROPIC_VERTEX_SERVICE, model, maxInputToken, calculateCosts, oldIds);
}

protected apiKey(): string {
Expand Down
20 changes: 6 additions & 14 deletions src/llm/services/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,18 @@ export function anthropicLLMRegistry(): Record<string, () => LLM> {
return {
[`${ANTHROPIC_SERVICE}:claude-3-5-haiku`]: Claude3_5_Haiku,
[`${ANTHROPIC_SERVICE}:claude-sonnet-4-0`]: anthropicClaude4_Sonnet,
[`${ANTHROPIC_SERVICE}:claude-opus-4-0`]: anthropicClaude4_Opus,
[`${ANTHROPIC_SERVICE}:claude-opus-4-1-20250805`]: anthropicClaude4_1_Opus,
};
}

export function anthropicClaude4_Opus(): LLM {
return new Anthropic('Claude 4 Opus (Anthropic)', 'claude-opus-4-0', anthropicCostFunction(15, 75));
export function anthropicClaude4_1_Opus(): LLM {
return new Anthropic('Claude 4.1 Opus (Anthropic)', 'claude-opus-4-1-20250805', anthropicCostFunction(15, 75), ['claude-opus-4-0']);
}

export function anthropicClaude4_Sonnet(): LLM {
return new Anthropic('Claude 4 Sonnet (Anthropic)', 'claude-sonnet-4-0', anthropicCostFunction(3, 15));
}

// export function Claude3_5_Sonnet() {
// return new Anthropic('Claude 3.5 Sonnet', 'claude-3-5-sonnet-20241022', 3, 15);
// }

// export function Claude3_7_Sonnet() {
// return new Anthropic('Claude 3.7 Sonnet', 'claude-3-7-sonnet-latest', 3, 15);
// }

export function Claude3_5_Haiku(): LLM {
return new Anthropic('Claude 3.5 Haiku', 'claude-3-5-haiku-20241022', anthropicCostFunction(1, 5));
}
Expand All @@ -54,7 +46,7 @@ function anthropicCostFunction(inputMil: number, outputMil: number): LlmCostFunc

export function ClaudeLLMs(): AgentLLMs {
const sonnet4 = anthropicClaude4_Sonnet();
const opus = anthropicClaude4_Opus();
const opus = anthropicClaude4_1_Opus();
return {
easy: Claude3_5_Haiku(),
medium: sonnet4,
Expand All @@ -64,8 +56,8 @@ export function ClaudeLLMs(): AgentLLMs {
}

export class Anthropic extends AiLLM<AnthropicProvider> {
constructor(displayName: string, model: string, calculateCosts: LlmCostFunction) {
super(displayName, ANTHROPIC_SERVICE, model, 200_000, calculateCosts);
constructor(displayName: string, model: string, calculateCosts: LlmCostFunction, oldIds?: string[]) {
super(displayName, ANTHROPIC_SERVICE, model, 200_000, calculateCosts, oldIds);
}

protected apiKey(): string {
Expand Down
2 changes: 1 addition & 1 deletion src/llm/services/cerebras.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ export class CerebrasLLM extends AiLLM<OpenAIProvider> {
if (this.getModel().includes('qwen-3')) {
return wrapLanguageModel({
model: aiModel,
middleware: extractReasoningMiddleware({ tagName: 'think' }),
middleware: extractReasoningMiddleware({ tagName: 'think', startWithReasoning: true }),
});
}
return aiModel;
Expand Down
8 changes: 4 additions & 4 deletions src/llm/services/defaultLlms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { cerebrasQwen3_235b_Thinking } from './cerebras';
import { Gemini_2_5_Flash, Gemini_2_5_Pro } from './gemini';
import { groqLlama4_Scout } from './groq';
import { Ollama_LLMs } from './ollama';
import { openAIo3, openaiGPT41, openaiGPT41mini } from './openai';
import { openaiGPT5, openaiGPT5mini } from './openai';
import { xai_Grok4 } from './xai';

let _summaryLLM: LLM;
Expand All @@ -31,15 +31,15 @@ export function defaultLLMs(): AgentLLMs {
// return _defaultLLMs;
// }

const easyLLMs = [new FastEasyLLM(), vertexGemini_2_5_Flash(), Gemini_2_5_Flash(), groqLlama4_Scout(), openaiGPT41mini(), Claude3_5_Haiku()];
const easyLLMs = [new FastEasyLLM(), vertexGemini_2_5_Flash(), Gemini_2_5_Flash(), groqLlama4_Scout(), openaiGPT5mini(), Claude3_5_Haiku()];
const easy: LLM | undefined = easyLLMs.find((llm) => llm.isConfigured());
if (!easy) throw new Error('No default easy LLM configured');

const mediumLLMs = [new FastMediumLLM(), vertexGemini_2_5_Flash(), Gemini_2_5_Flash(), cerebrasQwen3_235b_Thinking(), openaiGPT41(), Claude3_5_Haiku()];
const mediumLLMs = [new FastMediumLLM(), vertexGemini_2_5_Flash(), Gemini_2_5_Flash(), cerebrasQwen3_235b_Thinking(), openaiGPT5(), Claude3_5_Haiku()];
const medium: LLM | undefined = mediumLLMs.find((llm) => llm.isConfigured());
if (!medium) throw new Error('No default medium LLM configured');

const hardLLMs = [vertexGemini_2_5_Pro(), Gemini_2_5_Pro(), xai_Grok4(), openAIo3(), anthropicClaude4_Sonnet()];
const hardLLMs = [vertexGemini_2_5_Pro(), Gemini_2_5_Pro(), xai_Grok4(), openaiGPT5(), anthropicClaude4_Sonnet()];
const hard: LLM | undefined = hardLLMs.find((llm) => llm.isConfigured());
if (!hard) throw new Error('No default hard LLM configured');

Expand Down
4 changes: 2 additions & 2 deletions src/llm/services/llm.int.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { deepSeekV3 } from '#llm/services/deepseek';
import { fireworksLlama3_70B } from '#llm/services/fireworks';
import { nebiusDeepSeekR1 } from '#llm/services/nebius';
import { Ollama_Phi3 } from '#llm/services/ollama';
import { openaiGPT41mini } from '#llm/services/openai';
import { openaiGPT5mini } from '#llm/services/openai';
import { perplexityLLM } from '#llm/services/perplexity-llm';
import { sambanovaDeepseekR1, sambanovaLlama3_3_70b, sambanovaLlama3_3_70b_R1_Distill } from '#llm/services/sambanova';
import { togetherDeepSeekR1_0528_tput } from '#llm/services/together';
Expand Down Expand Up @@ -197,7 +197,7 @@ describe('LLMs', () => {
});

describe('OpenAI', () => {
const llm = openaiGPT41mini();
const llm = openaiGPT5mini();

it('should generateText', async () => {
const response = await llm.generateText(SKY_PROMPT, { temperature: 0, id: 'test' });
Expand Down
Loading