Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 41 additions & 10 deletions scripts/agent-evals/src/runner/gemini-cli-runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
} from "./tool-matcher.js";
import fs from "fs";
import { throwFailure } from "./logging.js";
import { getAgentEvalsRoot } from "./paths.js";
import { getAgentEvalsRoot, RunDirectories } from "./paths.js";
import { execSync } from "node:child_process";
import { ToolMockName } from "../mock/tool-mocks.js";

const READY_PROMPT = "Type your message";
const INSTALL_ID = "238efa5b-efb2-44bd-9dce-9b081532681c";

interface ParsedTelemetryLog {
attributes?: {
Expand Down Expand Up @@ -44,15 +45,16 @@

constructor(
private readonly testName: string,
testDir: string,
runDir: string,
dirs: RunDirectories,
toolMocks: ToolMockName[],
) {
// Create a settings file to point the CLI to a local telemetry log
this.telemetryPath = path.join(testDir, "telemetry.log");
this.telemetryPath = path.join(dirs.testDir, "telemetry.log");
const mockPath = path.resolve(path.join(getAgentEvalsRoot(), "lib/mock/mock-tools-main.js"));
const firebasePath = execSync("which firebase").toString().trim();
const settings = {

// Write workspace Gemini Settings
this.writeGeminiSettings(dirs.runDir, {
general: {
disableAutoUpdate: true,
},
Expand All @@ -71,15 +73,29 @@
},
},
},
};
const geminiDir = path.join(runDir, ".gemini");
mkdirSync(geminiDir, { recursive: true });
writeFileSync(path.join(geminiDir, "settings.json"), JSON.stringify(settings, null, 2));
});

// Write user Gemini Settings
this.writeGeminiSettings(dirs.userDir, {
security: {
auth: {
selectedType: "gemini-api-key",
},
},
hasSeenIdeIntegrationNudge: true,
});

this.writeGeminiInstallId(dirs.userDir);

this.cli = new InteractiveCLI("gemini", ["--yolo"], {
cwd: runDir,
cwd: dirs.runDir,
readyPrompt: READY_PROMPT,
showOutput: true,
env: {
// Overwrite $HOME so that we can support GCLI features that only apply
// on a per-user basis, like memories and extensions
HOME: dirs.userDir,
},
});
}

Expand All @@ -101,6 +117,21 @@
await this.cli.kill();
}

writeGeminiSettings(dir: string, settings: any) {

Check warning on line 120 in scripts/agent-evals/src/runner/gemini-cli-runner.ts

View workflow job for this annotation

GitHub Actions / lint (20)

Unexpected any. Specify a different type

Check warning on line 120 in scripts/agent-evals/src/runner/gemini-cli-runner.ts

View workflow job for this annotation

GitHub Actions / lint (20)

Missing return type on function
const geminiDir = path.join(dir, ".gemini");
mkdirSync(geminiDir, { recursive: true });
writeFileSync(path.join(geminiDir, "settings.json"), JSON.stringify(settings, null, 2));
}

/**
* Writes a constant, real install ID so that we don't bump Gemini metrics
* with fake users
*/
writeGeminiInstallId(userDir: string) {

Check warning on line 130 in scripts/agent-evals/src/runner/gemini-cli-runner.ts

View workflow job for this annotation

GitHub Actions / lint (20)

Missing return type on function
const geminiDir = path.join(userDir, ".gemini");
writeFileSync(path.join(geminiDir, "installation_id"), INSTALL_ID);
}

/**
* Reads the agent's telemetry file and looks for the given event. Throws if
* the event is not found
Expand All @@ -121,7 +152,7 @@
const matchingTool = toolLogs.find((log) => log.name === toolName);
if (!matchingTool) {
messages.push(
`Did not find expected tool call: "${toolName}" in the telemetry log. Found [${foundToolNames}]`,

Check warning on line 155 in scripts/agent-evals/src/runner/gemini-cli-runner.ts

View workflow job for this annotation

GitHub Actions / lint (20)

Invalid type "string[]" of template literal expression
);
allSucceeded = false;
} else {
Expand All @@ -145,7 +176,7 @@
}

// Implementation for this is borrowed from the Gemini CLI's test-helper
private async waitForTelemetryReady() {

Check warning on line 179 in scripts/agent-evals/src/runner/gemini-cli-runner.ts

View workflow job for this annotation

GitHub Actions / lint (20)

Missing return type on function
// Wait for telemetry file to exist and have content
await poll(() => {
if (!fs.existsSync(this.telemetryPath)) return false;
Expand Down Expand Up @@ -207,8 +238,8 @@

for (const jsonStr of jsonObjects) {
try {
const logData = JSON.parse(jsonStr);

Check warning on line 241 in scripts/agent-evals/src/runner/gemini-cli-runner.ts

View workflow job for this annotation

GitHub Actions / lint (20)

Unsafe assignment of an `any` value
logs.push(logData);

Check warning on line 242 in scripts/agent-evals/src/runner/gemini-cli-runner.ts

View workflow job for this annotation

GitHub Actions / lint (20)

Unsafe argument of type `any` assigned to a parameter of type `ParsedTelemetryLog`
} catch (e) {
// Skip objects that aren't valid JSON
}
Expand Down
19 changes: 12 additions & 7 deletions scripts/agent-evals/src/runner/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@
import { mkdirSync } from "node:fs";
import { AgentTestRunner } from "./agent-test-runner.js";
import { GeminiCliRunner } from "./gemini-cli-runner.js";
import { buildFirebaseCli, clearUserMcpServers } from "./setup.js";
import { buildFirebaseCli } from "./setup.js";
import { addCleanup } from "../helpers/cleanup.js";
import { TemplateName, copyTemplate, buildTemplates } from "../template/index.js";
import { ToolMockName } from "../mock/tool-mocks.js";
import { RunDirectories } from "./paths.js";

export * from "./agent-test-runner.js";

const dateName = new Date().toISOString().replace("T", "_").replace(/:/g, "-").replace(".", "-");

export async function setupEnvironment(): Promise<void> {

Check warning on line 16 in scripts/agent-evals/src/runner/index.ts

View workflow job for this annotation

GitHub Actions / lint (20)

Missing JSDoc comment
await buildFirebaseCli();
await clearUserMcpServers();
await buildTemplates();
}

Expand All @@ -27,7 +27,7 @@
toolMocks?: ToolMockName[];
}

export async function startAgentTest(

Check warning on line 30 in scripts/agent-evals/src/runner/index.ts

View workflow job for this annotation

GitHub Actions / lint (20)

Missing JSDoc comment
mocha: Mocha.Context,
options?: AgentTestOptions,
): Promise<AgentTestRunner> {
Expand All @@ -35,13 +35,13 @@
throw new Error("startAgentTest must be called inside of an `it` block of a Mocha test.");
}
const testName = mocha.test.fullTitle();
const { testDir, runDir } = createRunDirectory(testName);
const dirs = createRunDirectory(testName);

if (options?.templateName) {
copyTemplate(options.templateName, runDir);
copyTemplate(options.templateName, dirs.runDir);
}

const run = new GeminiCliRunner(testName, testDir, runDir, options?.toolMocks || []);
const run = new GeminiCliRunner(testName, dirs, options?.toolMocks || []);
await run.waitForReadyPrompt();

addCleanup(async () => {
Expand All @@ -51,12 +51,17 @@
return run;
}

function createRunDirectory(testName: string): { testDir: string; runDir: string } {
function createRunDirectory(testName: string): RunDirectories {
const sanitizedName = testName.toLowerCase().replace(/[^a-z0-9]/g, "-");
const testDir = path.resolve(
path.join("output", dateName, `${sanitizedName}-${randomBytes(8).toString("hex")}`),
);

const runDir = path.join(testDir, "repo");
mkdirSync(runDir, { recursive: true });
return { testDir, runDir };

const userDir = path.join(testDir, "user");
mkdirSync(userDir, { recursive: true });

return { testDir, runDir, userDir };
}
2 changes: 2 additions & 0 deletions scripts/agent-evals/src/runner/paths.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import path from "path";
import { fileURLToPath } from "url";

export type RunDirectories = { testDir: string; runDir: string; userDir: string };

export function getAgentEvalsRoot(): string {

Check warning on line 6 in scripts/agent-evals/src/runner/paths.ts

View workflow job for this annotation

GitHub Actions / lint (20)

Missing JSDoc comment
const thisFilePath = path.dirname(fileURLToPath(import.meta.url));
return path.resolve(path.join(thisFilePath, "..", ".."));
}
Expand Down
14 changes: 0 additions & 14 deletions scripts/agent-evals/src/runner/setup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,3 @@ export async function buildFirebaseCli() {
console.log(`Building Firebase CLI at ${firebaseCliRoot}`);
await execPromise("./scripts/clean-install.sh", { cwd: firebaseCliRoot });
}

export async function clearUserMcpServers() {
console.log(`Clearing existing MCP servers...`);
try {
await execPromise("gemini extensions uninstall firebase");
} catch (_: any) {
/* This can fail if there's nothing installed, so ignore that */
}
try {
await execPromise("gemini mcp remove firebase");
} catch (_: any) {
/* This can fail if there's nothing installed, so ignore that */
}
}
Loading