Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions examples/environments/angular/config.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// @ts-check

import { getBuiltInRatings } from 'web-codegen-scorer';
import {getBuiltInRatings} from 'web-codegen-scorer';

/** @type {import("web-codegen-scorer").EnvironmentConfig} */
export default {
Expand All @@ -11,5 +11,4 @@ export default {
generationSystemPrompt: './system-instructions.md',
executablePrompts: ['../../prompts/**/*.md'],
packageManager: 'npm',
buildCommand: '',
};
10 changes: 5 additions & 5 deletions examples/environments/remote_env/config.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
// @ts-check

/**
* @import {RemoteEnvironmentConfig} from 'web-codegen-scorer';
* @import {EnvironmentConfig} from 'web-codegen-scorer';
*/

import { getBuiltInRatings } from 'web-codegen-scorer';
import { FakeRemoteGateway } from './fake-gateway';
import {getBuiltInRatings} from 'web-codegen-scorer';
import {FakeRemoteExecutor} from './fake-executor';

/** @type {RemoteEnvironmentConfig} */
/** @type {EnvironmentConfig} */
export default {
displayName: 'Remote Env (example)',
clientSideFramework: 'angular',
ratings: getBuiltInRatings(),
generationSystemPrompt: './system-instructions.md',
executablePrompts: ['../../prompts/**/*.md'],
gateway: new FakeRemoteGateway(),
executor: new FakeRemoteExecutor(),
};
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,16 @@ import {
BuildResult,
BuildResultStatus,
EvalID,
Gateway,
Executor,
LlmContextFile,
LlmGenerateFilesRequest,
LlmResponse,
LlmResponseFile,
RemoteEnvironment,
RootPromptDefinition,
} from '../../../runner';
import { LlmGenerateFilesContext } from '../../../runner/codegen/llm-runner';
import { ProgressLogger } from '../../../runner/progress/progress-logger';
import {ProgressLogger} from '../../../runner/progress/progress-logger';

export class FakeRemoteGateway implements Gateway<RemoteEnvironment> {
export class FakeRemoteExecutor implements Executor {
ids = 0;

async initializeEval() {
Expand All @@ -26,46 +25,45 @@ export class FakeRemoteGateway implements Gateway<RemoteEnvironment> {
async performFakeLlmRequest(): Promise<LlmResponse> {
return {
success: true,
outputFiles: [{ code: 'Works!', filePath: 'main.ts' }],
outputFiles: [{code: 'Works!', filePath: 'main.ts'}],
reasoning: '',
errors: [],
usage: { inputTokens: 0, totalTokens: 0, outputTokens: 0 },
usage: {inputTokens: 0, totalTokens: 0, outputTokens: 0},
};
}

generateInitialFiles(
id: EvalID,
requestCtx: LlmGenerateFilesContext,
requestCtx: LlmGenerateFilesRequest,
model: string,
contextFiles: LlmContextFile[],
abortSignal: AbortSignal
abortSignal: AbortSignal,
): Promise<LlmResponse> {
// Generate the initial files of the eval app.
// This generation can happen on a remote service with access to private models.
return this.performFakeLlmRequest();
}

repairBuild(
generateRepairFiles(
id: EvalID,
requestCtx: LlmGenerateFilesContext,
requestCtx: LlmGenerateFilesRequest,
model: string,
errorMessage: string,
appFiles: LlmResponseFile[],
contextFiles: LlmContextFile[],
abortSignal: AbortSignal
abortSignal: AbortSignal,
): Promise<LlmResponse> {
// Repair the given eval app.
// This generation can happen on a remote service with access to private models.
return this.performFakeLlmRequest();
}

async serveBuild<T>(
async serveWebApplication<T>(
id: EvalID,
env: RemoteEnvironment,
appDirectoryPath: string,
rootPromptDef: RootPromptDefinition,
progress: ProgressLogger,
logicWhileServing: (serveUrl: string) => Promise<T>
logicWhileServing: (serveUrl: string) => Promise<T>,
): Promise<T> {
// Start serving of the app.
// Invoke the logic while the server is running.
Expand All @@ -74,12 +72,10 @@ export class FakeRemoteGateway implements Gateway<RemoteEnvironment> {
return result;
}

async tryBuild(
async performBuild(
id: EvalID,
env: RemoteEnvironment,
appDirectoryPath: string,
rootPromptDef: RootPromptDefinition,
progress: ProgressLogger
): Promise<BuildResult> {
// Here, building can happen in the remote service.
// Eval ID is useful here for storing the build on a server, for re-using later when serving.
Expand All @@ -89,7 +85,7 @@ export class FakeRemoteGateway implements Gateway<RemoteEnvironment> {
};
}

shouldRetryFailedBuilds() {
async shouldRepairFailedBuilds() {
// Some environments have a builtin retry loop as part of initial generation.
// In those cases, you may want to skip retrying.
return true;
Expand All @@ -98,4 +94,18 @@ export class FakeRemoteGateway implements Gateway<RemoteEnvironment> {
async finalizeEval() {
// Do your cleanup.
}

async isSupportedModel() {
return {supported: true};
}

async getExecutorInfo() {
return {
id: 'fake-executor',
displayName: 'Fake Executor',
mcpServersLaunched: 0,
};
}

async destroy() {}
}
24 changes: 13 additions & 11 deletions runner/codegen/base-cli-agent-runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ import {join, relative} from 'path';
import {existsSync} from 'fs';
import assert from 'assert';
import {
LlmConstrainedOutputGenerateResponse,
LlmGenerateFilesRequestOptions,
LlmGenerateFilesResponse,
LlmGenerateTextResponse,
LocalLlmConstrainedOutputGenerateResponse,
LocalLlmGenerateFilesRequestOptions,
LocalLlmGenerateFilesResponse,
LocalLlmGenerateTextResponse,
} from './llm-runner.js';
import {DirectorySnapshot} from './directory-snapshot.js';
import {LlmResponseFile} from '../shared-interfaces.js';
Expand All @@ -17,8 +17,8 @@ export abstract class BaseCliAgentRunner {
abstract readonly displayName: string;
protected abstract readonly binaryName: string;
protected abstract readonly ignoredFilePatterns: string[];
protected abstract getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[];
protected abstract writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise<void>;
protected abstract getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[];
protected abstract writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise<void>;
protected inactivityTimeoutMins = 2;
protected totalRequestTimeoutMins = 10;

Expand All @@ -27,7 +27,9 @@ export abstract class BaseCliAgentRunner {
private binaryPath: string | null = null;
private commonIgnoredPatterns = ['**/node_modules/**', '**/dist/**', '**/.angular/**'];

async generateFiles(options: LlmGenerateFilesRequestOptions): Promise<LlmGenerateFilesResponse> {
async generateFiles(
options: LocalLlmGenerateFilesRequestOptions,
): Promise<LocalLlmGenerateFilesResponse> {
const {context} = options;

// TODO: Consider removing these assertions when we have better types.
Expand Down Expand Up @@ -64,12 +66,12 @@ export abstract class BaseCliAgentRunner {
return {files, reasoning, toolLogs: []};
}

generateText(): Promise<LlmGenerateTextResponse> {
generateText(): Promise<LocalLlmGenerateTextResponse> {
// Technically we can make this work, but we don't need it at the time of writing.
throw new UserFacingError(`Generating text with ${this.displayName} is not supported.`);
}

generateConstrained(): Promise<LlmConstrainedOutputGenerateResponse<any>> {
generateConstrained(): Promise<LocalLlmConstrainedOutputGenerateResponse<any>> {
// We can't support this, because there's no straightforward
// way to tell the agent to follow a schema.
throw new UserFacingError(`Constrained output with ${this.displayName} is not supported.`);
Expand Down Expand Up @@ -117,7 +119,7 @@ export abstract class BaseCliAgentRunner {
}

/** Gets the common system instructions for all agents. */
protected getCommonInstructions(options: LlmGenerateFilesRequestOptions) {
protected getCommonInstructions(options: LocalLlmGenerateFilesRequestOptions) {
return [
`# Important Rules`,
`The following instructions dictate how you should behave. It is CRITICAL that you follow them AS CLOSELY AS POSSIBLE:`,
Expand Down Expand Up @@ -170,7 +172,7 @@ export abstract class BaseCliAgentRunner {
return binaryPath;
}

private runAgentProcess(options: LlmGenerateFilesRequestOptions): Promise<string> {
private runAgentProcess(options: LocalLlmGenerateFilesRequestOptions): Promise<string> {
return new Promise<string>(resolve => {
let stdoutBuffer = '';
let stdErrBuffer = '';
Expand Down
12 changes: 8 additions & 4 deletions runner/codegen/claude-code-runner.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import {LlmGenerateFilesContext, LlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js';
import {
LocalLlmGenerateFilesContext,
LocalLlmGenerateFilesRequestOptions,
LlmRunner,
} from './llm-runner.js';
import {join} from 'path';
import {mkdirSync} from 'fs';
import {writeFile} from 'fs/promises';
Expand All @@ -25,7 +29,7 @@ export class ClaudeCodeRunner extends BaseCliAgentRunner implements LlmRunner {
return Object.keys(MODEL_MAPPING);
}

protected getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[] {
protected getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[] {
return [
'--print',
'--model',
Expand All @@ -39,7 +43,7 @@ export class ClaudeCodeRunner extends BaseCliAgentRunner implements LlmRunner {
];
}

protected async writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise<void> {
protected async writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise<void> {
const {context} = options;
const instructionFilePath = join(context.directory, 'CLAUDE.md');
const settingsDir = join(context.directory, '.claude');
Expand All @@ -52,7 +56,7 @@ export class ClaudeCodeRunner extends BaseCliAgentRunner implements LlmRunner {
]);
}

private getSettingsJsonFile(context: LlmGenerateFilesContext): string {
private getSettingsJsonFile(context: LocalLlmGenerateFilesContext): string {
const ignoredPatterns = super.getCommonIgnorePatterns();
const deniedPermissions: string[] = [
// Block some commands like `git` and `npm install` since they aren't relevant for the evals.
Expand Down
6 changes: 3 additions & 3 deletions runner/codegen/codex-runner.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {LlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js';
import {LocalLlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js';
import {join} from 'path';
import {mkdirSync} from 'fs';
import {writeFile} from 'fs/promises';
Expand All @@ -22,7 +22,7 @@ export class CodexRunner extends BaseCliAgentRunner implements LlmRunner {
return Object.keys(MODEL_MAPPING);
}

protected getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[] {
protected getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[] {
return [
'exec',
'--model',
Expand All @@ -34,7 +34,7 @@ export class CodexRunner extends BaseCliAgentRunner implements LlmRunner {
];
}

protected async writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise<void> {
protected async writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise<void> {
const {context} = options;
const instructionFilePath = join(context.directory, 'AGENTS.md');
const settingsDir = join(context.directory, '.codex');
Expand Down
6 changes: 3 additions & 3 deletions runner/codegen/gemini-cli-runner.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {LlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js';
import {LocalLlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js';
import {join} from 'path';
import {mkdirSync} from 'fs';
import {writeFile} from 'fs/promises';
Expand All @@ -18,7 +18,7 @@ export class GeminiCliRunner extends BaseCliAgentRunner implements LlmRunner {
return SUPPORTED_MODELS;
}

protected getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[] {
protected getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[] {
return [
'--prompt',
options.context.executablePrompt,
Expand All @@ -30,7 +30,7 @@ export class GeminiCliRunner extends BaseCliAgentRunner implements LlmRunner {
];
}

protected async writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise<void> {
protected async writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise<void> {
const {context} = options;
const ignoreFilePath = join(context.directory, '.geminiignore');
const instructionFilePath = join(context.directory, 'GEMINI.md');
Expand Down
30 changes: 17 additions & 13 deletions runner/codegen/genkit/genkit-runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ import {GenkitPlugin, GenkitPluginV2} from 'genkit/plugin';
import {z} from 'zod';
import {
McpServerOptions,
LlmConstrainedOutputGenerateRequestOptions,
LlmConstrainedOutputGenerateResponse,
LocalLlmConstrainedOutputGenerateRequestOptions,
LocalLlmConstrainedOutputGenerateResponse,
LlmRunner,
LlmGenerateFilesResponse,
LlmGenerateTextResponse,
LlmGenerateTextRequestOptions,
LlmGenerateFilesRequestOptions,
LocalLlmGenerateFilesResponse,
LocalLlmGenerateTextResponse,
LocalLlmGenerateTextRequestOptions,
LocalLlmGenerateFilesRequestOptions,
} from '../llm-runner.js';
import {setTimeout} from 'node:timers/promises';
import {callWithTimeout} from '../../utils/timeout.js';
Expand All @@ -34,8 +34,8 @@ export class GenkitRunner implements LlmRunner {
private toolLogs: ToolLogEntry[] = [];

async generateConstrained<T extends z.ZodTypeAny = z.ZodTypeAny>(
options: LlmConstrainedOutputGenerateRequestOptions<T>,
): Promise<LlmConstrainedOutputGenerateResponse<T>> {
options: LocalLlmConstrainedOutputGenerateRequestOptions<T>,
): Promise<LocalLlmConstrainedOutputGenerateResponse<T>> {
const {provider, model} = this.resolveModel(options.model);
const result = await this._genkitRequest(provider, model, options);

Expand All @@ -46,8 +46,10 @@ export class GenkitRunner implements LlmRunner {
};
}

async generateFiles(options: LlmGenerateFilesRequestOptions): Promise<LlmGenerateFilesResponse> {
const requestOptions: LlmConstrainedOutputGenerateRequestOptions = {
async generateFiles(
options: LocalLlmGenerateFilesRequestOptions,
): Promise<LocalLlmGenerateFilesResponse> {
const requestOptions: LocalLlmConstrainedOutputGenerateRequestOptions = {
...options,
prompt: options.context.combinedPrompt,
schema: z.object({
Expand Down Expand Up @@ -80,7 +82,9 @@ export class GenkitRunner implements LlmRunner {
return this.toolLogs.splice(0);
}

async generateText(options: LlmGenerateTextRequestOptions): Promise<LlmGenerateTextResponse> {
async generateText(
options: LocalLlmGenerateTextRequestOptions,
): Promise<LocalLlmGenerateTextResponse> {
const {provider, model} = this.resolveModel(options.model);
const result = await this._genkitRequest(provider, model, options);

Expand All @@ -103,14 +107,14 @@ export class GenkitRunner implements LlmRunner {
private async _genkitRequest(
provider: GenkitModelProvider,
model: ModelReference<any>,
options: LlmGenerateTextRequestOptions | LlmConstrainedOutputGenerateRequestOptions,
options: LocalLlmGenerateTextRequestOptions | LocalLlmConstrainedOutputGenerateRequestOptions,
) {
return await rateLimitLLMRequest(
provider,
model,
{messages: options.messages || [], prompt: options.prompt},
() => {
const schema = (options as Partial<LlmConstrainedOutputGenerateRequestOptions>).schema;
const schema = (options as Partial<LocalLlmConstrainedOutputGenerateRequestOptions>).schema;
const performRequest = async () => {
let tools: ToolAction[] | undefined;
let resources: DynamicResourceAction[] | undefined;
Expand Down
Loading
Loading