diff --git a/genkit-tools/cli/src/commands/eval-flow.ts b/genkit-tools/cli/src/commands/eval-flow.ts index cbfb3fb2d7..2cab5288a5 100644 --- a/genkit-tools/cli/src/commands/eval-flow.ts +++ b/genkit-tools/cli/src/commands/eval-flow.ts @@ -180,7 +180,7 @@ export const evalFlow = new Command('eval:flow') const evalRun = { key: { - actionId: flowName, + actionRef: `/flow/${flowName}`, evalRunId, createdAt: new Date().toISOString(), }, diff --git a/genkit-tools/common/src/eval/localFileEvalStore.ts b/genkit-tools/common/src/eval/localFileEvalStore.ts index 9d17066314..e9a1c44ea4 100644 --- a/genkit-tools/common/src/eval/localFileEvalStore.ts +++ b/genkit-tools/common/src/eval/localFileEvalStore.ts @@ -61,10 +61,7 @@ export class LocalFileEvalStore implements EvalStore { } async save(evalRun: EvalRun): Promise { - const fileName = this.generateFileName( - evalRun.key.evalRunId, - evalRun.key.actionId - ); + const fileName = this.generateFileName(evalRun.key.evalRunId); logger.info( `Saving EvalRun ${evalRun.key.evalRunId} to ` + @@ -85,13 +82,10 @@ export class LocalFileEvalStore implements EvalStore { ); } - async load( - evalRunId: string, - actionId?: string - ): Promise { + async load(evalRunId: string): Promise { const filePath = path.resolve( this.storeRoot, - this.generateFileName(evalRunId, actionId) + this.generateFileName(evalRunId) ); if (!fs.existsSync(filePath)) { return undefined; @@ -117,8 +111,8 @@ export class LocalFileEvalStore implements EvalStore { logger.debug(`Found keys: ${JSON.stringify(keys)}`); - if (query?.filter?.actionId) { - keys = keys.filter((key) => key.actionId === query?.filter?.actionId); + if (query?.filter?.actionRef) { + keys = keys.filter((key) => key.actionRef === query?.filter?.actionRef); logger.debug(`Filtered keys: ${JSON.stringify(keys)}`); } @@ -127,12 +121,8 @@ export class LocalFileEvalStore implements EvalStore { }; } - private generateFileName(evalRunId: string, actionId?: string): string { - if (!actionId) { - return `${evalRunId}.json`; - } - - return `${actionId?.replace('/', '_')}-${evalRunId}.json`; + private generateFileName(evalRunId: string): string { + return `${evalRunId}.json`; } private getIndexFilePath(): string { diff --git a/genkit-tools/common/src/server/router.ts b/genkit-tools/common/src/server/router.ts index 5f1859db8f..bcec08c82a 100644 --- a/genkit-tools/common/src/server/router.ts +++ b/genkit-tools/common/src/server/router.ts @@ -190,9 +190,8 @@ export const TOOLS_SERVER_ROUTER = (runner: Runner) => .output(evals.EvalRunSchema) .query(async ({ input }) => { const parts = input.name.split('/'); - const evalRunId = parts[3]; - const actionId = parts[1] !== '-' ? parts[1] : undefined; - const evalRun = await getEvalStore().load(evalRunId, actionId); + const evalRunId = parts[1]; + const evalRun = await getEvalStore().load(evalRunId); if (!evalRun) { throw new TRPCError({ code: 'NOT_FOUND', diff --git a/genkit-tools/common/src/types/apis.ts b/genkit-tools/common/src/types/apis.ts index 6ed3e4961f..b5d19509c7 100644 --- a/genkit-tools/common/src/types/apis.ts +++ b/genkit-tools/common/src/types/apis.ts @@ -113,7 +113,7 @@ export type PageView = z.infer; export const ListEvalKeysRequestSchema = z.object({ filter: z .object({ - actionId: z.string().optional(), + actionRef: z.string().optional(), }) .optional(), }); @@ -127,8 +127,7 @@ export const ListEvalKeysResponseSchema = z.object({ export type ListEvalKeysResponse = z.infer; export const GetEvalRunRequestSchema = z.object({ - // Eval run name in the form actions/{action}/evalRun/{evalRun} - // where `action` can be blank e.g. actions/-/evalRun/{evalRun} + // Eval run name in the form evalRuns/{evalRunId} name: z.string(), }); export type GetEvalRunRequest = z.infer; diff --git a/genkit-tools/common/src/types/eval.ts b/genkit-tools/common/src/types/eval.ts index 75556d048a..13b5255807 100644 --- a/genkit-tools/common/src/types/eval.ts +++ b/genkit-tools/common/src/types/eval.ts @@ -88,7 +88,7 @@ export type EvalResult = z.infer; * A unique identifier for an Evaluation Run. */ export const EvalRunKeySchema = z.object({ - actionId: z.string().optional(), + actionRef: z.string().optional(), evalRunId: z.string(), createdAt: z.string(), }); @@ -125,9 +125,8 @@ export interface EvalStore { /** * Load a single EvalRun from storage * @param evalRunId the ID of the EvalRun - * @param actionId (optional) the ID of the action used to generate output. */ - load(evalRunId: string, actionId?: string): Promise; + load(evalRunId: string): Promise; /** * List the keys of all EvalRuns from storage diff --git a/genkit-tools/common/tests/eval/exporter_test.ts b/genkit-tools/common/tests/eval/exporter_test.ts index 8955661f11..fc81efbff9 100644 --- a/genkit-tools/common/tests/eval/exporter_test.ts +++ b/genkit-tools/common/tests/eval/exporter_test.ts @@ -80,7 +80,7 @@ const EVAL_RESULTS: EvalResult[] = [ ]; const EVAL_RUN_KEY: EvalRunKey = { - actionId: 'flow/myAwesomeFlow', + actionRef: 'flow/myAwesomeFlow', evalRunId: 'abc1234', createdAt: new Date().toISOString(), }; diff --git a/genkit-tools/common/tests/eval/localFileEvalStore_test.ts b/genkit-tools/common/tests/eval/localFileEvalStore_test.ts index 6885c84c38..72fb8758eb 100644 --- a/genkit-tools/common/tests/eval/localFileEvalStore_test.ts +++ b/genkit-tools/common/tests/eval/localFileEvalStore_test.ts @@ -86,7 +86,7 @@ const METRICS_METADATA = { const EVAL_RUN_WITH_ACTION = EvalRunSchema.parse({ key: { - actionId: 'flow/tellMeAJoke', + actionRef: 'flow/tellMeAJoke', evalRunId: 'abc1234', createdAt: new Date().toISOString(), }, @@ -125,7 +125,7 @@ describe('localFileEvalStore', () => { await evalStore.save(EVAL_RUN_WITH_ACTION); expect(fs.promises.writeFile).toHaveBeenCalledWith( - `/tmp/.genkit/store-root/evals/flow_tellMeAJoke-abc1234.json`, + `/tmp/.genkit/store-root/evals/abc1234.json`, JSON.stringify(EVAL_RUN_WITH_ACTION) ); expect(fs.promises.appendFile).toHaveBeenCalledWith( @@ -155,8 +155,7 @@ describe('localFileEvalStore', () => { Promise.resolve(JSON.stringify(EVAL_RUN_WITH_ACTION) as any) ); const fetchedEvalRun = await evalStore.load( - EVAL_RUN_WITH_ACTION.key.evalRunId, - EVAL_RUN_WITH_ACTION.key.actionId + EVAL_RUN_WITH_ACTION.key.evalRunId ); expect(fetchedEvalRun).toMatchObject(EVAL_RUN_WITH_ACTION); }); @@ -167,8 +166,7 @@ describe('localFileEvalStore', () => { Promise.resolve(JSON.stringify(EVAL_RUN_WITHOUT_ACTION) as any) ); const fetchedEvalRun = await evalStore.load( - EVAL_RUN_WITHOUT_ACTION.key.evalRunId, - EVAL_RUN_WITHOUT_ACTION.key.actionId + EVAL_RUN_WITHOUT_ACTION.key.evalRunId ); expect(fetchedEvalRun).toMatchObject(EVAL_RUN_WITHOUT_ACTION); }); @@ -177,8 +175,7 @@ describe('localFileEvalStore', () => { fs.existsSync = jest.fn(() => false); const fetchedEvalRun = await evalStore.load( - EVAL_RUN_WITH_ACTION.key.evalRunId, - EVAL_RUN_WITH_ACTION.key.actionId + EVAL_RUN_WITH_ACTION.key.evalRunId ); expect(fetchedEvalRun).toBeUndefined(); }); @@ -208,7 +205,7 @@ describe('localFileEvalStore', () => { ); const fetchedEvalKeys = await evalStore.list({ - filter: { actionId: EVAL_RUN_WITH_ACTION.key.actionId }, + filter: { actionRef: EVAL_RUN_WITH_ACTION.key.actionRef }, }); const expectedKeys = { evalRunKeys: [EVAL_RUN_WITH_ACTION.key] }; diff --git a/js/testapps/cat-eval/src/index.ts b/js/testapps/cat-eval/src/index.ts index 5ac4f9ccdb..b3a6784aa8 100644 --- a/js/testapps/cat-eval/src/index.ts +++ b/js/testapps/cat-eval/src/index.ts @@ -19,7 +19,7 @@ import { devLocalVectorstore } from '@genkit-ai/dev-local-vectorstore'; import { dotprompt } from '@genkit-ai/dotprompt'; import { genkitEval, GenkitMetric } from '@genkit-ai/evaluator'; import { firebase } from '@genkit-ai/firebase'; -import { geminiPro, googleAI } from '@genkit-ai/googleai'; +import { gemini15Pro, googleAI } from '@genkit-ai/googleai'; import { textEmbeddingGecko, vertexAI } from '@genkit-ai/vertexai'; // Turn off safety checks for evaluation so that the LLM as an evaluator can @@ -51,7 +51,7 @@ configureGenkit({ firebase(), googleAI(), genkitEval({ - judge: geminiPro, + judge: gemini15Pro, judgeConfig: PERMISSIVE_SAFETY_SETTINGS, metrics: [GenkitMetric.MALICIOUSNESS], embedder: textEmbeddingGecko,