firebase · ssbushi · Aug 20, 2024 · Aug 6, 2024 · Aug 6, 2024 · pavelgj
diff --git a/genkit-tools/cli/src/commands/eval-flow.ts b/genkit-tools/cli/src/commands/eval-flow.ts
@@ -180,7 +180,7 @@ export const evalFlow = new Command('eval:flow')
 
         const evalRun = {
           key: {
-            actionId: flowName,
+            actionRef: `/flow/${flowName}`,
             evalRunId,
             createdAt: new Date().toISOString(),
           },

diff --git a/genkit-tools/common/src/eval/localFileEvalStore.ts b/genkit-tools/common/src/eval/localFileEvalStore.ts
@@ -61,10 +61,7 @@ export class LocalFileEvalStore implements EvalStore {
   }
 
   async save(evalRun: EvalRun): Promise<void> {
-    const fileName = this.generateFileName(
-      evalRun.key.evalRunId,
-      evalRun.key.actionId
-    );
+    const fileName = this.generateFileName(evalRun.key.evalRunId);
 
     logger.info(
       `Saving EvalRun ${evalRun.key.evalRunId} to ` +
@@ -85,13 +82,10 @@ export class LocalFileEvalStore implements EvalStore {
     );
   }
 
-  async load(
-    evalRunId: string,
-    actionId?: string
-  ): Promise<EvalRun | undefined> {
+  async load(evalRunId: string): Promise<EvalRun | undefined> {
     const filePath = path.resolve(
       this.storeRoot,
-      this.generateFileName(evalRunId, actionId)
+      this.generateFileName(evalRunId)
     );
     if (!fs.existsSync(filePath)) {
       return undefined;
@@ -117,8 +111,8 @@ export class LocalFileEvalStore implements EvalStore {
 
     logger.debug(`Found keys: ${JSON.stringify(keys)}`);
 
-    if (query?.filter?.actionId) {
-      keys = keys.filter((key) => key.actionId === query?.filter?.actionId);
+    if (query?.filter?.actionRef) {
+      keys = keys.filter((key) => key.actionRef === query?.filter?.actionRef);
       logger.debug(`Filtered keys: ${JSON.stringify(keys)}`);
     }
 
@@ -127,12 +121,8 @@ export class LocalFileEvalStore implements EvalStore {
     };
   }
 
-  private generateFileName(evalRunId: string, actionId?: string): string {
-    if (!actionId) {
-      return `${evalRunId}.json`;
-    }
-
-    return `${actionId?.replace('/', '_')}-${evalRunId}.json`;
+  private generateFileName(evalRunId: string): string {
+    return `${evalRunId}.json`;
   }
 
   private getIndexFilePath(): string {

diff --git a/genkit-tools/common/src/server/router.ts b/genkit-tools/common/src/server/router.ts
@@ -190,9 +190,8 @@ export const TOOLS_SERVER_ROUTER = (runner: Runner) =>
       .output(evals.EvalRunSchema)
       .query(async ({ input }) => {
         const parts = input.name.split('/');
-        const evalRunId = parts[3];
-        const actionId = parts[1] !== '-' ? parts[1] : undefined;
-        const evalRun = await getEvalStore().load(evalRunId, actionId);
+        const evalRunId = parts[1];
+        const evalRun = await getEvalStore().load(evalRunId);
         if (!evalRun) {
           throw new TRPCError({
             code: 'NOT_FOUND',

diff --git a/genkit-tools/common/src/types/apis.ts b/genkit-tools/common/src/types/apis.ts
@@ -113,7 +113,7 @@ export type PageView = z.infer<typeof PageViewSchema>;
 export const ListEvalKeysRequestSchema = z.object({
   filter: z
     .object({
-      actionId: z.string().optional(),
+      actionRef: z.string().optional(),
     })
     .optional(),
 });
@@ -127,8 +127,7 @@ export const ListEvalKeysResponseSchema = z.object({
 export type ListEvalKeysResponse = z.infer<typeof ListEvalKeysResponseSchema>;
 
 export const GetEvalRunRequestSchema = z.object({
-  // Eval run name in the form actions/{action}/evalRun/{evalRun}
-  // where `action` can be blank e.g. actions/-/evalRun/{evalRun}
+  // Eval run name in the form evalRuns/{evalRunId}
   name: z.string(),
 });
 export type GetEvalRunRequest = z.infer<typeof GetEvalRunRequestSchema>;
diff --git a/genkit-tools/common/src/types/eval.ts b/genkit-tools/common/src/types/eval.ts
@@ -88,7 +88,7 @@ export type EvalResult = z.infer<typeof EvalResultSchema>;
  * A unique identifier for an Evaluation Run.
  */
 export const EvalRunKeySchema = z.object({
-  actionId: z.string().optional(),
+  actionRef: z.string().optional(),
   evalRunId: z.string(),
   createdAt: z.string(),
 });
@@ -125,9 +125,8 @@ export interface EvalStore {
   /**
    * Load a single EvalRun from storage
    * @param evalRunId the ID of the EvalRun
-   * @param actionId (optional) the ID of the action used to generate output.
    */
-  load(evalRunId: string, actionId?: string): Promise<EvalRun | undefined>;
+  load(evalRunId: string): Promise<EvalRun | undefined>;
 
   /**
    * List the keys of all EvalRuns from storage

diff --git a/genkit-tools/common/tests/eval/exporter_test.ts b/genkit-tools/common/tests/eval/exporter_test.ts
@@ -80,7 +80,7 @@ const EVAL_RESULTS: EvalResult[] = [
 ];
 
 const EVAL_RUN_KEY: EvalRunKey = {
-  actionId: 'flow/myAwesomeFlow',
+  actionRef: 'flow/myAwesomeFlow',
   evalRunId: 'abc1234',
   createdAt: new Date().toISOString(),
 };

diff --git a/genkit-tools/common/tests/eval/localFileEvalStore_test.ts b/genkit-tools/common/tests/eval/localFileEvalStore_test.ts
@@ -86,7 +86,7 @@ const METRICS_METADATA = {
 
 const EVAL_RUN_WITH_ACTION = EvalRunSchema.parse({
   key: {
-    actionId: 'flow/tellMeAJoke',
+    actionRef: 'flow/tellMeAJoke',
     evalRunId: 'abc1234',
     createdAt: new Date().toISOString(),
   },
@@ -125,7 +125,7 @@ describe('localFileEvalStore', () => {
       await evalStore.save(EVAL_RUN_WITH_ACTION);
 
       expect(fs.promises.writeFile).toHaveBeenCalledWith(
-        `/tmp/.genkit/store-root/evals/flow_tellMeAJoke-abc1234.json`,
+        `/tmp/.genkit/store-root/evals/abc1234.json`,
         JSON.stringify(EVAL_RUN_WITH_ACTION)
       );
       expect(fs.promises.appendFile).toHaveBeenCalledWith(
@@ -155,8 +155,7 @@ describe('localFileEvalStore', () => {
         Promise.resolve(JSON.stringify(EVAL_RUN_WITH_ACTION) as any)
       );
       const fetchedEvalRun = await evalStore.load(
-        EVAL_RUN_WITH_ACTION.key.evalRunId,
-        EVAL_RUN_WITH_ACTION.key.actionId
+        EVAL_RUN_WITH_ACTION.key.evalRunId
       );
       expect(fetchedEvalRun).toMatchObject(EVAL_RUN_WITH_ACTION);
     });
@@ -167,8 +166,7 @@ describe('localFileEvalStore', () => {
         Promise.resolve(JSON.stringify(EVAL_RUN_WITHOUT_ACTION) as any)
       );
       const fetchedEvalRun = await evalStore.load(
-        EVAL_RUN_WITHOUT_ACTION.key.evalRunId,
-        EVAL_RUN_WITHOUT_ACTION.key.actionId
+        EVAL_RUN_WITHOUT_ACTION.key.evalRunId
       );
       expect(fetchedEvalRun).toMatchObject(EVAL_RUN_WITHOUT_ACTION);
     });
@@ -177,8 +175,7 @@ describe('localFileEvalStore', () => {
       fs.existsSync = jest.fn(() => false);
 
       const fetchedEvalRun = await evalStore.load(
-        EVAL_RUN_WITH_ACTION.key.evalRunId,
-        EVAL_RUN_WITH_ACTION.key.actionId
+        EVAL_RUN_WITH_ACTION.key.evalRunId
       );
       expect(fetchedEvalRun).toBeUndefined();
     });
@@ -208,7 +205,7 @@ describe('localFileEvalStore', () => {
       );
 
       const fetchedEvalKeys = await evalStore.list({
-        filter: { actionId: EVAL_RUN_WITH_ACTION.key.actionId },
+        filter: { actionRef: EVAL_RUN_WITH_ACTION.key.actionRef },
       });
 
       const expectedKeys = { evalRunKeys: [EVAL_RUN_WITH_ACTION.key] };

diff --git a/js/testapps/cat-eval/src/index.ts b/js/testapps/cat-eval/src/index.ts
@@ -19,7 +19,7 @@ import { devLocalVectorstore } from '@genkit-ai/dev-local-vectorstore';
 import { dotprompt } from '@genkit-ai/dotprompt';
 import { genkitEval, GenkitMetric } from '@genkit-ai/evaluator';
 import { firebase } from '@genkit-ai/firebase';
-import { geminiPro, googleAI } from '@genkit-ai/googleai';
+import { gemini15Pro, googleAI } from '@genkit-ai/googleai';
 import { textEmbeddingGecko, vertexAI } from '@genkit-ai/vertexai';
 
 // Turn off safety checks for evaluation so that the LLM as an evaluator can
@@ -51,7 +51,7 @@ configureGenkit({
     firebase(),
     googleAI(),
     genkitEval({
-      judge: geminiPro,
+      judge: gemini15Pro,
       judgeConfig: PERMISSIVE_SAFETY_SETTINGS,
       metrics: [GenkitMetric.MALICIOUSNESS],
       embedder: textEmbeddingGecko,