Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion genkit-tools/cli/src/commands/eval-flow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ export const evalFlow = new Command('eval:flow')

const evalRun = {
key: {
actionId: flowName,
actionRef: `/flow/${flowName}`,
evalRunId,
createdAt: new Date().toISOString(),
},
Expand Down
24 changes: 7 additions & 17 deletions genkit-tools/common/src/eval/localFileEvalStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,7 @@ export class LocalFileEvalStore implements EvalStore {
}

async save(evalRun: EvalRun): Promise<void> {
const fileName = this.generateFileName(
evalRun.key.evalRunId,
evalRun.key.actionId
);
const fileName = this.generateFileName(evalRun.key.evalRunId);

logger.info(
`Saving EvalRun ${evalRun.key.evalRunId} to ` +
Expand All @@ -85,13 +82,10 @@ export class LocalFileEvalStore implements EvalStore {
);
}

async load(
evalRunId: string,
actionId?: string
): Promise<EvalRun | undefined> {
async load(evalRunId: string): Promise<EvalRun | undefined> {
const filePath = path.resolve(
this.storeRoot,
this.generateFileName(evalRunId, actionId)
this.generateFileName(evalRunId)
);
if (!fs.existsSync(filePath)) {
return undefined;
Expand All @@ -117,8 +111,8 @@ export class LocalFileEvalStore implements EvalStore {

logger.debug(`Found keys: ${JSON.stringify(keys)}`);

if (query?.filter?.actionId) {
keys = keys.filter((key) => key.actionId === query?.filter?.actionId);
if (query?.filter?.actionRef) {
keys = keys.filter((key) => key.actionRef === query?.filter?.actionRef);
logger.debug(`Filtered keys: ${JSON.stringify(keys)}`);
}

Expand All @@ -127,12 +121,8 @@ export class LocalFileEvalStore implements EvalStore {
};
}

private generateFileName(evalRunId: string, actionId?: string): string {
if (!actionId) {
return `${evalRunId}.json`;
}

return `${actionId?.replace('/', '_')}-${evalRunId}.json`;
private generateFileName(evalRunId: string): string {
return `${evalRunId}.json`;
}

private getIndexFilePath(): string {
Expand Down
5 changes: 2 additions & 3 deletions genkit-tools/common/src/server/router.ts
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,8 @@ export const TOOLS_SERVER_ROUTER = (runner: Runner) =>
.output(evals.EvalRunSchema)
.query(async ({ input }) => {
const parts = input.name.split('/');
const evalRunId = parts[3];
const actionId = parts[1] !== '-' ? parts[1] : undefined;
const evalRun = await getEvalStore().load(evalRunId, actionId);
const evalRunId = parts[1];
const evalRun = await getEvalStore().load(evalRunId);
if (!evalRun) {
throw new TRPCError({
code: 'NOT_FOUND',
Expand Down
5 changes: 2 additions & 3 deletions genkit-tools/common/src/types/apis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ export type PageView = z.infer<typeof PageViewSchema>;
export const ListEvalKeysRequestSchema = z.object({
filter: z
.object({
actionId: z.string().optional(),
actionRef: z.string().optional(),
})
.optional(),
});
Expand All @@ -127,8 +127,7 @@ export const ListEvalKeysResponseSchema = z.object({
export type ListEvalKeysResponse = z.infer<typeof ListEvalKeysResponseSchema>;

export const GetEvalRunRequestSchema = z.object({
// Eval run name in the form actions/{action}/evalRun/{evalRun}
// where `action` can be blank e.g. actions/-/evalRun/{evalRun}
// Eval run name in the form evalRuns/{evalRunId}
name: z.string(),
});
export type GetEvalRunRequest = z.infer<typeof GetEvalRunRequestSchema>;
5 changes: 2 additions & 3 deletions genkit-tools/common/src/types/eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ export type EvalResult = z.infer<typeof EvalResultSchema>;
* A unique identifier for an Evaluation Run.
*/
export const EvalRunKeySchema = z.object({
actionId: z.string().optional(),
actionRef: z.string().optional(),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are existing/saved eval runs, this change will break those right? Can you add actionRef without removing actionId?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it will break.

Yes, that's a good idea. That should be possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that main is only for critical fixes, I think we can merge this newer version into next without complications.

evalRunId: z.string(),
createdAt: z.string(),
});
Expand Down Expand Up @@ -125,9 +125,8 @@ export interface EvalStore {
/**
* Load a single EvalRun from storage
* @param evalRunId the ID of the EvalRun
* @param actionId (optional) the ID of the action used to generate output.
*/
load(evalRunId: string, actionId?: string): Promise<EvalRun | undefined>;
load(evalRunId: string): Promise<EvalRun | undefined>;

/**
* List the keys of all EvalRuns from storage
Expand Down
2 changes: 1 addition & 1 deletion genkit-tools/common/tests/eval/exporter_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ const EVAL_RESULTS: EvalResult[] = [
];

const EVAL_RUN_KEY: EvalRunKey = {
actionId: 'flow/myAwesomeFlow',
actionRef: 'flow/myAwesomeFlow',
evalRunId: 'abc1234',
createdAt: new Date().toISOString(),
};
Expand Down
15 changes: 6 additions & 9 deletions genkit-tools/common/tests/eval/localFileEvalStore_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ const METRICS_METADATA = {

const EVAL_RUN_WITH_ACTION = EvalRunSchema.parse({
key: {
actionId: 'flow/tellMeAJoke',
actionRef: 'flow/tellMeAJoke',
evalRunId: 'abc1234',
createdAt: new Date().toISOString(),
},
Expand Down Expand Up @@ -125,7 +125,7 @@ describe('localFileEvalStore', () => {
await evalStore.save(EVAL_RUN_WITH_ACTION);

expect(fs.promises.writeFile).toHaveBeenCalledWith(
`/tmp/.genkit/store-root/evals/flow_tellMeAJoke-abc1234.json`,
`/tmp/.genkit/store-root/evals/abc1234.json`,
JSON.stringify(EVAL_RUN_WITH_ACTION)
);
expect(fs.promises.appendFile).toHaveBeenCalledWith(
Expand Down Expand Up @@ -155,8 +155,7 @@ describe('localFileEvalStore', () => {
Promise.resolve(JSON.stringify(EVAL_RUN_WITH_ACTION) as any)
);
const fetchedEvalRun = await evalStore.load(
EVAL_RUN_WITH_ACTION.key.evalRunId,
EVAL_RUN_WITH_ACTION.key.actionId
EVAL_RUN_WITH_ACTION.key.evalRunId
);
expect(fetchedEvalRun).toMatchObject(EVAL_RUN_WITH_ACTION);
});
Expand All @@ -167,8 +166,7 @@ describe('localFileEvalStore', () => {
Promise.resolve(JSON.stringify(EVAL_RUN_WITHOUT_ACTION) as any)
);
const fetchedEvalRun = await evalStore.load(
EVAL_RUN_WITHOUT_ACTION.key.evalRunId,
EVAL_RUN_WITHOUT_ACTION.key.actionId
EVAL_RUN_WITHOUT_ACTION.key.evalRunId
);
expect(fetchedEvalRun).toMatchObject(EVAL_RUN_WITHOUT_ACTION);
});
Expand All @@ -177,8 +175,7 @@ describe('localFileEvalStore', () => {
fs.existsSync = jest.fn(() => false);

const fetchedEvalRun = await evalStore.load(
EVAL_RUN_WITH_ACTION.key.evalRunId,
EVAL_RUN_WITH_ACTION.key.actionId
EVAL_RUN_WITH_ACTION.key.evalRunId
);
expect(fetchedEvalRun).toBeUndefined();
});
Expand Down Expand Up @@ -208,7 +205,7 @@ describe('localFileEvalStore', () => {
);

const fetchedEvalKeys = await evalStore.list({
filter: { actionId: EVAL_RUN_WITH_ACTION.key.actionId },
filter: { actionRef: EVAL_RUN_WITH_ACTION.key.actionRef },
});

const expectedKeys = { evalRunKeys: [EVAL_RUN_WITH_ACTION.key] };
Expand Down
4 changes: 2 additions & 2 deletions js/testapps/cat-eval/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import { devLocalVectorstore } from '@genkit-ai/dev-local-vectorstore';
import { dotprompt } from '@genkit-ai/dotprompt';
import { genkitEval, GenkitMetric } from '@genkit-ai/evaluator';
import { firebase } from '@genkit-ai/firebase';
import { geminiPro, googleAI } from '@genkit-ai/googleai';
import { gemini15Pro, googleAI } from '@genkit-ai/googleai';
import { textEmbeddingGecko, vertexAI } from '@genkit-ai/vertexai';

// Turn off safety checks for evaluation so that the LLM as an evaluator can
Expand Down Expand Up @@ -51,7 +51,7 @@ configureGenkit({
firebase(),
googleAI(),
genkitEval({
judge: geminiPro,
judge: gemini15Pro,
judgeConfig: PERMISSIVE_SAFETY_SETTINGS,
metrics: [GenkitMetric.MALICIOUSNESS],
embedder: textEmbeddingGecko,
Expand Down