Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions genkit-tools/cli/src/commands/eval-extract-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
*/

import { EnvTypes, EvalInput, TraceData } from '@genkit-ai/tools-common';
import { getEvalExtractors, logger } from '@genkit-ai/tools-common/utils';
import {
generateTestCaseId,
getEvalExtractors,
logger,
} from '@genkit-ai/tools-common/utils';
import { Command } from 'commander';
import { randomUUID } from 'crypto';
import { writeFile } from 'fs/promises';
import { runInRunnerThenStop } from '../utils/runner-utils';

Expand Down Expand Up @@ -73,7 +76,7 @@ export const evalExtractData = new Command('eval:extractData')
.filter((t): t is TraceData => !!t)
.map((trace) => {
return {
testCaseId: randomUUID(),
testCaseId: generateTestCaseId(),
input: extractors.input(trace),
output: extractors.output(trace),
context: JSON.parse(extractors.context(trace)) as string[],
Expand Down
82 changes: 52 additions & 30 deletions genkit-tools/cli/src/commands/eval-flow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,13 @@

import {
Action,
EvalFlowInput,
EvalFlowInputSchema,
EvalInferenceInput,
EvalInferenceInputSchema,
} from '@genkit-ai/tools-common';
import {
EvalExporter,
getAllEvaluatorActions,
getDatasetStore,
getEvalStore,
getExporterForString,
getMatchingEvaluatorActions,
runEvaluation,
Expand All @@ -44,6 +43,11 @@ interface EvalFlowRunCliOptions {
}

const EVAL_FLOW_SCHEMA = '{samples: Array<{input: any; reference?: any;}>}';
enum SourceType {
DATA = 'data',
FILE = 'file',
DATASET = 'dataset',
}

/** Command to run a flow and evaluate the output */
export const evalFlow = new Command('eval:flow')
Expand Down Expand Up @@ -105,8 +109,18 @@ export const evalFlow = new Command('eval:flow')
}
}

const sourceType = getSourceType(data, options.input);
let targetDatasetMetadata;
if (sourceType === SourceType.DATASET) {
const datasetStore = await getDatasetStore();
const datasetMetadatas = await datasetStore.listDatasets();
targetDatasetMetadata = datasetMetadatas.find(
(d) => d.datasetId === options.input
);
}

const actionRef = `/flow/${flowName}`;
const evalFlowInput = await readInputs(data, options.input);
const evalFlowInput = await readInputs(sourceType, data, options.input);
const evalDataset = await runInference({
runner,
actionRef,
Expand All @@ -118,16 +132,14 @@ export const evalFlow = new Command('eval:flow')
runner,
evaluatorActions,
evalDataset,
actionRef: `/flow/${flowName}`,
datasetId:
options.input && !options.input.endsWith('.json')
? options.input
: undefined,
augments: {
actionRef: `/flow/${flowName}`,
datasetId:
sourceType === SourceType.DATASET ? options.input : undefined,
datasetVersion: targetDatasetMetadata?.version,
},
});

const evalStore = getEvalStore();
await evalStore.save(evalRun);

if (options.output) {
const exportFn: EvalExporter = getExporterForString(
options.outputFormat
Expand All @@ -147,33 +159,43 @@ export const evalFlow = new Command('eval:flow')
* Only one of these parameters is expected to be provided.
**/
async function readInputs(
data?: string,
sourceType: SourceType,
dataField?: string,
input?: string
): Promise<EvalFlowInput> {
): Promise<EvalInferenceInput> {
let parsedData;
if (input) {
if (data) {
logger.warn('Both [data] and input provided, ignoring [data]...');
}
const isFile = input.endsWith('.json');
if (isFile) {
parsedData = JSON.parse(await readFile(input, 'utf8'));
} else {
switch (sourceType) {
case SourceType.DATA:
parsedData = JSON.parse(dataField!);
break;
case SourceType.FILE:
parsedData = JSON.parse(await readFile(input!, 'utf8'));
break;
case SourceType.DATASET:
const datasetStore = await getDatasetStore();
parsedData = await datasetStore.getDataset(input);
}
} else if (data) {
parsedData = JSON.parse(data);
}
if (Array.isArray(parsedData)) {
return parsedData as any[];
const data = await datasetStore.getDataset(input!);
// Format to match EvalInferenceInputSchema
parsedData = { samples: data };
break;
}

try {
return EvalFlowInputSchema.parse(parsedData);
return EvalInferenceInputSchema.parse(parsedData);
} catch (e) {
throw new Error(
`Error parsing the input. Please provide an array of inputs for the flow or a ${EVAL_FLOW_SCHEMA} object. Error: ${e}`
);
}
}

function getSourceType(data?: string, input?: string): SourceType {
if (input) {
if (data) {
logger.warn('Both [data] and input provided, ignoring [data]...');
}
return input.endsWith('.json') ? SourceType.FILE : SourceType.DATASET;
} else if (data) {
return SourceType.DATA;
}
throw new Error('Must provide either data or input');
}
13 changes: 6 additions & 7 deletions genkit-tools/cli/src/commands/eval-run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@ import { Action, EvalInput } from '@genkit-ai/tools-common';
import {
EvalExporter,
getAllEvaluatorActions,
getEvalStore,
getExporterForString,
getMatchingEvaluatorActions,
runEvaluation,
} from '@genkit-ai/tools-common/eval';
import { confirmLlmUse, logger } from '@genkit-ai/tools-common/utils';
import {
confirmLlmUse,
generateTestCaseId,
logger,
} from '@genkit-ai/tools-common/utils';
import { Command } from 'commander';
import { randomUUID } from 'crypto';
import { readFile } from 'fs/promises';
import { runInRunnerThenStop } from '../utils/runner-utils';

Expand Down Expand Up @@ -91,7 +93,7 @@ export const evalRun = new Command('eval:run')
(await readFile(dataset)).toString('utf-8')
).map((testCase: any) => ({
...testCase,
testCaseId: testCase.testCaseId || randomUUID(),
testCaseId: testCase.testCaseId || generateTestCaseId(),
traceIds: testCase.traceIds || [],
}));
const evalRun = await runEvaluation({
Expand All @@ -100,9 +102,6 @@ export const evalRun = new Command('eval:run')
evalDataset,
});

const evalStore = getEvalStore();
await evalStore.save(evalRun);

if (options.output) {
const exportFn: EvalExporter = getExporterForString(
options.outputFormat
Expand Down
Loading
Loading