firebase · ssbushi · Jul 9, 2024 · Jul 8, 2024 · Jul 8, 2024 · Jul 8, 2024
diff --git a/genkit-tools/cli/src/commands/eval-flow.ts b/genkit-tools/cli/src/commands/eval-flow.ts
@@ -52,6 +52,12 @@ interface EvalFlowRunOptions {
   outputFormat: string;
 }
 
+interface FlowRunState {
+  state: FlowState;
+  hasErrored: boolean;
+  error?: string;
+}
+
 const EVAL_FLOW_SCHEMA = '{samples: Array<{input: any; reference?: any;}>}';
 
 /** Command to run a flow and evaluate the output */
@@ -136,19 +142,21 @@ export const evalFlow = new Command('eval:flow')
 
         const states = await runFlows(runner, flowName, parsedData);
 
-        const errors = states
-          .filter((s) => s.operation.result?.error)
-          .map((s) => s.operation.result?.error);
-        if (errors.length > 0) {
-          logger.error('Some flows failed with the following errors');
-          logger.error(errors);
-          return;
+        const runStates: FlowRunState[] = states.map((s) => {
+          return {
+            state: s,
+            hasErrored: !!s.operation.result?.error,
+            error: s.operation.result?.error,
+          } as FlowRunState;
+        });
+        if (runStates.some((s) => s.hasErrored)) {
+          logger.error('Some flows failed with errors');
         }
 
         const evalDataset = await fetchDataSet(
           runner,
           flowName,
-          states,
+          runStates,
           parsedData
         );
         const evalRunId = randomUUID();
@@ -159,7 +167,7 @@ export const evalFlow = new Command('eval:flow')
           const response = await runner.runAction({
             key: name,
             input: {
-              dataset: evalDataset,
+              dataset: evalDataset.filter((row) => !row.error),
               evalRunId,
               auth: options.auth ? JSON.parse(options.auth) : undefined,
             },
@@ -251,7 +259,7 @@ async function runFlows(
 async function fetchDataSet(
   runner: Runner,
   flowName: string,
-  states: FlowState[],
+  states: FlowRunState[],
   parsedData: EvalFlowInput
 ): Promise<EvalInput[]> {
   let references: any[] | undefined = undefined;
@@ -268,7 +276,7 @@ async function fetchDataSet(
   const extractors = await getEvalExtractors(flowName);
   return await Promise.all(
     states.map(async (s, i) => {
-      const traceIds = s.executions.flatMap((e) => e.traceIds);
+      const traceIds = s.state.executions.flatMap((e) => e.traceIds);
       if (traceIds.length > 1) {
         logger.warn('The flow is split across multiple traces');
       }
@@ -288,8 +296,23 @@ async function fetchDataSet(
       let inputs: string[] = [];
       let outputs: string[] = [];
       let contexts: string[] = [];
+
+      // First extract inputs for all traces
       traces.forEach((trace) => {
         inputs.push(extractors.input(trace));
+      });
+
+      if (s.hasErrored) {
+        return {
+          testCaseId: randomUUID(),
+          input: inputs[0],
+          error: s.error,
+          reference: references?.at(i),
+          traceIds,
+        };
+      }
+
+      traces.forEach((trace) => {
         outputs.push(extractors.output(trace));
         contexts.push(extractors.context(trace));
       });

diff --git a/genkit-tools/common/jest.config.ts b/genkit-tools/common/jest.config.ts
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * For a detailed explanation regarding each configuration property, visit:
+ * https://jestjs.io/docs/configuration
+ */
+
+import type { Config } from 'jest';
+
+const config: Config = {
+  // Automatically clear mock calls, instances, contexts and results before every test
+  clearMocks: true,
+
+  // A preset that is used as a base for Jest's configuration
+  preset: 'ts-jest',
+
+  // The glob patterns Jest uses to detect test files
+  testMatch: ['**/tests/**/*_test.ts'],
+
+  // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
+  testPathIgnorePatterns: ['/node_modules/'],
+
+  // A map from regular expressions to paths to transformers
+  transform: {
+    '^.+\\.[jt]s$': 'ts-jest',
+  },
+
+  // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
+  transformIgnorePatterns: ['/node_modules/'],
+};
+
+export default config;
diff --git a/genkit-tools/common/package.json b/genkit-tools/common/package.json
@@ -5,6 +5,7 @@
     "compile": "tsc -b ./tsconfig.cjs.json ./tsconfig.esm.json ./tsconfig.types.json",
     "build:clean": "rm -rf ./lib",
     "build": "npm-run-all build:clean compile",
+    "test": "jest --verbose",
     "build:watch": "tsc -b ./tsconfig.cjs.json ./tsconfig.esm.json ./tsconfig.types.json --watch"
   },
   "dependencies": {
@@ -40,6 +41,9 @@
     "@types/express": "^4.17.21",
     "@types/inquirer": "^8.1.3",
     "@types/jest": "^29.5.12",
+    "@jest/globals": "^29.7.0",
+    "jest": "^29.7.0",
+    "ts-jest": "^29.1.2",
     "@types/js-yaml": "^4.0.9",
     "@types/node": "^20.11.19",
     "@types/uuid": "^9.0.8",

diff --git a/genkit-tools/common/src/types/eval.ts b/genkit-tools/common/src/types/eval.ts
@@ -57,6 +57,7 @@ export const EvalInputSchema = z.object({
   testCaseId: z.string(),
   input: z.any(),
   output: z.any(),
+  error: z.string().optional(),
   context: z.array(z.string()).optional(),
   reference: z.any().optional(),
   traceIds: z.array(z.string()),

diff --git a/genkit-tools/common/src/utils/eval.ts b/genkit-tools/common/src/utils/eval.ts
@@ -80,16 +80,21 @@ export async function confirmLlmUse(
   return answers.confirm;
 }
 
-function getRootSpan(trace: TraceData): SpanData | undefined {
+function getRootSpan(
+  trace: TraceData,
+  shouldSucceed: boolean = true
+): SpanData | undefined {
   return Object.values(trace.spans).find(
     (s) =>
       s.attributes['genkit:type'] === 'flow' &&
-      s.attributes['genkit:metadata:flow:state'] === 'done'
+      (shouldSucceed
+        ? s.attributes['genkit:metadata:flow:state'] === 'done'
+        : true)
   );
 }
 
 const DEFAULT_INPUT_EXTRACTOR: EvalExtractorFn = (trace: TraceData) => {
-  const rootSpan = getRootSpan(trace);
+  const rootSpan = getRootSpan(trace, /* shouldSucceed= */ false);
   return (rootSpan?.attributes['genkit:input'] as string) || JSON_EMPTY_STRING;
 };
 const DEFAULT_OUTPUT_EXTRACTOR: EvalExtractorFn = (trace: TraceData) => {

diff --git a/genkit-tools/common/tests/utils/eval_test.ts b/genkit-tools/common/tests/utils/eval_test.ts
@@ -201,4 +201,28 @@ describe('eval utils', () => {
       JSON.stringify(['Hello', 'World'])
     );
   });
+
+  it('returns runs default extractors when trace fails', async () => {
+    const spy = jest.spyOn(configModule, 'findToolsConfig');
+    spy.mockReturnValue(Promise.resolve(null));
+    const trace = new MockTrace('My input', 'My output', 'error')
+      .addSpan({
+        stepName: 'retrieverStep',
+        spanType: 'action',
+        retrieverConfig: {
+          query: 'What are cats?',
+          text: CONTEXT_TEXTS,
+        },
+      })
+      .getTrace();
+
+    const extractors = await getEvalExtractors('multiSteps');
+
+    expect(Object.keys(extractors).sort()).toEqual(
+      ['input', 'output', 'context'].sort()
+    );
+    expect(extractors.input(trace)).toEqual(JSON.stringify('My input'));
+    expect(extractors.output(trace)).toEqual(JSON.stringify(''));
+    expect(extractors.context(trace)).toEqual(JSON.stringify(CONTEXT_TEXTS));
+  });
 });
diff --git a/genkit-tools/common/tests/utils/trace.ts b/genkit-tools/common/tests/utils/trace.ts
@@ -223,12 +223,17 @@ export class MockTrace {
     return this;
   }
 
-  constructor(traceInput?: any, traceOutput?: any) {
+  constructor(
+    traceInput?: any,
+    traceOutput?: any,
+    baseFlowState: 'done' | 'error' = 'done'
+  ) {
     const flowInput = traceInput ?? 'Douglas Adams';
     const flowOutput = traceOutput ?? 42;
     let baseFlowSpan = { ...this.BASE_FLOW_SPAN };
     baseFlowSpan.attributes['genkit:input'] = JSON.stringify(flowInput);
     baseFlowSpan.attributes['genkit:output'] = JSON.stringify(flowOutput);
+    baseFlowSpan.attributes['genkit:metadata:flow:state'] = baseFlowState;
 
     let wrapperActionSpan = { ...this.WRAPPER_ACTION_SPAN };
     wrapperActionSpan.attributes['genkit:input'] = JSON.stringify({

diff --git a/genkit-tools/pnpm-lock.yaml b/genkit-tools/pnpm-lock.yaml
diff --git a/js/pnpm-lock.yaml b/js/pnpm-lock.yaml
diff --git a/js/testapps/cat-eval/package.json b/js/testapps/cat-eval/package.json
@@ -16,16 +16,17 @@
   "dependencies": {
     "@genkit-ai/ai": "workspace:*",
     "@genkit-ai/core": "workspace:*",
-    "@genkit-ai/dotprompt": "workspace:*",
-    "@genkit-ai/flow": "workspace:*",
     "@genkit-ai/dev-local-vectorstore": "workspace:*",
+    "@genkit-ai/dotprompt": "workspace:*",
+    "@genkit-ai/evaluator": "workspace:*",
     "@genkit-ai/firebase": "workspace:*",
+    "@genkit-ai/flow": "workspace:*",
     "@genkit-ai/googleai": "workspace:*",
-    "genkitx-pinecone": "workspace:*",
-    "@genkit-ai/evaluator": "workspace:*",
     "@genkit-ai/vertexai": "workspace:*",
+    "genkitx-pinecone": "workspace:*",
     "llm-chunk": "^0.0.1",
     "pdfjs-dist": "^4.0.379",
+    "pdfjs-dist-legacy": "^1.0.1",
     "zod": "^3.22.4"
   },
   "devDependencies": {

diff --git a/js/testapps/cat-eval/src/pdf_rag.ts b/js/testapps/cat-eval/src/pdf_rag.ts
@@ -24,6 +24,7 @@ import { defineFlow, run } from '@genkit-ai/flow';
 import { geminiPro } from '@genkit-ai/googleai';
 import { chunk } from 'llm-chunk';
 import path from 'path';
+import { getDocument } from 'pdfjs-dist-legacy';
 import * as z from 'zod';
 
 export const pdfChatRetriever = devLocalRetrieverRef('pdfQA');
@@ -67,26 +68,6 @@ export const pdfQA = defineFlow(
     const llmResponse = await generate({
       model: geminiPro,
       prompt: augmentedPrompt,
-      config: {
-        safetySettings: [
-          {
-            category: 'HARM_CATEGORY_HATE_SPEECH',
-            threshold: 'BLOCK_NONE',
-          },
-          {
-            category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
-            threshold: 'BLOCK_NONE',
-          },
-          {
-            category: 'HARM_CATEGORY_HARASSMENT',
-            threshold: 'BLOCK_NONE',
-          },
-          {
-            category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
-            threshold: 'BLOCK_NONE',
-          },
-        ],
-      },
     });
     return llmResponse.text();
   }
@@ -127,8 +108,7 @@ export const indexPdf = defineFlow(
 );
 
 async function extractText(filePath: string): Promise<string> {
-  const pdfjsLib = await import('pdfjs-dist');
-  let doc = await pdfjsLib.getDocument(filePath).promise;
+  let doc = await getDocument(filePath).promise;
 
   let pdfTxt = '';
   const numPages = doc.numPages;

diff --git a/package.json b/package.json
@@ -19,7 +19,9 @@
     "dist:zip": "cd dist && zip genkit-dist.zip *.tgz",
     "test:all": "npm-run-all test:js test:genkit-tools",
     "test:js": "cd js && pnpm i && pnpm test:all",
-    "test:genkit-tools": "cd genkit-tools/cli && pnpm i && pnpm test",
+    "test:genkit-tools": "pnpm test:genkit-tools-cli && pnpm test:genkit-tools-common",
+    "test:genkit-tools-cli": "cd genkit-tools/cli && pnpm i && pnpm test",
+    "test:genkit-tools-common": "cd genkit-tools/common && pnpm i && pnpm test",
     "test:e2e": "pnpm build && pnpm pack:all && cd tests && pnpm test"
   },
   "pre-commit": [