From d9d16bf897377832b3bde77c249b56b35772a89f Mon Sep 17 00:00:00 2001 From: Samuel Bushi Date: Wed, 22 Jan 2025 21:36:05 +0000 Subject: [PATCH 1/5] new api for data validation --- genkit-tools/common/package.json | 2 + genkit-tools/common/src/eval/index.ts | 1 + genkit-tools/common/src/eval/validate.ts | 110 +++++++++++++++++++++++ genkit-tools/common/src/server/router.ts | 18 +++- genkit-tools/common/src/types/apis.ts | 22 +++++ genkit-tools/pnpm-lock.yaml | 50 ++++++++++- 6 files changed, 199 insertions(+), 4 deletions(-) create mode 100644 genkit-tools/common/src/eval/validate.ts diff --git a/genkit-tools/common/package.json b/genkit-tools/common/package.json index 6f7724e404..a40378e06b 100644 --- a/genkit-tools/common/package.json +++ b/genkit-tools/common/package.json @@ -26,6 +26,8 @@ "json-2-csv": "^5.5.1", "json-schema": "^0.4.0", "terminate": "^2.6.1", + "ajv": "^8.12.0", + "ajv-formats": "^3.0.1", "tsx": "^4.19.2", "uuid": "^9.0.1", "winston": "^3.11.0", diff --git a/genkit-tools/common/src/eval/index.ts b/genkit-tools/common/src/eval/index.ts index d2805447a4..64a5db61f4 100644 --- a/genkit-tools/common/src/eval/index.ts +++ b/genkit-tools/common/src/eval/index.ts @@ -21,6 +21,7 @@ export { InferenceDataset, InferenceDatasetSchema } from '../types/eval'; export * from './evaluate'; export * from './exporter'; export * from './parser'; +export * from './validate'; export function getEvalStore(): EvalStore { // TODO: This should provide EvalStore, based on tools config. diff --git a/genkit-tools/common/src/eval/validate.ts b/genkit-tools/common/src/eval/validate.ts new file mode 100644 index 0000000000..db3dac1231 --- /dev/null +++ b/genkit-tools/common/src/eval/validate.ts @@ -0,0 +1,110 @@ +/** + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ajv, { ErrorObject, JSONSchemaType } from 'ajv'; +import addFormats from 'ajv-formats'; +import { getDatasetStore } from '.'; +import { RuntimeManager } from '../manager'; +import { + Action, + InferenceDatasetSchema, + ValidateDataRequest, + ValidateDataResponse, +} from '../types'; + +// Setup for AJV +type JSONSchema = JSONSchemaType | any; +const ajv = new Ajv(); +addFormats(ajv); + +/** + * Validate given data against a target action. Intended to be used via the + * reflection API. + */ +export async function validateSchema( + manager: RuntimeManager, + request: ValidateDataRequest +): Promise { + const { dataSource, actionRef } = request; + const { datasetId, data } = dataSource; + if (!datasetId && !data) { + throw new Error(`Either 'data' or 'datasetId' must be provided`); + } + const targetAction = await getAction(manager, actionRef); + const targetSchema = targetAction?.inputSchema; + if (!targetAction) { + throw new Error(`Could not find matching action for ${actionRef}`); + } + if (!targetSchema) { + return { valid: true }; + } + + let errorsMap: Record = {}; + + if (datasetId) { + const datasetStore = await getDatasetStore(); + const dataset = await datasetStore.getDataset(datasetId); + if (dataset.length === 0) { + return { valid: true }; + } + dataset.forEach((sample, index) => { + const response = validate(targetSchema, sample.input); + if (!response.valid) { + errorsMap[sample.testCaseId] = response.errors; + } + }); + + return Object.keys(errorsMap).length === 0 + ? { valid: true } + : { valid: false, datasetErrors: errorsMap }; + } else { + const dataset = InferenceDatasetSchema.parse(data); + dataset.forEach((sample, index) => { + const response = validate(targetSchema, sample.input); + if (!response.valid) { + errorsMap[index] = response.errors; + } + }); + return Object.keys(errorsMap).length === 0 + ? { valid: true } + : { valid: false, dataErrors: errorsMap }; + } +} + +function validate( + jsonSchema: JSONSchema, + data: unknown +): { valid: boolean; errors?: Record } { + const validator = ajv.compile(jsonSchema); + const valid = validator(data) as boolean; + const errors = validator.errors?.map((e) => e); + return { valid, errors: errors?.map(toErrorDetail) }; +} + +function toErrorDetail(error: ErrorObject) { + return { + path: error.instancePath.substring(1).replace(/\//g, '.') || '(root)', + message: error.message!, + }; +} + +async function getAction( + manager: RuntimeManager, + actionRef: string +): Promise { + const actions = await manager.listActions(); + return actions[actionRef]; +} diff --git a/genkit-tools/common/src/server/router.ts b/genkit-tools/common/src/server/router.ts index dda9590119..b2f4a084f2 100644 --- a/genkit-tools/common/src/server/router.ts +++ b/genkit-tools/common/src/server/router.ts @@ -15,7 +15,12 @@ */ import { initTRPC, TRPCError } from '@trpc/server'; import { z } from 'zod'; -import { getDatasetStore, getEvalStore, runNewEvaluation } from '../eval'; +import { + getDatasetStore, + getEvalStore, + runNewEvaluation, + validateSchema, +} from '../eval'; import { RuntimeManager } from '../manager/manager'; import { GenkitToolsError, RuntimeInfo } from '../manager/types'; import { Action } from '../types/action'; @@ -239,6 +244,17 @@ export const TOOLS_SERVER_ROUTER = (manager: RuntimeManager) => return response; }), + /** Start new evaluation run */ + validateSchema: loggedProcedure + .input(apis.ValidateDataRequestSchema) + .output(apis.ValidateDataResponseSchema) + .mutation(async ({ input }) => { + console.log(input); + const response = await validateSchema(manager, input); + console.log(response); + return response; + }), + /** Send a screen view analytics event */ sendPageView: t.procedure .input(apis.PageViewSchema) diff --git a/genkit-tools/common/src/types/apis.ts b/genkit-tools/common/src/types/apis.ts index f9a34cc256..1d8cb1d4e6 100644 --- a/genkit-tools/common/src/types/apis.ts +++ b/genkit-tools/common/src/types/apis.ts @@ -150,3 +150,25 @@ export const RunNewEvaluationRequestSchema = z.object({ export type RunNewEvaluationRequest = z.infer< typeof RunNewEvaluationRequestSchema >; + +export const ValidateDataRequestSchema = z.object({ + dataSource: z.object({ + datasetId: z.string().optional(), + data: InferenceDatasetSchema.optional(), + }), + actionRef: z.string(), +}); +export type ValidateDataRequest = z.infer; + +export const ValidateDataResponseSchema = z.object({ + valid: z.boolean(), + datasetErrors: z + .record(z.string(), z.any()) + .describe('Errors mapping when validating dataset') + .optional(), + dataErrors: z + .record(z.string(), z.any()) + .describe('Errors mapping when validating raw data') + .optional(), +}); +export type ValidateDataResponse = z.infer; diff --git a/genkit-tools/pnpm-lock.yaml b/genkit-tools/pnpm-lock.yaml index de5c1e102b..a12afd419a 100644 --- a/genkit-tools/pnpm-lock.yaml +++ b/genkit-tools/pnpm-lock.yaml @@ -83,7 +83,7 @@ importers: version: 29.7.0(@types/node@20.12.7)(ts-node@10.9.2(@types/node@20.12.7)(typescript@5.4.5)) ts-jest: specifier: ^29.1.2 - version: 29.1.2(@babel/core@7.24.5)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.5))(jest@29.7.0(@types/node@20.12.7))(typescript@5.4.5) + version: 29.1.2(@babel/core@7.24.5)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.5))(jest@29.7.0(@types/node@20.12.7)(ts-node@10.9.2(@types/node@20.12.7)(typescript@5.4.5)))(typescript@5.4.5) typescript: specifier: ^5.3.3 version: 5.4.5 @@ -99,6 +99,12 @@ importers: adm-zip: specifier: ^0.5.12 version: 0.5.12 + ajv: + specifier: ^8.12.0 + version: 8.17.1 + ajv-formats: + specifier: ^3.0.1 + version: 3.0.1(ajv@8.17.1) axios: specifier: ^1.7.7 version: 1.7.7 @@ -210,7 +216,7 @@ importers: version: 6.0.1 ts-jest: specifier: ^29.1.2 - version: 29.1.2(@babel/core@7.24.5)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.5))(jest@29.7.0(@types/node@20.12.7))(typescript@5.4.5) + version: 29.1.2(@babel/core@7.24.5)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.5))(jest@29.7.0(@types/node@20.12.7)(ts-node@10.9.2(@types/node@20.12.7)(typescript@5.4.5)))(typescript@5.4.5) ts-node: specifier: ^10.9.2 version: 10.9.2(@types/node@20.12.7)(typescript@5.4.5) @@ -1088,6 +1094,17 @@ packages: resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==} engines: {node: '>= 14'} + ajv-formats@3.0.1: + resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==} + peerDependencies: + ajv: ^8.0.0 + peerDependenciesMeta: + ajv: + optional: true + + ajv@8.17.1: + resolution: {integrity: sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==} + ansi-escapes@4.3.2: resolution: {integrity: sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==} engines: {node: '>=8'} @@ -1634,6 +1651,9 @@ packages: fast-json-stable-stringify@2.1.0: resolution: {integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==} + fast-uri@3.0.6: + resolution: {integrity: sha512-Atfo14OibSv5wAp4VWNsFYE1AchQRTv9cBGWET4pZWHzYshFSS9NQI6I57rdKn9croWVMbYFbLhJ+yJvmZIIHw==} + fb-watchman@2.0.2: resolution: {integrity: sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==} @@ -2221,6 +2241,9 @@ packages: json-parse-even-better-errors@2.3.1: resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==} + json-schema-traverse@1.0.0: + resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==} + json-schema@0.4.0: resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==} @@ -2646,6 +2669,10 @@ packages: resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==} engines: {node: '>=0.10.0'} + require-from-string@2.0.2: + resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==} + engines: {node: '>=0.10.0'} + require-in-the-middle@7.4.0: resolution: {integrity: sha512-X34iHADNbNDfr6OTStIAHWSAvvKQRYgLO6duASaVf7J2VA3lvmNYboAHOuLC2huav1IwgZJtyEcJCKVzFxOSMQ==} engines: {node: '>=8.6.0'} @@ -4094,6 +4121,17 @@ snapshots: transitivePeerDependencies: - supports-color + ajv-formats@3.0.1(ajv@8.17.1): + optionalDependencies: + ajv: 8.17.1 + + ajv@8.17.1: + dependencies: + fast-deep-equal: 3.1.3 + fast-uri: 3.0.6 + json-schema-traverse: 1.0.0 + require-from-string: 2.0.2 + ansi-escapes@4.3.2: dependencies: type-fest: 0.21.3 @@ -4773,6 +4811,8 @@ snapshots: fast-json-stable-stringify@2.1.0: {} + fast-uri@3.0.6: {} + fb-watchman@2.0.2: dependencies: bser: 2.1.1 @@ -5601,6 +5641,8 @@ snapshots: json-parse-even-better-errors@2.3.1: {} + json-schema-traverse@1.0.0: {} + json-schema@0.4.0: {} json5@2.2.3: {} @@ -6004,6 +6046,8 @@ snapshots: require-directory@2.1.1: {} + require-from-string@2.0.2: {} + require-in-the-middle@7.4.0: dependencies: debug: 4.3.7 @@ -6319,7 +6363,7 @@ snapshots: triple-beam@1.4.1: {} - ts-jest@29.1.2(@babel/core@7.24.5)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.5))(jest@29.7.0(@types/node@20.12.7))(typescript@5.4.5): + ts-jest@29.1.2(@babel/core@7.24.5)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.5))(jest@29.7.0(@types/node@20.12.7)(ts-node@10.9.2(@types/node@20.12.7)(typescript@5.4.5)))(typescript@5.4.5): dependencies: bs-logger: 0.2.6 fast-json-stable-stringify: 2.1.0 From b4cf32dd75b46b4757e92dfb0f6becdc6192b36e Mon Sep 17 00:00:00 2001 From: Samuel Bushi Date: Mon, 27 Jan 2025 15:56:10 +0000 Subject: [PATCH 2/5] feedback --- genkit-tools/common/src/eval/validate.ts | 6 +++--- genkit-tools/common/src/server/router.ts | 6 ++---- genkit-tools/common/src/types/apis.ts | 10 ++++------ 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/genkit-tools/common/src/eval/validate.ts b/genkit-tools/common/src/eval/validate.ts index db3dac1231..4d89f24595 100644 --- a/genkit-tools/common/src/eval/validate.ts +++ b/genkit-tools/common/src/eval/validate.ts @@ -52,7 +52,7 @@ export async function validateSchema( return { valid: true }; } - let errorsMap: Record = {}; + const errorsMap: Record = {}; if (datasetId) { const datasetStore = await getDatasetStore(); @@ -69,7 +69,7 @@ export async function validateSchema( return Object.keys(errorsMap).length === 0 ? { valid: true } - : { valid: false, datasetErrors: errorsMap }; + : { valid: false, errors: errorsMap }; } else { const dataset = InferenceDatasetSchema.parse(data); dataset.forEach((sample, index) => { @@ -80,7 +80,7 @@ export async function validateSchema( }); return Object.keys(errorsMap).length === 0 ? { valid: true } - : { valid: false, dataErrors: errorsMap }; + : { valid: false, errors: errorsMap }; } } diff --git a/genkit-tools/common/src/server/router.ts b/genkit-tools/common/src/server/router.ts index b2f4a084f2..5bff7fe24a 100644 --- a/genkit-tools/common/src/server/router.ts +++ b/genkit-tools/common/src/server/router.ts @@ -244,14 +244,12 @@ export const TOOLS_SERVER_ROUTER = (manager: RuntimeManager) => return response; }), - /** Start new evaluation run */ - validateSchema: loggedProcedure + /** Validate given data against a target action schema */ + validateDatasetSchema: loggedProcedure .input(apis.ValidateDataRequestSchema) .output(apis.ValidateDataResponseSchema) .mutation(async ({ input }) => { - console.log(input); const response = await validateSchema(manager, input); - console.log(response); return response; }), diff --git a/genkit-tools/common/src/types/apis.ts b/genkit-tools/common/src/types/apis.ts index 1d8cb1d4e6..9ca7c1ca7f 100644 --- a/genkit-tools/common/src/types/apis.ts +++ b/genkit-tools/common/src/types/apis.ts @@ -162,13 +162,11 @@ export type ValidateDataRequest = z.infer; export const ValidateDataResponseSchema = z.object({ valid: z.boolean(), - datasetErrors: z + errors: z .record(z.string(), z.any()) - .describe('Errors mapping when validating dataset') - .optional(), - dataErrors: z - .record(z.string(), z.any()) - .describe('Errors mapping when validating raw data') + .describe( + 'Errors mapping, if any. The key is testCaseId if source is a dataset, otherewise it is the index number (stringified)' + ) .optional(), }); export type ValidateDataResponse = z.infer; From 5085835b3d376f11f1b295ff7722d8fa74b204e5 Mon Sep 17 00:00:00 2001 From: Samuel Bushi Date: Mon, 27 Jan 2025 16:01:42 +0000 Subject: [PATCH 3/5] int to string --- genkit-tools/common/src/eval/validate.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genkit-tools/common/src/eval/validate.ts b/genkit-tools/common/src/eval/validate.ts index 4d89f24595..e1077be634 100644 --- a/genkit-tools/common/src/eval/validate.ts +++ b/genkit-tools/common/src/eval/validate.ts @@ -75,7 +75,7 @@ export async function validateSchema( dataset.forEach((sample, index) => { const response = validate(targetSchema, sample.input); if (!response.valid) { - errorsMap[index] = response.errors; + errorsMap[index.toString()] = response.errors; } }); return Object.keys(errorsMap).length === 0 From 431713ce10fa8db133b6f15335877b1a170d2f2f Mon Sep 17 00:00:00 2001 From: Samuel Bushi Date: Mon, 27 Jan 2025 21:28:03 +0000 Subject: [PATCH 4/5] stronger types --- genkit-tools/common/src/eval/validate.ts | 7 ++++--- genkit-tools/common/src/types/apis.ts | 8 +++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/genkit-tools/common/src/eval/validate.ts b/genkit-tools/common/src/eval/validate.ts index e1077be634..40a7049d8b 100644 --- a/genkit-tools/common/src/eval/validate.ts +++ b/genkit-tools/common/src/eval/validate.ts @@ -20,6 +20,7 @@ import { getDatasetStore } from '.'; import { RuntimeManager } from '../manager'; import { Action, + ErrorDetail, InferenceDatasetSchema, ValidateDataRequest, ValidateDataResponse, @@ -52,7 +53,7 @@ export async function validateSchema( return { valid: true }; } - const errorsMap: Record = {}; + const errorsMap: Record = {}; if (datasetId) { const datasetStore = await getDatasetStore(); @@ -87,14 +88,14 @@ export async function validateSchema( function validate( jsonSchema: JSONSchema, data: unknown -): { valid: boolean; errors?: Record } { +): { valid: boolean; errors?: ErrorDetail[] } { const validator = ajv.compile(jsonSchema); const valid = validator(data) as boolean; const errors = validator.errors?.map((e) => e); return { valid, errors: errors?.map(toErrorDetail) }; } -function toErrorDetail(error: ErrorObject) { +function toErrorDetail(error: ErrorObject): ErrorDetail { return { path: error.instancePath.substring(1).replace(/\//g, '.') || '(root)', message: error.message!, diff --git a/genkit-tools/common/src/types/apis.ts b/genkit-tools/common/src/types/apis.ts index 9ca7c1ca7f..a67c5f2662 100644 --- a/genkit-tools/common/src/types/apis.ts +++ b/genkit-tools/common/src/types/apis.ts @@ -160,10 +160,16 @@ export const ValidateDataRequestSchema = z.object({ }); export type ValidateDataRequest = z.infer; +export const ErrorDetailSchema = z.object({ + path: z.string(), + message: z.string(), +}); +export type ErrorDetail = z.infer; + export const ValidateDataResponseSchema = z.object({ valid: z.boolean(), errors: z - .record(z.string(), z.any()) + .record(z.string(), z.array(ErrorDetailSchema).optional()) .describe( 'Errors mapping, if any. The key is testCaseId if source is a dataset, otherewise it is the index number (stringified)' ) From 2e4a9ea6031724e3e08550169eb0107bb7c05a3a Mon Sep 17 00:00:00 2001 From: Samuel Bushi Date: Mon, 27 Jan 2025 21:31:28 +0000 Subject: [PATCH 5/5] better typing --- genkit-tools/common/src/eval/validate.ts | 6 +++--- genkit-tools/common/src/types/apis.ts | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/genkit-tools/common/src/eval/validate.ts b/genkit-tools/common/src/eval/validate.ts index 40a7049d8b..2cf0f0bf3d 100644 --- a/genkit-tools/common/src/eval/validate.ts +++ b/genkit-tools/common/src/eval/validate.ts @@ -53,7 +53,7 @@ export async function validateSchema( return { valid: true }; } - const errorsMap: Record = {}; + const errorsMap: Record = {}; if (datasetId) { const datasetStore = await getDatasetStore(); @@ -64,7 +64,7 @@ export async function validateSchema( dataset.forEach((sample, index) => { const response = validate(targetSchema, sample.input); if (!response.valid) { - errorsMap[sample.testCaseId] = response.errors; + errorsMap[sample.testCaseId] = response.errors ?? []; } }); @@ -76,7 +76,7 @@ export async function validateSchema( dataset.forEach((sample, index) => { const response = validate(targetSchema, sample.input); if (!response.valid) { - errorsMap[index.toString()] = response.errors; + errorsMap[index.toString()] = response.errors ?? []; } }); return Object.keys(errorsMap).length === 0 diff --git a/genkit-tools/common/src/types/apis.ts b/genkit-tools/common/src/types/apis.ts index a67c5f2662..5ae8045496 100644 --- a/genkit-tools/common/src/types/apis.ts +++ b/genkit-tools/common/src/types/apis.ts @@ -169,7 +169,7 @@ export type ErrorDetail = z.infer; export const ValidateDataResponseSchema = z.object({ valid: z.boolean(), errors: z - .record(z.string(), z.array(ErrorDetailSchema).optional()) + .record(z.string(), z.array(ErrorDetailSchema)) .describe( 'Errors mapping, if any. The key is testCaseId if source is a dataset, otherewise it is the index number (stringified)' )