From 6ea95677f90371f88085e71923c4e173c8924ec7 Mon Sep 17 00:00:00 2001 From: Michael Bleigh Date: Fri, 25 Oct 2024 13:13:49 -0700 Subject: [PATCH 1/4] Formats WIP --- js/ai/package.json | 2 +- js/ai/src/extract.ts | 109 +++++++++++++++- js/ai/src/formats/array.ts | 48 +++++++ js/ai/src/formats/enum.ts | 24 ++++ js/ai/src/formats/index.ts | 35 +++++ js/ai/src/formats/json.ts | 29 +++++ js/ai/src/formats/jsonl.ts | 66 ++++++++++ js/ai/src/formats/text.ts | 14 ++ js/ai/src/formats/types.d.ts | 13 ++ js/ai/src/generate.ts | 16 +++ js/ai/src/index.ts | 4 +- js/ai/tests/extract_test.ts | 206 ++++++++++++++++++++++++++++++ js/ai/tests/formats/array_test.ts | 144 +++++++++++++++++++++ js/ai/tests/formats/enum_test.ts | 80 ++++++++++++ js/ai/tests/formats/json_test.ts | 163 +++++++++++++++++++++++ js/ai/tests/formats/jsonl_test.ts | 188 +++++++++++++++++++++++++++ js/ai/tests/formats/text_test.ts | 59 +++++++++ js/core/src/registry.ts | 22 ++++ 18 files changed, 1216 insertions(+), 6 deletions(-) create mode 100644 js/ai/src/formats/array.ts create mode 100644 js/ai/src/formats/enum.ts create mode 100644 js/ai/src/formats/index.ts create mode 100644 js/ai/src/formats/json.ts create mode 100644 js/ai/src/formats/jsonl.ts create mode 100644 js/ai/src/formats/text.ts create mode 100644 js/ai/src/formats/types.d.ts create mode 100644 js/ai/tests/extract_test.ts create mode 100644 js/ai/tests/formats/array_test.ts create mode 100644 js/ai/tests/formats/enum_test.ts create mode 100644 js/ai/tests/formats/json_test.ts create mode 100644 js/ai/tests/formats/jsonl_test.ts create mode 100644 js/ai/tests/formats/text_test.ts diff --git a/js/ai/package.json b/js/ai/package.json index d75ba30a1a..2880d6cc31 100644 --- a/js/ai/package.json +++ b/js/ai/package.json @@ -15,7 +15,7 @@ "build:clean": "rimraf ./lib", "build": "npm-run-all build:clean check compile", "build:watch": "tsup-node --watch", - "test": "node --import tsx --test ./tests/**/*_test.ts", + "test": "node --import tsx --test ./tests/**/*_test.ts ./tests/*_test.ts", "test:single": "node --import tsx --test" }, "repository": { diff --git a/js/ai/src/extract.ts b/js/ai/src/extract.ts index 402587ed28..c5d0147110 100644 --- a/js/ai/src/extract.ts +++ b/js/ai/src/extract.ts @@ -40,10 +40,31 @@ export function extractJson( let closingChar: '}' | ']' | undefined; let startPos: number | undefined; let nestingCount = 0; + let inString = false; + let escapeNext = false; for (let i = 0; i < text.length; i++) { const char = text[i].replace(/\u00A0/g, ' '); + if (escapeNext) { + escapeNext = false; + continue; + } + + if (char === '\\') { + escapeNext = true; + continue; + } + + if (char === '"') { + inString = !inString; + continue; + } + + if (inString) { + continue; + } + if (!openingChar && (char === '{' || char === '[')) { // Look for opening character openingChar = char; @@ -67,18 +88,100 @@ export function extractJson( // If an incomplete JSON structure is detected try { // Parse the incomplete JSON structure using partial-json for lenient parsing - // Note: partial-json automatically handles adding the closing character return parsePartialJson(text.substring(startPos)); } catch { // If parsing fails, throw an error if (throwOnBadJson) { throw new Error(`Invalid JSON extracted from model output: ${text}`); } - return null; // Return null if no JSON structure is found } + return null; } } if (throwOnBadJson) { throw new Error(`Invalid JSON extracted from model output: ${text}`); } - return null; // Return null if no JSON structure is found + return null; +} + +interface ExtractItemsResult { + items: unknown[]; + cursor: number; +} + +/** + * Extracts complete objects from the first array found in the text. + * Processes text from the cursor position and returns both complete items + * and the new cursor position. + */ +export function extractItems( + text: string, + cursor: number = 0 +): ExtractItemsResult { + const items: unknown[] = []; + let currentCursor = cursor; + + // Find the first array start if we haven't already processed any text + if (cursor === 0) { + const arrayStart = text.indexOf('['); + if (arrayStart === -1) { + return { items: [], cursor: text.length }; + } + currentCursor = arrayStart + 1; + } + + let objectStart = -1; + let braceCount = 0; + let inString = false; + let escapeNext = false; + + // Process the text from the cursor position + for (let i = currentCursor; i < text.length; i++) { + const char = text[i]; + + if (escapeNext) { + escapeNext = false; + continue; + } + + if (char === '\\') { + escapeNext = true; + continue; + } + + if (char === '"') { + inString = !inString; + continue; + } + + if (inString) { + continue; + } + + if (char === '{') { + if (braceCount === 0) { + objectStart = i; + } + braceCount++; + } else if (char === '}') { + braceCount--; + if (braceCount === 0 && objectStart !== -1) { + try { + const obj = JSON5.parse(text.substring(objectStart, i + 1)); + items.push(obj); + currentCursor = i + 1; + objectStart = -1; + } catch { + // If parsing fails, continue + } + } + } else if (char === ']' && braceCount === 0) { + // End of array + break; + } + } + + return { + items, + cursor: currentCursor, + }; } diff --git a/js/ai/src/formats/array.ts b/js/ai/src/formats/array.ts new file mode 100644 index 0000000000..0dc7bd433b --- /dev/null +++ b/js/ai/src/formats/array.ts @@ -0,0 +1,48 @@ +import { GenkitError } from '@genkit-ai/core'; +import { extractItems } from '../extract'; +import type { Formatter } from './types'; + +export const arrayParser: Formatter = (request) => { + if (request.output?.schema && request.output?.schema.type !== 'array') { + throw new GenkitError({ + status: 'INVALID_ARGUMENT', + message: `Must supply an 'array' schema type when using the 'items' parser format.`, + }); + } + + let instructions: boolean | string = false; + if (request.output?.schema) { + instructions = `Output should be a JSON array conforming to the following schema: + + \`\`\` + ${JSON.stringify(request.output!.schema!)} + \`\`\` + `; + } + + let cursor: number = 0; + + return { + parseChunk: (chunk, emit) => { + const { items, cursor: newCursor } = extractItems( + chunk.accumulatedText, + cursor + ); + + // Emit any complete items + for (const item of items) { + emit(item); + } + + // Update cursor position + cursor = newCursor; + }, + + parseResponse: (response) => { + const { items } = extractItems(response.text, 0); + return items; + }, + + instructions, + }; +}; diff --git a/js/ai/src/formats/enum.ts b/js/ai/src/formats/enum.ts new file mode 100644 index 0000000000..6a6da7a1b0 --- /dev/null +++ b/js/ai/src/formats/enum.ts @@ -0,0 +1,24 @@ +import { GenkitError } from '@genkit-ai/core'; +import type { Formatter } from './types'; + +export const enumParser: Formatter = (request) => { + const schemaType = request.output?.schema?.type; + if (schemaType && schemaType !== 'string' && schemaType !== 'enum') { + throw new GenkitError({ + status: 'INVALID_ARGUMENT', + message: `Must supply a 'string' or 'enum' schema type when using the enum parser format.`, + }); + } + + let instructions: boolean | string = false; + if (request.output?.schema?.enum) { + instructions = `Output should be ONLY one of the following enum values. Do not output any additional information or add quotes.\n\n${request.output?.schema?.enum.map((v) => v.toString()).join('\n')}`; + } + + return { + parseResponse: (response) => { + return response.text.trim(); + }, + instructions, + }; +}; diff --git a/js/ai/src/formats/index.ts b/js/ai/src/formats/index.ts new file mode 100644 index 0000000000..4cb91af831 --- /dev/null +++ b/js/ai/src/formats/index.ts @@ -0,0 +1,35 @@ +import { Registry } from '@genkit-ai/core/registry'; +import { arrayParser } from './array'; +import { enumParser } from './enum'; +import { jsonParser } from './json'; +import { jsonlParser } from './jsonl'; +import { textParser } from './text'; +import { Formatter } from './types'; + +export const DEFAULT_FORMATS = { + json: jsonParser, + array: arrayParser, + text: textParser, + enum: enumParser, + jsonl: jsonlParser, +}; + +export function defineFormat( + registry: Registry, + name: string, + formatter: Formatter +) { + registry.registerValue('format', name, formatter); +} + +export type FormatArgument = string | Formatter; + +export async function resolveFormat( + registry: Registry, + arg: FormatArgument +): Promise { + if (typeof arg === 'string') { + return registry.lookupValue('format', arg); + } + return arg; +} diff --git a/js/ai/src/formats/json.ts b/js/ai/src/formats/json.ts new file mode 100644 index 0000000000..2f9e7e405b --- /dev/null +++ b/js/ai/src/formats/json.ts @@ -0,0 +1,29 @@ +import { extractJson } from '../extract'; +import type { Formatter } from './types'; + +export const jsonParser: Formatter = (request) => { + let accumulatedText: string = ''; + let instructions: boolean | string = false; + + if (request.output?.schema) { + instructions = `Output should be in JSON format and conform to the following schema: + +\`\`\` +${JSON.stringify(request.output!.schema!)} +\`\`\` +`; + } + + return { + parseChunk: (chunk, emit) => { + accumulatedText = chunk.accumulatedText; + emit(extractJson(accumulatedText)); + }, + + parseResponse: (response) => { + return extractJson(response.text); + }, + + instructions, + }; +}; diff --git a/js/ai/src/formats/jsonl.ts b/js/ai/src/formats/jsonl.ts new file mode 100644 index 0000000000..5a6dcd2918 --- /dev/null +++ b/js/ai/src/formats/jsonl.ts @@ -0,0 +1,66 @@ +import { GenkitError } from '@genkit-ai/core'; +import JSON5 from 'json5'; +import { extractJson } from '../extract'; +import type { Formatter } from './types'; + +function objectLines(text: string): string[] { + return text + .split('\n') + .map((line) => line.trim()) + .filter((line) => line.startsWith('{')); +} + +export const jsonlParser: Formatter = (request) => { + if ( + request.output?.schema && + (request.output?.schema.type !== 'array' || + request.output?.schema.items?.type !== 'object') + ) { + throw new GenkitError({ + status: 'INVALID_ARGUMENT', + message: `Must supply an 'array' schema type containing 'object' items when using the 'jsonl' parser format.`, + }); + } + + let instructions: boolean | string = false; + if (request.output?.schema?.items) { + instructions = `Output should be JSONL format, a sequence of JSON objects (one per line). Each line should conform to the following schema: + +\`\`\` +${JSON.stringify(request.output.schema.items)} +\`\`\` + `; + } + + let cursor = 0; + + return { + parseChunk: (chunk, emit) => { + const jsonLines = objectLines(chunk.accumulatedText); + + for (let i = cursor; i < jsonLines.length; i++) { + try { + const result = JSON5.parse(jsonLines[i]); + if (result) { + emit(result); + } + } catch (e) { + cursor = i; + return; + } + } + + cursor = jsonLines.length; + }, + + parseResponse: (response) => { + const items = objectLines(response.text) + .map((l) => extractJson(l)) + .filter((l) => !!l); + + return items; + }, + + instructions, + }; +}; diff --git a/js/ai/src/formats/text.ts b/js/ai/src/formats/text.ts new file mode 100644 index 0000000000..2363cfa9ef --- /dev/null +++ b/js/ai/src/formats/text.ts @@ -0,0 +1,14 @@ +import { GenerateResponse, GenerateResponseChunk } from '../generate'; +import type { Formatter } from './types'; + +export const textParser: Formatter = (request) => { + return { + parseChunk: (chunk: GenerateResponseChunk, emit: (chunk: any) => void) => { + emit(chunk.text); + }, + + parseResponse: (response: GenerateResponse) => { + return response.text; + }, + }; +}; diff --git a/js/ai/src/formats/types.d.ts b/js/ai/src/formats/types.d.ts new file mode 100644 index 0000000000..68cc8d4f37 --- /dev/null +++ b/js/ai/src/formats/types.d.ts @@ -0,0 +1,13 @@ +import { GenerateResponse, GenerateResponseChunk } from '../generate'; +import { GenerateRequest } from '../model'; + +export interface Formatter { + (req: GenerateRequest): { + parseChunk?: ( + chunk: GenerateResponseChunk, + emit: (chunk: any) => void + ) => void; + parseResponse(response: GenerateResponse): any; + instructions?: boolean | string; + }; +} diff --git a/js/ai/src/generate.ts b/js/ai/src/generate.ts index 604fcfbf1a..8908790040 100755 --- a/js/ai/src/generate.ts +++ b/js/ai/src/generate.ts @@ -317,6 +317,22 @@ export class GenerateResponseChunk return this.content.map((part) => part.text || '').join(''); } + /** + * Concatenates all `text` parts of all chunks from the response thus far. + * @returns A string of all concatenated chunk text content. + */ + get accumulatedText(): string { + if (!this.accumulatedChunks) + throw new GenkitError({ + status: 'FAILED_PRECONDITION', + message: 'Cannot compose accumulated text without accumulated chunks.', + }); + + return this.accumulatedChunks + ?.map((c) => c.content.map((p) => p.text || '').join('')) + .join(''); + } + /** * Returns the first media part detected in the chunk. Useful for extracting * (for example) an image from a generation expected to create one. diff --git a/js/ai/src/index.ts b/js/ai/src/index.ts index e629db64e7..2879a78fa4 100644 --- a/js/ai/src/index.ts +++ b/js/ai/src/index.ts @@ -36,12 +36,12 @@ export { type EvaluatorReference, } from './evaluator.js'; export { + generate, GenerateResponse, + generateStream, GenerationBlockedError, GenerationResponseError, Message, - generate, - generateStream, normalizePart, tagAsPreamble, toGenerateRequest, diff --git a/js/ai/tests/extract_test.ts b/js/ai/tests/extract_test.ts new file mode 100644 index 0000000000..98d3018a80 --- /dev/null +++ b/js/ai/tests/extract_test.ts @@ -0,0 +1,206 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import assert from 'node:assert'; +import { describe, it } from 'node:test'; +import { extractItems, extractJson, parsePartialJson } from '../src/extract'; + +describe('extract', () => { + describe('extractItems', () => { + interface TestStep { + chunk: string; + want: unknown[]; + } + + interface TestCase { + name: string; + steps: TestStep[]; + } + + const testCases: TestCase[] = [ + { + name: 'handles simple array in chunks', + steps: [ + { chunk: '[', want: [] }, + { chunk: '{"a": 1},', want: [{ a: 1 }] }, + { chunk: '{"b": 2}', want: [{ b: 2 }] }, + { chunk: ']', want: [] }, + ], + }, + { + name: 'handles nested objects', + steps: [ + { chunk: '[{"outer": {', want: [] }, + { + chunk: '"inner": "value"}},', + want: [{ outer: { inner: 'value' } }], + }, + { chunk: '{"next": true}]', want: [{ next: true }] }, + ], + }, + { + name: 'handles escaped characters', + steps: [ + { chunk: '[{"text": "line1\\n', want: [] }, + { chunk: 'line2"},', want: [{ text: 'line1\nline2' }] }, + { chunk: '{"text": "tab\\there"}]', want: [{ text: 'tab\there' }] }, + ], + }, + { + name: 'ignores content before first array', + steps: [ + { chunk: 'Here is an array:\n```json\n\n[', want: [] }, + { chunk: '{"a": 1},', want: [{ a: 1 }] }, + { chunk: '{"b": 2}]\n```\nDid you like my array?', want: [{ b: 2 }] }, + ], + }, + { + name: 'handles whitespace', + steps: [ + { chunk: '[\n ', want: [] }, + { chunk: '{"a": 1},\n ', want: [{ a: 1 }] }, + { chunk: '{"b": 2}\n]', want: [{ b: 2 }] }, + ], + }, + ]; + + for (const tc of testCases) { + it(tc.name, () => { + let text = ''; + let cursor = 0; + + for (const step of tc.steps) { + text += step.chunk; + const result = extractItems(text, cursor); + assert.deepStrictEqual(result.items, step.want); + cursor = result.cursor; + } + }); + } + }); + + describe('extractJson', () => { + interface TestCase { + name: string; + input: { + text: string; + throwOnBadJson?: boolean; + }; + expected?: unknown; + throws?: boolean; + } + + const testCases: TestCase[] = [ + { + name: 'extracts simple object', + input: { + text: 'prefix{"a":1}suffix', + }, + expected: { a: 1 }, + }, + { + name: 'extracts simple array', + input: { + text: 'prefix[1,2,3]suffix', + }, + expected: [1, 2, 3], + }, + { + name: 'handles nested structures', + input: { + text: 'text{"a":{"b":[1,2]}}more', + }, + expected: { a: { b: [1, 2] } }, + }, + { + name: 'handles strings with braces', + input: { + text: '{"text": "not {a} json"}', + }, + expected: { text: 'not {a} json' }, + }, + { + name: 'returns null for invalid JSON without throw', + input: { + text: 'not json at all', + }, + expected: null, + }, + { + name: 'throws for invalid JSON with throw flag', + input: { + text: 'not json at all', + throwOnBadJson: true, + }, + throws: true, + }, + ]; + + for (const tc of testCases) { + it(tc.name, () => { + if (tc.throws) { + assert.throws(() => { + extractJson(tc.input.text, true); + }); + } else { + const result = extractJson( + tc.input.text, + (tc.input.throwOnBadJson || false) as any + ); + assert.deepStrictEqual(result, tc.expected); + } + }); + } + }); + + describe('parsePartialJson', () => { + interface TestCase { + name: string; + input: string; + expected: unknown; + } + + const testCases: TestCase[] = [ + { + name: 'parses complete object', + input: '{"a":1,"b":2}', + expected: { a: 1, b: 2 }, + }, + { + name: 'parses partial object', + input: '{"a":1,"b":', + expected: { a: 1 }, + }, + { + name: 'parses partial array', + input: '[1,2,3,', + expected: [1, 2, 3], + }, + { + name: 'parses nested partial structures', + input: '{"a":{"b":1,"c":]}}', + expected: { a: { b: 1 } }, + }, + ]; + + for (const tc of testCases) { + it(tc.name, () => { + const result = parsePartialJson(tc.input); + assert.deepStrictEqual(result, tc.expected); + }); + } + }); +}); diff --git a/js/ai/tests/formats/array_test.ts b/js/ai/tests/formats/array_test.ts new file mode 100644 index 0000000000..448d15cb3b --- /dev/null +++ b/js/ai/tests/formats/array_test.ts @@ -0,0 +1,144 @@ +import assert from 'node:assert'; +import { describe, it } from 'node:test'; +import { arrayParser } from '../../src/formats/array.js'; +import { GenerateResponse, GenerateResponseChunk } from '../../src/generate.js'; +import { GenerateResponseChunkData } from '../../src/model.js'; + +describe('arrayFormat', () => { + const streamingTests = [ + { + desc: 'emits complete array items as they arrive', + chunks: [ + { + text: '[{"id": 1,', + want: [], + }, + { + text: '"name": "first"}', + want: [{ id: 1, name: 'first' }], + }, + { + text: ', {"id": 2, "name": "second"}]', + want: [{ id: 2, name: 'second' }], + }, + ], + }, + { + desc: 'handles single item arrays', + chunks: [ + { + text: '[{"id": 1, "name": "single"}]', + want: [{ id: 1, name: 'single' }], + }, + ], + }, + { + desc: 'handles preamble with code fence', + chunks: [ + { + text: 'Here is the array you requested:\n\n```json\n[', + want: [], + }, + { + text: '{"id": 1, "name": "item"}]\n```', + want: [{ id: 1, name: 'item' }], + }, + ], + }, + ]; + + for (const st of streamingTests) { + it(st.desc, () => { + const parser = arrayParser({ messages: [] }); + const chunks: GenerateResponseChunkData[] = []; + let lastEmitted: any[] = []; + for (const chunk of st.chunks) { + const newChunk: GenerateResponseChunkData = { + content: [{ text: chunk.text }], + }; + chunks.push(newChunk); + + lastEmitted = []; + const emit = (item: any) => { + lastEmitted.push(item); + }; + parser.parseChunk!(new GenerateResponseChunk(newChunk, chunks), emit); + + assert.deepStrictEqual(lastEmitted, chunk.want); + } + }); + } + + const responseTests = [ + { + desc: 'parses complete array response', + response: new GenerateResponse({ + message: { + role: 'model', + content: [{ text: '[{"id": 1, "name": "test"}]' }], + }, + }), + want: [{ id: 1, name: 'test' }], + }, + { + desc: 'parses empty array', + response: new GenerateResponse({ + message: { + role: 'model', + content: [{ text: '[]' }], + }, + }), + want: [], + }, + { + desc: 'parses array with preamble and code fence', + response: new GenerateResponse({ + message: { + role: 'model', + content: [ + { text: 'Here is the array:\n\n```json\n[{"id": 1}]\n```' }, + ], + }, + }), + want: [{ id: 1 }], + }, + ]; + + for (const rt of responseTests) { + it(rt.desc, () => { + const parser = arrayParser({ messages: [] }); + assert.deepStrictEqual(parser.parseResponse(rt.response), rt.want); + }); + } + + const errorTests = [ + { + desc: 'throws error for non-array schema type', + request: { + messages: [], + output: { + schema: { type: 'string' }, + }, + }, + wantError: /Must supply an 'array' schema type/, + }, + { + desc: 'throws error for object schema type', + request: { + messages: [], + output: { + schema: { type: 'object' }, + }, + }, + wantError: /Must supply an 'array' schema type/, + }, + ]; + + for (const et of errorTests) { + it(et.desc, () => { + assert.throws(() => { + arrayParser(et.request); + }, et.wantError); + }); + } +}); diff --git a/js/ai/tests/formats/enum_test.ts b/js/ai/tests/formats/enum_test.ts new file mode 100644 index 0000000000..153b2f030e --- /dev/null +++ b/js/ai/tests/formats/enum_test.ts @@ -0,0 +1,80 @@ +import { GenkitError } from '@genkit-ai/core'; +import assert from 'node:assert'; +import { describe, it } from 'node:test'; +import { enumParser } from '../../src/formats/enum.js'; +import { GenerateResponse } from '../../src/generate.js'; + +describe('enumFormat', () => { + const responseTests = [ + { + desc: 'parses simple string response', + response: new GenerateResponse({ + message: { + role: 'model', + content: [{ text: 'value1' }], + }, + }), + want: 'value1', + }, + { + desc: 'trims whitespace from response', + response: new GenerateResponse({ + message: { + role: 'model', + content: [{ text: ' value2 \n' }], + }, + }), + want: 'value2', + }, + ]; + + for (const rt of responseTests) { + it(rt.desc, () => { + const parser = enumParser({ + messages: [], + output: { schema: { type: 'string' } }, + }); + assert.strictEqual(parser.parseResponse(rt.response), rt.want); + }); + } + + it('throws error for invalid schema type', () => { + assert.throws( + () => { + enumParser({ messages: [], output: { schema: { type: 'number' } } }); + }, + (err: GenkitError) => { + return ( + err.status === 'INVALID_ARGUMENT' && + err.message.includes( + `Must supply a 'string' or 'enum' schema type when using the enum parser format.` + ) + ); + } + ); + }); + + it('includes enum values in instructions when provided', () => { + const enumValues = ['option1', 'option2', 'option3']; + const parser = enumParser({ + messages: [], + output: { schema: { type: 'enum', enum: enumValues } }, + }); + + assert.match( + parser.instructions as string, + /Output should be ONLY one of the following enum values/ + ); + for (const value of enumValues) { + assert.match(parser.instructions as string, new RegExp(value)); + } + }); + + it('has no instructions when no enum values provided', () => { + const parser = enumParser({ + messages: [], + output: { schema: { type: 'string' } }, + }); + assert.strictEqual(parser.instructions, false); + }); +}); diff --git a/js/ai/tests/formats/json_test.ts b/js/ai/tests/formats/json_test.ts new file mode 100644 index 0000000000..8ac707ffbe --- /dev/null +++ b/js/ai/tests/formats/json_test.ts @@ -0,0 +1,163 @@ +import assert from 'node:assert'; +import { describe, it } from 'node:test'; +import { jsonParser } from '../../src/formats/json.js'; +import { GenerateResponse, GenerateResponseChunk } from '../../src/generate.js'; +import { GenerateResponseChunkData } from '../../src/model.js'; + +describe('jsonFormat', () => { + const streamingTests = [ + { + desc: 'emits partial object as it streams', + chunks: [ + { + text: '{"name": "test', + want: { name: 'test' }, + }, + { + text: '", "value": 42}', + want: { name: 'test', value: 42 }, + }, + ], + }, + { + desc: 'handles nested objects', + chunks: [ + { + text: '{"outer": {"inner": ', + want: { outer: {} }, + }, + { + text: '"value"}}', + want: { outer: { inner: 'value' } }, + }, + ], + }, + { + desc: 'handles preamble with code fence', + chunks: [ + { + text: 'Here is the JSON:\n\n```json\n{"key": ', + want: {}, + }, + { + text: '"value"}\n```', + want: { key: 'value' }, + }, + ], + }, + { + desc: 'handles arrays', + chunks: [ + { + text: '[{"id": 1}, {"id"', + want: [{ id: 1 }, {}], + }, + { + text: ': 2}]', + want: [{ id: 1 }, { id: 2 }], + }, + ], + }, + ]; + + for (const st of streamingTests) { + it(st.desc, () => { + const parser = jsonParser({ messages: [] }); + const chunks: GenerateResponseChunkData[] = []; + let lastEmitted: any; + for (const chunk of st.chunks) { + const newChunk: GenerateResponseChunkData = { + content: [{ text: chunk.text }], + }; + chunks.push(newChunk); + + lastEmitted = undefined; + const emit = (value: any) => { + lastEmitted = value; + }; + parser.parseChunk!(new GenerateResponseChunk(newChunk, chunks), emit); + + assert.deepStrictEqual(lastEmitted, chunk.want); + } + }); + } + + const responseTests = [ + { + desc: 'parses complete object response', + response: new GenerateResponse({ + message: { + role: 'model', + content: [{ text: '{"name": "test", "value": 42}' }], + }, + }), + want: { name: 'test', value: 42 }, + }, + { + desc: 'parses array response', + response: new GenerateResponse({ + message: { + role: 'model', + content: [{ text: '[1, 2, 3]' }], + }, + }), + want: [1, 2, 3], + }, + { + desc: 'parses nested structures', + response: new GenerateResponse({ + message: { + role: 'model', + content: [{ text: '{"outer": {"inner": [1, 2]}}' }], + }, + }), + want: { outer: { inner: [1, 2] } }, + }, + { + desc: 'parses with preamble and code fence', + response: new GenerateResponse({ + message: { + role: 'model', + content: [ + { text: 'Here is the JSON:\n\n```json\n{"key": "value"}\n```' }, + ], + }, + }), + want: { key: 'value' }, + }, + ]; + + for (const rt of responseTests) { + it(rt.desc, () => { + const parser = jsonParser({ messages: [] }); + assert.deepStrictEqual(parser.parseResponse(rt.response), rt.want); + }); + } + + it('includes schema in instructions when provided', () => { + const schema = { + type: 'object', + properties: { + name: { type: 'string' }, + }, + }; + const parser = jsonParser({ + messages: [], + output: { schema }, + }); + + assert.match( + parser.instructions as string, + /Output should be in JSON format/ + ); + assert.match( + parser.instructions as string, + new RegExp(JSON.stringify(schema)) + ); + }); + + it('has no instructions when no schema provided', () => { + const parser = jsonParser({ messages: [] }); + assert.strictEqual(parser.instructions, false); + }); +}); diff --git a/js/ai/tests/formats/jsonl_test.ts b/js/ai/tests/formats/jsonl_test.ts new file mode 100644 index 0000000000..a6237bd385 --- /dev/null +++ b/js/ai/tests/formats/jsonl_test.ts @@ -0,0 +1,188 @@ +import assert from 'node:assert'; +import { describe, it } from 'node:test'; +import { jsonlParser } from '../../src/formats/jsonl.js'; +import { GenerateResponse, GenerateResponseChunk } from '../../src/generate.js'; +import { GenerateResponseChunkData } from '../../src/model.js'; + +describe('jsonlFormat', () => { + const streamingTests = [ + { + desc: 'emits complete objects line by line', + chunks: [ + { + text: '{"id": 1}\n{"id"', + want: [{ id: 1 }], + }, + { + text: ': 2}\n{"id": 3}', + want: [{ id: 2 }, { id: 3 }], + }, + ], + }, + { + desc: 'handles preamble with code fence', + chunks: [ + { + text: 'Here are the items:\n\n```jsonl\n{"id": 1', + want: [], + }, + { + text: '}\n{"id": 2}\n```', + want: [{ id: 1 }, { id: 2 }], + }, + ], + }, + { + desc: 'ignores non-object lines', + chunks: [ + { + text: 'Starting output:\n{"id": 1}\nsome text\n{"id": 2}', + want: [{ id: 1 }, { id: 2 }], + }, + ], + }, + { + desc: 'handles objects with nested structures', + chunks: [ + { + text: '{"user": {"name": "test"}}\n{"data": ', + want: [{ user: { name: 'test' } }], + }, + { + text: '{"values": [1,2]}}', + want: [{ data: { values: [1, 2] } }], + }, + ], + }, + ]; + + for (const st of streamingTests) { + it(st.desc, () => { + const parser = jsonlParser({ messages: [] }); + const chunks: GenerateResponseChunkData[] = []; + let lastEmitted: any[] = []; + for (const chunk of st.chunks) { + const newChunk: GenerateResponseChunkData = { + content: [{ text: chunk.text }], + }; + chunks.push(newChunk); + + lastEmitted = []; + const emit = (item: any) => { + lastEmitted.push(item); + }; + parser.parseChunk!(new GenerateResponseChunk(newChunk, chunks), emit); + + assert.deepStrictEqual(lastEmitted, chunk.want); + } + }); + } + + const responseTests = [ + { + desc: 'parses multiple objects', + response: new GenerateResponse({ + message: { + role: 'model', + content: [{ text: '{"id": 1}\n{"id": 2}\n{"id": 3}' }], + }, + }), + want: [{ id: 1 }, { id: 2 }, { id: 3 }], + }, + { + desc: 'handles empty lines and non-object lines', + response: new GenerateResponse({ + message: { + role: 'model', + content: [{ text: '\n{"id": 1}\nsome text\n{"id": 2}\n' }], + }, + }), + want: [{ id: 1 }, { id: 2 }], + }, + { + desc: 'parses with preamble and code fence', + response: new GenerateResponse({ + message: { + role: 'model', + content: [ + { + text: 'Here are the items:\n\n```jsonl\n{"id": 1}\n{"id": 2}\n```', + }, + ], + }, + }), + want: [{ id: 1 }, { id: 2 }], + }, + ]; + + for (const rt of responseTests) { + it(rt.desc, () => { + const parser = jsonlParser({ messages: [] }); + assert.deepStrictEqual(parser.parseResponse(rt.response), rt.want); + }); + } + + const errorTests = [ + { + desc: 'throws error for non-array schema type', + request: { + messages: [], + output: { + schema: { type: 'string' }, + }, + }, + wantError: /Must supply an 'array' schema type/, + }, + { + desc: 'throws error for array schema with non-object items', + request: { + messages: [], + output: { + schema: { + type: 'array', + items: { type: 'string' }, + }, + }, + }, + wantError: /Must supply an 'array' schema type containing 'object' items/, + }, + ]; + + for (const et of errorTests) { + it(et.desc, () => { + assert.throws(() => { + jsonlParser(et.request); + }, et.wantError); + }); + } + + it('includes schema in instructions when provided', () => { + const schema = { + type: 'array', + items: { + type: 'object', + properties: { + id: { type: 'number' }, + }, + }, + }; + const parser = jsonlParser({ + messages: [], + output: { schema }, + }); + + assert.match( + parser.instructions as string, + /Output should be JSONL format/ + ); + assert.match( + parser.instructions as string, + new RegExp(JSON.stringify(schema.items)) + ); + }); + + it('has no instructions when no schema provided', () => { + const parser = jsonlParser({ messages: [] }); + assert.strictEqual(parser.instructions, false); + }); +}); diff --git a/js/ai/tests/formats/text_test.ts b/js/ai/tests/formats/text_test.ts new file mode 100644 index 0000000000..3249fbae6a --- /dev/null +++ b/js/ai/tests/formats/text_test.ts @@ -0,0 +1,59 @@ +import assert from 'node:assert'; +import { describe, it } from 'node:test'; +import { textParser } from '../../src/formats/text.js'; +import { GenerateResponse, GenerateResponseChunk } from '../../src/generate.js'; +import { GenerateResponseChunkData } from '../../src/model.js'; + +describe('textFormat', () => { + const streamingTests = [ + { + desc: 'emits each chunk as it comes', + chunks: [ + { text: 'this is', want: ['this is'] }, + { text: ' a two-chunk response', want: [' a two-chunk response'] }, + ], + }, + ]; + + for (const st of streamingTests) { + it(st.desc, () => { + const parser = textParser({ messages: [] }); + const chunks: GenerateResponseChunkData[] = []; + let lastEmitted: string[] = []; + for (const chunk of st.chunks) { + const newChunk: GenerateResponseChunkData = { + content: [{ text: chunk.text }], + }; + chunks.push(newChunk); + + lastEmitted = []; + const emit = (chunk: string) => { + lastEmitted.push(chunk); + }; + parser.parseChunk!(new GenerateResponseChunk(newChunk, chunks), emit); + + assert.deepStrictEqual(lastEmitted, chunk.want); + } + }); + } + + const responseTests = [ + { + desc: 'it returns the concatenated text', + response: new GenerateResponse({ + message: { + role: 'model', + content: [{ text: 'chunk one.' }, { text: 'chunk two.' }], + }, + }), + want: 'chunk one.chunk two.', + }, + ]; + + for (const rt of responseTests) { + it(rt.desc, () => { + const parser = textParser({ messages: [] }); + assert.deepStrictEqual(parser.parseResponse(rt.response), rt.want); + }); + } +}); diff --git a/js/core/src/registry.ts b/js/core/src/registry.ts index f7cd0f5323..94cb213729 100644 --- a/js/core/src/registry.ts +++ b/js/core/src/registry.ts @@ -63,6 +63,7 @@ export class Registry { private actionsById: Record> = {}; private pluginsByName: Record = {}; private schemasByName: Record = {}; + private valueByTypeAndName: Record> = {}; private allPluginsInitialized = false; constructor(public parent?: Registry) {} @@ -195,6 +196,27 @@ export class Registry { this.schemasByName[name] = data; } + registerValue(type: string, name: string, value: any) { + if (!this.valueByTypeAndName[type]) { + this.valueByTypeAndName[type] = {}; + } + this.valueByTypeAndName[type][name] = value; + } + + async lookupValue( + type: string, + key: string + ): Promise { + const pluginName = parsePluginName(key); + if (!this.valueByTypeAndName[type]?.[key] && pluginName) { + await this.initializePlugin(pluginName); + } + return ( + (this.valueByTypeAndName[type][key] as T) || + this.parent?.lookupValue(type, key) + ); + } + /** * Looks up a schema. * @param name The name of the schema to lookup. From dff3404a9d978ca4906c0999c399e34a55bd50d8 Mon Sep 17 00:00:00 2001 From: Michael Bleigh Date: Fri, 25 Oct 2024 13:41:05 -0700 Subject: [PATCH 2/4] format --- js/ai/src/formats/array.ts | 16 ++++++++++++++++ js/ai/src/formats/enum.ts | 16 ++++++++++++++++ js/ai/src/formats/index.ts | 22 +++++++++++++++++++++- js/ai/src/formats/json.ts | 16 ++++++++++++++++ js/ai/src/formats/jsonl.ts | 16 ++++++++++++++++ js/ai/src/formats/text.ts | 16 ++++++++++++++++ js/ai/src/formats/types.d.ts | 16 ++++++++++++++++ js/ai/src/generate.ts | 10 ++++------ js/ai/src/index.ts | 4 ++-- js/ai/tests/formats/array_test.ts | 16 ++++++++++++++++ js/ai/tests/formats/enum_test.ts | 16 ++++++++++++++++ js/ai/tests/formats/json_test.ts | 16 ++++++++++++++++ js/ai/tests/formats/jsonl_test.ts | 16 ++++++++++++++++ js/ai/tests/formats/text_test.ts | 16 ++++++++++++++++ 14 files changed, 203 insertions(+), 9 deletions(-) diff --git a/js/ai/src/formats/array.ts b/js/ai/src/formats/array.ts index 0dc7bd433b..ec32a5be74 100644 --- a/js/ai/src/formats/array.ts +++ b/js/ai/src/formats/array.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import { GenkitError } from '@genkit-ai/core'; import { extractItems } from '../extract'; import type { Formatter } from './types'; diff --git a/js/ai/src/formats/enum.ts b/js/ai/src/formats/enum.ts index 6a6da7a1b0..dd4744c28f 100644 --- a/js/ai/src/formats/enum.ts +++ b/js/ai/src/formats/enum.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import { GenkitError } from '@genkit-ai/core'; import type { Formatter } from './types'; diff --git a/js/ai/src/formats/index.ts b/js/ai/src/formats/index.ts index 4cb91af831..da409bcb4f 100644 --- a/js/ai/src/formats/index.ts +++ b/js/ai/src/formats/index.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import { Registry } from '@genkit-ai/core/registry'; import { arrayParser } from './array'; import { enumParser } from './enum'; @@ -20,9 +36,13 @@ export function defineFormat( formatter: Formatter ) { registry.registerValue('format', name, formatter); + return formatter; } -export type FormatArgument = string | Formatter; +export type FormatArgument = + | keyof typeof DEFAULT_FORMATS + | Formatter + | Omit; export async function resolveFormat( registry: Registry, diff --git a/js/ai/src/formats/json.ts b/js/ai/src/formats/json.ts index 2f9e7e405b..fe0d0bdad7 100644 --- a/js/ai/src/formats/json.ts +++ b/js/ai/src/formats/json.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import { extractJson } from '../extract'; import type { Formatter } from './types'; diff --git a/js/ai/src/formats/jsonl.ts b/js/ai/src/formats/jsonl.ts index 5a6dcd2918..a186faa7f9 100644 --- a/js/ai/src/formats/jsonl.ts +++ b/js/ai/src/formats/jsonl.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import { GenkitError } from '@genkit-ai/core'; import JSON5 from 'json5'; import { extractJson } from '../extract'; diff --git a/js/ai/src/formats/text.ts b/js/ai/src/formats/text.ts index 2363cfa9ef..f968693893 100644 --- a/js/ai/src/formats/text.ts +++ b/js/ai/src/formats/text.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import { GenerateResponse, GenerateResponseChunk } from '../generate'; import type { Formatter } from './types'; diff --git a/js/ai/src/formats/types.d.ts b/js/ai/src/formats/types.d.ts index 68cc8d4f37..3f5736bc40 100644 --- a/js/ai/src/formats/types.d.ts +++ b/js/ai/src/formats/types.d.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import { GenerateResponse, GenerateResponseChunk } from '../generate'; import { GenerateRequest } from '../model'; diff --git a/js/ai/src/generate.ts b/js/ai/src/generate.ts index 8908790040..6a3fd33b7c 100755 --- a/js/ai/src/generate.ts +++ b/js/ai/src/generate.ts @@ -25,6 +25,7 @@ import { Registry } from '@genkit-ai/core/registry'; import { parseSchema, toJsonSchema } from '@genkit-ai/core/schema'; import { DocumentData } from './document.js'; import { extractJson } from './extract.js'; +import { FormatArgument } from './formats/index.js'; import { generateHelper, GenerateUtilParamSchema } from './generateAction.js'; import { GenerateRequest, @@ -415,11 +416,7 @@ export async function toGenerateRequest( docs: options.docs, tools: tools?.map((tool) => toToolDefinition(tool)) || [], output: { - format: - options.output?.format || - (options.output?.schema || options.output?.jsonSchema - ? 'json' - : 'text'), + format: options.output?.format, schema: toJsonSchema({ schema: options.output?.schema, jsonSchema: options.output?.jsonSchema, @@ -450,7 +447,8 @@ export interface GenerateOptions< config?: z.infer; /** Configuration for the desired output of the request. Defaults to the model's default output if unspecified. */ output?: { - format?: 'text' | 'json' | 'media'; + format?: FormatArgument; + constrained?: boolean; schema?: O; jsonSchema?: any; }; diff --git a/js/ai/src/index.ts b/js/ai/src/index.ts index 2879a78fa4..e629db64e7 100644 --- a/js/ai/src/index.ts +++ b/js/ai/src/index.ts @@ -36,12 +36,12 @@ export { type EvaluatorReference, } from './evaluator.js'; export { - generate, GenerateResponse, - generateStream, GenerationBlockedError, GenerationResponseError, Message, + generate, + generateStream, normalizePart, tagAsPreamble, toGenerateRequest, diff --git a/js/ai/tests/formats/array_test.ts b/js/ai/tests/formats/array_test.ts index 448d15cb3b..bb8a8f8a58 100644 --- a/js/ai/tests/formats/array_test.ts +++ b/js/ai/tests/formats/array_test.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import assert from 'node:assert'; import { describe, it } from 'node:test'; import { arrayParser } from '../../src/formats/array.js'; diff --git a/js/ai/tests/formats/enum_test.ts b/js/ai/tests/formats/enum_test.ts index 153b2f030e..338921821a 100644 --- a/js/ai/tests/formats/enum_test.ts +++ b/js/ai/tests/formats/enum_test.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import { GenkitError } from '@genkit-ai/core'; import assert from 'node:assert'; import { describe, it } from 'node:test'; diff --git a/js/ai/tests/formats/json_test.ts b/js/ai/tests/formats/json_test.ts index 8ac707ffbe..f5824fa418 100644 --- a/js/ai/tests/formats/json_test.ts +++ b/js/ai/tests/formats/json_test.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import assert from 'node:assert'; import { describe, it } from 'node:test'; import { jsonParser } from '../../src/formats/json.js'; diff --git a/js/ai/tests/formats/jsonl_test.ts b/js/ai/tests/formats/jsonl_test.ts index a6237bd385..a08119be41 100644 --- a/js/ai/tests/formats/jsonl_test.ts +++ b/js/ai/tests/formats/jsonl_test.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import assert from 'node:assert'; import { describe, it } from 'node:test'; import { jsonlParser } from '../../src/formats/jsonl.js'; diff --git a/js/ai/tests/formats/text_test.ts b/js/ai/tests/formats/text_test.ts index 3249fbae6a..0e1f268042 100644 --- a/js/ai/tests/formats/text_test.ts +++ b/js/ai/tests/formats/text_test.ts @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import assert from 'node:assert'; import { describe, it } from 'node:test'; import { textParser } from '../../src/formats/text.js'; From 7944b6db6f83ea0e74942276565cb9ad5ea72739 Mon Sep 17 00:00:00 2001 From: Michael Bleigh Date: Mon, 28 Oct 2024 11:07:10 -0700 Subject: [PATCH 3/4] fix build --- js/ai/src/formats/index.ts | 2 +- js/ai/src/generate.ts | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/js/ai/src/formats/index.ts b/js/ai/src/formats/index.ts index da409bcb4f..567e38efd3 100644 --- a/js/ai/src/formats/index.ts +++ b/js/ai/src/formats/index.ts @@ -51,5 +51,5 @@ export async function resolveFormat( if (typeof arg === 'string') { return registry.lookupValue('format', arg); } - return arg; + return arg as Formatter; } diff --git a/js/ai/src/generate.ts b/js/ai/src/generate.ts index 6a3fd33b7c..0b586d3beb 100755 --- a/js/ai/src/generate.ts +++ b/js/ai/src/generate.ts @@ -25,7 +25,6 @@ import { Registry } from '@genkit-ai/core/registry'; import { parseSchema, toJsonSchema } from '@genkit-ai/core/schema'; import { DocumentData } from './document.js'; import { extractJson } from './extract.js'; -import { FormatArgument } from './formats/index.js'; import { generateHelper, GenerateUtilParamSchema } from './generateAction.js'; import { GenerateRequest, @@ -447,8 +446,7 @@ export interface GenerateOptions< config?: z.infer; /** Configuration for the desired output of the request. Defaults to the model's default output if unspecified. */ output?: { - format?: FormatArgument; - constrained?: boolean; + format?: 'json' | 'text' | 'media'; schema?: O; jsonSchema?: any; }; From e38c1018ac1f5e662cc1ca4a9cfd8e73209c22b0 Mon Sep 17 00:00:00 2001 From: Michael Bleigh Date: Mon, 28 Oct 2024 11:14:56 -0700 Subject: [PATCH 4/4] fix --- js/ai/src/generate.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/js/ai/src/generate.ts b/js/ai/src/generate.ts index 0b586d3beb..f3644e1e8b 100755 --- a/js/ai/src/generate.ts +++ b/js/ai/src/generate.ts @@ -415,7 +415,11 @@ export async function toGenerateRequest( docs: options.docs, tools: tools?.map((tool) => toToolDefinition(tool)) || [], output: { - format: options.output?.format, + format: + options.output?.format || + (options.output?.schema || options.output?.jsonSchema + ? 'json' + : 'text'), schema: toJsonSchema({ schema: options.output?.schema, jsonSchema: options.output?.jsonSchema,