Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 34 additions & 33 deletions js/ai/src/formats/array.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,47 +18,48 @@ import { GenkitError } from '@genkit-ai/core';
import { extractItems } from '../extract';
import type { Formatter } from './types';

export const arrayParser: Formatter = (request) => {
if (request.output?.schema && request.output?.schema.type !== 'array') {
throw new GenkitError({
status: 'INVALID_ARGUMENT',
message: `Must supply an 'array' schema type when using the 'items' parser format.`,
});
}
export const arrayFormatter: Formatter<unknown[], unknown[]> = {
name: 'array',
config: {
contentType: 'application/json',
constrained: true,
},
handler: (request) => {
if (request.output?.schema && request.output?.schema.type !== 'array') {
throw new GenkitError({
status: 'INVALID_ARGUMENT',
message: `Must supply an 'array' schema type when using the 'items' parser format.`,
});
}

let instructions: boolean | string = false;
if (request.output?.schema) {
instructions = `Output should be a JSON array conforming to the following schema:
let instructions: string | undefined;
if (request.output?.schema) {
instructions = `Output should be a JSON array conforming to the following schema:

\`\`\`
${JSON.stringify(request.output!.schema!)}
\`\`\`
`;
}
}

let cursor: number = 0;
return {
parseChunk: (chunk) => {
// first, determine the cursor position from the previous chunks
const cursor = chunk.previousChunks?.length
? extractItems(chunk.previousText).cursor
: 0;
// then, extract the items starting at that cursor
const { items } = extractItems(chunk.accumulatedText, cursor);

return {
parseChunk: (chunk, emit) => {
const { items, cursor: newCursor } = extractItems(
chunk.accumulatedText,
cursor
);
return items;
},

// Emit any complete items
for (const item of items) {
emit(item);
}
parseResponse: (response) => {
const { items } = extractItems(response.text, 0);
return items;
},

// Update cursor position
cursor = newCursor;
},

parseResponse: (response) => {
const { items } = extractItems(response.text, 0);
return items;
},

instructions,
};
instructions,
};
},
};
43 changes: 25 additions & 18 deletions js/ai/src/formats/enum.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,31 @@
import { GenkitError } from '@genkit-ai/core';
import type { Formatter } from './types';

export const enumParser: Formatter = (request) => {
const schemaType = request.output?.schema?.type;
if (schemaType && schemaType !== 'string' && schemaType !== 'enum') {
throw new GenkitError({
status: 'INVALID_ARGUMENT',
message: `Must supply a 'string' or 'enum' schema type when using the enum parser format.`,
});
}
export const enumFormatter: Formatter<string, string> = {
name: 'enum',
config: {
contentType: 'text/plain',
constrained: true,
},
handler: (request) => {
const schemaType = request.output?.schema?.type;
if (schemaType && schemaType !== 'string' && schemaType !== 'enum') {
throw new GenkitError({
status: 'INVALID_ARGUMENT',
message: `Must supply a 'string' or 'enum' schema type when using the enum parser format.`,
});
}

let instructions: boolean | string = false;
if (request.output?.schema?.enum) {
instructions = `Output should be ONLY one of the following enum values. Do not output any additional information or add quotes.\n\n${request.output?.schema?.enum.map((v) => v.toString()).join('\n')}`;
}
let instructions: string | undefined;
if (request.output?.schema?.enum) {
instructions = `Output should be ONLY one of the following enum values. Do not output any additional information or add quotes.\n\n${request.output?.schema?.enum.map((v) => v.toString()).join('\n')}`;
}

return {
parseResponse: (response) => {
return response.text.trim();
},
instructions,
};
return {
parseResponse: (response) => {
return response.text.trim();
},
instructions,
};
},
};
45 changes: 30 additions & 15 deletions js/ai/src/formats/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,20 @@
*/

import { Registry } from '@genkit-ai/core/registry';
import { arrayParser } from './array';
import { enumParser } from './enum';
import { jsonParser } from './json';
import { jsonlParser } from './jsonl';
import { textParser } from './text';
import { arrayFormatter } from './array';
import { enumFormatter } from './enum';
import { jsonFormatter } from './json';
import { jsonlFormatter } from './jsonl';
import { textFormatter } from './text';
import { Formatter } from './types';

export const DEFAULT_FORMATS = {
json: jsonParser,
array: arrayParser,
text: textParser,
enum: enumParser,
jsonl: jsonlParser,
};

export function defineFormat(
registry: Registry,
name: string,
formatter: Formatter
options: { name: string } & Formatter['config'],
handler: Formatter['handler']
) {
const { name, ...config } = options;
const formatter = { config, handler };
registry.registerValue('format', name, formatter);
return formatter;
}
Expand All @@ -53,3 +47,24 @@ export async function resolveFormat(
}
return arg as Formatter;
}

export const DEFAULT_FORMATS: Formatter<any, any>[] = [
jsonFormatter,
arrayFormatter,
textFormatter,
enumFormatter,
jsonlFormatter,
];

/**
* initializeFormats registers the default built-in formats on a registry.
*/
export function initializeFormats(registry: Registry) {
for (const format of DEFAULT_FORMATS) {
defineFormat(
registry,
{ name: format.name, ...format.config },
format.handler
);
}
}
37 changes: 21 additions & 16 deletions js/ai/src/formats/json.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,34 @@
import { extractJson } from '../extract';
import type { Formatter } from './types';

export const jsonParser: Formatter = (request) => {
let accumulatedText: string = '';
let instructions: boolean | string = false;
export const jsonFormatter: Formatter<unknown, unknown> = {
name: 'json',
config: {
contentType: 'application/json',
constrained: true,
},
handler: (request) => {
let instructions: string | undefined;

if (request.output?.schema) {
instructions = `Output should be in JSON format and conform to the following schema:
if (request.output?.schema) {
instructions = `Output should be in JSON format and conform to the following schema:

\`\`\`
${JSON.stringify(request.output!.schema!)}
\`\`\`
`;
}
}

return {
parseChunk: (chunk, emit) => {
accumulatedText = chunk.accumulatedText;
emit(extractJson(accumulatedText));
},
return {
parseChunk: (chunk) => {
return extractJson(chunk.accumulatedText);
},

parseResponse: (response) => {
return extractJson(response.text);
},
parseResponse: (response) => {
return extractJson(response.text);
},

instructions,
};
instructions,
};
},
};
94 changes: 56 additions & 38 deletions js/ai/src/formats/jsonl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,57 +26,75 @@ function objectLines(text: string): string[] {
.filter((line) => line.startsWith('{'));
}

export const jsonlParser: Formatter = (request) => {
if (
request.output?.schema &&
(request.output?.schema.type !== 'array' ||
request.output?.schema.items?.type !== 'object')
) {
throw new GenkitError({
status: 'INVALID_ARGUMENT',
message: `Must supply an 'array' schema type containing 'object' items when using the 'jsonl' parser format.`,
});
}
export const jsonlFormatter: Formatter<unknown[], unknown[]> = {
name: 'jsonl',
config: {
contentType: 'application/jsonl',
},
handler: (request) => {
if (
request.output?.schema &&
(request.output?.schema.type !== 'array' ||
request.output?.schema.items?.type !== 'object')
) {
throw new GenkitError({
status: 'INVALID_ARGUMENT',
message: `Must supply an 'array' schema type containing 'object' items when using the 'jsonl' parser format.`,
});
}

let instructions: boolean | string = false;
if (request.output?.schema?.items) {
instructions = `Output should be JSONL format, a sequence of JSON objects (one per line). Each line should conform to the following schema:
let instructions: string | undefined;
if (request.output?.schema?.items) {
instructions = `Output should be JSONL format, a sequence of JSON objects (one per line). Each line should conform to the following schema:

\`\`\`
${JSON.stringify(request.output.schema.items)}
\`\`\`
`;
}
}

let cursor = 0;
return {
parseChunk: (chunk) => {
const results: unknown[] = [];

return {
parseChunk: (chunk, emit) => {
const jsonLines = objectLines(chunk.accumulatedText);
const text = chunk.accumulatedText;

for (let i = cursor; i < jsonLines.length; i++) {
try {
const result = JSON5.parse(jsonLines[i]);
if (result) {
emit(result);
let startIndex = 0;
if (chunk.previousChunks?.length) {
const lastNewline = chunk.previousText.lastIndexOf('\n');
if (lastNewline !== -1) {
startIndex = lastNewline + 1;
}
} catch (e) {
cursor = i;
return;
}
}

cursor = jsonLines.length;
},
const lines = text.slice(startIndex).split('\n');

parseResponse: (response) => {
const items = objectLines(response.text)
.map((l) => extractJson(l))
.filter((l) => !!l);
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.startsWith('{')) {
try {
const result = JSON5.parse(trimmed);
if (result) {
results.push(result);
}
} catch (e) {
break;
}
}
}

return results;
},

parseResponse: (response) => {
const items = objectLines(response.text)
.map((l) => extractJson(l))
.filter((l) => !!l);

return items;
},
return items;
},

instructions,
};
instructions,
};
},
};
25 changes: 15 additions & 10 deletions js/ai/src/formats/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,22 @@
* limitations under the License.
*/

import { GenerateResponse, GenerateResponseChunk } from '../generate';
import type { Formatter } from './types';

export const textParser: Formatter = (request) => {
return {
parseChunk: (chunk: GenerateResponseChunk, emit: (chunk: any) => void) => {
emit(chunk.text);
},
export const textFormatter: Formatter<string, string> = {
name: 'text',
config: {
contentType: 'text/plain',
},
handler: () => {
return {
parseChunk: (chunk) => {
return chunk.text;
},

parseResponse: (response: GenerateResponse) => {
return response.text;
},
};
parseResponse: (response) => {
return response.text;
},
};
},
};
Loading
Loading