Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion js/ai/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"build:clean": "rimraf ./lib",
"build": "npm-run-all build:clean check compile",
"build:watch": "tsup-node --watch",
"test": "node --import tsx --test ./tests/**/*_test.ts",
"test": "node --import tsx --test ./tests/**/*_test.ts ./tests/*_test.ts",
"test:single": "node --import tsx --test"
},
"repository": {
Expand Down
109 changes: 106 additions & 3 deletions js/ai/src/extract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,31 @@ export function extractJson<T = unknown>(
let closingChar: '}' | ']' | undefined;
let startPos: number | undefined;
let nestingCount = 0;
let inString = false;
let escapeNext = false;

for (let i = 0; i < text.length; i++) {
const char = text[i].replace(/\u00A0/g, ' ');

if (escapeNext) {
escapeNext = false;
continue;
}

if (char === '\\') {
escapeNext = true;
continue;
}

if (char === '"') {
inString = !inString;
continue;
}

if (inString) {
continue;
}

if (!openingChar && (char === '{' || char === '[')) {
// Look for opening character
openingChar = char;
Expand All @@ -67,18 +88,100 @@ export function extractJson<T = unknown>(
// If an incomplete JSON structure is detected
try {
// Parse the incomplete JSON structure using partial-json for lenient parsing
// Note: partial-json automatically handles adding the closing character
return parsePartialJson<T>(text.substring(startPos));
} catch {
// If parsing fails, throw an error
if (throwOnBadJson) {
throw new Error(`Invalid JSON extracted from model output: ${text}`);
}
return null; // Return null if no JSON structure is found }
return null;
}
}
if (throwOnBadJson) {
throw new Error(`Invalid JSON extracted from model output: ${text}`);
}
return null; // Return null if no JSON structure is found
return null;
}

interface ExtractItemsResult {
items: unknown[];
cursor: number;
}

/**
* Extracts complete objects from the first array found in the text.
* Processes text from the cursor position and returns both complete items
* and the new cursor position.
*/
export function extractItems(
text: string,
cursor: number = 0
): ExtractItemsResult {
const items: unknown[] = [];
let currentCursor = cursor;

// Find the first array start if we haven't already processed any text
if (cursor === 0) {
const arrayStart = text.indexOf('[');
if (arrayStart === -1) {
return { items: [], cursor: text.length };
}
currentCursor = arrayStart + 1;
}

let objectStart = -1;
let braceCount = 0;
let inString = false;
let escapeNext = false;

// Process the text from the cursor position
for (let i = currentCursor; i < text.length; i++) {
const char = text[i];

if (escapeNext) {
escapeNext = false;
continue;
}

if (char === '\\') {
escapeNext = true;
continue;
}

if (char === '"') {
inString = !inString;
continue;
}

if (inString) {
continue;
}

if (char === '{') {
if (braceCount === 0) {
objectStart = i;
}
braceCount++;
} else if (char === '}') {
braceCount--;
if (braceCount === 0 && objectStart !== -1) {
try {
const obj = JSON5.parse(text.substring(objectStart, i + 1));
items.push(obj);
currentCursor = i + 1;
objectStart = -1;
} catch {
// If parsing fails, continue
}
}
} else if (char === ']' && braceCount === 0) {
// End of array
break;
}
}

return {
items,
cursor: currentCursor,
};
}
64 changes: 64 additions & 0 deletions js/ai/src/formats/array.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { GenkitError } from '@genkit-ai/core';
import { extractItems } from '../extract';
import type { Formatter } from './types';

export const arrayParser: Formatter = (request) => {
if (request.output?.schema && request.output?.schema.type !== 'array') {
throw new GenkitError({
status: 'INVALID_ARGUMENT',
message: `Must supply an 'array' schema type when using the 'items' parser format.`,
});
}

let instructions: boolean | string = false;
if (request.output?.schema) {
instructions = `Output should be a JSON array conforming to the following schema:

\`\`\`
${JSON.stringify(request.output!.schema!)}
\`\`\`
`;
}

let cursor: number = 0;

return {
parseChunk: (chunk, emit) => {
const { items, cursor: newCursor } = extractItems(
chunk.accumulatedText,
cursor
);

// Emit any complete items
for (const item of items) {
emit(item);
}

// Update cursor position
cursor = newCursor;
},

parseResponse: (response) => {
const { items } = extractItems(response.text, 0);
return items;
},

instructions,
};
};
40 changes: 40 additions & 0 deletions js/ai/src/formats/enum.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { GenkitError } from '@genkit-ai/core';
import type { Formatter } from './types';

export const enumParser: Formatter = (request) => {
const schemaType = request.output?.schema?.type;
if (schemaType && schemaType !== 'string' && schemaType !== 'enum') {
throw new GenkitError({
status: 'INVALID_ARGUMENT',
message: `Must supply a 'string' or 'enum' schema type when using the enum parser format.`,
});
}

let instructions: boolean | string = false;
if (request.output?.schema?.enum) {
instructions = `Output should be ONLY one of the following enum values. Do not output any additional information or add quotes.\n\n${request.output?.schema?.enum.map((v) => v.toString()).join('\n')}`;
}

return {
parseResponse: (response) => {
return response.text.trim();
},
instructions,
};
};
55 changes: 55 additions & 0 deletions js/ai/src/formats/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { Registry } from '@genkit-ai/core/registry';
import { arrayParser } from './array';
import { enumParser } from './enum';
import { jsonParser } from './json';
import { jsonlParser } from './jsonl';
import { textParser } from './text';
import { Formatter } from './types';

export const DEFAULT_FORMATS = {
json: jsonParser,
array: arrayParser,
text: textParser,
enum: enumParser,
jsonl: jsonlParser,
};

export function defineFormat(
registry: Registry,
name: string,
formatter: Formatter
) {
registry.registerValue('format', name, formatter);
return formatter;
}

export type FormatArgument =
| keyof typeof DEFAULT_FORMATS
| Formatter
| Omit<string, keyof typeof DEFAULT_FORMATS>;

export async function resolveFormat(
registry: Registry,
arg: FormatArgument
): Promise<Formatter | undefined> {
if (typeof arg === 'string') {
return registry.lookupValue<Formatter>('format', arg);
}
return arg as Formatter;
}
45 changes: 45 additions & 0 deletions js/ai/src/formats/json.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { extractJson } from '../extract';
import type { Formatter } from './types';

export const jsonParser: Formatter = (request) => {
let accumulatedText: string = '';
let instructions: boolean | string = false;

if (request.output?.schema) {
instructions = `Output should be in JSON format and conform to the following schema:

\`\`\`
${JSON.stringify(request.output!.schema!)}
\`\`\`
`;
}

return {
parseChunk: (chunk, emit) => {
accumulatedText = chunk.accumulatedText;
emit(extractJson(accumulatedText));
},

parseResponse: (response) => {
return extractJson(response.text);
},

instructions,
};
};
Loading
Loading