diff --git a/packages/tasks/src/scripts/inference-codegen.ts b/packages/tasks/src/scripts/inference-codegen.ts index c66d87b25..ac72ff9f7 100644 --- a/packages/tasks/src/scripts/inference-codegen.ts +++ b/packages/tasks/src/scripts/inference-codegen.ts @@ -74,7 +74,6 @@ async function generateTypescript(inputData: InputData): Promise): Promise { const source = ts.createSourceFile( path.basename(path2generated), @@ -149,9 +148,12 @@ async function main() { .filter((entry) => entry.name !== "placeholder") .map(async (entry) => ({ task: entry.name, dirPath: path.join(entry.path, entry.name) })) ); - const allSpecFiles = allTasks - .flatMap(({ dirPath }) => [path.join(dirPath, "spec", "input.json"), path.join(dirPath, "spec", "output.json")]) - .filter((filepath) => pathExists(filepath)); + const allSpecFiles = [ + path.join(tasksDir, "schema-utils.json"), + ...allTasks + .flatMap(({ dirPath }) => [path.join(dirPath, "spec", "input.json"), path.join(dirPath, "spec", "output.json")]) + .filter((filepath) => pathExists(filepath)), + ]; for (const { task, dirPath } of allTasks) { const taskSpecDir = path.join(dirPath, "spec"); diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts index d83c45af5..244b44b69 100644 --- a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts +++ b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts @@ -14,9 +14,35 @@ export interface AutomaticSpeechRecognitionInput { /** * Additional inference parameters */ - parameters?: { - [key: string]: unknown; - }; + parameters?: AutomaticSpeechRecognitionParameters; + [property: string]: unknown; +} +/** + * Additional inference parameters + * + * Additional inference parameters for Automatic Speech Recognition + */ +export interface AutomaticSpeechRecognitionParameters { + /** + * Parametrization of the text generation process + */ + generate?: GenerationParameters; + /** + * Whether to output corresponding timestamps with the generated text + */ + returnTimestamps?: boolean; + [property: string]: unknown; +} +/** + * Parametrization of the text generation process + * + * Ad-hoc parametrization of the text generation process + */ +export interface GenerationParameters { + /** + * I can be the papa you'd be the mama + */ + temperature?: number; [property: string]: unknown; } export type AutomaticSpeechRecognitionOutput = AutomaticSpeechRecognitionOutputElement[]; diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json index f44075d56..93621151e 100644 --- a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json +++ b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json @@ -18,7 +18,16 @@ "title": "AutomaticSpeechRecognitionParameters", "description": "Additional inference parameters for Automatic Speech Recognition", "type": "object", - "properties": {} + "properties": { + "returnTimestamps": { + "type": "boolean", + "description": "Whether to output corresponding timestamps with the generated text" + }, + "generate": { + "description": "Parametrization of the text generation process", + "$ref": "/inference/schemas/schema-utils.json#/definitions/GenerationParameters" + } + } } }, "required": ["data"] diff --git a/packages/tasks/src/tasks/image-to-text/inference.ts b/packages/tasks/src/tasks/image-to-text/inference.ts index cba745139..c87a51ce3 100644 --- a/packages/tasks/src/tasks/image-to-text/inference.ts +++ b/packages/tasks/src/tasks/image-to-text/inference.ts @@ -23,12 +23,28 @@ export interface ImageToTextInput { * Additional inference parameters for Image To Text */ export interface ImageToTextParameters { + /** + * Parametrization of the text generation process + */ + generate?: GenerationParameters; /** * The amount of maximum tokens to generate. */ maxNewTokens?: number; [property: string]: unknown; } +/** + * Parametrization of the text generation process + * + * Ad-hoc parametrization of the text generation process + */ +export interface GenerationParameters { + /** + * I can be the papa you'd be the mama + */ + temperature?: number; + [property: string]: unknown; +} export type ImageToTextOutput = ImageToTextOutputElement[]; /** * Outputs of inference for the Image To Text task diff --git a/packages/tasks/src/tasks/image-to-text/spec/input.json b/packages/tasks/src/tasks/image-to-text/spec/input.json index f06eb59f0..b074372fc 100644 --- a/packages/tasks/src/tasks/image-to-text/spec/input.json +++ b/packages/tasks/src/tasks/image-to-text/spec/input.json @@ -22,6 +22,10 @@ "maxNewTokens": { "type": "integer", "description": "The amount of maximum tokens to generate." + }, + "generate": { + "description": "Parametrization of the text generation process", + "$ref": "/inference/schemas/schema-utils.json#/definitions/GenerationParameters" } } } diff --git a/packages/tasks/src/tasks/schema-utils.json b/packages/tasks/src/tasks/schema-utils.json new file mode 100644 index 000000000..60c833f60 --- /dev/null +++ b/packages/tasks/src/tasks/schema-utils.json @@ -0,0 +1,18 @@ +{ + "$id": "/inference/schemas/schema-utils.json", + "$schema": "http://json-schema.org/draft-06/schema#", + "description": "Common type definitions shared by several tasks", + "definitions": { + "GenerationParameters": { + "title": "GenerationParameters", + "description": "Ad-hoc parametrization of the text generation process", + "type": "object", + "properties": { + "temperature": { + "type": "number", + "description": "I can be the papa you'd be the mama" + } + } + } + } +} diff --git a/packages/tasks/src/tasks/text-to-audio/inference.ts b/packages/tasks/src/tasks/text-to-audio/inference.ts index d6a05e017..41796240a 100644 --- a/packages/tasks/src/tasks/text-to-audio/inference.ts +++ b/packages/tasks/src/tasks/text-to-audio/inference.ts @@ -14,9 +14,31 @@ export interface TextToAudioInput { /** * Additional inference parameters */ - parameters?: { - [key: string]: unknown; - }; + parameters?: TextToAudioParameters; + [property: string]: unknown; +} +/** + * Additional inference parameters + * + * Additional inference parameters for Text To Audio + */ +export interface TextToAudioParameters { + /** + * Parametrization of the text generation process + */ + generate?: GenerationParameters; + [property: string]: unknown; +} +/** + * Parametrization of the text generation process + * + * Ad-hoc parametrization of the text generation process + */ +export interface GenerationParameters { + /** + * I can be the papa you'd be the mama + */ + temperature?: number; [property: string]: unknown; } export type TextToAudioOutput = TextToAudioOutputElement[]; diff --git a/packages/tasks/src/tasks/text-to-audio/spec/input.json b/packages/tasks/src/tasks/text-to-audio/spec/input.json index 176060962..d049fb02e 100644 --- a/packages/tasks/src/tasks/text-to-audio/spec/input.json +++ b/packages/tasks/src/tasks/text-to-audio/spec/input.json @@ -19,7 +19,12 @@ "title": "TextToAudioParameters", "description": "Additional inference parameters for Text To Audio", "type": "object", - "properties": {} + "properties": { + "generate": { + "description": "Parametrization of the text generation process", + "$ref": "/inference/schemas/schema-utils.json#/definitions/GenerationParameters" + } + } } }, "required": ["data"] diff --git a/packages/tasks/src/tasks/text-to-speech/inference.ts b/packages/tasks/src/tasks/text-to-speech/inference.ts index f119bc62f..a89974072 100644 --- a/packages/tasks/src/tasks/text-to-speech/inference.ts +++ b/packages/tasks/src/tasks/text-to-speech/inference.ts @@ -17,7 +17,33 @@ export interface TextToSpeechInput { /** * Additional inference parameters */ - parameters?: { [key: string]: unknown }; + parameters?: TextToAudioParameters; + [property: string]: unknown; +} + +/** + * Additional inference parameters + * + * Additional inference parameters for Text To Audio + */ +export interface TextToAudioParameters { + /** + * Parametrization of the text generation process + */ + generate?: GenerationParameters; + [property: string]: unknown; +} + +/** + * Parametrization of the text generation process + * + * Ad-hoc parametrization of the text generation process + */ +export interface GenerationParameters { + /** + * I can be the papa you'd be the mama + */ + temperature?: number; [property: string]: unknown; }