Factorize generate parameters

huggingface · Jan 26, 2024 · 49a8151 · 49a8151
1 parent bf48f5e
commit 49a8151
Show file tree

Hide file tree

Showing 9 changed files with 141 additions and 13 deletions.
diff --git a/packages/tasks/src/scripts/inference-codegen.ts b/packages/tasks/src/scripts/inference-codegen.ts
@@ -74,7 +74,6 @@ async function generateTypescript(inputData: InputData): Promise<SerializedRende
  * And writes that to the `inference.ts` file
  *
  */
-
 async function postProcessOutput(path2generated: string, outputSpec: Record<string, unknown>): Promise<void> {
 	const source = ts.createSourceFile(
 		path.basename(path2generated),
@@ -149,9 +148,12 @@ async function main() {
 			.filter((entry) => entry.name !== "placeholder")
 			.map(async (entry) => ({ task: entry.name, dirPath: path.join(entry.path, entry.name) }))
 	);
-	const allSpecFiles = allTasks
-		.flatMap(({ dirPath }) => [path.join(dirPath, "spec", "input.json"), path.join(dirPath, "spec", "output.json")])
-		.filter((filepath) => pathExists(filepath));
+	const allSpecFiles = [
+		path.join(tasksDir, "schema-utils.json"),
+		...allTasks
+			.flatMap(({ dirPath }) => [path.join(dirPath, "spec", "input.json"), path.join(dirPath, "spec", "output.json")])
+			.filter((filepath) => pathExists(filepath)),
+	];
 
 	for (const { task, dirPath } of allTasks) {
 		const taskSpecDir = path.join(dirPath, "spec");

diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
@@ -14,9 +14,35 @@ export interface AutomaticSpeechRecognitionInput {
 	/**
 	 * Additional inference parameters
 	 */
-	parameters?: {
-		[key: string]: unknown;
-	};
+	parameters?: AutomaticSpeechRecognitionParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Automatic Speech Recognition
+ */
+export interface AutomaticSpeechRecognitionParameters {
+	/**
+	 * Parametrization of the text generation process
+	 */
+	generate?: GenerationParameters;
+	/**
+	 * Whether to output corresponding timestamps with the generated text
+	 */
+	returnTimestamps?: boolean;
+	[property: string]: unknown;
+}
+/**
+ * Parametrization of the text generation process
+ *
+ * Ad-hoc parametrization of the text generation process
+ */
+export interface GenerationParameters {
+	/**
+	 * I can be the papa you'd be the mama
+	 */
+	temperature?: number;
 	[property: string]: unknown;
 }
 export type AutomaticSpeechRecognitionOutput = AutomaticSpeechRecognitionOutputElement[];

diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
@@ -18,7 +18,16 @@
 			"title": "AutomaticSpeechRecognitionParameters",
 			"description": "Additional inference parameters for Automatic Speech Recognition",
 			"type": "object",
-			"properties": {}
+			"properties": {
+				"returnTimestamps": {
+					"type": "boolean",
+					"description": "Whether to output corresponding timestamps with the generated text"
+				},
+				"generate": {
+					"description": "Parametrization of the text generation process",
+					"$ref": "/inference/schemas/schema-utils.json#/definitions/GenerationParameters"
+				}
+			}
 		}
 	},
 	"required": ["data"]

diff --git a/packages/tasks/src/tasks/image-to-text/inference.ts b/packages/tasks/src/tasks/image-to-text/inference.ts
@@ -23,12 +23,28 @@ export interface ImageToTextInput {
  * Additional inference parameters for Image To Text
  */
 export interface ImageToTextParameters {
+	/**
+	 * Parametrization of the text generation process
+	 */
+	generate?: GenerationParameters;
 	/**
 	 * The amount of maximum tokens to generate.
 	 */
 	maxNewTokens?: number;
 	[property: string]: unknown;
 }
+/**
+ * Parametrization of the text generation process
+ *
+ * Ad-hoc parametrization of the text generation process
+ */
+export interface GenerationParameters {
+	/**
+	 * I can be the papa you'd be the mama
+	 */
+	temperature?: number;
+	[property: string]: unknown;
+}
 export type ImageToTextOutput = ImageToTextOutputElement[];
 /**
  * Outputs of inference for the Image To Text task

diff --git a/packages/tasks/src/tasks/image-to-text/spec/input.json b/packages/tasks/src/tasks/image-to-text/spec/input.json
@@ -22,6 +22,10 @@
 				"maxNewTokens": {
 					"type": "integer",
 					"description": "The amount of maximum tokens to generate."
+				},
+				"generate": {
+					"description": "Parametrization of the text generation process",
+					"$ref": "/inference/schemas/schema-utils.json#/definitions/GenerationParameters"
 				}
 			}
 		}

diff --git a/packages/tasks/src/tasks/schema-utils.json b/packages/tasks/src/tasks/schema-utils.json
@@ -0,0 +1,18 @@
+{
+	"$id": "/inference/schemas/schema-utils.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Common type definitions shared by several tasks",
+	"definitions": {
+		"GenerationParameters": {
+			"title": "GenerationParameters",
+			"description": "Ad-hoc parametrization of the text generation process",
+			"type": "object",
+			"properties": {
+				"temperature": {
+					"type": "number",
+					"description": "I can be the papa you'd be the mama"
+				}
+			}
+		}
+	}
+}
diff --git a/packages/tasks/src/tasks/text-to-audio/inference.ts b/packages/tasks/src/tasks/text-to-audio/inference.ts
@@ -14,9 +14,31 @@ export interface TextToAudioInput {
 	/**
 	 * Additional inference parameters
 	 */
-	parameters?: {
-		[key: string]: unknown;
-	};
+	parameters?: TextToAudioParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Text To Audio
+ */
+export interface TextToAudioParameters {
+	/**
+	 * Parametrization of the text generation process
+	 */
+	generate?: GenerationParameters;
+	[property: string]: unknown;
+}
+/**
+ * Parametrization of the text generation process
+ *
+ * Ad-hoc parametrization of the text generation process
+ */
+export interface GenerationParameters {
+	/**
+	 * I can be the papa you'd be the mama
+	 */
+	temperature?: number;
 	[property: string]: unknown;
 }
 export type TextToAudioOutput = TextToAudioOutputElement[];

diff --git a/packages/tasks/src/tasks/text-to-audio/spec/input.json b/packages/tasks/src/tasks/text-to-audio/spec/input.json
@@ -19,7 +19,12 @@
 			"title": "TextToAudioParameters",
 			"description": "Additional inference parameters for Text To Audio",
 			"type": "object",
-			"properties": {}
+			"properties": {
+				"generate": {
+					"description": "Parametrization of the text generation process",
+					"$ref": "/inference/schemas/schema-utils.json#/definitions/GenerationParameters"
+				}
+			}
 		}
 	},
 	"required": ["data"]

diff --git a/packages/tasks/src/tasks/text-to-speech/inference.ts b/packages/tasks/src/tasks/text-to-speech/inference.ts
@@ -17,7 +17,33 @@ export interface TextToSpeechInput {
 	/**
 	 * Additional inference parameters
 	 */
-	parameters?: { [key: string]: unknown };
+	parameters?: TextToAudioParameters;
+	[property: string]: unknown;
+}
+
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Text To Audio
+ */
+export interface TextToAudioParameters {
+	/**
+	 * Parametrization of the text generation process
+	 */
+	generate?: GenerationParameters;
+	[property: string]: unknown;
+}
+
+/**
+ * Parametrization of the text generation process
+ *
+ * Ad-hoc parametrization of the text generation process
+ */
+export interface GenerationParameters {
+	/**
+	 * I can be the papa you'd be the mama
+	 */
+	temperature?: number;
 	[property: string]: unknown;
 }