From f6c1650583c7f5691e5aa2b45ccbef5e1159150f Mon Sep 17 00:00:00 2001 From: Matt Johnson-Pint Date: Mon, 24 Jun 2024 09:48:07 -0700 Subject: [PATCH 1/4] Fix encoding type --- src/models/openai/embeddings.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/models/openai/embeddings.ts b/src/models/openai/embeddings.ts index 1e9e7cd..e73ccdb 100644 --- a/src/models/openai/embeddings.ts +++ b/src/models/openai/embeddings.ts @@ -38,10 +38,10 @@ class EmbeddingsInput { model!: string; - @omitif("this.encodingFormat.type == 'float'") - encodingFormat: EncodingFormat = EncodingFormat.Float; - + @alias("encoding_format") + @omitif("this.encodingFormat == 'float'") + encodingFormat: string = EncodingFormat.Float; @omitif("this.dimensions == -1") dimensions: i32 = -1; // TODO: make this an `i32 | null` when supported @@ -66,13 +66,12 @@ class EmbeddingsOutput { } -@json -export class EncodingFormat { - type: string = "float"; - - static Float: EncodingFormat = { type: "float" }; - static Base64: EncodingFormat = { type: "base64" }; +// eslint-disable-next-line @typescript-eslint/no-namespace +export namespace EncodingFormat { + export const Float = "float"; + export const Base64 = "base64"; } +export type EncodingFormat = string; @json From ab494e96ba4f4683bff75873a293128b2cafe481 Mon Sep 17 00:00:00 2001 From: Matt Johnson-Pint Date: Mon, 24 Jun 2024 09:50:36 -0700 Subject: [PATCH 2/4] Use an f32[] for output embeddings --- src/models/openai/embeddings.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models/openai/embeddings.ts b/src/models/openai/embeddings.ts index e73ccdb..058e0ca 100644 --- a/src/models/openai/embeddings.ts +++ b/src/models/openai/embeddings.ts @@ -78,7 +78,7 @@ export type EncodingFormat = string; class Embedding { object!: string; index!: i32; - embedding!: f64[]; + embedding!: f32[]; // TODO: support `f32[] | string` based on input encoding format } From 3559420a6c6a0dbb2712ec1b06424f2fbc06bfae Mon Sep 17 00:00:00 2001 From: Matt Johnson-Pint Date: Mon, 24 Jun 2024 09:50:46 -0700 Subject: [PATCH 3/4] Add JS docs --- src/models/openai/embeddings.ts | 123 ++++++++++++++++++++++++++++---- 1 file changed, 110 insertions(+), 13 deletions(-) diff --git a/src/models/openai/embeddings.ts b/src/models/openai/embeddings.ts index 058e0ca..7cfc45f 100644 --- a/src/models/openai/embeddings.ts +++ b/src/models/openai/embeddings.ts @@ -1,8 +1,25 @@ import { Model } from "../.."; -// Reference: https://platform.openai.com/docs/api-reference/embeddings - +/** + * Provides input and output types that conform to the OpenAI Embeddings API. + * + * Reference: https://platform.openai.com/docs/api-reference/embeddings + */ export class EmbeddingsModel extends Model { + /** + * Creates an input object for the OpenAI Embeddings API. + * + * @param content The input content to vectorize. Can be any of: + * - A string representing the text to vectorize. + * - An array of strings representing multiple texts to vectorize. + * - An array of integers representing pre-tokenized text to vectorize. + * - An array of arrays of integers representing multiple pre-tokenized texts to vectorize. + * + * @returns An input object that can be passed to the `invoke` method. + * + * @remarks + * The input content must not exceed the maximum token limit of the model. + */ createInput(content: T): EmbeddingsInput { const model = this.info.fullName; @@ -32,63 +49,143 @@ export class EmbeddingsModel extends Model { } } - +/** + * The input object for the OpenAI Embeddings API. + */ @json class EmbeddingsInput { + /** + * The name of the model to use for the embeddings. + * Must be the exact string expected by the model provider. + * For example, "text-embedding-3-small". + * + * @remarks + * This field is automatically set by the `createInput` method when creating this object. + * It does not need to be set manually. + */ model!: string; - - + /** + * The encoding format for the output embeddings. + * + * @default EncodingFormat.Float + * + * @remarks + * Currently only `EncodingFormat.Float` is supported. + */ @alias("encoding_format") @omitif("this.encodingFormat == 'float'") encodingFormat: string = EncodingFormat.Float; + + /** + * The maximum number of dimensions for the output embeddings. + * If not specified, the model's default number of dimensions will be used. + */ @omitif("this.dimensions == -1") dimensions: i32 = -1; // TODO: make this an `i32 | null` when supported - + /** + * The user ID to associate with the request. + * If not specified, the request will be anonymous. + * See https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids + */ @omitnull() user: string | null = null; } - +/** + * The input object for the OpenAI Embeddings API. + */ @json class TypedEmbeddingsInput extends EmbeddingsInput { + /** + * The input content to vectorize. + */ input!: T; } - +/** + * The output object for the OpenAI Embeddings API. + */ @json class EmbeddingsOutput { + /** + * The name of the output object type returned by the API. + * Always `"list"`. + */ object!: string; + + /** + * The name of the model used to generate the embeddings. + * In most cases, this will match the requested `model` field in the input. + */ model!: string; + + /** + * The usage statistics for the request. + */ usage!: Usage; + + /** + * The output vector embeddings data. + */ data!: Embedding[]; } - +/** + * The encoding format for the output embeddings. + */ // eslint-disable-next-line @typescript-eslint/no-namespace export namespace EncodingFormat { + /** + * The output embeddings are encoded as an array of floating-point numbers. + */ export const Float = "float"; + + /** + * The output embeddings are encoded as a base64-encoded string, + * containing an binary representation of an array of floating-point numbers. + * + * @remarks + * This format is currently not supported through this interface. + */ export const Base64 = "base64"; } export type EncodingFormat = string; - +/** + * The output vector embeddings data. + */ @json class Embedding { + /** + * The name of the output object type returned by the API. + * Always `"embedding"`. + */ object!: string; + + /** + * The index of the input text that corresponds to this embedding. + * Used when requesting embeddings for multiple texts. + */ index!: i32; embedding!: f32[]; // TODO: support `f32[] | string` based on input encoding format } - +/** + * The usage statistics for the request. + */ @json class Usage { - + /** + * The number of prompt tokens used in the request. + */ @alias("prompt_tokens") promptTokens!: i32; - + /** + * The total number of tokens used in the request. + */ @alias("total_tokens") totalTokens!: i32; } From 9c47f282b01ce66eb30c9e5f0de11518b6774459 Mon Sep 17 00:00:00 2001 From: Matt Johnson-Pint Date: Mon, 24 Jun 2024 09:52:50 -0700 Subject: [PATCH 4/4] Update CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 522314b..1bd2eb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,8 @@ ## UNRELEASED -- Update OpenAI Embeddings model to support all allowed types of input +- Update OpenAI Embeddings model to support all allowed types of input [#6](https://github.com/hypermodeAI/models-as/pull/6) +- More updates for OpenAI Embeddings model [#7](https://github.com/hypermodeAI/models-as/pull/7) ## 2024-06-28 - Version 0.1.6