Skip to content
This repository was archived by the owner on Oct 8, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

## UNRELEASED

- Update OpenAI Embeddings model to support all allowed types of input
- Update OpenAI Embeddings model to support all allowed types of input [#6](https://github.com/hypermodeAI/models-as/pull/6)
- More updates for OpenAI Embeddings model [#7](https://github.com/hypermodeAI/models-as/pull/7)

## 2024-06-28 - Version 0.1.6

Expand Down
142 changes: 119 additions & 23 deletions src/models/openai/embeddings.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,25 @@
import { Model } from "../..";

// Reference: https://platform.openai.com/docs/api-reference/embeddings

/**
* Provides input and output types that conform to the OpenAI Embeddings API.
*
* Reference: https://platform.openai.com/docs/api-reference/embeddings
*/
export class EmbeddingsModel extends Model<EmbeddingsInput, EmbeddingsOutput> {
/**
* Creates an input object for the OpenAI Embeddings API.
*
* @param content The input content to vectorize. Can be any of:
* - A string representing the text to vectorize.
* - An array of strings representing multiple texts to vectorize.
* - An array of integers representing pre-tokenized text to vectorize.
* - An array of arrays of integers representing multiple pre-tokenized texts to vectorize.
*
* @returns An input object that can be passed to the `invoke` method.
*
* @remarks
* The input content must not exceed the maximum token limit of the model.
*/
createInput<T>(content: T): EmbeddingsInput {
const model = this.info.fullName;

Expand Down Expand Up @@ -32,64 +49,143 @@ export class EmbeddingsModel extends Model<EmbeddingsInput, EmbeddingsOutput> {
}
}


/**
* The input object for the OpenAI Embeddings API.
*/
@json
class EmbeddingsInput {
/**
* The name of the model to use for the embeddings.
* Must be the exact string expected by the model provider.
* For example, "text-embedding-3-small".
*
* @remarks
* This field is automatically set by the `createInput` method when creating this object.
* It does not need to be set manually.
*/
model!: string;


@omitif("this.encodingFormat.type == 'float'")
encodingFormat: EncodingFormat = EncodingFormat.Float;


/**
* The encoding format for the output embeddings.
*
* @default EncodingFormat.Float
*
* @remarks
* Currently only `EncodingFormat.Float` is supported.
*/
@alias("encoding_format")
@omitif("this.encodingFormat == 'float'")
encodingFormat: string = EncodingFormat.Float;

/**
* The maximum number of dimensions for the output embeddings.
* If not specified, the model's default number of dimensions will be used.
*/
@omitif("this.dimensions == -1")
dimensions: i32 = -1; // TODO: make this an `i32 | null` when supported


/**
* The user ID to associate with the request.
* If not specified, the request will be anonymous.
* See https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids
*/
@omitnull()
user: string | null = null;
}


/**
* The input object for the OpenAI Embeddings API.
*/
@json
class TypedEmbeddingsInput<T> extends EmbeddingsInput {
/**
* The input content to vectorize.
*/
input!: T;
}


/**
* The output object for the OpenAI Embeddings API.
*/
@json
class EmbeddingsOutput {
/**
* The name of the output object type returned by the API.
* Always `"list"`.
*/
object!: string;

/**
* The name of the model used to generate the embeddings.
* In most cases, this will match the requested `model` field in the input.
*/
model!: string;

/**
* The usage statistics for the request.
*/
usage!: Usage;

/**
* The output vector embeddings data.
*/
data!: Embedding[];
}


@json
export class EncodingFormat {
type: string = "float";

static Float: EncodingFormat = { type: "float" };
static Base64: EncodingFormat = { type: "base64" };
/**
* The encoding format for the output embeddings.
*/
// eslint-disable-next-line @typescript-eslint/no-namespace
export namespace EncodingFormat {
/**
* The output embeddings are encoded as an array of floating-point numbers.
*/
export const Float = "float";

/**
* The output embeddings are encoded as a base64-encoded string,
* containing an binary representation of an array of floating-point numbers.
*
* @remarks
* This format is currently not supported through this interface.
*/
export const Base64 = "base64";
}
export type EncodingFormat = string;


/**
* The output vector embeddings data.
*/
@json
class Embedding {
/**
* The name of the output object type returned by the API.
* Always `"embedding"`.
*/
object!: string;

/**
* The index of the input text that corresponds to this embedding.
* Used when requesting embeddings for multiple texts.
*/
index!: i32;
embedding!: f64[];
embedding!: f32[]; // TODO: support `f32[] | string` based on input encoding format
}


/**
* The usage statistics for the request.
*/
@json
class Usage {

/**
* The number of prompt tokens used in the request.
*/
@alias("prompt_tokens")
promptTokens!: i32;


/**
* The total number of tokens used in the request.
*/
@alias("total_tokens")
totalTokens!: i32;
}