From 4c73a5dbd9569ff8f1b7a0274a86043869215a17 Mon Sep 17 00:00:00 2001 From: David Miguel Date: Fri, 26 Jan 2024 22:27:56 +0100 Subject: [PATCH] feat!: Update OpenAIEmbeddings' default model to text-embedding-3-small https://openai.com/blog/new-embedding-models-and-api-updates#new-embedding-models-with-lower-pricing --- packages/langchain/lib/src/documents/embeddings/cache.dart | 2 +- .../langchain_openai/lib/src/chat_models/chat_openai.dart | 2 +- packages/langchain_openai/lib/src/embeddings/openai.dart | 4 ++-- packages/langchain_openai/lib/src/llms/openai.dart | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/langchain/lib/src/documents/embeddings/cache.dart b/packages/langchain/lib/src/documents/embeddings/cache.dart index c0894559..594312f4 100644 --- a/packages/langchain/lib/src/documents/embeddings/cache.dart +++ b/packages/langchain/lib/src/documents/embeddings/cache.dart @@ -54,7 +54,7 @@ class CacheBackedEmbeddings implements Embeddings { /// final cacheBackedEmbeddings = CacheBackedEmbeddings.fromByteStore( /// underlyingEmbeddings: OpenAIEmbeddings(apiKey: openaiApiKey), /// documentEmbeddingsStore: InMemoryStore(), - /// namespace: 'text-embedding-ada-002', + /// namespace: 'text-embedding-3-small', /// ); factory CacheBackedEmbeddings.fromByteStore({ required final Embeddings underlyingEmbeddings, diff --git a/packages/langchain_openai/lib/src/chat_models/chat_openai.dart b/packages/langchain_openai/lib/src/chat_models/chat_openai.dart index 958823a2..fa1f52d6 100644 --- a/packages/langchain_openai/lib/src/chat_models/chat_openai.dart +++ b/packages/langchain_openai/lib/src/chat_models/chat_openai.dart @@ -220,7 +220,7 @@ class ChatOpenAI extends BaseChatModel { /// it using this field. /// /// Supported encodings: - /// - `cl100k_base` (used by gpt-4, gpt-3.5-turbo, text-embedding-ada-002). + /// - `cl100k_base` (used by gpt-4, gpt-3.5-turbo, text-embedding-3-small). /// /// For an exhaustive list check: /// https://github.com/mvitlov/tiktoken/blob/master/lib/tiktoken.dart diff --git a/packages/langchain_openai/lib/src/embeddings/openai.dart b/packages/langchain_openai/lib/src/embeddings/openai.dart index 0821731d..d926f33d 100644 --- a/packages/langchain_openai/lib/src/embeddings/openai.dart +++ b/packages/langchain_openai/lib/src/embeddings/openai.dart @@ -130,7 +130,7 @@ class OpenAIEmbeddings implements Embeddings { final Map? headers, final Map? queryParams, final http.Client? client, - this.model = 'text-embedding-ada-002', + this.model = 'text-embedding-3-small', this.dimensions, this.batchSize = 512, this.user, @@ -163,7 +163,7 @@ class OpenAIEmbeddings implements Embeddings { /// The maximum number of documents to embed in a single request. /// This is limited by max input tokens for the model - /// (e.g. 8191 tokens for text-embedding-ada-002). + /// (e.g. 8191 tokens for text-embedding-3-small). int batchSize; /// A unique identifier representing your end-user, which can help OpenAI to diff --git a/packages/langchain_openai/lib/src/llms/openai.dart b/packages/langchain_openai/lib/src/llms/openai.dart index 0a4d27fe..a3e1c0c9 100644 --- a/packages/langchain_openai/lib/src/llms/openai.dart +++ b/packages/langchain_openai/lib/src/llms/openai.dart @@ -214,7 +214,7 @@ class OpenAI extends BaseLLM { /// it using this field. /// /// Supported encodings: - /// - `cl100k_base` (used by gpt-4, gpt-3.5-turbo, text-embedding-ada-002). + /// - `cl100k_base` (used by gpt-4, gpt-3.5-turbo, text-embedding-3-small). /// /// For an exhaustive list check: /// https://github.com/mvitlov/tiktoken/blob/master/lib/tiktoken.dart