diff --git a/packages/langchain_ollama/lib/src/chat_models/chat_ollama.dart b/packages/langchain_ollama/lib/src/chat_models/chat_ollama.dart index 566c910d..12602184 100644 --- a/packages/langchain_ollama/lib/src/chat_models/chat_ollama.dart +++ b/packages/langchain_ollama/lib/src/chat_models/chat_ollama.dart @@ -245,7 +245,6 @@ class ChatOllama extends BaseChatModel { numa: options?.numa ?? defaultOptions.numa, numCtx: options?.numCtx ?? defaultOptions.numCtx, numBatch: options?.numBatch ?? defaultOptions.numBatch, - numGqa: options?.numGqa ?? defaultOptions.numGqa, numGpu: options?.numGpu ?? defaultOptions.numGpu, mainGpu: options?.mainGpu ?? defaultOptions.mainGpu, lowVram: options?.lowVram ?? defaultOptions.lowVram, @@ -254,11 +253,6 @@ class ChatOllama extends BaseChatModel { vocabOnly: options?.vocabOnly ?? defaultOptions.vocabOnly, useMmap: options?.useMmap ?? defaultOptions.useMmap, useMlock: options?.useMlock ?? defaultOptions.useMlock, - embeddingOnly: options?.embeddingOnly ?? defaultOptions.embeddingOnly, - ropeFrequencyBase: - options?.ropeFrequencyBase ?? defaultOptions.ropeFrequencyBase, - ropeFrequencyScale: - options?.ropeFrequencyScale ?? defaultOptions.ropeFrequencyScale, numThread: options?.numThread ?? defaultOptions.numThread, ), ); diff --git a/packages/langchain_ollama/lib/src/chat_models/types.dart b/packages/langchain_ollama/lib/src/chat_models/types.dart index 16a0a98c..1f05b575 100644 --- a/packages/langchain_ollama/lib/src/chat_models/types.dart +++ b/packages/langchain_ollama/lib/src/chat_models/types.dart @@ -31,7 +31,6 @@ class ChatOllamaOptions extends ChatModelOptions { this.numa, this.numCtx, this.numBatch, - this.numGqa, this.numGpu, this.mainGpu, this.lowVram, @@ -40,9 +39,6 @@ class ChatOllamaOptions extends ChatModelOptions { this.vocabOnly, this.useMmap, this.useMlock, - this.embeddingOnly, - this.ropeFrequencyBase, - this.ropeFrequencyScale, this.numThread, super.concurrencyLimit, }); @@ -163,10 +159,6 @@ class ChatOllamaOptions extends ChatModelOptions { /// (Default: 1) final int? numBatch; - /// The number of GQA groups in the transformer layer. Required for some - /// models, for example it is 8 for `llama2:70b`. - final int? numGqa; - /// The number of layers to send to the GPU(s). On macOS it defaults to 1 to /// enable metal support, 0 to disable. final int? numGpu; @@ -199,18 +191,6 @@ class ChatOllamaOptions extends ChatModelOptions { /// (Default: false) final bool? useMlock; - /// Enable embedding only. - /// (Default: false) - final bool? embeddingOnly; - - /// The base of the rope frequency scale. - /// (Default: 1.0) - final double? ropeFrequencyBase; - - /// The scale of the rope frequency. - /// (Default: 1.0) - final double? ropeFrequencyScale; - /// Sets the number of threads to use during computation. By default, Ollama /// will detect this for optimal performance. It is recommended to set this /// value to the number of physical CPU cores your system has (as opposed to @@ -279,7 +259,6 @@ class ChatOllamaOptions extends ChatModelOptions { numa: numa ?? this.numa, numCtx: numCtx ?? this.numCtx, numBatch: numBatch ?? this.numBatch, - numGqa: numGqa ?? this.numGqa, numGpu: numGpu ?? this.numGpu, mainGpu: mainGpu ?? this.mainGpu, lowVram: lowVram ?? this.lowVram, @@ -288,9 +267,6 @@ class ChatOllamaOptions extends ChatModelOptions { vocabOnly: vocabOnly ?? this.vocabOnly, useMmap: useMmap ?? this.useMmap, useMlock: useMlock ?? this.useMlock, - embeddingOnly: embeddingOnly ?? this.embeddingOnly, - ropeFrequencyBase: ropeFrequencyBase ?? this.ropeFrequencyBase, - ropeFrequencyScale: ropeFrequencyScale ?? this.ropeFrequencyScale, numThread: numThread ?? this.numThread, ); } diff --git a/packages/langchain_ollama/lib/src/llms/ollama.dart b/packages/langchain_ollama/lib/src/llms/ollama.dart index 3fa5967f..44a499f9 100644 --- a/packages/langchain_ollama/lib/src/llms/ollama.dart +++ b/packages/langchain_ollama/lib/src/llms/ollama.dart @@ -243,7 +243,6 @@ class Ollama extends BaseLLM { numa: options?.numa ?? defaultOptions.numa, numCtx: options?.numCtx ?? defaultOptions.numCtx, numBatch: options?.numBatch ?? defaultOptions.numBatch, - numGqa: options?.numGqa ?? defaultOptions.numGqa, numGpu: options?.numGpu ?? defaultOptions.numGpu, mainGpu: options?.mainGpu ?? defaultOptions.mainGpu, lowVram: options?.lowVram ?? defaultOptions.lowVram, @@ -252,11 +251,6 @@ class Ollama extends BaseLLM { vocabOnly: options?.vocabOnly ?? defaultOptions.vocabOnly, useMmap: options?.useMmap ?? defaultOptions.useMmap, useMlock: options?.useMlock ?? defaultOptions.useMlock, - embeddingOnly: options?.embeddingOnly ?? defaultOptions.embeddingOnly, - ropeFrequencyBase: - options?.ropeFrequencyBase ?? defaultOptions.ropeFrequencyBase, - ropeFrequencyScale: - options?.ropeFrequencyScale ?? defaultOptions.ropeFrequencyScale, numThread: options?.numThread ?? defaultOptions.numThread, ), ); diff --git a/packages/langchain_ollama/lib/src/llms/types.dart b/packages/langchain_ollama/lib/src/llms/types.dart index 7b97a69a..7c6cadd5 100644 --- a/packages/langchain_ollama/lib/src/llms/types.dart +++ b/packages/langchain_ollama/lib/src/llms/types.dart @@ -33,7 +33,6 @@ class OllamaOptions extends LLMOptions { this.numa, this.numCtx, this.numBatch, - this.numGqa, this.numGpu, this.mainGpu, this.lowVram, @@ -42,9 +41,6 @@ class OllamaOptions extends LLMOptions { this.vocabOnly, this.useMmap, this.useMlock, - this.embeddingOnly, - this.ropeFrequencyBase, - this.ropeFrequencyScale, this.numThread, super.concurrencyLimit, }); @@ -185,10 +181,6 @@ class OllamaOptions extends LLMOptions { /// (Default: 1) final int? numBatch; - /// The number of GQA groups in the transformer layer. Required for some - /// models, for example it is 8 for `llama2:70b`. - final int? numGqa; - /// The number of layers to send to the GPU(s). On macOS it defaults to 1 to /// enable metal support, 0 to disable. final int? numGpu; @@ -221,18 +213,6 @@ class OllamaOptions extends LLMOptions { /// (Default: false) final bool? useMlock; - /// Enable embedding only. - /// (Default: false) - final bool? embeddingOnly; - - /// The base of the rope frequency scale. - /// (Default: 1.0) - final double? ropeFrequencyBase; - - /// The scale of the rope frequency. - /// (Default: 1.0) - final double? ropeFrequencyScale; - /// Sets the number of threads to use during computation. By default, Ollama /// will detect this for optimal performance. It is recommended to set this /// value to the number of physical CPU cores your system has (as opposed to @@ -309,7 +289,6 @@ class OllamaOptions extends LLMOptions { numa: numa ?? this.numa, numCtx: numCtx ?? this.numCtx, numBatch: numBatch ?? this.numBatch, - numGqa: numGqa ?? this.numGqa, numGpu: numGpu ?? this.numGpu, mainGpu: mainGpu ?? this.mainGpu, lowVram: lowVram ?? this.lowVram, @@ -318,9 +297,6 @@ class OllamaOptions extends LLMOptions { vocabOnly: vocabOnly ?? this.vocabOnly, useMmap: useMmap ?? this.useMmap, useMlock: useMlock ?? this.useMlock, - embeddingOnly: embeddingOnly ?? this.embeddingOnly, - ropeFrequencyBase: ropeFrequencyBase ?? this.ropeFrequencyBase, - ropeFrequencyScale: ropeFrequencyScale ?? this.ropeFrequencyScale, numThread: numThread ?? this.numThread, ); } diff --git a/packages/langchain_ollama/test/chat_models/chat_ollama_test.dart b/packages/langchain_ollama/test/chat_models/chat_ollama_test.dart index 79d8a77a..d47f6a06 100644 --- a/packages/langchain_ollama/test/chat_models/chat_ollama_test.dart +++ b/packages/langchain_ollama/test/chat_models/chat_ollama_test.dart @@ -11,7 +11,7 @@ import 'package:test/test.dart'; void main() { group('ChatOllama tests', skip: Platform.environment.containsKey('CI'), () { late ChatOllama chatModel; - const defaultModel = 'llama2:latest'; + const defaultModel = 'llama3:latest'; const visionModel = 'llava:latest'; setUp(() async { @@ -50,7 +50,6 @@ void main() { numa: true, numCtx: 15, numBatch: 16, - numGqa: 17, numGpu: 0, mainGpu: 18, lowVram: true, @@ -59,9 +58,6 @@ void main() { vocabOnly: true, useMmap: true, useMlock: true, - embeddingOnly: true, - ropeFrequencyBase: 19.0, - ropeFrequencyScale: 20.0, numThread: 21, ); @@ -87,7 +83,6 @@ void main() { expect(options.numa, true); expect(options.numCtx, 15); expect(options.numBatch, 16); - expect(options.numGqa, 17); expect(options.numGpu, 0); expect(options.mainGpu, 18); expect(options.lowVram, true); @@ -96,9 +91,6 @@ void main() { expect(options.vocabOnly, true); expect(options.useMmap, true); expect(options.useMlock, true); - expect(options.embeddingOnly, true); - expect(options.ropeFrequencyBase, 19.0); - expect(options.ropeFrequencyScale, 20.0); expect(options.numThread, 21); }); diff --git a/packages/langchain_ollama/test/llms/ollama_test.dart b/packages/langchain_ollama/test/llms/ollama_test.dart index 2a56edc0..3ed251f8 100644 --- a/packages/langchain_ollama/test/llms/ollama_test.dart +++ b/packages/langchain_ollama/test/llms/ollama_test.dart @@ -52,7 +52,6 @@ void main() { numa: true, numCtx: 15, numBatch: 16, - numGqa: 17, numGpu: 0, mainGpu: 18, lowVram: true, @@ -61,9 +60,6 @@ void main() { vocabOnly: true, useMmap: true, useMlock: true, - embeddingOnly: true, - ropeFrequencyBase: 19.0, - ropeFrequencyScale: 20.0, numThread: 21, ); @@ -96,7 +92,6 @@ void main() { expect(options.numa, true); expect(options.numCtx, 15); expect(options.numBatch, 16); - expect(options.numGqa, 17); expect(options.numGpu, 0); expect(options.mainGpu, 18); expect(options.lowVram, true); @@ -105,9 +100,6 @@ void main() { expect(options.vocabOnly, true); expect(options.useMmap, true); expect(options.useMlock, true); - expect(options.embeddingOnly, true); - expect(options.ropeFrequencyBase, 19.0); - expect(options.ropeFrequencyScale, 20.0); expect(options.numThread, 21); });