refactor: Remove deprecated Ollama options (#414)

davidmigloz · May 11, 2024 · 861a2b7 · 861a2b7
1 parent cc5b1b0
commit 861a2b7
Show file tree

Hide file tree

Showing 6 changed files with 1 addition and 77 deletions.
diff --git a/packages/langchain_ollama/lib/src/chat_models/chat_ollama.dart b/packages/langchain_ollama/lib/src/chat_models/chat_ollama.dart
@@ -245,7 +245,6 @@ class ChatOllama extends BaseChatModel<ChatOllamaOptions> {
         numa: options?.numa ?? defaultOptions.numa,
         numCtx: options?.numCtx ?? defaultOptions.numCtx,
         numBatch: options?.numBatch ?? defaultOptions.numBatch,
-        numGqa: options?.numGqa ?? defaultOptions.numGqa,
         numGpu: options?.numGpu ?? defaultOptions.numGpu,
         mainGpu: options?.mainGpu ?? defaultOptions.mainGpu,
         lowVram: options?.lowVram ?? defaultOptions.lowVram,
@@ -254,11 +253,6 @@ class ChatOllama extends BaseChatModel<ChatOllamaOptions> {
         vocabOnly: options?.vocabOnly ?? defaultOptions.vocabOnly,
         useMmap: options?.useMmap ?? defaultOptions.useMmap,
         useMlock: options?.useMlock ?? defaultOptions.useMlock,
-        embeddingOnly: options?.embeddingOnly ?? defaultOptions.embeddingOnly,
-        ropeFrequencyBase:
-            options?.ropeFrequencyBase ?? defaultOptions.ropeFrequencyBase,
-        ropeFrequencyScale:
-            options?.ropeFrequencyScale ?? defaultOptions.ropeFrequencyScale,
         numThread: options?.numThread ?? defaultOptions.numThread,
       ),
     );

diff --git a/packages/langchain_ollama/lib/src/chat_models/types.dart b/packages/langchain_ollama/lib/src/chat_models/types.dart
@@ -31,7 +31,6 @@ class ChatOllamaOptions extends ChatModelOptions {
     this.numa,
     this.numCtx,
     this.numBatch,
-    this.numGqa,
     this.numGpu,
     this.mainGpu,
     this.lowVram,
@@ -40,9 +39,6 @@ class ChatOllamaOptions extends ChatModelOptions {
     this.vocabOnly,
     this.useMmap,
     this.useMlock,
-    this.embeddingOnly,
-    this.ropeFrequencyBase,
-    this.ropeFrequencyScale,
     this.numThread,
     super.concurrencyLimit,
   });
@@ -163,10 +159,6 @@ class ChatOllamaOptions extends ChatModelOptions {
   /// (Default: 1)
   final int? numBatch;
 
-  /// The number of GQA groups in the transformer layer. Required for some
-  /// models, for example it is 8 for `llama2:70b`.
-  final int? numGqa;
-
   /// The number of layers to send to the GPU(s). On macOS it defaults to 1 to
   /// enable metal support, 0 to disable.
   final int? numGpu;
@@ -199,18 +191,6 @@ class ChatOllamaOptions extends ChatModelOptions {
   /// (Default: false)
   final bool? useMlock;
 
-  /// Enable embedding only.
-  /// (Default: false)
-  final bool? embeddingOnly;
-
-  /// The base of the rope frequency scale.
-  /// (Default: 1.0)
-  final double? ropeFrequencyBase;
-
-  /// The scale of the rope frequency.
-  /// (Default: 1.0)
-  final double? ropeFrequencyScale;
-
   /// Sets the number of threads to use during computation. By default, Ollama
   /// will detect this for optimal performance. It is recommended to set this
   /// value to the number of physical CPU cores your system has (as opposed to
@@ -279,7 +259,6 @@ class ChatOllamaOptions extends ChatModelOptions {
       numa: numa ?? this.numa,
       numCtx: numCtx ?? this.numCtx,
       numBatch: numBatch ?? this.numBatch,
-      numGqa: numGqa ?? this.numGqa,
       numGpu: numGpu ?? this.numGpu,
       mainGpu: mainGpu ?? this.mainGpu,
       lowVram: lowVram ?? this.lowVram,
@@ -288,9 +267,6 @@ class ChatOllamaOptions extends ChatModelOptions {
       vocabOnly: vocabOnly ?? this.vocabOnly,
       useMmap: useMmap ?? this.useMmap,
       useMlock: useMlock ?? this.useMlock,
-      embeddingOnly: embeddingOnly ?? this.embeddingOnly,
-      ropeFrequencyBase: ropeFrequencyBase ?? this.ropeFrequencyBase,
-      ropeFrequencyScale: ropeFrequencyScale ?? this.ropeFrequencyScale,
       numThread: numThread ?? this.numThread,
     );
   }

diff --git a/packages/langchain_ollama/lib/src/llms/ollama.dart b/packages/langchain_ollama/lib/src/llms/ollama.dart
@@ -243,7 +243,6 @@ class Ollama extends BaseLLM<OllamaOptions> {
         numa: options?.numa ?? defaultOptions.numa,
         numCtx: options?.numCtx ?? defaultOptions.numCtx,
         numBatch: options?.numBatch ?? defaultOptions.numBatch,
-        numGqa: options?.numGqa ?? defaultOptions.numGqa,
         numGpu: options?.numGpu ?? defaultOptions.numGpu,
         mainGpu: options?.mainGpu ?? defaultOptions.mainGpu,
         lowVram: options?.lowVram ?? defaultOptions.lowVram,
@@ -252,11 +251,6 @@ class Ollama extends BaseLLM<OllamaOptions> {
         vocabOnly: options?.vocabOnly ?? defaultOptions.vocabOnly,
         useMmap: options?.useMmap ?? defaultOptions.useMmap,
         useMlock: options?.useMlock ?? defaultOptions.useMlock,
-        embeddingOnly: options?.embeddingOnly ?? defaultOptions.embeddingOnly,
-        ropeFrequencyBase:
-            options?.ropeFrequencyBase ?? defaultOptions.ropeFrequencyBase,
-        ropeFrequencyScale:
-            options?.ropeFrequencyScale ?? defaultOptions.ropeFrequencyScale,
         numThread: options?.numThread ?? defaultOptions.numThread,
       ),
     );

diff --git a/packages/langchain_ollama/lib/src/llms/types.dart b/packages/langchain_ollama/lib/src/llms/types.dart
@@ -33,7 +33,6 @@ class OllamaOptions extends LLMOptions {
     this.numa,
     this.numCtx,
     this.numBatch,
-    this.numGqa,
     this.numGpu,
     this.mainGpu,
     this.lowVram,
@@ -42,9 +41,6 @@ class OllamaOptions extends LLMOptions {
     this.vocabOnly,
     this.useMmap,
     this.useMlock,
-    this.embeddingOnly,
-    this.ropeFrequencyBase,
-    this.ropeFrequencyScale,
     this.numThread,
     super.concurrencyLimit,
   });
@@ -185,10 +181,6 @@ class OllamaOptions extends LLMOptions {
   /// (Default: 1)
   final int? numBatch;
 
-  /// The number of GQA groups in the transformer layer. Required for some
-  /// models, for example it is 8 for `llama2:70b`.
-  final int? numGqa;
-
   /// The number of layers to send to the GPU(s). On macOS it defaults to 1 to
   /// enable metal support, 0 to disable.
   final int? numGpu;
@@ -221,18 +213,6 @@ class OllamaOptions extends LLMOptions {
   /// (Default: false)
   final bool? useMlock;
 
-  /// Enable embedding only.
-  /// (Default: false)
-  final bool? embeddingOnly;
-
-  /// The base of the rope frequency scale.
-  /// (Default: 1.0)
-  final double? ropeFrequencyBase;
-
-  /// The scale of the rope frequency.
-  /// (Default: 1.0)
-  final double? ropeFrequencyScale;
-
   /// Sets the number of threads to use during computation. By default, Ollama
   /// will detect this for optimal performance. It is recommended to set this
   /// value to the number of physical CPU cores your system has (as opposed to
@@ -309,7 +289,6 @@ class OllamaOptions extends LLMOptions {
       numa: numa ?? this.numa,
       numCtx: numCtx ?? this.numCtx,
       numBatch: numBatch ?? this.numBatch,
-      numGqa: numGqa ?? this.numGqa,
       numGpu: numGpu ?? this.numGpu,
       mainGpu: mainGpu ?? this.mainGpu,
       lowVram: lowVram ?? this.lowVram,
@@ -318,9 +297,6 @@ class OllamaOptions extends LLMOptions {
       vocabOnly: vocabOnly ?? this.vocabOnly,
       useMmap: useMmap ?? this.useMmap,
       useMlock: useMlock ?? this.useMlock,
-      embeddingOnly: embeddingOnly ?? this.embeddingOnly,
-      ropeFrequencyBase: ropeFrequencyBase ?? this.ropeFrequencyBase,
-      ropeFrequencyScale: ropeFrequencyScale ?? this.ropeFrequencyScale,
       numThread: numThread ?? this.numThread,
     );
   }

diff --git a/packages/langchain_ollama/test/chat_models/chat_ollama_test.dart b/packages/langchain_ollama/test/chat_models/chat_ollama_test.dart
@@ -11,7 +11,7 @@ import 'package:test/test.dart';
 void main() {
   group('ChatOllama tests', skip: Platform.environment.containsKey('CI'), () {
     late ChatOllama chatModel;
-    const defaultModel = 'llama2:latest';
+    const defaultModel = 'llama3:latest';
     const visionModel = 'llava:latest';
 
     setUp(() async {
@@ -50,7 +50,6 @@ void main() {
         numa: true,
         numCtx: 15,
         numBatch: 16,
-        numGqa: 17,
         numGpu: 0,
         mainGpu: 18,
         lowVram: true,
@@ -59,9 +58,6 @@ void main() {
         vocabOnly: true,
         useMmap: true,
         useMlock: true,
-        embeddingOnly: true,
-        ropeFrequencyBase: 19.0,
-        ropeFrequencyScale: 20.0,
         numThread: 21,
       );
 
@@ -87,7 +83,6 @@ void main() {
       expect(options.numa, true);
       expect(options.numCtx, 15);
       expect(options.numBatch, 16);
-      expect(options.numGqa, 17);
       expect(options.numGpu, 0);
       expect(options.mainGpu, 18);
       expect(options.lowVram, true);
@@ -96,9 +91,6 @@ void main() {
       expect(options.vocabOnly, true);
       expect(options.useMmap, true);
       expect(options.useMlock, true);
-      expect(options.embeddingOnly, true);
-      expect(options.ropeFrequencyBase, 19.0);
-      expect(options.ropeFrequencyScale, 20.0);
       expect(options.numThread, 21);
     });
 

diff --git a/packages/langchain_ollama/test/llms/ollama_test.dart b/packages/langchain_ollama/test/llms/ollama_test.dart
@@ -52,7 +52,6 @@ void main() {
         numa: true,
         numCtx: 15,
         numBatch: 16,
-        numGqa: 17,
         numGpu: 0,
         mainGpu: 18,
         lowVram: true,
@@ -61,9 +60,6 @@ void main() {
         vocabOnly: true,
         useMmap: true,
         useMlock: true,
-        embeddingOnly: true,
-        ropeFrequencyBase: 19.0,
-        ropeFrequencyScale: 20.0,
         numThread: 21,
       );
 
@@ -96,7 +92,6 @@ void main() {
       expect(options.numa, true);
       expect(options.numCtx, 15);
       expect(options.numBatch, 16);
-      expect(options.numGqa, 17);
       expect(options.numGpu, 0);
       expect(options.mainGpu, 18);
       expect(options.lowVram, true);
@@ -105,9 +100,6 @@ void main() {
       expect(options.vocabOnly, true);
       expect(options.useMmap, true);
       expect(options.useMlock, true);
-      expect(options.embeddingOnly, true);
-      expect(options.ropeFrequencyBase, 19.0);
-      expect(options.ropeFrequencyScale, 20.0);
       expect(options.numThread, 21);
     });