🤖 refactor: move Ollama model pull to test-side for better parallelism

ammar-agent · ammar-agent · commit ab90e9b4e415 · 2025-11-08T22:45:52.000Z
Previously, setup-ollama action pulled models sequentially during setup.
Now tests pull models idempotently in beforeAll hook, enabling:

- Better parallelism across test jobs
- Idempotent model pulls (multiple tests can check/pull safely)
- Shared model cache across parallel test runners
- Ollama handles deduplication when multiple pulls happen simultaneously

Changes:
- Remove model input and pull logic from setup-ollama action
- Add ensureOllamaModel() helper to check if model exists and pull if needed
- Call ensureOllamaModel() in beforeAll hook before tests run
- Bump beforeAll timeout to 150s to accommodate potential model pull
- Simplify cache key to 'ollama-models-v2' (model-agnostic)

_Generated with `cmux`_
diff --git a/.github/actions/setup-ollama/action.yml b/.github/actions/setup-ollama/action.yml
@@ -1,10 +1,5 @@
 name: Setup Ollama
-description: Install Ollama and pull required models with caching
-inputs:
-  model:
-    description: 'Ollama model to pull'
-    required: false
-    default: 'gpt-oss:20b'
+description: Install Ollama binary and restore model cache (tests pull models idempotently)
 
 runs:
   using: composite
@@ -21,7 +16,7 @@ runs:
       uses: actions/cache@v4
       with:
         path: ~/.ollama
-        key: ${{ runner.os }}-ollama-${{ inputs.model }}-v2
+        key: ${{ runner.os }}-ollama-models-v2
 
     - name: Install Ollama binary (cache miss)
       if: steps.cache-ollama-binary.outputs.cache-hit != 'true'
@@ -57,18 +52,14 @@ runs:
       shell: bash
       run: |
         ollama --version
+        echo "Ollama binary ready - tests will pull models idempotently"
 
-    - name: Pull model (cache miss)
-      if: steps.cache-ollama-models.outputs.cache-hit != 'true'
+    - name: Verify cache status
       shell: bash
       run: |
-        echo "Cache miss - pulling model ${{ inputs.model }}..."
-        ollama pull ${{ inputs.model }}
-        echo "Model pulled successfully"
-
-    - name: Verify cache (cache hit)
-      if: steps.cache-ollama-models.outputs.cache-hit == 'true'
-      shell: bash
-      run: |
-        echo "Cache hit - models restored from cache"
-        ls -lh "$HOME/.ollama" || echo "Warning: .ollama directory not found"
+        if [[ "${{ steps.cache-ollama-models.outputs.cache-hit }}" == "true" ]]; then
+          echo "Model cache restored - available for tests"
+          ls -lh "$HOME/.ollama" || echo "Warning: .ollama directory not found"
+        else
+          echo "Model cache miss - tests will pull models on first run"
+        fi
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -101,16 +101,14 @@ jobs:
 
       - name: Setup Ollama
         uses: ./.github/actions/setup-ollama
-        with:
-          model: gpt-oss:20b
 
-      # Ollama server already started by setup-ollama action
-      # Just verify it's ready
+      # Ollama server started by setup-ollama action
+      # Tests will pull models idempotently
       - name: Verify Ollama server
         run: |
           echo "Verifying Ollama server..."
           timeout 5 sh -c 'until curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; do sleep 0.2; done'
-          echo "Ollama ready"
+          echo "Ollama ready - integration tests will pull models on demand"
 
       - name: Build worker files
         run: make build-main
diff --git a/tests/ipcMain/ollama.test.ts b/tests/ipcMain/ollama.test.ts
@@ -5,25 +5,89 @@ import {
   assertStreamSuccess,
   extractTextFromEvents,
 } from "./helpers";
+import { spawn } from "child_process";
 
 // Skip all tests if TEST_INTEGRATION is not set
 const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip;
 
 // Ollama doesn't require API keys - it's a local service
-// Tests require Ollama to be running with the gpt-oss:20b model installed
+// Tests require Ollama to be running and will pull models idempotently
+
+const OLLAMA_MODEL = "gpt-oss:20b";
+
+/**
+ * Ensure Ollama model is available (idempotent).
+ * Checks if model exists, pulls it if not.
+ * Multiple tests can call this in parallel - Ollama handles deduplication.
+ */
+async function ensureOllamaModel(model: string): Promise<void> {
+  return new Promise((resolve, reject) => {
+    // Check if model exists: ollama list | grep <model>
+    const checkProcess = spawn("ollama", ["list"]);
+    let stdout = "";
+    let stderr = "";
+
+    checkProcess.stdout.on("data", (data) => {
+      stdout += data.toString();
+    });
+
+    checkProcess.stderr.on("data", (data) => {
+      stderr += data.toString();
+    });
+
+    checkProcess.on("close", (code) => {
+      if (code !== 0) {
+        return reject(new Error(`Failed to check Ollama models: ${stderr}`));
+      }
+
+      // Check if model is in the list
+      const modelLines = stdout.split("\n");
+      const modelExists = modelLines.some((line) => line.includes(model));
+
+      if (modelExists) {
+        console.log(`✓ Ollama model ${model} already available`);
+        return resolve();
+      }
+
+      // Model doesn't exist, pull it
+      console.log(`Pulling Ollama model ${model}...`);
+      const pullProcess = spawn("ollama", ["pull", model], {
+        stdio: ["ignore", "inherit", "inherit"],
+      });
+
+      const timeout = setTimeout(() => {
+        pullProcess.kill();
+        reject(new Error(`Timeout pulling Ollama model ${model}`));
+      }, 120000); // 2 minute timeout for model pull
+
+      pullProcess.on("close", (pullCode) => {
+        clearTimeout(timeout);
+        if (pullCode !== 0) {
+          reject(new Error(`Failed to pull Ollama model ${model}`));
+        } else {
+          console.log(`✓ Ollama model ${model} pulled successfully`);
+          resolve();
+        }
+      });
+    });
+  });
+}
 
 describeIntegration("IpcMain Ollama integration tests", () => {
   // Enable retries in CI for potential network flakiness with Ollama
   if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
     jest.retryTimes(3, { logErrorsBeforeRetry: true });
   }
 
-  // Load tokenizer modules once before all tests (takes ~14s)
-  // This ensures accurate token counts for API calls without timing out individual tests
+  // Load tokenizer modules and ensure model is available before all tests
   beforeAll(async () => {
+    // Load tokenizers (takes ~14s)
     const { loadTokenizerModules } = await import("../../src/utils/main/tokenizer");
     await loadTokenizerModules();
-  }, 30000); // 30s timeout for tokenizer loading
+
+    // Ensure Ollama model is available (idempotent - fast if cached)
+    await ensureOllamaModel(OLLAMA_MODEL);
+  }, 150000); // 150s timeout for tokenizer loading + potential model pull
 
   test("should successfully send message to Ollama and receive response", async () => {
     // Setup test environment
@@ -35,7 +99,7 @@ describeIntegration("IpcMain Ollama integration tests", () => {
         workspaceId,
         "Say 'hello' and nothing else",
         "ollama",
-        "gpt-oss:20b"
+        OLLAMA_MODEL
       );
 
       // Verify the IPC call succeeded
@@ -69,7 +133,7 @@ describeIntegration("IpcMain Ollama integration tests", () => {
         workspaceId,
         "What is the current date and time? Use the bash tool to find out.",
         "ollama",
-        "gpt-oss:20b"
+        OLLAMA_MODEL
       );
 
       expect(result.success).toBe(true);
@@ -108,7 +172,7 @@ describeIntegration("IpcMain Ollama integration tests", () => {
         workspaceId,
         "Read the README.md file and tell me what the first heading says.",
         "ollama",
-        "gpt-oss:20b"
+        OLLAMA_MODEL
       );
 
       expect(result.success).toBe(true);
@@ -146,7 +210,7 @@ describeIntegration("IpcMain Ollama integration tests", () => {
         workspaceId,
         "This should fail",
         "ollama",
-        "gpt-oss:20b",
+        OLLAMA_MODEL,
         {
           providerOptions: {
             ollama: {},