algorithmicsuperintelligence · codelion · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -7,7 +7,7 @@ on:
     branches: [ main ]
 
 jobs:
-  test:
+  unit-tests:
     runs-on: ubuntu-latest
     strategy:
       matrix:
@@ -34,53 +34,95 @@ jobs:
         python -m pip install --upgrade pip
         pip install -r requirements.txt
         pip install -r tests/requirements.txt
+        pip install -e .
 
-    - name: Run unit tests
+    - name: Run unit tests (no server required)
       run: |
-        # Run quick CI tests
-        python tests/test_ci_quick.py
+        # Set up local inference environment
+        export OPTILLM_API_KEY=optillm
 
-        # Run plugin tests with pytest if available
+        # Run tests that don't need server - fast feedback!
+        python tests/test_ci_quick.py
         python -m pytest tests/test_plugins.py -v --tb=short || python tests/test_plugins.py
-
-        # Run approach tests
         python tests/test_approaches.py
+        python tests/test_reasoning_simple.py
+        python tests/test_batching.py
+      env:
+        OPTILLM_API_KEY: optillm
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
 
-  integration-test:
+  integration-tests:
     runs-on: ubuntu-latest
-    needs: test
-    if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
-    # Only run integration tests on PRs from the same repository (not forks)
-    # This ensures secrets are available
+    needs: unit-tests  # Only run if unit tests pass
+    strategy:
+      matrix:
+        python-version: ['3.12']
 
     steps:
     - uses: actions/checkout@v4
 
-    - name: Set up Python
+    - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
       with:
-        python-version: '3.12'
+        python-version: ${{ matrix.python-version }}
+
+    - name: Cache pip packages
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
 
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         pip install -r requirements.txt
+        pip install -r tests/requirements.txt
+        pip install -e .
 
-    - name: Run integration test with OpenAI
-      if: env.OPENAI_API_KEY != ''
+    - name: Start optillm server
       run: |
-        # Start OptILLM server
-        python optillm.py &
-        SERVER_PID=$!
+        echo "Starting optillm server for integration tests..."
+        OPTILLM_API_KEY=optillm python optillm.py --model google/gemma-3-270m-it --port 8000 &
+        echo $! > server.pid
 
-        # Wait for server
-        sleep 5
+        # Wait for server to be ready
+        echo "Waiting for server to start..."
+        sleep 15
 
-        # Run simple integration test
-        python tests/test.py --approaches none --single-test "Simple Math Problem" --base-url http://localhost:8000/v1 --model gpt-4o-mini || true
-
-        # Stop server
-        kill $SERVER_PID || true
+        # Test server health
+        curl -s http://localhost:8000/health || echo "Server health check failed"
+      env:
+        OPTILLM_API_KEY: optillm
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+
+    - name: Run integration tests (server required)
+      run: |
+        # Run tests that need the server
+        echo "Running tests that require optillm server..."
+        OPTILLM_API_KEY=optillm python tests/test_reasoning_tokens.py
+        OPTILLM_API_KEY=optillm python tests/test_reasoning_integration.py
+        OPTILLM_API_KEY=optillm python tests/test_json_plugin.py
+        OPTILLM_API_KEY=optillm python tests/test_n_parameter.py
+        OPTILLM_API_KEY=optillm python -m pytest tests/test_api_compatibility.py -v --tb=short || echo "API compatibility tests require pytest"
+        OPTILLM_API_KEY=optillm python tests/test.py --approaches none --single-test "Simple Math Problem" || echo "Main test completed"
+        echo "All integration tests completed successfully!"
+        exit 0
       env:
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      continue-on-error: true
+        OPTILLM_API_KEY: optillm
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+
+    - name: Stop optillm server
+      if: always()
+      run: |
+        echo "Stopping optillm server..."
+        if [ -f server.pid ]; then
+          kill $(cat server.pid) 2>/dev/null || true
+          rm -f server.pid
+        fi
+        # Kill any remaining python processes running optillm
+        pkill -f "python.*optillm" 2>/dev/null || true
+        sleep 2
+        echo "Server shutdown completed"
+        exit 0
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ## Project Overview
 
-OptILLM is an OpenAI API compatible optimizing inference proxy that implements state-of-the-art techniques to improve accuracy and performance of LLMs. It focuses on reasoning improvements for coding, logical, and mathematical queries through inference-time compute optimization.
+OptiLLM is an OpenAI API compatible optimizing inference proxy that implements state-of-the-art techniques to improve accuracy and performance of LLMs. It focuses on reasoning improvements for coding, logical, and mathematical queries through inference-time compute optimization.
 
 ## Core Architecture
 
@@ -95,7 +95,7 @@ python scripts/eval_arena_hard_auto_rtc.py
 # FRAMES benchmark
 python scripts/eval_frames_benchmark.py
 
-# OptILLM benchmark generation/evaluation
+# OptiLLM benchmark generation/evaluation
 python scripts/gen_optillmbench.py
 python scripts/eval_optillmbench.py
 ```
@@ -120,6 +120,7 @@ python scripts/eval_optillmbench.py
 - MCP: `~/.optillm/mcp_config.json` for Model Context Protocol servers
 - SPL: Built-in system prompt learning for solving strategies
 - Memory: Automatic unbounded context via chunking and retrieval
+- GenSelect: Quality-based selection from multiple generated candidates
 
 ## Key Concepts