diff --git a/.github/workflows/publish-docker-manifest.yml b/.github/workflows/publish-docker-manifest.yml index 89b778ec..e83eefa8 100644 --- a/.github/workflows/publish-docker-manifest.yml +++ b/.github/workflows/publish-docker-manifest.yml @@ -1,11 +1,13 @@ name: Docker Multi-arch Manifests on: workflow_run: - workflows: + workflows: - "Docker Proxy AMD64" - "Docker Proxy ARM64" - "Docker Full AMD64" - "Docker Full ARM64" + - "Docker Offline AMD64" + - "Docker Offline ARM64" types: [completed] jobs: @@ -23,9 +25,11 @@ jobs: script: | const workflows = [ "Docker Proxy AMD64", - "Docker Proxy ARM64", + "Docker Proxy ARM64", "Docker Full AMD64", - "Docker Full ARM64" + "Docker Full ARM64", + "Docker Offline AMD64", + "Docker Offline ARM64" ]; const runId = context.payload.workflow_run.id; @@ -107,17 +111,35 @@ jobs: - name: Create full multi-arch manifest run: | VERSION="${{ needs.check-builds.outputs.version }}" - + # Create versioned full manifest docker manifest create ghcr.io/${{ github.repository }}:${VERSION} \ ghcr.io/${{ github.repository }}:${VERSION}-amd64 \ ghcr.io/${{ github.repository }}:${VERSION}-arm64 - + docker manifest push ghcr.io/${{ github.repository }}:${VERSION} - + # Create latest full manifest docker manifest create ghcr.io/${{ github.repository }}:latest \ ghcr.io/${{ github.repository }}:latest-amd64 \ ghcr.io/${{ github.repository }}:latest-arm64 - - docker manifest push ghcr.io/${{ github.repository }}:latest \ No newline at end of file + + docker manifest push ghcr.io/${{ github.repository }}:latest + + - name: Create offline multi-arch manifest + run: | + VERSION="${{ needs.check-builds.outputs.version }}" + + # Create versioned offline manifest + docker manifest create ghcr.io/${{ github.repository }}:${VERSION}-offline \ + ghcr.io/${{ github.repository }}:${VERSION}-offline-amd64 \ + ghcr.io/${{ github.repository }}:${VERSION}-offline-arm64 + + docker manifest push ghcr.io/${{ github.repository }}:${VERSION}-offline + + # Create latest offline manifest + docker manifest create ghcr.io/${{ github.repository }}:latest-offline \ + ghcr.io/${{ github.repository }}:latest-offline-amd64 \ + ghcr.io/${{ github.repository }}:latest-offline-arm64 + + docker manifest push ghcr.io/${{ github.repository }}:latest-offline \ No newline at end of file diff --git a/.github/workflows/publish-docker-offline-amd64.yml b/.github/workflows/publish-docker-offline-amd64.yml new file mode 100644 index 00000000..e8f078f7 --- /dev/null +++ b/.github/workflows/publish-docker-offline-amd64.yml @@ -0,0 +1,52 @@ +name: Docker Offline AMD64 +on: + release: + types: [created] + +jobs: + build: + name: Build offline Docker image for AMD64 + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract version from tag + id: version + run: | + VERSION=${GITHUB_REF#refs/tags/} + if [ -z "$VERSION" ] || [ "$VERSION" = "$GITHUB_REF" ]; then + VERSION="latest" + fi + echo "VERSION=$VERSION" >> $GITHUB_OUTPUT + + - name: Build and push offline AMD64 image + uses: docker/build-push-action@v5 + with: + context: . + file: Dockerfile.offline + push: true + platforms: linux/amd64 + provenance: false + tags: | + ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-offline-amd64 + ghcr.io/${{ github.repository }}:latest-offline-amd64 + labels: | + org.opencontainers.image.source=https://github.com/${{ github.repository }} + org.opencontainers.image.description=OptiLLM offline image with pre-downloaded models for fully offline operation (AMD64) + org.opencontainers.image.licenses=Apache-2.0 + org.opencontainers.image.version=${{ steps.version.outputs.VERSION }} + cache-from: type=gha,scope=offline-amd64 + cache-to: type=gha,scope=offline-amd64,mode=max \ No newline at end of file diff --git a/.github/workflows/publish-docker-offline-arm64.yml b/.github/workflows/publish-docker-offline-arm64.yml new file mode 100644 index 00000000..70068662 --- /dev/null +++ b/.github/workflows/publish-docker-offline-arm64.yml @@ -0,0 +1,55 @@ +name: Docker Offline ARM64 +on: + release: + types: [created] + +jobs: + build: + name: Build offline Docker image for ARM64 + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract version from tag + id: version + run: | + VERSION=${GITHUB_REF#refs/tags/} + if [ -z "$VERSION" ] || [ "$VERSION" = "$GITHUB_REF" ]; then + VERSION="latest" + fi + echo "VERSION=$VERSION" >> $GITHUB_OUTPUT + + - name: Build and push offline ARM64 image + uses: docker/build-push-action@v5 + with: + context: . + file: Dockerfile.offline + push: true + platforms: linux/arm64 + provenance: false + tags: | + ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-offline-arm64 + ghcr.io/${{ github.repository }}:latest-offline-arm64 + labels: | + org.opencontainers.image.source=https://github.com/${{ github.repository }} + org.opencontainers.image.description=OptiLLM offline image with pre-downloaded models for fully offline operation (ARM64) + org.opencontainers.image.licenses=Apache-2.0 + org.opencontainers.image.version=${{ steps.version.outputs.VERSION }} + cache-from: type=gha,scope=offline-arm64 + cache-to: type=gha,scope=offline-arm64,mode=max \ No newline at end of file diff --git a/Dockerfile.offline b/Dockerfile.offline new file mode 100644 index 00000000..e93ed24c --- /dev/null +++ b/Dockerfile.offline @@ -0,0 +1,67 @@ +# Build stage +FROM python:3.12-slim-bookworm AS builder + +# Define build argument with default value +ARG PORT=8000 +# Make it available as env variable at runtime +ENV OPTILLM_PORT=$PORT + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + python3-dev \ + gcc \ + g++ \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Copy only the requirements file first to leverage Docker cache +COPY requirements.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Download spaCy model for offline use +RUN python -m spacy download en_core_web_lg + +# Final stage +FROM python:3.12-slim-bookworm + +# Add labels for the final image +LABEL org.opencontainers.image.source="https://github.com/codelion/optillm" +LABEL org.opencontainers.image.description="OptiLLM offline image with pre-downloaded models for fully offline operation" +LABEL org.opencontainers.image.licenses="Apache-2.0" + +# Install curl for the healthcheck +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Copy installed dependencies from builder stage +COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages +COPY --from=builder /usr/local/bin /usr/local/bin + +# Copy application code +COPY . . + +# Create a non-root user and switch to it +RUN useradd -m appuser +USER appuser + +# Set environment variables +ENV PYTHONUNBUFFERED=1 + +# Use the ARG in EXPOSE +EXPOSE ${PORT} + +# Run the application +ENTRYPOINT ["python", "optillm.py"] \ No newline at end of file diff --git a/optillm/__init__.py b/optillm/__init__.py index ef07b022..f22e9889 100644 --- a/optillm/__init__.py +++ b/optillm/__init__.py @@ -1,5 +1,5 @@ # Version information -__version__ = "0.3.0" +__version__ = "0.3.1" # Import from server module from .server import ( diff --git a/optillm/plugins/privacy_plugin.py b/optillm/plugins/privacy_plugin.py index f7a10bcf..5ba2228a 100644 --- a/optillm/plugins/privacy_plugin.py +++ b/optillm/plugins/privacy_plugin.py @@ -105,6 +105,9 @@ def get_analyzer_engine() -> AnalyzerEngine: global _analyzer_engine if _analyzer_engine is None: _analyzer_engine = AnalyzerEngine() + # Pre-warm the analyzer to load all recognizers once during initialization + # This prevents recognizers from being reloaded on each analyze() call + _analyzer_engine.analyze(text="warm up", language="en") return _analyzer_engine def get_anonymizer_engine() -> AnonymizerEngine: diff --git a/pyproject.toml b/pyproject.toml index 77f9895f..02dd0824 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "optillm" -version = "0.3.0" +version = "0.3.1" description = "An optimizing inference proxy for LLMs." readme = "README.md" license = "Apache-2.0" diff --git a/tests/test_privacy_plugin_performance.py b/tests/test_privacy_plugin_performance.py index 6e6fdfbf..5b543f8d 100644 --- a/tests/test_privacy_plugin_performance.py +++ b/tests/test_privacy_plugin_performance.py @@ -181,6 +181,64 @@ def test_singleton_instances_are_reused(): print(f"❌ Singleton test failed: {e}") raise +def test_recognizers_not_reloaded(): + """ + Test that recognizers are not fetched/reloaded on each analyze() call. + This prevents the performance regression where "Fetching all recognizers for language en" + appears in logs on every request. + """ + print("\nTesting that recognizers are not reloaded on each call...") + + # Reset module state + if 'optillm.plugins.privacy_plugin' in sys.modules: + del sys.modules['optillm.plugins.privacy_plugin'] + + try: + # Mock at the presidio level to track registry calls + with patch('presidio_analyzer.AnalyzerEngine') as MockAnalyzerEngine, \ + patch('spacy.util.is_package', return_value=True): + + # Create a mock analyzer instance + mock_analyzer_instance = MagicMock() + mock_registry = MagicMock() + + # Track calls to get_recognizers + mock_registry.get_recognizers = MagicMock(return_value=[]) + mock_analyzer_instance.registry = mock_registry + mock_analyzer_instance.analyze = MagicMock(return_value=[]) + + MockAnalyzerEngine.return_value = mock_analyzer_instance + + # Import module with mocks + import optillm.plugins.privacy_plugin as privacy_plugin + + # First call to get_analyzer_engine - should create and warm up + analyzer1 = privacy_plugin.get_analyzer_engine() + initial_analyze_calls = mock_analyzer_instance.analyze.call_count + + print(f"Warm-up analyze calls: {initial_analyze_calls}") + assert initial_analyze_calls == 1, f"Expected 1 warm-up analyze call, got {initial_analyze_calls}" + + # Second call - should return cached instance without additional analyze + analyzer2 = privacy_plugin.get_analyzer_engine() + second_analyze_calls = mock_analyzer_instance.analyze.call_count + + print(f"Total analyze calls after second get_analyzer_engine: {second_analyze_calls}") + assert second_analyze_calls == 1, f"Analyzer should not call analyze() again on cached retrieval, got {second_analyze_calls} calls" + + # Verify it's the same instance + assert analyzer1 is analyzer2, "Should return the same cached analyzer instance" + + print("✅ Recognizer reload test PASSED - Recognizers are pre-warmed and not reloaded!") + return True + + except ImportError as e: + print(f"⚠️ Skipping recognizer reload test - dependencies not installed: {e}") + return True + except Exception as e: + print(f"❌ Recognizer reload test failed: {e}") + raise + if __name__ == "__main__": print("=" * 60) print("Privacy Plugin Performance & Caching Tests") @@ -200,6 +258,12 @@ def test_singleton_instances_are_reused(): all_passed = False print(f"❌ Singleton instance test failed: {e}") + try: + test_recognizers_not_reloaded() + except Exception as e: + all_passed = False + print(f"❌ Recognizer reload test failed: {e}") + try: test_privacy_plugin_performance() except Exception as e: