Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 30 additions & 8 deletions .github/workflows/publish-docker-manifest.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
name: Docker Multi-arch Manifests
on:
workflow_run:
workflows:
workflows:
- "Docker Proxy AMD64"
- "Docker Proxy ARM64"
- "Docker Full AMD64"
- "Docker Full ARM64"
- "Docker Offline AMD64"
- "Docker Offline ARM64"
types: [completed]

jobs:
Expand All @@ -23,9 +25,11 @@ jobs:
script: |
const workflows = [
"Docker Proxy AMD64",
"Docker Proxy ARM64",
"Docker Proxy ARM64",
"Docker Full AMD64",
"Docker Full ARM64"
"Docker Full ARM64",
"Docker Offline AMD64",
"Docker Offline ARM64"
];

const runId = context.payload.workflow_run.id;
Expand Down Expand Up @@ -107,17 +111,35 @@ jobs:
- name: Create full multi-arch manifest
run: |
VERSION="${{ needs.check-builds.outputs.version }}"

# Create versioned full manifest
docker manifest create ghcr.io/${{ github.repository }}:${VERSION} \
ghcr.io/${{ github.repository }}:${VERSION}-amd64 \
ghcr.io/${{ github.repository }}:${VERSION}-arm64

docker manifest push ghcr.io/${{ github.repository }}:${VERSION}

# Create latest full manifest
docker manifest create ghcr.io/${{ github.repository }}:latest \
ghcr.io/${{ github.repository }}:latest-amd64 \
ghcr.io/${{ github.repository }}:latest-arm64

docker manifest push ghcr.io/${{ github.repository }}:latest

docker manifest push ghcr.io/${{ github.repository }}:latest

- name: Create offline multi-arch manifest
run: |
VERSION="${{ needs.check-builds.outputs.version }}"

# Create versioned offline manifest
docker manifest create ghcr.io/${{ github.repository }}:${VERSION}-offline \
ghcr.io/${{ github.repository }}:${VERSION}-offline-amd64 \
ghcr.io/${{ github.repository }}:${VERSION}-offline-arm64

docker manifest push ghcr.io/${{ github.repository }}:${VERSION}-offline

# Create latest offline manifest
docker manifest create ghcr.io/${{ github.repository }}:latest-offline \
ghcr.io/${{ github.repository }}:latest-offline-amd64 \
ghcr.io/${{ github.repository }}:latest-offline-arm64

docker manifest push ghcr.io/${{ github.repository }}:latest-offline
52 changes: 52 additions & 0 deletions .github/workflows/publish-docker-offline-amd64.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Docker Offline AMD64
on:
release:
types: [created]

jobs:
build:
name: Build offline Docker image for AMD64
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract version from tag
id: version
run: |
VERSION=${GITHUB_REF#refs/tags/}
if [ -z "$VERSION" ] || [ "$VERSION" = "$GITHUB_REF" ]; then
VERSION="latest"
fi
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT

- name: Build and push offline AMD64 image
uses: docker/build-push-action@v5
with:
context: .
file: Dockerfile.offline
push: true
platforms: linux/amd64
provenance: false
tags: |
ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-offline-amd64
ghcr.io/${{ github.repository }}:latest-offline-amd64
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.description=OptiLLM offline image with pre-downloaded models for fully offline operation (AMD64)
org.opencontainers.image.licenses=Apache-2.0
org.opencontainers.image.version=${{ steps.version.outputs.VERSION }}
cache-from: type=gha,scope=offline-amd64
cache-to: type=gha,scope=offline-amd64,mode=max
55 changes: 55 additions & 0 deletions .github/workflows/publish-docker-offline-arm64.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: Docker Offline ARM64
on:
release:
types: [created]

jobs:
build:
name: Build offline Docker image for ARM64
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v4

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract version from tag
id: version
run: |
VERSION=${GITHUB_REF#refs/tags/}
if [ -z "$VERSION" ] || [ "$VERSION" = "$GITHUB_REF" ]; then
VERSION="latest"
fi
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT

- name: Build and push offline ARM64 image
uses: docker/build-push-action@v5
with:
context: .
file: Dockerfile.offline
push: true
platforms: linux/arm64
provenance: false
tags: |
ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-offline-arm64
ghcr.io/${{ github.repository }}:latest-offline-arm64
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.description=OptiLLM offline image with pre-downloaded models for fully offline operation (ARM64)
org.opencontainers.image.licenses=Apache-2.0
org.opencontainers.image.version=${{ steps.version.outputs.VERSION }}
cache-from: type=gha,scope=offline-arm64
cache-to: type=gha,scope=offline-arm64,mode=max
67 changes: 67 additions & 0 deletions Dockerfile.offline
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Build stage
FROM python:3.12-slim-bookworm AS builder

# Define build argument with default value
ARG PORT=8000
# Make it available as env variable at runtime
ENV OPTILLM_PORT=$PORT

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
python3-dev \
gcc \
g++ \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Copy only the requirements file first to leverage Docker cache
COPY requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Download spaCy model for offline use
RUN python -m spacy download en_core_web_lg

# Final stage
FROM python:3.12-slim-bookworm

# Add labels for the final image
LABEL org.opencontainers.image.source="https://github.com/codelion/optillm"
LABEL org.opencontainers.image.description="OptiLLM offline image with pre-downloaded models for fully offline operation"
LABEL org.opencontainers.image.licenses="Apache-2.0"

# Install curl for the healthcheck
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Set working directory
WORKDIR /app

# Copy installed dependencies from builder stage
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin

# Copy application code
COPY . .

# Create a non-root user and switch to it
RUN useradd -m appuser
USER appuser

# Set environment variables
ENV PYTHONUNBUFFERED=1

# Use the ARG in EXPOSE
EXPOSE ${PORT}

# Run the application
ENTRYPOINT ["python", "optillm.py"]
2 changes: 1 addition & 1 deletion optillm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Version information
__version__ = "0.3.0"
__version__ = "0.3.1"

# Import from server module
from .server import (
Expand Down
3 changes: 3 additions & 0 deletions optillm/plugins/privacy_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ def get_analyzer_engine() -> AnalyzerEngine:
global _analyzer_engine
if _analyzer_engine is None:
_analyzer_engine = AnalyzerEngine()
# Pre-warm the analyzer to load all recognizers once during initialization
# This prevents recognizers from being reloaded on each analyze() call
_analyzer_engine.analyze(text="warm up", language="en")
return _analyzer_engine

def get_anonymizer_engine() -> AnonymizerEngine:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "optillm"
version = "0.3.0"
version = "0.3.1"
description = "An optimizing inference proxy for LLMs."
readme = "README.md"
license = "Apache-2.0"
Expand Down
64 changes: 64 additions & 0 deletions tests/test_privacy_plugin_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,64 @@ def test_singleton_instances_are_reused():
print(f"❌ Singleton test failed: {e}")
raise

def test_recognizers_not_reloaded():
"""
Test that recognizers are not fetched/reloaded on each analyze() call.
This prevents the performance regression where "Fetching all recognizers for language en"
appears in logs on every request.
"""
print("\nTesting that recognizers are not reloaded on each call...")

# Reset module state
if 'optillm.plugins.privacy_plugin' in sys.modules:
del sys.modules['optillm.plugins.privacy_plugin']

try:
# Mock at the presidio level to track registry calls
with patch('presidio_analyzer.AnalyzerEngine') as MockAnalyzerEngine, \
patch('spacy.util.is_package', return_value=True):

# Create a mock analyzer instance
mock_analyzer_instance = MagicMock()
mock_registry = MagicMock()

# Track calls to get_recognizers
mock_registry.get_recognizers = MagicMock(return_value=[])
mock_analyzer_instance.registry = mock_registry
mock_analyzer_instance.analyze = MagicMock(return_value=[])

MockAnalyzerEngine.return_value = mock_analyzer_instance

# Import module with mocks
import optillm.plugins.privacy_plugin as privacy_plugin

# First call to get_analyzer_engine - should create and warm up
analyzer1 = privacy_plugin.get_analyzer_engine()
initial_analyze_calls = mock_analyzer_instance.analyze.call_count

print(f"Warm-up analyze calls: {initial_analyze_calls}")
assert initial_analyze_calls == 1, f"Expected 1 warm-up analyze call, got {initial_analyze_calls}"

# Second call - should return cached instance without additional analyze
analyzer2 = privacy_plugin.get_analyzer_engine()
second_analyze_calls = mock_analyzer_instance.analyze.call_count

print(f"Total analyze calls after second get_analyzer_engine: {second_analyze_calls}")
assert second_analyze_calls == 1, f"Analyzer should not call analyze() again on cached retrieval, got {second_analyze_calls} calls"

# Verify it's the same instance
assert analyzer1 is analyzer2, "Should return the same cached analyzer instance"

print("✅ Recognizer reload test PASSED - Recognizers are pre-warmed and not reloaded!")
return True

except ImportError as e:
print(f"⚠️ Skipping recognizer reload test - dependencies not installed: {e}")
return True
except Exception as e:
print(f"❌ Recognizer reload test failed: {e}")
raise

if __name__ == "__main__":
print("=" * 60)
print("Privacy Plugin Performance & Caching Tests")
Expand All @@ -200,6 +258,12 @@ def test_singleton_instances_are_reused():
all_passed = False
print(f"❌ Singleton instance test failed: {e}")

try:
test_recognizers_not_reloaded()
except Exception as e:
all_passed = False
print(f"❌ Recognizer reload test failed: {e}")

try:
test_privacy_plugin_performance()
except Exception as e:
Expand Down