diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7d748f4d..ae03a5a5 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -50,6 +50,32 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata for proxy_only image + - name: Extract metadata for proxy_only Docker + id: meta-proxy + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository }} + flavor: | + suffix=-slim + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + latest + + # Build and push proxy image + - name: Build and push proxy_only Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: Dockerfile.proxy_only + push: true + platforms: linux/amd64,linux/arm64 + tags: ${{ steps.meta-proxy.outputs.tags }} + labels: ${{ steps.meta-proxy.outputs.labels }} + cache-from: type=gha,scope=proxy + cache-to: type=gha,scope=proxy,mode=max - name: Extract metadata for Docker id: meta diff --git a/Dockerfile.proxy_only b/Dockerfile.proxy_only new file mode 100644 index 00000000..bc4cc90b --- /dev/null +++ b/Dockerfile.proxy_only @@ -0,0 +1,55 @@ +# Build stage +FROM python:3.12-slim AS builder + +# Define build argument with default value +ARG PORT=8000 +# Make it available as env variable at runtime +ENV OPTILLM_PORT=$PORT + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + python3-dev \ + gcc \ + g++ \ + && rm -rf /var/lib/apt/lists/* + +# Copy only the requirements file first to leverage Docker cache +COPY requirements_proxy_only.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements_proxy_only.txt + +# Final stage +FROM python:3.12-slim + +# Install curl for the healthcheck +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Copy installed dependencies from builder stage +COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages +COPY --from=builder /usr/local/bin /usr/local/bin + +# Copy application code +COPY . . + +# Create a non-root user and switch to it +RUN useradd -m appuser +USER appuser + +# Set environment variables +ENV PYTHONUNBUFFERED=1 + +# Use the ARG in EXPOSE +EXPOSE ${PORT} + +# Run the application +ENTRYPOINT ["python", "optillm.py"] diff --git a/optillm.py b/optillm.py index b5046262..cdd52587 100644 --- a/optillm.py +++ b/optillm.py @@ -158,7 +158,7 @@ def load_plugins(): package_plugin_dir = os.path.join(os.path.dirname(optillm.__file__), 'plugins') # Get local project plugins directory - current_dir = os.getcwd() + current_dir = os.getcwd() if server_config.get("plugins_dir", "") == "" else server_config["plugins_dir"] local_plugin_dir = os.path.join(current_dir, 'optillm', 'plugins') plugin_dirs = [] @@ -664,7 +664,8 @@ def parse_args(): ("--return-full-response", "OPTILLM_RETURN_FULL_RESPONSE", bool, False, "Return the full response including the CoT with tags"), ("--port", "OPTILLM_PORT", int, 8000, "Specify the port to run the proxy"), ("--log", "OPTILLM_LOG", str, "info", "Specify the logging level", list(logging_levels.keys())), - ("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface") + ("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface"), + ("--plugins-dir", "OPTILLM_PLUGINS_DIR", str, "", "Path to the plugins directory"), ] for arg, env, type_, default, help_text, *extra in args_env: @@ -704,11 +705,11 @@ def main(): global server_config # Call this function at the start of main() args = parse_args() - load_plugins() - # Update server_config with all argument values server_config.update(vars(args)) + load_plugins() + port = server_config['port'] # Set logging level from user request diff --git a/optillm/plugins/coc_plugin.py b/optillm/plugins/coc_plugin.py index 62ef5ce6..34d18776 100644 --- a/optillm/plugins/coc_plugin.py +++ b/optillm/plugins/coc_plugin.py @@ -104,6 +104,7 @@ def sanitize_code(code: str) -> str: safe_lines.append(line) safe_code = '\n'.join(safe_lines) + safe_code = safe_code.replace('\n', '\n ') # Add safety wrapper wrapper = f""" @@ -111,7 +112,7 @@ def sanitize_code(code: str) -> str: def safe_execute(): import numpy as np # Always allow numpy - {safe_code.replace('\n', '\n ')} + {safe_code} return answer if 'answer' in locals() else None result = safe_execute() diff --git a/requirements_proxy_only.txt b/requirements_proxy_only.txt new file mode 100644 index 00000000..84e02764 --- /dev/null +++ b/requirements_proxy_only.txt @@ -0,0 +1,19 @@ +numpy +networkx +openai +z3-solver +aiohttp +flask +azure.identity +scikit-learn +litellm +requests +beautifulsoup4 +lxml +presidio_analyzer +presidio_anonymizer +nbformat +nbconvert +ipython +ipykernel +gradio \ No newline at end of file diff --git a/scripts/eval_aime_benchmark.py b/scripts/eval_aime_benchmark.py index c834a2fc..5fc72576 100644 --- a/scripts/eval_aime_benchmark.py +++ b/scripts/eval_aime_benchmark.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) # Initialize OpenAI client -client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="http://localhost:8000/v1") +client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://ot7nh9nqf4l7b43s.us-east-1.aws.endpoints.huggingface.cloud/v1/") SYSTEM_PROMPT = '''You are solving AIME (American Invitational Mathematics Examination) problems. @@ -241,18 +241,21 @@ def analyze_results(results: List[Dict], n: int): print("---") def main(model: str, n_attempts: int): - """Main evaluation function.""" + """Main evaluation function that handles gaps in processed indexes.""" os.makedirs("results", exist_ok=True) - # Include n_attempts in filename to keep separate results for different n values results_file = f"evaluation_results_{model.replace('/', '_')}_pass_at_{n_attempts}.json" dataset = load_2024_dataset() existing_results = load_existing_results(results_file) - last_processed_index = get_last_processed_index(existing_results) - for idx, item in enumerate(tqdm(dataset, desc="Evaluating problems")): - if idx <= last_processed_index: + # Create a set of already processed indexes for efficient lookup + processed_indexes = {result['index'] for result in existing_results} + + for _, item in enumerate(tqdm(dataset, desc="Evaluating problems")): + id = int(item['id']) + # Skip if this index has already been processed + if id in processed_indexes: continue problem_text = item['problem'] @@ -263,7 +266,7 @@ def main(model: str, n_attempts: int): is_correct, first_correct = evaluate_pass_at_n(attempts, correct_answer) result = { - "index": idx, + "index": id, "problem": problem_text, "attempts": attempts, "correct_answer": correct_answer, @@ -275,6 +278,7 @@ def main(model: str, n_attempts: int): final_results = load_existing_results(results_file) analyze_results(final_results, n_attempts) + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Evaluate LLM performance on AIME 2024 problems") parser.add_argument("--model", type=str, required=True, help="OpenAI model to use (e.g., gpt-4, gpt-3.5-turbo)") diff --git a/scripts/eval_arena_hard_auto_rtc.py b/scripts/eval_arena_hard_auto_rtc.py index 890c8019..76ab4835 100644 --- a/scripts/eval_arena_hard_auto_rtc.py +++ b/scripts/eval_arena_hard_auto_rtc.py @@ -17,7 +17,8 @@ logger = logging.getLogger(__name__) # Initialize OpenAI client (only used for chat completions now) -client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) +client = OpenAI(base_url="http://localhost:8000/v1", api_key=os.environ.get("OPENAI_API_KEY")) +# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) @dataclass class RTCConfig: @@ -58,8 +59,7 @@ def get_llm_response(messages: List[Dict], model: str) -> Optional[str]: response = client.chat.completions.create( model=model, messages=messages, - temperature=0.7, - max_tokens=1000 + max_tokens=4096 ) return response.choices[0].message.content.strip() except Exception as e: diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 8a130edf..dd662e3e 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,4 +1,3 @@ datasets accelerate huggingface_hub -git+https://github.com/huggingface/transformers.git \ No newline at end of file diff --git a/setup.py b/setup.py index 25068c22..610493c8 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="optillm", - version="0.0.24", + version="0.0.25", packages=find_packages(), py_modules=['optillm'], package_data={ @@ -33,7 +33,7 @@ "ipykernel", "peft", "bitsandbytes", - "gradio", + "gradio" ], entry_points={ 'console_scripts': [