From c7d00161b3e6af233cc26c29e11f4c9b7c560735 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 19 Jan 2025 09:38:33 +0800 Subject: [PATCH 01/11] Update eval_arena_hard_auto_rtc.py - update max tokens --- scripts/eval_arena_hard_auto_rtc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/eval_arena_hard_auto_rtc.py b/scripts/eval_arena_hard_auto_rtc.py index 890c8019..76ab4835 100644 --- a/scripts/eval_arena_hard_auto_rtc.py +++ b/scripts/eval_arena_hard_auto_rtc.py @@ -17,7 +17,8 @@ logger = logging.getLogger(__name__) # Initialize OpenAI client (only used for chat completions now) -client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) +client = OpenAI(base_url="http://localhost:8000/v1", api_key=os.environ.get("OPENAI_API_KEY")) +# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) @dataclass class RTCConfig: @@ -58,8 +59,7 @@ def get_llm_response(messages: List[Dict], model: str) -> Optional[str]: response = client.chat.completions.create( model=model, messages=messages, - temperature=0.7, - max_tokens=1000 + max_tokens=4096 ) return response.choices[0].message.content.strip() except Exception as e: From ac66567e355b838574d2f083eab35feefd0a95be Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 19 Jan 2025 09:39:45 +0800 Subject: [PATCH 02/11] Update coc_plugin.py --- optillm/plugins/coc_plugin.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/optillm/plugins/coc_plugin.py b/optillm/plugins/coc_plugin.py index 62ef5ce6..b8f421ec 100644 --- a/optillm/plugins/coc_plugin.py +++ b/optillm/plugins/coc_plugin.py @@ -108,12 +108,10 @@ def sanitize_code(code: str) -> str: # Add safety wrapper wrapper = f""" {imports} - def safe_execute(): import numpy as np # Always allow numpy - {safe_code.replace('\n', '\n ')} + {safe_code.replace('\\n', '\\n ')} return answer if 'answer' in locals() else None - result = safe_execute() answer = result """ From 576c7d530b962778782b51ebf364cb239fe29e0d Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 19 Jan 2025 09:47:29 +0800 Subject: [PATCH 03/11] Update coc_plugin.py fix bug --- optillm/plugins/coc_plugin.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/optillm/plugins/coc_plugin.py b/optillm/plugins/coc_plugin.py index b8f421ec..34d18776 100644 --- a/optillm/plugins/coc_plugin.py +++ b/optillm/plugins/coc_plugin.py @@ -104,14 +104,17 @@ def sanitize_code(code: str) -> str: safe_lines.append(line) safe_code = '\n'.join(safe_lines) + safe_code = safe_code.replace('\n', '\n ') # Add safety wrapper wrapper = f""" {imports} + def safe_execute(): import numpy as np # Always allow numpy - {safe_code.replace('\\n', '\\n ')} + {safe_code} return answer if 'answer' in locals() else None + result = safe_execute() answer = result """ From 917141f48163b802b782bcd86df8f1729ccc8665 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 19 Jan 2025 10:03:48 +0800 Subject: [PATCH 04/11] fix dependenices on GPU for local inference add protobuf --- requirements.txt | 3 ++- setup.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index af328f48..3eb1bdde 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,5 @@ ipython ipykernel peft bitsandbytes -gradio \ No newline at end of file +gradio +protobuf==3.20.3 \ No newline at end of file diff --git a/setup.py b/setup.py index 25068c22..a19c71a3 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="optillm", - version="0.0.24", + version="0.0.25", packages=find_packages(), py_modules=['optillm'], package_data={ @@ -34,6 +34,7 @@ "peft", "bitsandbytes", "gradio", + "protobuf" ], entry_points={ 'console_scripts': [ From 8e0adfcb839a361fc30359a4aa63532722ad4656 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 20 Jan 2025 09:33:40 +0800 Subject: [PATCH 05/11] update dependencies --- requirements.txt | 3 +-- scripts/requirements.txt | 1 - setup.py | 3 +-- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3eb1bdde..af328f48 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,5 +21,4 @@ ipython ipykernel peft bitsandbytes -gradio -protobuf==3.20.3 \ No newline at end of file +gradio \ No newline at end of file diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 8a130edf..dd662e3e 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,4 +1,3 @@ datasets accelerate huggingface_hub -git+https://github.com/huggingface/transformers.git \ No newline at end of file diff --git a/setup.py b/setup.py index a19c71a3..610493c8 100644 --- a/setup.py +++ b/setup.py @@ -33,8 +33,7 @@ "ipykernel", "peft", "bitsandbytes", - "gradio", - "protobuf" + "gradio" ], entry_points={ 'console_scripts': [ From c0f0893c5b502c935181284f0f2b2e46395fae74 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 20 Jan 2025 10:23:02 +0800 Subject: [PATCH 06/11] Update inference.py fix inference on amd gpu --- optillm/inference.py | 87 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 76 insertions(+), 11 deletions(-) diff --git a/optillm/inference.py b/optillm/inference.py index 3c8a4a4e..3c9901de 100644 --- a/optillm/inference.py +++ b/optillm/inference.py @@ -402,42 +402,107 @@ def __init__(self): self.device_stats = {device: {'memory_used': 0, 'active_models': 0} for device in self.available_devices} def _detect_devices(self) -> List[str]: + """Detect available compute devices including AMD GPUs via ROCm""" devices = ['cpu'] + + # Check for CUDA (NVIDIA) GPUs if torch.cuda.is_available(): - devices.extend([f'cuda:{i}' for i in range(torch.cuda.device_count())]) + backend = torch.cuda.get_device_properties(0).platform + if backend == 'ROCm': + # AMD GPUs via ROCm + devices.extend([f'cuda:{i}' for i in range(torch.cuda.device_count())]) + logging.info("Detected AMD GPU(s) using ROCm backend") + else: + # NVIDIA GPUs + devices.extend([f'cuda:{i}' for i in range(torch.cuda.device_count())]) + logging.info("Detected NVIDIA GPU(s)") + + # Check for Apple M-series GPU if torch.backends.mps.is_available(): devices.append('mps') + logging.info("Detected Apple M-series GPU") + return devices def get_optimal_device(self, model_size: int = 0) -> str: + """Select the optimal device considering AMD GPU support""" if not self.available_devices: return 'cpu' - # Prefer CUDA devices if available + # Get CUDA devices (both NVIDIA and AMD via ROCm) cuda_devices = [d for d in self.available_devices if 'cuda' in d] + if cuda_devices: - # Find CUDA device with most free memory + # Find device with most free memory max_free_memory = 0 optimal_device = cuda_devices[0] - for device in cuda_devices: - idx = int(device.split(':')[1]) - free_memory = torch.cuda.get_device_properties(idx).total_memory - torch.cuda.memory_allocated(idx) - if free_memory > max_free_memory: - max_free_memory = free_memory - optimal_device = device - - return optimal_device + try: + for device in cuda_devices: + idx = int(device.split(':')[1]) + # Get memory info safely handling both NVIDIA and AMD + try: + total_memory = torch.cuda.get_device_properties(idx).total_memory + used_memory = torch.cuda.memory_allocated(idx) + free_memory = total_memory - used_memory + except Exception as e: + logging.warning(f"Error getting memory info for device {device}: {e}") + continue + + if free_memory > max_free_memory: + max_free_memory = free_memory + optimal_device = device + + logging.info(f"Selected optimal CUDA device: {optimal_device} with {max_free_memory/1e9:.2f}GB free memory") + return optimal_device + + except Exception as e: + logging.error(f"Error selecting optimal CUDA device: {e}") + # Fall back to first CUDA device if memory query fails + return cuda_devices[0] # Fall back to MPS if available if 'mps' in self.available_devices: return 'mps' + # Final fallback to CPU + logging.info("No GPU detected, using CPU") return 'cpu' def track_device_usage(self, device: str, memory_delta: int): + """Track memory usage for the device""" if device in self.device_stats: self.device_stats[device]['memory_used'] += memory_delta + + def get_device_info(self, device: str) -> Dict[str, Any]: + """Get detailed information about a device""" + info = { + 'type': 'cpu', + 'memory_total': None, + 'memory_used': None, + 'memory_free': None + } + + if 'cuda' in device: + try: + idx = int(device.split(':')[1]) + props = torch.cuda.get_device_properties(idx) + info.update({ + 'type': 'gpu', + 'name': props.name, + 'backend': 'ROCm' if hasattr(props, 'platform') and props.platform == 'ROCm' else 'CUDA', + 'compute_capability': f"{props.major}.{props.minor}", + 'memory_total': props.total_memory, + 'memory_used': torch.cuda.memory_allocated(idx), + 'memory_free': props.total_memory - torch.cuda.memory_allocated(idx) + }) + except Exception as e: + logging.warning(f"Error getting device info for {device}: {e}") + + elif device == 'mps': + info['type'] = 'mps' + + return info class ModelManager: def __init__(self, cache_manager: CacheManager, device_manager: DeviceManager): From 75a5d21e0b31ada37c48d9b04b205fd4dc6e5cd2 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 20 Jan 2025 12:10:25 +0800 Subject: [PATCH 07/11] Revert "Update inference.py" This reverts commit c0f0893c5b502c935181284f0f2b2e46395fae74. --- optillm/inference.py | 87 ++++++-------------------------------------- 1 file changed, 11 insertions(+), 76 deletions(-) diff --git a/optillm/inference.py b/optillm/inference.py index 3c9901de..3c8a4a4e 100644 --- a/optillm/inference.py +++ b/optillm/inference.py @@ -402,107 +402,42 @@ def __init__(self): self.device_stats = {device: {'memory_used': 0, 'active_models': 0} for device in self.available_devices} def _detect_devices(self) -> List[str]: - """Detect available compute devices including AMD GPUs via ROCm""" devices = ['cpu'] - - # Check for CUDA (NVIDIA) GPUs if torch.cuda.is_available(): - backend = torch.cuda.get_device_properties(0).platform - if backend == 'ROCm': - # AMD GPUs via ROCm - devices.extend([f'cuda:{i}' for i in range(torch.cuda.device_count())]) - logging.info("Detected AMD GPU(s) using ROCm backend") - else: - # NVIDIA GPUs - devices.extend([f'cuda:{i}' for i in range(torch.cuda.device_count())]) - logging.info("Detected NVIDIA GPU(s)") - - # Check for Apple M-series GPU + devices.extend([f'cuda:{i}' for i in range(torch.cuda.device_count())]) if torch.backends.mps.is_available(): devices.append('mps') - logging.info("Detected Apple M-series GPU") - return devices def get_optimal_device(self, model_size: int = 0) -> str: - """Select the optimal device considering AMD GPU support""" if not self.available_devices: return 'cpu' - # Get CUDA devices (both NVIDIA and AMD via ROCm) + # Prefer CUDA devices if available cuda_devices = [d for d in self.available_devices if 'cuda' in d] - if cuda_devices: - # Find device with most free memory + # Find CUDA device with most free memory max_free_memory = 0 optimal_device = cuda_devices[0] - try: - for device in cuda_devices: - idx = int(device.split(':')[1]) - # Get memory info safely handling both NVIDIA and AMD - try: - total_memory = torch.cuda.get_device_properties(idx).total_memory - used_memory = torch.cuda.memory_allocated(idx) - free_memory = total_memory - used_memory - except Exception as e: - logging.warning(f"Error getting memory info for device {device}: {e}") - continue - - if free_memory > max_free_memory: - max_free_memory = free_memory - optimal_device = device - - logging.info(f"Selected optimal CUDA device: {optimal_device} with {max_free_memory/1e9:.2f}GB free memory") - return optimal_device - - except Exception as e: - logging.error(f"Error selecting optimal CUDA device: {e}") - # Fall back to first CUDA device if memory query fails - return cuda_devices[0] + for device in cuda_devices: + idx = int(device.split(':')[1]) + free_memory = torch.cuda.get_device_properties(idx).total_memory - torch.cuda.memory_allocated(idx) + if free_memory > max_free_memory: + max_free_memory = free_memory + optimal_device = device + + return optimal_device # Fall back to MPS if available if 'mps' in self.available_devices: return 'mps' - # Final fallback to CPU - logging.info("No GPU detected, using CPU") return 'cpu' def track_device_usage(self, device: str, memory_delta: int): - """Track memory usage for the device""" if device in self.device_stats: self.device_stats[device]['memory_used'] += memory_delta - - def get_device_info(self, device: str) -> Dict[str, Any]: - """Get detailed information about a device""" - info = { - 'type': 'cpu', - 'memory_total': None, - 'memory_used': None, - 'memory_free': None - } - - if 'cuda' in device: - try: - idx = int(device.split(':')[1]) - props = torch.cuda.get_device_properties(idx) - info.update({ - 'type': 'gpu', - 'name': props.name, - 'backend': 'ROCm' if hasattr(props, 'platform') and props.platform == 'ROCm' else 'CUDA', - 'compute_capability': f"{props.major}.{props.minor}", - 'memory_total': props.total_memory, - 'memory_used': torch.cuda.memory_allocated(idx), - 'memory_free': props.total_memory - torch.cuda.memory_allocated(idx) - }) - except Exception as e: - logging.warning(f"Error getting device info for {device}: {e}") - - elif device == 'mps': - info['type'] = 'mps' - - return info class ModelManager: def __init__(self, cache_manager: CacheManager, device_manager: DeviceManager): From e45cb7f0f76f2457b6e9821eb11386f3a2727ebd Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 20 Jan 2025 20:46:59 +0800 Subject: [PATCH 08/11] Update publish.yml --- .github/workflows/publish.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7d748f4d..3772a2af 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -71,3 +71,29 @@ jobs: labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max + + # Extract metadata for slim image + - name: Extract metadata for proxy_only Docker + id: meta-proxy + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository }} + flavor: | + suffix=-slim + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + latest + + # Build and push slim image + - name: Build and push proxy_only Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: Dockerfile.proxy_only + push: true + platforms: linux/amd64,linux/arm64 + tags: ${{ steps.meta-proxy.outputs.tags }} + labels: ${{ steps.meta-proxy.outputs.labels }} + cache-from: type=gha,scope=proxy + cache-to: type=gha,scope=proxy,mode=max From d6c11518c2f29c18b58c0daba928e3630a4fc64f Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 20 Jan 2025 20:47:20 +0800 Subject: [PATCH 09/11] add plugindir flag and env var --- Dockerfile.proxy_only | 55 ++++++++++++++++++++++++++++++++++ optillm.py | 9 +++--- requirements_proxy_only.txt | 19 ++++++++++++ scripts/eval_aime_benchmark.py | 21 ++++++++----- 4 files changed, 92 insertions(+), 12 deletions(-) create mode 100644 Dockerfile.proxy_only create mode 100644 requirements_proxy_only.txt diff --git a/Dockerfile.proxy_only b/Dockerfile.proxy_only new file mode 100644 index 00000000..bc4cc90b --- /dev/null +++ b/Dockerfile.proxy_only @@ -0,0 +1,55 @@ +# Build stage +FROM python:3.12-slim AS builder + +# Define build argument with default value +ARG PORT=8000 +# Make it available as env variable at runtime +ENV OPTILLM_PORT=$PORT + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + python3-dev \ + gcc \ + g++ \ + && rm -rf /var/lib/apt/lists/* + +# Copy only the requirements file first to leverage Docker cache +COPY requirements_proxy_only.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements_proxy_only.txt + +# Final stage +FROM python:3.12-slim + +# Install curl for the healthcheck +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Copy installed dependencies from builder stage +COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages +COPY --from=builder /usr/local/bin /usr/local/bin + +# Copy application code +COPY . . + +# Create a non-root user and switch to it +RUN useradd -m appuser +USER appuser + +# Set environment variables +ENV PYTHONUNBUFFERED=1 + +# Use the ARG in EXPOSE +EXPOSE ${PORT} + +# Run the application +ENTRYPOINT ["python", "optillm.py"] diff --git a/optillm.py b/optillm.py index b5046262..cdd52587 100644 --- a/optillm.py +++ b/optillm.py @@ -158,7 +158,7 @@ def load_plugins(): package_plugin_dir = os.path.join(os.path.dirname(optillm.__file__), 'plugins') # Get local project plugins directory - current_dir = os.getcwd() + current_dir = os.getcwd() if server_config.get("plugins_dir", "") == "" else server_config["plugins_dir"] local_plugin_dir = os.path.join(current_dir, 'optillm', 'plugins') plugin_dirs = [] @@ -664,7 +664,8 @@ def parse_args(): ("--return-full-response", "OPTILLM_RETURN_FULL_RESPONSE", bool, False, "Return the full response including the CoT with tags"), ("--port", "OPTILLM_PORT", int, 8000, "Specify the port to run the proxy"), ("--log", "OPTILLM_LOG", str, "info", "Specify the logging level", list(logging_levels.keys())), - ("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface") + ("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface"), + ("--plugins-dir", "OPTILLM_PLUGINS_DIR", str, "", "Path to the plugins directory"), ] for arg, env, type_, default, help_text, *extra in args_env: @@ -704,11 +705,11 @@ def main(): global server_config # Call this function at the start of main() args = parse_args() - load_plugins() - # Update server_config with all argument values server_config.update(vars(args)) + load_plugins() + port = server_config['port'] # Set logging level from user request diff --git a/requirements_proxy_only.txt b/requirements_proxy_only.txt new file mode 100644 index 00000000..84e02764 --- /dev/null +++ b/requirements_proxy_only.txt @@ -0,0 +1,19 @@ +numpy +networkx +openai +z3-solver +aiohttp +flask +azure.identity +scikit-learn +litellm +requests +beautifulsoup4 +lxml +presidio_analyzer +presidio_anonymizer +nbformat +nbconvert +ipython +ipykernel +gradio \ No newline at end of file diff --git a/scripts/eval_aime_benchmark.py b/scripts/eval_aime_benchmark.py index c834a2fc..2e51ff0f 100644 --- a/scripts/eval_aime_benchmark.py +++ b/scripts/eval_aime_benchmark.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) # Initialize OpenAI client -client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="http://localhost:8000/v1") +client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://ot7nh9nqf4l7b43s.us-east-1.aws.endpoints.huggingface.cloud/v1/") SYSTEM_PROMPT = '''You are solving AIME (American Invitational Mathematics Examination) problems. @@ -104,10 +104,11 @@ def get_llm_response(problem: str, model: str) -> Union[str, List[Dict]]: try: response = client.with_options(timeout=1000.0).chat.completions.create( model=model, + temperature=0.2, messages=[ {"role": "user", "content": SYSTEM_PROMPT + problem} ], - max_tokens=8192, + max_tokens=40000, ) # If there's more than one choice, format as attempts @@ -241,18 +242,21 @@ def analyze_results(results: List[Dict], n: int): print("---") def main(model: str, n_attempts: int): - """Main evaluation function.""" + """Main evaluation function that handles gaps in processed indexes.""" os.makedirs("results", exist_ok=True) - # Include n_attempts in filename to keep separate results for different n values results_file = f"evaluation_results_{model.replace('/', '_')}_pass_at_{n_attempts}.json" dataset = load_2024_dataset() existing_results = load_existing_results(results_file) - last_processed_index = get_last_processed_index(existing_results) - for idx, item in enumerate(tqdm(dataset, desc="Evaluating problems")): - if idx <= last_processed_index: + # Create a set of already processed indexes for efficient lookup + processed_indexes = {result['index'] for result in existing_results} + + for _, item in enumerate(tqdm(dataset, desc="Evaluating problems")): + id = int(item['id']) + # Skip if this index has already been processed + if id in processed_indexes: continue problem_text = item['problem'] @@ -263,7 +267,7 @@ def main(model: str, n_attempts: int): is_correct, first_correct = evaluate_pass_at_n(attempts, correct_answer) result = { - "index": idx, + "index": id, "problem": problem_text, "attempts": attempts, "correct_answer": correct_answer, @@ -275,6 +279,7 @@ def main(model: str, n_attempts: int): final_results = load_existing_results(results_file) analyze_results(final_results, n_attempts) + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Evaluate LLM performance on AIME 2024 problems") parser.add_argument("--model", type=str, required=True, help="OpenAI model to use (e.g., gpt-4, gpt-3.5-turbo)") From e14521b657e1153908372f272660215ade3e23b0 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 20 Jan 2025 22:10:16 +0800 Subject: [PATCH 10/11] Update publish.yml --- .github/workflows/publish.yml | 42 +++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3772a2af..ae03a5a5 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -50,50 +50,50 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata for Docker - id: meta + + # Extract metadata for proxy_only image + - name: Extract metadata for proxy_only Docker + id: meta-proxy uses: docker/metadata-action@v5 with: images: ghcr.io/${{ github.repository }} + flavor: | + suffix=-slim tags: | type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} latest - - name: Build and push Docker image + # Build and push proxy image + - name: Build and push proxy_only Docker image uses: docker/build-push-action@v5 with: context: . + file: Dockerfile.proxy_only push: true platforms: linux/amd64,linux/arm64 - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max - - # Extract metadata for slim image - - name: Extract metadata for proxy_only Docker - id: meta-proxy + tags: ${{ steps.meta-proxy.outputs.tags }} + labels: ${{ steps.meta-proxy.outputs.labels }} + cache-from: type=gha,scope=proxy + cache-to: type=gha,scope=proxy,mode=max + + - name: Extract metadata for Docker + id: meta uses: docker/metadata-action@v5 with: images: ghcr.io/${{ github.repository }} - flavor: | - suffix=-slim tags: | type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} latest - # Build and push slim image - - name: Build and push proxy_only Docker image + - name: Build and push Docker image uses: docker/build-push-action@v5 with: context: . - file: Dockerfile.proxy_only push: true platforms: linux/amd64,linux/arm64 - tags: ${{ steps.meta-proxy.outputs.tags }} - labels: ${{ steps.meta-proxy.outputs.labels }} - cache-from: type=gha,scope=proxy - cache-to: type=gha,scope=proxy,mode=max + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max From 910d3b3d4fa4ad4b7a2d17a4b0e90749e21a1dc1 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 20 Jan 2025 22:34:36 +0800 Subject: [PATCH 11/11] Update eval_aime_benchmark.py --- scripts/eval_aime_benchmark.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/eval_aime_benchmark.py b/scripts/eval_aime_benchmark.py index 2e51ff0f..5fc72576 100644 --- a/scripts/eval_aime_benchmark.py +++ b/scripts/eval_aime_benchmark.py @@ -104,11 +104,10 @@ def get_llm_response(problem: str, model: str) -> Union[str, List[Dict]]: try: response = client.with_options(timeout=1000.0).chat.completions.create( model=model, - temperature=0.2, messages=[ {"role": "user", "content": SYSTEM_PROMPT + problem} ], - max_tokens=40000, + max_tokens=8192, ) # If there's more than one choice, format as attempts