diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 7d748f4d..ae03a5a5 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -50,6 +50,32 @@ jobs:
           registry: ghcr.io
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
+          
+      # Extract metadata for proxy_only image
+      - name: Extract metadata for proxy_only Docker
+        id: meta-proxy
+        uses: docker/metadata-action@v5
+        with:
+          images: ghcr.io/${{ github.repository }}
+          flavor: |
+            suffix=-slim
+          tags: |
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            latest
+      
+      # Build and push proxy image
+      - name: Build and push proxy_only Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile.proxy_only
+          push: true
+          platforms: linux/amd64,linux/arm64
+          tags: ${{ steps.meta-proxy.outputs.tags }}
+          labels: ${{ steps.meta-proxy.outputs.labels }}
+          cache-from: type=gha,scope=proxy
+          cache-to: type=gha,scope=proxy,mode=max
       
       - name: Extract metadata for Docker
         id: meta
diff --git a/Dockerfile.proxy_only b/Dockerfile.proxy_only
new file mode 100644
index 00000000..bc4cc90b
--- /dev/null
+++ b/Dockerfile.proxy_only
@@ -0,0 +1,55 @@
+# Build stage
+FROM python:3.12-slim AS builder
+
+# Define build argument with default value
+ARG PORT=8000
+# Make it available as env variable at runtime
+ENV OPTILLM_PORT=$PORT
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  build-essential \
+  python3-dev \
+  gcc \
+  g++ \
+  && rm -rf /var/lib/apt/lists/*
+
+# Copy only the requirements file first to leverage Docker cache
+COPY requirements_proxy_only.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements_proxy_only.txt
+
+# Final stage
+FROM python:3.12-slim
+
+# Install curl for the healthcheck
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  curl && \
+  apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Set working directory
+WORKDIR /app
+
+# Copy installed dependencies from builder stage
+COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+# Copy application code
+COPY . .
+
+# Create a non-root user and switch to it
+RUN useradd -m appuser
+USER appuser
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+
+# Use the ARG in EXPOSE
+EXPOSE ${PORT}
+
+# Run the application
+ENTRYPOINT ["python", "optillm.py"]
diff --git a/optillm.py b/optillm.py
index b5046262..cdd52587 100644
--- a/optillm.py
+++ b/optillm.py
@@ -158,7 +158,7 @@ def load_plugins():
    package_plugin_dir = os.path.join(os.path.dirname(optillm.__file__), 'plugins')
    
    # Get local project plugins directory
-   current_dir = os.getcwd()
+   current_dir = os.getcwd() if server_config.get("plugins_dir", "") == "" else server_config["plugins_dir"]
    local_plugin_dir = os.path.join(current_dir, 'optillm', 'plugins')
    
    plugin_dirs = []
@@ -664,7 +664,8 @@ def parse_args():
         ("--return-full-response", "OPTILLM_RETURN_FULL_RESPONSE", bool, False, "Return the full response including the CoT with <thinking> tags"),
         ("--port", "OPTILLM_PORT", int, 8000, "Specify the port to run the proxy"),
         ("--log", "OPTILLM_LOG", str, "info", "Specify the logging level", list(logging_levels.keys())),
-        ("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface")
+        ("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface"),
+        ("--plugins-dir", "OPTILLM_PLUGINS_DIR", str, "", "Path to the plugins directory"),
     ]
 
     for arg, env, type_, default, help_text, *extra in args_env:
@@ -704,11 +705,11 @@ def main():
     global server_config
     # Call this function at the start of main()
     args = parse_args()
-    load_plugins()
-
     # Update server_config with all argument values
     server_config.update(vars(args))
 
+    load_plugins()
+
     port = server_config['port']
 
     # Set logging level from user request
diff --git a/optillm/plugins/coc_plugin.py b/optillm/plugins/coc_plugin.py
index 62ef5ce6..34d18776 100644
--- a/optillm/plugins/coc_plugin.py
+++ b/optillm/plugins/coc_plugin.py
@@ -104,6 +104,7 @@ def sanitize_code(code: str) -> str:
         safe_lines.append(line)
     
     safe_code = '\n'.join(safe_lines)
+    safe_code = safe_code.replace('\n', '\n    ')
     
     # Add safety wrapper
     wrapper = f"""
@@ -111,7 +112,7 @@ def sanitize_code(code: str) -> str:
 
 def safe_execute():
     import numpy as np  # Always allow numpy
-    {safe_code.replace('\n', '\n    ')}
+    {safe_code}
     return answer if 'answer' in locals() else None
 
 result = safe_execute()
diff --git a/requirements_proxy_only.txt b/requirements_proxy_only.txt
new file mode 100644
index 00000000..84e02764
--- /dev/null
+++ b/requirements_proxy_only.txt
@@ -0,0 +1,19 @@
+numpy
+networkx
+openai
+z3-solver
+aiohttp
+flask
+azure.identity
+scikit-learn
+litellm
+requests
+beautifulsoup4
+lxml
+presidio_analyzer
+presidio_anonymizer
+nbformat
+nbconvert
+ipython
+ipykernel
+gradio
\ No newline at end of file
diff --git a/scripts/eval_aime_benchmark.py b/scripts/eval_aime_benchmark.py
index c834a2fc..5fc72576 100644
--- a/scripts/eval_aime_benchmark.py
+++ b/scripts/eval_aime_benchmark.py
@@ -15,7 +15,7 @@
 logger = logging.getLogger(__name__)
 
 # Initialize OpenAI client
-client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="http://localhost:8000/v1")
+client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://ot7nh9nqf4l7b43s.us-east-1.aws.endpoints.huggingface.cloud/v1/")
 
 SYSTEM_PROMPT = '''You are solving AIME (American Invitational Mathematics Examination) problems.
 
@@ -241,18 +241,21 @@ def analyze_results(results: List[Dict], n: int):
             print("---")
 
 def main(model: str, n_attempts: int):
-    """Main evaluation function."""
+    """Main evaluation function that handles gaps in processed indexes."""
     os.makedirs("results", exist_ok=True)
     
-    # Include n_attempts in filename to keep separate results for different n values
     results_file = f"evaluation_results_{model.replace('/', '_')}_pass_at_{n_attempts}.json"
     
     dataset = load_2024_dataset()
     existing_results = load_existing_results(results_file)
-    last_processed_index = get_last_processed_index(existing_results)
     
-    for idx, item in enumerate(tqdm(dataset, desc="Evaluating problems")):
-        if idx <= last_processed_index:
+    # Create a set of already processed indexes for efficient lookup
+    processed_indexes = {result['index'] for result in existing_results}
+    
+    for _, item in enumerate(tqdm(dataset, desc="Evaluating problems")):
+        id = int(item['id'])
+        # Skip if this index has already been processed
+        if id in processed_indexes:
             continue
             
         problem_text = item['problem']
@@ -263,7 +266,7 @@ def main(model: str, n_attempts: int):
         is_correct, first_correct = evaluate_pass_at_n(attempts, correct_answer)
         
         result = {
-            "index": idx,
+            "index": id,
             "problem": problem_text,
             "attempts": attempts,
             "correct_answer": correct_answer,
@@ -275,6 +278,7 @@ def main(model: str, n_attempts: int):
     final_results = load_existing_results(results_file)
     analyze_results(final_results, n_attempts)
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Evaluate LLM performance on AIME 2024 problems")
     parser.add_argument("--model", type=str, required=True, help="OpenAI model to use (e.g., gpt-4, gpt-3.5-turbo)")
diff --git a/scripts/eval_arena_hard_auto_rtc.py b/scripts/eval_arena_hard_auto_rtc.py
index 890c8019..76ab4835 100644
--- a/scripts/eval_arena_hard_auto_rtc.py
+++ b/scripts/eval_arena_hard_auto_rtc.py
@@ -17,7 +17,8 @@
 logger = logging.getLogger(__name__)
 
 # Initialize OpenAI client (only used for chat completions now)
-client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+client = OpenAI(base_url="http://localhost:8000/v1", api_key=os.environ.get("OPENAI_API_KEY"))
+# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
 
 @dataclass
 class RTCConfig:
@@ -58,8 +59,7 @@ def get_llm_response(messages: List[Dict], model: str) -> Optional[str]:
             response = client.chat.completions.create(
                 model=model,
                 messages=messages,
-                temperature=0.7,
-                max_tokens=1000
+                max_tokens=4096
             )
             return response.choices[0].message.content.strip()
         except Exception as e:
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
index 8a130edf..dd662e3e 100644
--- a/scripts/requirements.txt
+++ b/scripts/requirements.txt
@@ -1,4 +1,3 @@
 datasets
 accelerate
 huggingface_hub
-git+https://github.com/huggingface/transformers.git
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 25068c22..610493c8 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="optillm",
-    version="0.0.24",
+    version="0.0.25",
     packages=find_packages(),
     py_modules=['optillm'],
     package_data={
@@ -33,7 +33,7 @@
         "ipykernel",
         "peft",
         "bitsandbytes",
-        "gradio",
+        "gradio"
     ],
     entry_points={
         'console_scripts': [