From abc810176b203c9d8f7f301f0a807440ca9358be Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 8 Sep 2025 16:57:10 +0800 Subject: [PATCH] Update README.md --- optillm/plugins/proxy/README.md | 90 +++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 11 deletions(-) diff --git a/optillm/plugins/proxy/README.md b/optillm/plugins/proxy/README.md index 395e3e47..71424db4 100644 --- a/optillm/plugins/proxy/README.md +++ b/optillm/plugins/proxy/README.md @@ -11,9 +11,19 @@ A sophisticated load balancing and failover plugin for OptiLLM that distributes - πŸ“Š **Performance Tracking**: Monitor latency and errors per provider - πŸ—ΊοΈ **Model Mapping**: Map model names to provider-specific deployments +## Installation + +```bash +# Install OptiLLM via pip +pip install optillm + +# Verify installation +optillm --version +``` + ## Quick Start -### 1. Basic Setup +### 1. Create Configuration Create `~/.optillm/proxy_config.yaml`: @@ -23,6 +33,8 @@ providers: base_url: https://api.openai.com/v1 api_key: ${OPENAI_API_KEY} weight: 2 + model_map: + gpt-4: gpt-4-turbo-preview # Optional: map model names - name: backup base_url: https://api.openai.com/v1 @@ -30,20 +42,60 @@ providers: weight: 1 routing: - strategy: weighted + strategy: weighted # Options: weighted, round_robin, failover +``` + +### 2. Start OptiLLM Server + +```bash +# Option A: Use proxy as default for ALL requests (recommended) +optillm --approach proxy + +# Option B: Start server normally (requires model prefix or extra_body) +optillm + +# With custom port +optillm --approach proxy --port 8000 ``` -### 2. Usage Examples +### 3. Usage Examples + +#### When using `--approach proxy` (Recommended) +```bash +# No need for "proxy-" prefix! The proxy handles all requests automatically +curl -X POST http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}] + }' + +# The proxy will: +# 1. Route to one of your configured providers +# 2. Apply model mapping if configured +# 3. Handle failover automatically +``` -#### Standalone Proxy +#### Without `--approach proxy` flag ```bash -# Route requests through proxy +# Method 1: Use model prefix curl -X POST http://localhost:8000/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "proxy-gpt-4", "messages": [{"role": "user", "content": "Hello"}] }' + +# Method 2: Use extra_body +curl -X POST http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "extra_body": { + "optillm_approach": "proxy" + } + }' ``` #### Proxy with Approach/Plugin @@ -151,7 +203,7 @@ providers: ### Model-Specific Routing -Different providers may use different model names: +When using `--approach proxy`, the proxy automatically maps model names to provider-specific deployments: ```yaml providers: @@ -159,12 +211,21 @@ providers: base_url: ${AZURE_ENDPOINT} api_key: ${AZURE_KEY} model_map: - # Request -> Provider mapping + # Request model -> Provider deployment name gpt-4: gpt-4-deployment-001 gpt-4-turbo: gpt-4-turbo-latest gpt-3.5-turbo: gpt-35-turbo-deployment + + - name: openai + base_url: https://api.openai.com/v1 + api_key: ${OPENAI_API_KEY} + # No model_map needed - uses model names as-is ``` +With this configuration and `optillm --approach proxy`: +- Request for "gpt-4" β†’ Azure uses "gpt-4-deployment-001", OpenAI uses "gpt-4" +- Request for "gpt-3.5-turbo" β†’ Azure uses "gpt-35-turbo-deployment", OpenAI uses "gpt-3.5-turbo" + ### Failover Configuration Set up primary and backup providers: @@ -294,16 +355,22 @@ from openai import OpenAI client = OpenAI( base_url="http://localhost:8000/v1", - api_key="dummy" + api_key="dummy" # Can be any string when using proxy +) + +# If server started with --approach proxy: +response = client.chat.completions.create( + model="gpt-4", # No "proxy-" prefix needed! + messages=[{"role": "user", "content": "Hello"}] ) -# Proxy wrapping MOA approach +# Or explicitly use proxy with another approach: response = client.chat.completions.create( model="gpt-4", messages=[{"role": "user", "content": "Hello"}], extra_body={ "optillm_approach": "proxy", - "proxy_wrap": "moa" + "proxy_wrap": "moa" # Proxy will route MOA's requests } ) ``` @@ -312,9 +379,10 @@ response = client.chat.completions.create( ```python from langchain.llms import OpenAI +# If server started with --approach proxy: llm = OpenAI( openai_api_base="http://localhost:8000/v1", - model_name="proxy-gpt-4" + model_name="gpt-4" # Proxy handles routing automatically ) response = llm("What is the meaning of life?")