From da956b264c2592f946ca582803dac05880f60abe Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Tue, 4 Nov 2025 17:43:20 +0800 Subject: [PATCH 1/3] Update proxy_plugin.py --- optillm/plugins/proxy_plugin.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/optillm/plugins/proxy_plugin.py b/optillm/plugins/proxy_plugin.py index 9e8a6c6a..3c977709 100644 --- a/optillm/plugins/proxy_plugin.py +++ b/optillm/plugins/proxy_plugin.py @@ -204,12 +204,17 @@ def run(system_prompt: str, initial_query: str, client, model: str, if not supports_system_messages: logger.info(f"Using fallback message formatting for {model} (no system message support)") + # Strip stream parameter to force complete response + # server.py will handle converting to SSE streaming format if needed + api_config = dict(request_config or {}) + api_config.pop('stream', None) + response = proxy_client.chat.completions.create( model=model, messages=messages, - **(request_config or {}) + **api_config ) - + # Return full response dict to preserve all usage information response_dict = response.model_dump() if hasattr(response, 'model_dump') else response return response_dict, 0 From 32e19a90e2d7f5de200089f679be63f52c8dd23a Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Tue, 4 Nov 2025 17:45:45 +0800 Subject: [PATCH 2/3] Update proxy_plugin.py --- optillm/plugins/proxy_plugin.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/optillm/plugins/proxy_plugin.py b/optillm/plugins/proxy_plugin.py index 3c977709..edcdc474 100644 --- a/optillm/plugins/proxy_plugin.py +++ b/optillm/plugins/proxy_plugin.py @@ -120,12 +120,17 @@ def run(system_prompt: str, initial_query: str, client, model: str, if not config.get('providers'): logger.warning("No providers configured, falling back to original client") + # Strip stream parameter to force complete response + api_config = dict(request_config or {}) + api_config.pop('stream', None) + response = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": initial_query} - ] + ], + **api_config ) # Return full response dict to preserve all usage information response_dict = response.model_dump() if hasattr(response, 'model_dump') else response @@ -223,12 +228,17 @@ def run(system_prompt: str, initial_query: str, client, model: str, logger.error(f"Proxy plugin error: {e}", exc_info=True) # Fallback to original client logger.info("Falling back to original client") + # Strip stream parameter to force complete response + api_config = dict(request_config or {}) + api_config.pop('stream', None) + response = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": initial_query} - ] + ], + **api_config ) # Return full response dict to preserve all usage information response_dict = response.model_dump() if hasattr(response, 'model_dump') else response From af30a0380560e1945cc75bd4d9e0f371dc3d3bcb Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Tue, 4 Nov 2025 17:57:44 +0800 Subject: [PATCH 3/3] Bump version to 0.3.5 Update __version__ in __init__.py and project version in pyproject.toml to 0.3.5 for new release. --- optillm/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/optillm/__init__.py b/optillm/__init__.py index ca7b60ec..50b9850d 100644 --- a/optillm/__init__.py +++ b/optillm/__init__.py @@ -1,5 +1,5 @@ # Version information -__version__ = "0.3.4" +__version__ = "0.3.5" # Import from server module from .server import ( diff --git a/pyproject.toml b/pyproject.toml index e8e3fd33..5a7fd95b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "optillm" -version = "0.3.4" +version = "0.3.5" description = "An optimizing inference proxy for LLMs." readme = "README.md" license = "Apache-2.0"