From 2d29cfb361e127065242a005a3d936baec42ebec Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 17 Nov 2025 16:14:34 +0800 Subject: [PATCH 1/3] Update server.py --- optillm/server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/optillm/server.py b/optillm/server.py index 17b1579..e5975df 100644 --- a/optillm/server.py +++ b/optillm/server.py @@ -714,9 +714,12 @@ def proxy(): request_config.update({ "stream": stream, "n": n, - "response_format": response_format, # Add response_format to config }) + # Only add response_format if it's not None + if response_format is not None: + request_config['response_format'] = response_format + # Add token limits to request_config with proper priority if max_completion_tokens is not None: request_config['max_completion_tokens'] = max_completion_tokens From 690d234e7b015e8d825d0f9c7d45dc4d06914095 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 17 Nov 2025 16:15:20 +0800 Subject: [PATCH 2/3] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6a909e6..7e8ded1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "optillm" -version = "0.3.6" +version = "0.3.7" description = "An optimizing inference proxy for LLMs." readme = "README.md" license = "Apache-2.0" From 27d6d0a5e3f7c85d4b61285045aec751298f3bc7 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 17 Nov 2025 16:15:23 +0800 Subject: [PATCH 3/3] Update __init__.py --- optillm/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optillm/__init__.py b/optillm/__init__.py index 2ba7e5b..c21e426 100644 --- a/optillm/__init__.py +++ b/optillm/__init__.py @@ -1,5 +1,5 @@ # Version information -__version__ = "0.3.6" +__version__ = "0.3.7" # Import from server module from .server import (