diff --git a/optillm/__init__.py b/optillm/__init__.py index 2ba7e5b..c21e426 100644 --- a/optillm/__init__.py +++ b/optillm/__init__.py @@ -1,5 +1,5 @@ # Version information -__version__ = "0.3.6" +__version__ = "0.3.7" # Import from server module from .server import ( diff --git a/optillm/server.py b/optillm/server.py index 17b1579..e5975df 100644 --- a/optillm/server.py +++ b/optillm/server.py @@ -714,9 +714,12 @@ def proxy(): request_config.update({ "stream": stream, "n": n, - "response_format": response_format, # Add response_format to config }) + # Only add response_format if it's not None + if response_format is not None: + request_config['response_format'] = response_format + # Add token limits to request_config with proper priority if max_completion_tokens is not None: request_config['max_completion_tokens'] = max_completion_tokens diff --git a/pyproject.toml b/pyproject.toml index 6a909e6..7e8ded1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "optillm" -version = "0.3.6" +version = "0.3.7" description = "An optimizing inference proxy for LLMs." readme = "README.md" license = "Apache-2.0"