From 55abcf2c87f1a0b495ebc4a7cd5a91faa54fc444 Mon Sep 17 00:00:00 2001 From: Masataro Asai Date: Wed, 1 Oct 2025 11:36:23 -0400 Subject: [PATCH 1/2] refactor: renamed 'format' variable to '_format' in internal methods so that mypy detects it --- mellea/backends/huggingface.py | 26 ++++++++++++++------------ mellea/backends/litellm.py | 24 +++++++++++++----------- mellea/backends/ollama.py | 19 ++++++++++++------- mellea/backends/openai.py | 30 ++++++++++++++++-------------- mellea/backends/watsonx.py | 20 +++++++++++--------- 5 files changed, 66 insertions(+), 53 deletions(-) diff --git a/mellea/backends/huggingface.py b/mellea/backends/huggingface.py index f09b4a04..6da6eece 100644 --- a/mellea/backends/huggingface.py +++ b/mellea/backends/huggingface.py @@ -67,6 +67,8 @@ """ TransformersTorchConfig = tuple[PreTrainedTokenizer, PreTrainedModel, torch.device] +format: int = 1 # typing this variable in order to shadow the global format function and ensure mypy checks for errors + @dataclasses.dataclass class HFAloraCacheInfo: @@ -209,11 +211,11 @@ def generate_from_context( reroute_to_alora = True if reroute_to_alora: mot = self._generate_from_context_alora( - action, ctx, format=format, model_options=model_opts + action, ctx, _format=format, model_options=model_opts ) return mot, ctx.add(mot) mot = self._generate_from_context_standard( - action, ctx, format=format, model_options=model_opts, tool_calls=tool_calls + action, ctx, _format=format, model_options=model_opts, tool_calls=tool_calls ) return mot, ctx.add(action).add(mot) @@ -222,7 +224,7 @@ def _generate_from_context_alora( action: Component | CBlock, ctx: Context, *, - format: type[BaseModelSubclass] | None = None, + _format: type[BaseModelSubclass] | None = None, model_options: dict[str, Any], ) -> ModelOutputThunk: match action: @@ -245,7 +247,7 @@ def _generate_from_context_alora( assert alora_for_this_request is not None assert type(user_message) is str assert type(assistant_message) is str - assert format is None, "Structured outputs are not supported by ALoRAs." + assert _format is None, "Structured outputs are not supported by ALoRAs." alora_output = alora_for_this_request.generate_using_strings( input=user_message, @@ -269,7 +271,7 @@ def _generate_from_context_standard( action: Component | CBlock, ctx: Context, *, - format: type[BaseModelSubclass] | None = None, + _format: type[BaseModelSubclass] | None = None, model_options: dict[str, Any], tool_calls: bool = False, ) -> ModelOutputThunk: @@ -310,7 +312,7 @@ def _generate_from_context_standard( # Append tool call information if applicable. tools: dict[str, Callable] = dict() if tool_calls: - if format: + if _format: FancyLogger.get_logger().warning( f"Tool calling typically uses constrained generation, but you have specified a `format` in your generate call. NB: tool calling is superseded by format; we will NOT call tools for your request: {action}" ) @@ -338,10 +340,10 @@ def _generate_from_context_standard( ).to(self._device) # type: ignore format_kwargs = {} - if format: + if _format: # outlines.generate.json always parses the resulting json into a python dict. # We however want to keep it as a json string for later storing it in ModelOutputThunk - schema: dict[str, Any] = format.model_json_schema() + schema: dict[str, Any] = _format.model_json_schema() schema_json: str = json.dumps(schema) regex_str: str = outlines_core.fsm.json_schema.build_regex_from_schema( # type: ignore schema_json @@ -406,7 +408,7 @@ def _generate_from_context_standard( self.post_processing, conversation=ctx_as_conversation, input_ids=input_ids, - format=format, + _format=_format, tool_calls=tool_calls, tools=tools, seed=seed, @@ -463,7 +465,7 @@ async def post_processing( self, mot: ModelOutputThunk, conversation: list[dict], - format: type[BaseModelSubclass] | None, + _format: type[BaseModelSubclass] | None, tool_calls: bool, tools: dict[str, Callable], seed, @@ -494,7 +496,7 @@ async def post_processing( self.cache_put(mot.value, cache_info) # Only scan for tools if we are not doing structured output and tool calls were provided to the model. - if format is None and tool_calls: + if _format is None and tool_calls: mot.tool_calls = self._extract_model_tool_requests(tools, mot.value) assert mot._action is not None, ( @@ -514,7 +516,7 @@ async def post_processing( generate_log.date = datetime.datetime.now() generate_log.model_output = mot.value generate_log.extra = { - "format": format, + "format": _format, "tools_available": tools, "tools_called": mot.tool_calls, "seed": seed, diff --git a/mellea/backends/litellm.py b/mellea/backends/litellm.py index 6219e081..6691ca15 100644 --- a/mellea/backends/litellm.py +++ b/mellea/backends/litellm.py @@ -40,6 +40,8 @@ from mellea.stdlib.chat import Message from mellea.stdlib.requirement import ALoraRequirement +format: int = 1 # typing this variable in order to shadow the global format function and ensure mypy checks for errors + class LiteLLMBackend(FormatterBackend): """A generic LiteLLM compatible backend.""" @@ -121,7 +123,7 @@ def generate_from_context( mot = self._generate_from_chat_context_standard( action, ctx, - format=format, + _format=format, model_options=model_options, tool_calls=tool_calls, ) @@ -213,7 +215,7 @@ def _generate_from_chat_context_standard( action: Component | CBlock, ctx: Context, *, - format: type[BaseModelSubclass] + _format: type[BaseModelSubclass] | None = None, # Type[BaseModelSubclass] is a class object of a subclass of BaseModel model_options: dict | None = None, tool_calls: bool = False, @@ -247,12 +249,12 @@ def _generate_from_chat_context_standard( [OpenAIBackend.message_to_openai_message(m) for m in messages] ) - if format is not None: + if _format is not None: response_format = { "type": "json_schema", "json_schema": { - "name": format.__name__, - "schema": format.model_json_schema(), + "name": _format.__name__, + "schema": _format.model_json_schema(), "strict": True, }, } @@ -265,7 +267,7 @@ def _generate_from_chat_context_standard( thinking = "medium" # Append tool call information if applicable. - tools = self._extract_tools(action, format, model_opts, tool_calls, ctx) + tools = self._extract_tools(action, _format, model_opts, tool_calls, ctx) formatted_tools = convert_tools_to_json(tools) if len(tools) > 0 else None model_specific_options = self._make_backend_specific_and_remove(model_opts) @@ -295,7 +297,7 @@ def _generate_from_chat_context_standard( conversation=conversation, tools=tools, thinking=thinking, - format=format, + _format=_format, ) try: @@ -373,7 +375,7 @@ async def post_processing( conversation: list[dict], tools: dict[str, Callable], thinking, - format, + _format, ): """Called when generation is done.""" # Reconstruct the chat_response from chunks if streamed. @@ -418,7 +420,7 @@ async def post_processing( generate_log.date = datetime.datetime.now() generate_log.model_output = mot._meta["litellm_chat_response"] generate_log.extra = { - "format": format, + "format": _format, "tools_available": tools, "tools_called": mot.tool_calls, "seed": thinking, @@ -429,11 +431,11 @@ async def post_processing( @staticmethod def _extract_tools( - action, format, model_opts, tool_calls, ctx + action, _format, model_opts, tool_calls, ctx ) -> dict[str, Callable]: tools: dict[str, Callable] = dict() if tool_calls: - if format: + if _format: FancyLogger.get_logger().warning( f"Tool calling typically uses constrained generation, but you have specified a `format` in your generate call. NB: tool calling is superseded by format; we will NOT call tools for your request: {action}" ) diff --git a/mellea/backends/ollama.py b/mellea/backends/ollama.py index e4e9ad5b..f4d95215 100644 --- a/mellea/backends/ollama.py +++ b/mellea/backends/ollama.py @@ -32,6 +32,8 @@ from mellea.stdlib.chat import Message from mellea.stdlib.requirement import ALoraRequirement +format: int = 1 # typing this variable in order to shadow the global format function and ensure mypy checks for errors + class OllamaModelBackend(FormatterBackend): """A model that uses the Ollama Python SDK for local inference.""" @@ -245,7 +247,7 @@ def generate_from_context( mot = self.generate_from_chat_context( action, ctx, - format=format, + _format=format, model_options=model_options, tool_calls=tool_calls, ) @@ -257,7 +259,7 @@ def generate_from_chat_context( action: Component | CBlock, ctx: Context, *, - format: type[BaseModelSubclass] | None = None, + _format: type[BaseModelSubclass] | None = None, model_options: dict | None = None, tool_calls: bool = False, ) -> ModelOutputThunk: @@ -305,7 +307,7 @@ def generate_from_chat_context( # Append tool call information if applicable. tools: dict[str, Callable] = dict() if tool_calls: - if format: + if _format: FancyLogger.get_logger().warning( f"Tool calling typically uses constrained generation, but you have specified a `format` in your generate call. NB: tool calling is superseded by format; we will NOT call tools for your request: {action}" ) @@ -331,7 +333,7 @@ def generate_from_chat_context( think=model_opts.get(ModelOption.THINKING, None), stream=model_opts.get(ModelOption.STREAM, False), options=self._make_backend_specific_and_remove(model_opts), - format=format.model_json_schema() if format is not None else None, + format=_format.model_json_schema() if _format is not None else None, ) # type: ignore output = ModelOutputThunk(None) @@ -343,7 +345,10 @@ def generate_from_chat_context( # each processing step. output._process = functools.partial(self.processing, tools=tools) output._post_process = functools.partial( - self.post_processing, conversation=conversation, tools=tools, format=format + self.post_processing, + conversation=conversation, + tools=tools, + _format=_format, ) try: @@ -506,7 +511,7 @@ async def post_processing( mot: ModelOutputThunk, conversation: list[dict], tools: dict[str, Callable], - format, + _format, ): """Called when generation is done.""" assert mot._action is not None, ( @@ -525,7 +530,7 @@ async def post_processing( generate_log.date = datetime.datetime.now() generate_log.model_output = mot._meta["chat_response"] generate_log.extra = { - "format": format, + "format": _format, "thinking": mot._model_options.get(ModelOption.THINKING, None), "tools_available": tools, "tools_called": mot.tool_calls, diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py index dfea5d1a..b25e4cf1 100644 --- a/mellea/backends/openai.py +++ b/mellea/backends/openai.py @@ -51,6 +51,8 @@ openai_ollama_batching_error = "json: cannot unmarshal array into Go struct field CompletionRequest.prompt of type string" +format: int = 1 # typing this variable in order to shadow the global format function and ensure mypy checks for errors + class _ServerType(Enum): LOCALHOST = 1 @@ -282,7 +284,7 @@ def generate_from_context( mot = self.generate_from_chat_context( action, ctx, - format=format, + _format=format, model_options=model_options, tool_calls=tool_calls, ) @@ -293,7 +295,7 @@ def generate_from_chat_context( action: Component | CBlock, ctx: Context, *, - format: type[BaseModelSubclass] + _format: type[BaseModelSubclass] | None = None, # Type[BaseModelSubclass] is a class object of a subclass of BaseModel model_options: dict | None = None, tool_calls: bool = False, @@ -311,13 +313,13 @@ def generate_from_chat_context( reroute_to_alora = True if reroute_to_alora: return self._generate_from_chat_context_alora( - action, ctx, format=format, model_options=model_options + action, ctx, _format=_format, model_options=model_options ) return self._generate_from_chat_context_standard( action, ctx, - format=format, + _format=_format, model_options=model_options, tool_calls=tool_calls, ) @@ -327,7 +329,7 @@ def _generate_from_chat_context_alora( action: Component | CBlock, ctx: Context, *, - format: type[BaseModelSubclass] + _format: type[BaseModelSubclass] | None = None, # Type[BaseModelSubclass] is a class object of a subclass of BaseModel model_options: dict | None = None, ) -> ModelOutputThunk: @@ -352,7 +354,7 @@ def _generate_from_chat_context_alora( assert alora_for_this_request is not None assert type(user_message) is str assert type(assistant_message) is str - assert format is None, "Structured outputs are not supported by ALoRAs." + assert _format is None, "Structured outputs are not supported by ALoRAs." model_opts = self._simplify_and_merge(model_options, is_chat_context=True) @@ -413,7 +415,7 @@ def _generate_from_chat_context_standard( action: Component | CBlock, ctx: Context, *, - format: type[BaseModelSubclass] + _format: type[BaseModelSubclass] | None = None, # Type[BaseModelSubclass] is a class object of a subclass of BaseModel model_options: dict | None = None, tool_calls: bool = False, @@ -442,12 +444,12 @@ def _generate_from_chat_context_standard( conversation.append({"role": "system", "content": system_prompt}) conversation.extend([self.message_to_openai_message(m) for m in messages]) - if format is not None: + if _format is not None: response_format = { "type": "json_schema", "json_schema": { - "name": format.__name__, - "schema": format.model_json_schema(), + "name": _format.__name__, + "schema": _format.model_json_schema(), "strict": True, }, } @@ -457,7 +459,7 @@ def _generate_from_chat_context_standard( # Append tool call information if applicable. tools: dict[str, Callable] = dict() if tool_calls: - if format: + if _format: FancyLogger.get_logger().warning( f"Tool calling typically uses constrained generation, but you have specified a `format` in your generate call. NB: tool calling is superseded by format; we will NOT call tools for your request: {action}" ) @@ -506,7 +508,7 @@ def _generate_from_chat_context_standard( conversation=conversation, thinking=thinking, seed=model_opts.get(ModelOption.SEED, None), - format=format, + _format=_format, ) try: @@ -575,7 +577,7 @@ async def post_processing( conversation: list[dict], thinking, seed, - format, + _format, ): """Called when generation is done.""" # Reconstruct the chat_response from chunks if streamed. @@ -613,7 +615,7 @@ async def post_processing( generate_log.date = datetime.datetime.now() generate_log.model_output = mot._meta["oai_chat_response"] generate_log.extra = { - "format": format, + "format": _format, "thinking": thinking, "tools_available": tools, "tools_called": mot.tool_calls, diff --git a/mellea/backends/watsonx.py b/mellea/backends/watsonx.py index 31d33a4a..81f51c18 100644 --- a/mellea/backends/watsonx.py +++ b/mellea/backends/watsonx.py @@ -40,6 +40,8 @@ from mellea.stdlib.chat import Message from mellea.stdlib.requirement import ALoraRequirement # type: ignore +format: int = 1 # typing this variable in order to shadow the global format function and ensure mypy checks for errors + class WatsonxAIBackend(FormatterBackend): """A generic backend class for watsonx SDK.""" @@ -236,7 +238,7 @@ def generate_from_context( mot = self.generate_from_chat_context( action, ctx, - format=format, + _format=format, model_options=model_options, tool_calls=tool_calls, ) @@ -247,7 +249,7 @@ def generate_from_chat_context( action: Component | CBlock, ctx: Context, *, - format: type[BaseModelSubclass] + _format: type[BaseModelSubclass] | None = None, # Type[BaseModelSubclass] is a class object of a subclass of BaseModel model_options: dict | None = None, tool_calls: bool = False, @@ -278,12 +280,12 @@ def generate_from_chat_context( conversation.append({"role": "system", "content": system_prompt}) conversation.extend([{"role": m.role, "content": m.content} for m in messages]) - if format is not None: + if _format is not None: model_opts["response_format"] = { "type": "json_schema", "json_schema": { - "name": format.__name__, - "schema": format.model_json_schema(), + "name": _format.__name__, + "schema": _format.model_json_schema(), "strict": True, }, } @@ -293,7 +295,7 @@ def generate_from_chat_context( # Append tool call information if applicable. tools: dict[str, Callable] = {} if tool_calls: - if format: + if _format: FancyLogger.get_logger().warning( f"tool calling is superseded by format; will not call tools for request: {action}" ) @@ -349,7 +351,7 @@ def generate_from_chat_context( conversation=conversation, tools=tools, seed=model_opts.get(ModelOption.SEED, None), - format=format, + _format=_format, ) try: @@ -417,7 +419,7 @@ async def post_processing( conversation: list[dict], tools: dict[str, Callable], seed, - format, + _format, ): """Called when generation is done.""" # Reconstruct the chat_response from chunks if streamed. @@ -455,7 +457,7 @@ async def post_processing( generate_log.date = datetime.datetime.now() generate_log.model_output = mot._meta["oai_chat_response"] generate_log.extra = { - "format": format, + "format": _format, "tools_available": tools, "tools_called": mot.tool_calls, "seed": seed, From db1e514fd476ad6a7fa5cb4907703b60fd9d2f0f Mon Sep 17 00:00:00 2001 From: Masataro Asai Date: Sat, 11 Oct 2025 21:07:18 -0400 Subject: [PATCH 2/2] fix: use format = None --- mellea/backends/huggingface.py | 2 +- mellea/backends/litellm.py | 2 +- mellea/backends/ollama.py | 2 +- mellea/backends/openai.py | 2 +- mellea/backends/watsonx.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mellea/backends/huggingface.py b/mellea/backends/huggingface.py index 6da6eece..c38c988d 100644 --- a/mellea/backends/huggingface.py +++ b/mellea/backends/huggingface.py @@ -67,7 +67,7 @@ """ TransformersTorchConfig = tuple[PreTrainedTokenizer, PreTrainedModel, torch.device] -format: int = 1 # typing this variable in order to shadow the global format function and ensure mypy checks for errors +format: None = None # typing this variable in order to shadow the global format function and ensure mypy checks for errors @dataclasses.dataclass diff --git a/mellea/backends/litellm.py b/mellea/backends/litellm.py index 6691ca15..21575681 100644 --- a/mellea/backends/litellm.py +++ b/mellea/backends/litellm.py @@ -40,7 +40,7 @@ from mellea.stdlib.chat import Message from mellea.stdlib.requirement import ALoraRequirement -format: int = 1 # typing this variable in order to shadow the global format function and ensure mypy checks for errors +format: None = None # typing this variable in order to shadow the global format function and ensure mypy checks for errors class LiteLLMBackend(FormatterBackend): diff --git a/mellea/backends/ollama.py b/mellea/backends/ollama.py index f4d95215..a72bb8e0 100644 --- a/mellea/backends/ollama.py +++ b/mellea/backends/ollama.py @@ -32,7 +32,7 @@ from mellea.stdlib.chat import Message from mellea.stdlib.requirement import ALoraRequirement -format: int = 1 # typing this variable in order to shadow the global format function and ensure mypy checks for errors +format: None = None # typing this variable in order to shadow the global format function and ensure mypy checks for errors class OllamaModelBackend(FormatterBackend): diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py index b25e4cf1..e3d71d95 100644 --- a/mellea/backends/openai.py +++ b/mellea/backends/openai.py @@ -51,7 +51,7 @@ openai_ollama_batching_error = "json: cannot unmarshal array into Go struct field CompletionRequest.prompt of type string" -format: int = 1 # typing this variable in order to shadow the global format function and ensure mypy checks for errors +format: None = None # typing this variable in order to shadow the global format function and ensure mypy checks for errors class _ServerType(Enum): diff --git a/mellea/backends/watsonx.py b/mellea/backends/watsonx.py index 81f51c18..9d95e3f2 100644 --- a/mellea/backends/watsonx.py +++ b/mellea/backends/watsonx.py @@ -40,7 +40,7 @@ from mellea.stdlib.chat import Message from mellea.stdlib.requirement import ALoraRequirement # type: ignore -format: int = 1 # typing this variable in order to shadow the global format function and ensure mypy checks for errors +format: None = None # typing this variable in order to shadow the global format function and ensure mypy checks for errors class WatsonxAIBackend(FormatterBackend):