From 55abcf2c87f1a0b495ebc4a7cd5a91faa54fc444 Mon Sep 17 00:00:00 2001
From: Masataro Asai <guicho2.71828@gmail.com>
Date: Wed, 1 Oct 2025 11:36:23 -0400
Subject: [PATCH 1/2] refactor: renamed 'format' variable to '_format' in
 internal methods so that mypy detects it

---
 mellea/backends/huggingface.py | 26 ++++++++++++++------------
 mellea/backends/litellm.py     | 24 +++++++++++++-----------
 mellea/backends/ollama.py      | 19 ++++++++++++-------
 mellea/backends/openai.py      | 30 ++++++++++++++++--------------
 mellea/backends/watsonx.py     | 20 +++++++++++---------
 5 files changed, 66 insertions(+), 53 deletions(-)

diff --git a/mellea/backends/huggingface.py b/mellea/backends/huggingface.py
index f09b4a04..6da6eece 100644
--- a/mellea/backends/huggingface.py
+++ b/mellea/backends/huggingface.py
@@ -67,6 +67,8 @@
 """
 TransformersTorchConfig = tuple[PreTrainedTokenizer, PreTrainedModel, torch.device]
 
+format: int = 1  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
+
 
 @dataclasses.dataclass
 class HFAloraCacheInfo:
@@ -209,11 +211,11 @@ def generate_from_context(
                 reroute_to_alora = True
             if reroute_to_alora:
                 mot = self._generate_from_context_alora(
-                    action, ctx, format=format, model_options=model_opts
+                    action, ctx, _format=format, model_options=model_opts
                 )
                 return mot, ctx.add(mot)
         mot = self._generate_from_context_standard(
-            action, ctx, format=format, model_options=model_opts, tool_calls=tool_calls
+            action, ctx, _format=format, model_options=model_opts, tool_calls=tool_calls
         )
         return mot, ctx.add(action).add(mot)
 
@@ -222,7 +224,7 @@ def _generate_from_context_alora(
         action: Component | CBlock,
         ctx: Context,
         *,
-        format: type[BaseModelSubclass] | None = None,
+        _format: type[BaseModelSubclass] | None = None,
         model_options: dict[str, Any],
     ) -> ModelOutputThunk:
         match action:
@@ -245,7 +247,7 @@ def _generate_from_context_alora(
         assert alora_for_this_request is not None
         assert type(user_message) is str
         assert type(assistant_message) is str
-        assert format is None, "Structured outputs are not supported by ALoRAs."
+        assert _format is None, "Structured outputs are not supported by ALoRAs."
 
         alora_output = alora_for_this_request.generate_using_strings(
             input=user_message,
@@ -269,7 +271,7 @@ def _generate_from_context_standard(
         action: Component | CBlock,
         ctx: Context,
         *,
-        format: type[BaseModelSubclass] | None = None,
+        _format: type[BaseModelSubclass] | None = None,
         model_options: dict[str, Any],
         tool_calls: bool = False,
     ) -> ModelOutputThunk:
@@ -310,7 +312,7 @@ def _generate_from_context_standard(
             # Append tool call information if applicable.
             tools: dict[str, Callable] = dict()
             if tool_calls:
-                if format:
+                if _format:
                     FancyLogger.get_logger().warning(
                         f"Tool calling typically uses constrained generation, but you have specified a `format` in your generate call. NB: tool calling is superseded by format; we will NOT call tools for your request: {action}"
                     )
@@ -338,10 +340,10 @@ def _generate_from_context_standard(
             ).to(self._device)  # type: ignore
 
             format_kwargs = {}
-            if format:
+            if _format:
                 # outlines.generate.json always parses the resulting json into a python dict.
                 # We however want to keep it as a json string for later storing it in ModelOutputThunk
-                schema: dict[str, Any] = format.model_json_schema()
+                schema: dict[str, Any] = _format.model_json_schema()
                 schema_json: str = json.dumps(schema)
                 regex_str: str = outlines_core.fsm.json_schema.build_regex_from_schema(  # type: ignore
                     schema_json
@@ -406,7 +408,7 @@ def _generate_from_context_standard(
                 self.post_processing,
                 conversation=ctx_as_conversation,
                 input_ids=input_ids,
-                format=format,
+                _format=_format,
                 tool_calls=tool_calls,
                 tools=tools,
                 seed=seed,
@@ -463,7 +465,7 @@ async def post_processing(
         self,
         mot: ModelOutputThunk,
         conversation: list[dict],
-        format: type[BaseModelSubclass] | None,
+        _format: type[BaseModelSubclass] | None,
         tool_calls: bool,
         tools: dict[str, Callable],
         seed,
@@ -494,7 +496,7 @@ async def post_processing(
             self.cache_put(mot.value, cache_info)
 
         # Only scan for tools if we are not doing structured output and tool calls were provided to the model.
-        if format is None and tool_calls:
+        if _format is None and tool_calls:
             mot.tool_calls = self._extract_model_tool_requests(tools, mot.value)
 
         assert mot._action is not None, (
@@ -514,7 +516,7 @@ async def post_processing(
         generate_log.date = datetime.datetime.now()
         generate_log.model_output = mot.value
         generate_log.extra = {
-            "format": format,
+            "format": _format,
             "tools_available": tools,
             "tools_called": mot.tool_calls,
             "seed": seed,
diff --git a/mellea/backends/litellm.py b/mellea/backends/litellm.py
index 6219e081..6691ca15 100644
--- a/mellea/backends/litellm.py
+++ b/mellea/backends/litellm.py
@@ -40,6 +40,8 @@
 from mellea.stdlib.chat import Message
 from mellea.stdlib.requirement import ALoraRequirement
 
+format: int = 1  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
+
 
 class LiteLLMBackend(FormatterBackend):
     """A generic LiteLLM compatible backend."""
@@ -121,7 +123,7 @@ def generate_from_context(
         mot = self._generate_from_chat_context_standard(
             action,
             ctx,
-            format=format,
+            _format=format,
             model_options=model_options,
             tool_calls=tool_calls,
         )
@@ -213,7 +215,7 @@ def _generate_from_chat_context_standard(
         action: Component | CBlock,
         ctx: Context,
         *,
-        format: type[BaseModelSubclass]
+        _format: type[BaseModelSubclass]
         | None = None,  # Type[BaseModelSubclass] is a class object of a subclass of BaseModel
         model_options: dict | None = None,
         tool_calls: bool = False,
@@ -247,12 +249,12 @@ def _generate_from_chat_context_standard(
             [OpenAIBackend.message_to_openai_message(m) for m in messages]
         )
 
-        if format is not None:
+        if _format is not None:
             response_format = {
                 "type": "json_schema",
                 "json_schema": {
-                    "name": format.__name__,
-                    "schema": format.model_json_schema(),
+                    "name": _format.__name__,
+                    "schema": _format.model_json_schema(),
                     "strict": True,
                 },
             }
@@ -265,7 +267,7 @@ def _generate_from_chat_context_standard(
             thinking = "medium"
 
         # Append tool call information if applicable.
-        tools = self._extract_tools(action, format, model_opts, tool_calls, ctx)
+        tools = self._extract_tools(action, _format, model_opts, tool_calls, ctx)
         formatted_tools = convert_tools_to_json(tools) if len(tools) > 0 else None
 
         model_specific_options = self._make_backend_specific_and_remove(model_opts)
@@ -295,7 +297,7 @@ def _generate_from_chat_context_standard(
             conversation=conversation,
             tools=tools,
             thinking=thinking,
-            format=format,
+            _format=_format,
         )
 
         try:
@@ -373,7 +375,7 @@ async def post_processing(
         conversation: list[dict],
         tools: dict[str, Callable],
         thinking,
-        format,
+        _format,
     ):
         """Called when generation is done."""
         # Reconstruct the chat_response from chunks if streamed.
@@ -418,7 +420,7 @@ async def post_processing(
         generate_log.date = datetime.datetime.now()
         generate_log.model_output = mot._meta["litellm_chat_response"]
         generate_log.extra = {
-            "format": format,
+            "format": _format,
             "tools_available": tools,
             "tools_called": mot.tool_calls,
             "seed": thinking,
@@ -429,11 +431,11 @@ async def post_processing(
 
     @staticmethod
     def _extract_tools(
-        action, format, model_opts, tool_calls, ctx
+        action, _format, model_opts, tool_calls, ctx
     ) -> dict[str, Callable]:
         tools: dict[str, Callable] = dict()
         if tool_calls:
-            if format:
+            if _format:
                 FancyLogger.get_logger().warning(
                     f"Tool calling typically uses constrained generation, but you have specified a `format` in your generate call. NB: tool calling is superseded by format; we will NOT call tools for your request: {action}"
                 )
diff --git a/mellea/backends/ollama.py b/mellea/backends/ollama.py
index e4e9ad5b..f4d95215 100644
--- a/mellea/backends/ollama.py
+++ b/mellea/backends/ollama.py
@@ -32,6 +32,8 @@
 from mellea.stdlib.chat import Message
 from mellea.stdlib.requirement import ALoraRequirement
 
+format: int = 1  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
+
 
 class OllamaModelBackend(FormatterBackend):
     """A model that uses the Ollama Python SDK for local inference."""
@@ -245,7 +247,7 @@ def generate_from_context(
         mot = self.generate_from_chat_context(
             action,
             ctx,
-            format=format,
+            _format=format,
             model_options=model_options,
             tool_calls=tool_calls,
         )
@@ -257,7 +259,7 @@ def generate_from_chat_context(
         action: Component | CBlock,
         ctx: Context,
         *,
-        format: type[BaseModelSubclass] | None = None,
+        _format: type[BaseModelSubclass] | None = None,
         model_options: dict | None = None,
         tool_calls: bool = False,
     ) -> ModelOutputThunk:
@@ -305,7 +307,7 @@ def generate_from_chat_context(
         # Append tool call information if applicable.
         tools: dict[str, Callable] = dict()
         if tool_calls:
-            if format:
+            if _format:
                 FancyLogger.get_logger().warning(
                     f"Tool calling typically uses constrained generation, but you have specified a `format` in your generate call. NB: tool calling is superseded by format; we will NOT call tools for your request: {action}"
                 )
@@ -331,7 +333,7 @@ def generate_from_chat_context(
             think=model_opts.get(ModelOption.THINKING, None),
             stream=model_opts.get(ModelOption.STREAM, False),
             options=self._make_backend_specific_and_remove(model_opts),
-            format=format.model_json_schema() if format is not None else None,
+            format=_format.model_json_schema() if _format is not None else None,
         )  # type: ignore
 
         output = ModelOutputThunk(None)
@@ -343,7 +345,10 @@ def generate_from_chat_context(
         # each processing step.
         output._process = functools.partial(self.processing, tools=tools)
         output._post_process = functools.partial(
-            self.post_processing, conversation=conversation, tools=tools, format=format
+            self.post_processing,
+            conversation=conversation,
+            tools=tools,
+            _format=_format,
         )
 
         try:
@@ -506,7 +511,7 @@ async def post_processing(
         mot: ModelOutputThunk,
         conversation: list[dict],
         tools: dict[str, Callable],
-        format,
+        _format,
     ):
         """Called when generation is done."""
         assert mot._action is not None, (
@@ -525,7 +530,7 @@ async def post_processing(
         generate_log.date = datetime.datetime.now()
         generate_log.model_output = mot._meta["chat_response"]
         generate_log.extra = {
-            "format": format,
+            "format": _format,
             "thinking": mot._model_options.get(ModelOption.THINKING, None),
             "tools_available": tools,
             "tools_called": mot.tool_calls,
diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py
index dfea5d1a..b25e4cf1 100644
--- a/mellea/backends/openai.py
+++ b/mellea/backends/openai.py
@@ -51,6 +51,8 @@
 
 openai_ollama_batching_error = "json: cannot unmarshal array into Go struct field CompletionRequest.prompt of type string"
 
+format: int = 1  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
+
 
 class _ServerType(Enum):
     LOCALHOST = 1
@@ -282,7 +284,7 @@ def generate_from_context(
         mot = self.generate_from_chat_context(
             action,
             ctx,
-            format=format,
+            _format=format,
             model_options=model_options,
             tool_calls=tool_calls,
         )
@@ -293,7 +295,7 @@ def generate_from_chat_context(
         action: Component | CBlock,
         ctx: Context,
         *,
-        format: type[BaseModelSubclass]
+        _format: type[BaseModelSubclass]
         | None = None,  # Type[BaseModelSubclass] is a class object of a subclass of BaseModel
         model_options: dict | None = None,
         tool_calls: bool = False,
@@ -311,13 +313,13 @@ def generate_from_chat_context(
                 reroute_to_alora = True
             if reroute_to_alora:
                 return self._generate_from_chat_context_alora(
-                    action, ctx, format=format, model_options=model_options
+                    action, ctx, _format=_format, model_options=model_options
                 )
 
         return self._generate_from_chat_context_standard(
             action,
             ctx,
-            format=format,
+            _format=_format,
             model_options=model_options,
             tool_calls=tool_calls,
         )
@@ -327,7 +329,7 @@ def _generate_from_chat_context_alora(
         action: Component | CBlock,
         ctx: Context,
         *,
-        format: type[BaseModelSubclass]
+        _format: type[BaseModelSubclass]
         | None = None,  # Type[BaseModelSubclass] is a class object of a subclass of BaseModel
         model_options: dict | None = None,
     ) -> ModelOutputThunk:
@@ -352,7 +354,7 @@ def _generate_from_chat_context_alora(
         assert alora_for_this_request is not None
         assert type(user_message) is str
         assert type(assistant_message) is str
-        assert format is None, "Structured outputs are not supported by ALoRAs."
+        assert _format is None, "Structured outputs are not supported by ALoRAs."
 
         model_opts = self._simplify_and_merge(model_options, is_chat_context=True)
 
@@ -413,7 +415,7 @@ def _generate_from_chat_context_standard(
         action: Component | CBlock,
         ctx: Context,
         *,
-        format: type[BaseModelSubclass]
+        _format: type[BaseModelSubclass]
         | None = None,  # Type[BaseModelSubclass] is a class object of a subclass of BaseModel
         model_options: dict | None = None,
         tool_calls: bool = False,
@@ -442,12 +444,12 @@ def _generate_from_chat_context_standard(
             conversation.append({"role": "system", "content": system_prompt})
         conversation.extend([self.message_to_openai_message(m) for m in messages])
 
-        if format is not None:
+        if _format is not None:
             response_format = {
                 "type": "json_schema",
                 "json_schema": {
-                    "name": format.__name__,
-                    "schema": format.model_json_schema(),
+                    "name": _format.__name__,
+                    "schema": _format.model_json_schema(),
                     "strict": True,
                 },
             }
@@ -457,7 +459,7 @@ def _generate_from_chat_context_standard(
         # Append tool call information if applicable.
         tools: dict[str, Callable] = dict()
         if tool_calls:
-            if format:
+            if _format:
                 FancyLogger.get_logger().warning(
                     f"Tool calling typically uses constrained generation, but you have specified a `format` in your generate call. NB: tool calling is superseded by format; we will NOT call tools for your request: {action}"
                 )
@@ -506,7 +508,7 @@ def _generate_from_chat_context_standard(
             conversation=conversation,
             thinking=thinking,
             seed=model_opts.get(ModelOption.SEED, None),
-            format=format,
+            _format=_format,
         )
 
         try:
@@ -575,7 +577,7 @@ async def post_processing(
         conversation: list[dict],
         thinking,
         seed,
-        format,
+        _format,
     ):
         """Called when generation is done."""
         # Reconstruct the chat_response from chunks if streamed.
@@ -613,7 +615,7 @@ async def post_processing(
         generate_log.date = datetime.datetime.now()
         generate_log.model_output = mot._meta["oai_chat_response"]
         generate_log.extra = {
-            "format": format,
+            "format": _format,
             "thinking": thinking,
             "tools_available": tools,
             "tools_called": mot.tool_calls,
diff --git a/mellea/backends/watsonx.py b/mellea/backends/watsonx.py
index 31d33a4a..81f51c18 100644
--- a/mellea/backends/watsonx.py
+++ b/mellea/backends/watsonx.py
@@ -40,6 +40,8 @@
 from mellea.stdlib.chat import Message
 from mellea.stdlib.requirement import ALoraRequirement  # type: ignore
 
+format: int = 1  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
+
 
 class WatsonxAIBackend(FormatterBackend):
     """A generic backend class for watsonx SDK."""
@@ -236,7 +238,7 @@ def generate_from_context(
         mot = self.generate_from_chat_context(
             action,
             ctx,
-            format=format,
+            _format=format,
             model_options=model_options,
             tool_calls=tool_calls,
         )
@@ -247,7 +249,7 @@ def generate_from_chat_context(
         action: Component | CBlock,
         ctx: Context,
         *,
-        format: type[BaseModelSubclass]
+        _format: type[BaseModelSubclass]
         | None = None,  # Type[BaseModelSubclass] is a class object of a subclass of BaseModel
         model_options: dict | None = None,
         tool_calls: bool = False,
@@ -278,12 +280,12 @@ def generate_from_chat_context(
             conversation.append({"role": "system", "content": system_prompt})
         conversation.extend([{"role": m.role, "content": m.content} for m in messages])
 
-        if format is not None:
+        if _format is not None:
             model_opts["response_format"] = {
                 "type": "json_schema",
                 "json_schema": {
-                    "name": format.__name__,
-                    "schema": format.model_json_schema(),
+                    "name": _format.__name__,
+                    "schema": _format.model_json_schema(),
                     "strict": True,
                 },
             }
@@ -293,7 +295,7 @@ def generate_from_chat_context(
         # Append tool call information if applicable.
         tools: dict[str, Callable] = {}
         if tool_calls:
-            if format:
+            if _format:
                 FancyLogger.get_logger().warning(
                     f"tool calling is superseded by format; will not call tools for request: {action}"
                 )
@@ -349,7 +351,7 @@ def generate_from_chat_context(
             conversation=conversation,
             tools=tools,
             seed=model_opts.get(ModelOption.SEED, None),
-            format=format,
+            _format=_format,
         )
 
         try:
@@ -417,7 +419,7 @@ async def post_processing(
         conversation: list[dict],
         tools: dict[str, Callable],
         seed,
-        format,
+        _format,
     ):
         """Called when generation is done."""
         # Reconstruct the chat_response from chunks if streamed.
@@ -455,7 +457,7 @@ async def post_processing(
         generate_log.date = datetime.datetime.now()
         generate_log.model_output = mot._meta["oai_chat_response"]
         generate_log.extra = {
-            "format": format,
+            "format": _format,
             "tools_available": tools,
             "tools_called": mot.tool_calls,
             "seed": seed,

From db1e514fd476ad6a7fa5cb4907703b60fd9d2f0f Mon Sep 17 00:00:00 2001
From: Masataro Asai <guicho2.71828@gmail.com>
Date: Sat, 11 Oct 2025 21:07:18 -0400
Subject: [PATCH 2/2] fix: use format = None

---
 mellea/backends/huggingface.py | 2 +-
 mellea/backends/litellm.py     | 2 +-
 mellea/backends/ollama.py      | 2 +-
 mellea/backends/openai.py      | 2 +-
 mellea/backends/watsonx.py     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/mellea/backends/huggingface.py b/mellea/backends/huggingface.py
index 6da6eece..c38c988d 100644
--- a/mellea/backends/huggingface.py
+++ b/mellea/backends/huggingface.py
@@ -67,7 +67,7 @@
 """
 TransformersTorchConfig = tuple[PreTrainedTokenizer, PreTrainedModel, torch.device]
 
-format: int = 1  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
+format: None = None  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
 
 
 @dataclasses.dataclass
diff --git a/mellea/backends/litellm.py b/mellea/backends/litellm.py
index 6691ca15..21575681 100644
--- a/mellea/backends/litellm.py
+++ b/mellea/backends/litellm.py
@@ -40,7 +40,7 @@
 from mellea.stdlib.chat import Message
 from mellea.stdlib.requirement import ALoraRequirement
 
-format: int = 1  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
+format: None = None  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
 
 
 class LiteLLMBackend(FormatterBackend):
diff --git a/mellea/backends/ollama.py b/mellea/backends/ollama.py
index f4d95215..a72bb8e0 100644
--- a/mellea/backends/ollama.py
+++ b/mellea/backends/ollama.py
@@ -32,7 +32,7 @@
 from mellea.stdlib.chat import Message
 from mellea.stdlib.requirement import ALoraRequirement
 
-format: int = 1  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
+format: None = None  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
 
 
 class OllamaModelBackend(FormatterBackend):
diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py
index b25e4cf1..e3d71d95 100644
--- a/mellea/backends/openai.py
+++ b/mellea/backends/openai.py
@@ -51,7 +51,7 @@
 
 openai_ollama_batching_error = "json: cannot unmarshal array into Go struct field CompletionRequest.prompt of type string"
 
-format: int = 1  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
+format: None = None  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
 
 
 class _ServerType(Enum):
diff --git a/mellea/backends/watsonx.py b/mellea/backends/watsonx.py
index 81f51c18..9d95e3f2 100644
--- a/mellea/backends/watsonx.py
+++ b/mellea/backends/watsonx.py
@@ -40,7 +40,7 @@
 from mellea.stdlib.chat import Message
 from mellea.stdlib.requirement import ALoraRequirement  # type: ignore
 
-format: int = 1  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
+format: None = None  # typing this variable in order to shadow the global format function and ensure mypy checks for errors
 
 
 class WatsonxAIBackend(FormatterBackend):