fix: add supports_reasoning to model_meta, disabling <thinking> prompting for such models

ErikBjare · ErikBjare · commit b15fb6a51c01 · 2025-02-25T19:59:33.000Z
diff --git a/gptme/cli.py b/gptme/cli.py
@@ -181,6 +181,7 @@ def main(
 
     config = get_config()
 
+    model = model or config.get_env("MODEL")
     selected_tool_format: ToolFormat = (
         tool_format or config.get_env("TOOL_FORMAT") or "markdown"  # type: ignore
     )
@@ -194,6 +195,7 @@ def main(
             prompt_system,
             interactive=interactive,
             tool_format=selected_tool_format,
+            model=model,
         )
     ]
 
diff --git a/gptme/llm/__init__.py b/gptme/llm/__init__.py
@@ -130,7 +130,7 @@ def print_clear(length: int = 0):
             if not output:  # first character
                 first_token_time = time.time()
                 print_clear()
-                rprint(f"{PROMPT_ASSISTANT}: ", end="")
+                rprint(f"{PROMPT_ASSISTANT}: \n", end="")
 
             # Check for thinking tags before printing a newline
             if char == "\n" or not output:
diff --git a/gptme/llm/llm_anthropic.py b/gptme/llm/llm_anthropic.py
@@ -118,17 +118,23 @@ def chat(messages: list[Message], model: str, tools: list[ToolSpec] | None) -> s
 
     model_meta = get_model(f"anthropic/{model}")
     use_thinking = _should_use_thinking(model, tools)
+    thinking_budget = 16000
+    max_tokens = (model_meta.max_output or 4096) + (
+        thinking_budget if use_thinking else 0
+    )
 
     response = _anthropic.messages.create(
         model=model,
         messages=messages_dicts,
         system=system_messages,
         temperature=TEMPERATURE if not use_thinking else 1,
         top_p=TOP_P if not use_thinking else NOT_GIVEN,
-        max_tokens=model_meta.max_output or 4096,
+        max_tokens=max_tokens,
         tools=tools_dict if tools_dict else NOT_GIVEN,
         thinking=(
-            {"type": "enabled", "budget_tokens": 16000} if use_thinking else NOT_GIVEN
+            {"type": "enabled", "budget_tokens": thinking_budget}
+            if use_thinking
+            else NOT_GIVEN
         ),
     )
     content = response.content
@@ -162,17 +168,23 @@ def stream(
 
     model_meta = get_model(f"anthropic/{model}")
     use_thinking = _should_use_thinking(model, tools)
+    thinking_budget = 16000
+    max_tokens = (model_meta.max_output or 4096) + (
+        thinking_budget if use_thinking else 0
+    )
 
     with _anthropic.messages.stream(
         model=model,
         messages=messages_dicts,
         system=system_messages,
         temperature=TEMPERATURE if not use_thinking else 1,
         top_p=TOP_P if not use_thinking else NOT_GIVEN,
-        max_tokens=model_meta.max_output or 4096,
+        max_tokens=max_tokens,
         tools=tools_dict if tools_dict else NOT_GIVEN,
         thinking=(
-            {"type": "enabled", "budget_tokens": 16000} if use_thinking else NOT_GIVEN
+            {"type": "enabled", "budget_tokens": thinking_budget}
+            if use_thinking
+            else NOT_GIVEN
         ),
     ) as stream:
         for chunk in stream:
@@ -184,9 +196,9 @@ def stream(
                         tool_use = block
                         yield f"\n@{tool_use.name}({tool_use.id}): "
                     elif isinstance(block, anthropic.types.ThinkingBlock):
-                        yield "\n<think>\n"
+                        yield "<think>\n"
                     elif isinstance(block, anthropic.types.RedactedThinkingBlock):
-                        yield "\n<think redacted>\n"
+                        yield "<think redacted>\n"
                     elif isinstance(block, anthropic.types.TextBlock):
                         if block.text:
                             logger.warning("unexpected text block: %s", block.text)
diff --git a/gptme/llm/models.py b/gptme/llm/models.py
@@ -50,6 +50,7 @@ class ModelMeta:
     max_output: int | None = None
     supports_streaming: bool = True
     supports_vision: bool = False
+    supports_reasoning: bool = False  # models which support reasoning do not need prompting to use <thinking> tags
 
     # price in USD per 1M tokens
     # if price is not set, it is assumed to be 0
@@ -66,13 +67,15 @@ def full(self) -> str:
 class _ModelDictMeta(TypedDict):
     context: int
     max_output: NotRequired[int]
-    supports_streaming: NotRequired[bool]
-    supports_vision: NotRequired[bool]
 
     # price in USD per 1M tokens
     price_input: NotRequired[float]
     price_output: NotRequired[float]
 
+    supports_streaming: NotRequired[bool]
+    supports_vision: NotRequired[bool]
+    supports_reasoning: NotRequired[bool]
+
     knowledge_cutoff: NotRequired[datetime]
 
 
@@ -92,6 +95,7 @@ class _ModelDictMeta(TypedDict):
             "price_input": 3,
             "price_output": 15,
             "supports_vision": True,
+            "supports_reasoning": True,
             "knowledge_cutoff": datetime(2024, 10, 1),
         },
         "claude-3-5-sonnet-20241022": {
diff --git a/gptme/prompts.py b/gptme/prompts.py
@@ -15,6 +15,7 @@
 from .__version__ import __version__
 from .config import get_config, get_project_config
 from .dirs import get_project_git_dir
+from .llm.models import get_model
 from .message import Message
 from .tools import ToolFormat
 from .util import document_prompt_function
@@ -28,13 +29,14 @@ def get_prompt(
     prompt: PromptType | str = "full",
     interactive: bool = True,
     tool_format: ToolFormat = "markdown",
+    model: str | None = None,
 ) -> Message:
     """
     Get the initial system prompt.
     """
     msgs: Iterable
     if prompt == "full":
-        msgs = prompt_full(interactive, tool_format)
+        msgs = prompt_full(interactive, tool_format, model)
     elif prompt == "short":
         msgs = prompt_short(interactive, tool_format)
     else:
@@ -56,10 +58,10 @@ def _join_messages(msgs: list[Message]) -> Message:
 
 
 def prompt_full(
-    interactive: bool, tool_format: ToolFormat
+    interactive: bool, tool_format: ToolFormat, model: str | None
 ) -> Generator[Message, None, None]:
     """Full prompt to start the conversation."""
-    yield from prompt_gptme(interactive)
+    yield from prompt_gptme(interactive, model)
     yield from prompt_tools(tool_format=tool_format)
     if interactive:
         yield from prompt_user()
@@ -79,7 +81,9 @@ def prompt_short(
     yield from prompt_project()
 
 
-def prompt_gptme(interactive: bool) -> Generator[Message, None, None]:
+def prompt_gptme(
+    interactive: bool, model: str | None = None
+) -> Generator[Message, None, None]:
     """
     Base system prompt for gptme.
 
@@ -90,20 +94,23 @@ def prompt_gptme(interactive: bool) -> Generator[Message, None, None]:
      - Not mention tools which may not be loaded (browser, vision)
      - Mention the ability to self-correct and ask clarifying questions
     """
+    model_meta = get_model(model)
+
+    # use <thinking> tags as a fallback if the model doesn't natively support reasoning
+    use_thinking_tags = not model_meta.supports_reasoning
 
     default_base_prompt = f"""
-You are gptme v{__version__}, a general-purpose AI assistant powered by LLMs.
+You are gptme v{__version__}, a general-purpose AI assistant powered by LLMs. {('Currently using model: ' + model_meta.full) if model_meta else ''}
 You are designed to help users with programming tasks, such as writing code, debugging, and learning new concepts.
 You can run code, execute terminal commands, and access the filesystem on the local machine.
 You will help the user with writing code, either from scratch or in existing projects.
-You will think step by step when solving a problem, in `<thinking>` tags.
+{'You will think step by step when solving a problem, in `<thinking>` tags.' if use_thinking_tags else ''}
 Break down complex tasks into smaller, manageable steps.
 
-You have the ability to self-correct.
-If you receive feedback that your output or actions were incorrect, you should:
+You have the ability to self-correct. {'''If you receive feedback that your output or actions were incorrect, you should:
 - acknowledge the mistake
 - analyze what went wrong in `<thinking>` tags
-- provide a corrected response
+- provide a corrected response''' if use_thinking_tags else ''}
 
 You should learn about the context needed to provide the best help,
 such as exploring the current working directory and reading the code using terminal tools.
@@ -125,7 +132,7 @@ def prompt_gptme(interactive: bool) -> Generator[Message, None, None]:
 
 Maintain a professional and efficient communication style. Be concise but thorough in your explanations.
 
-Use `<thinking>` tags to think before you answer.
+{'Use `<thinking>` tags to think before you answer.' if use_thinking_tags else ''}
 """.strip()
 
     interactive_prompt = """

Original file line number	Diff line number	Diff line change
`@@ -181,6 +181,7 @@ def main(`
`181`	`181`
`182`	`182`	`config = get_config()`
`183`	`183`
	`184`	`+ model = model or config.get_env("MODEL")`
`184`	`185`	`selected_tool_format: ToolFormat = (`
`185`	`186`	`tool_format or config.get_env("TOOL_FORMAT") or "markdown" # type: ignore`
`186`	`187`	`)`
`@@ -194,6 +195,7 @@ def main(`
`194`	`195`	`prompt_system,`
`195`	`196`	`interactive=interactive,`
`196`	`197`	`tool_format=selected_tool_format,`
	`198`	`+ model=model,`
`197`	`199`	`)`
`198`	`200`	`]`
`199`	`201`