Skip to content

Commit b15fb6a

Browse files
committed
fix: add supports_reasoning to model_meta, disabling <thinking> prompting for such models
1 parent a89ca15 commit b15fb6a

File tree

5 files changed

+44
-19
lines changed

5 files changed

+44
-19
lines changed

gptme/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ def main(
181181

182182
config = get_config()
183183

184+
model = model or config.get_env("MODEL")
184185
selected_tool_format: ToolFormat = (
185186
tool_format or config.get_env("TOOL_FORMAT") or "markdown" # type: ignore
186187
)
@@ -194,6 +195,7 @@ def main(
194195
prompt_system,
195196
interactive=interactive,
196197
tool_format=selected_tool_format,
198+
model=model,
197199
)
198200
]
199201

gptme/llm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def print_clear(length: int = 0):
130130
if not output: # first character
131131
first_token_time = time.time()
132132
print_clear()
133-
rprint(f"{PROMPT_ASSISTANT}: ", end="")
133+
rprint(f"{PROMPT_ASSISTANT}: \n", end="")
134134

135135
# Check for thinking tags before printing a newline
136136
if char == "\n" or not output:

gptme/llm/llm_anthropic.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -118,17 +118,23 @@ def chat(messages: list[Message], model: str, tools: list[ToolSpec] | None) -> s
118118

119119
model_meta = get_model(f"anthropic/{model}")
120120
use_thinking = _should_use_thinking(model, tools)
121+
thinking_budget = 16000
122+
max_tokens = (model_meta.max_output or 4096) + (
123+
thinking_budget if use_thinking else 0
124+
)
121125

122126
response = _anthropic.messages.create(
123127
model=model,
124128
messages=messages_dicts,
125129
system=system_messages,
126130
temperature=TEMPERATURE if not use_thinking else 1,
127131
top_p=TOP_P if not use_thinking else NOT_GIVEN,
128-
max_tokens=model_meta.max_output or 4096,
132+
max_tokens=max_tokens,
129133
tools=tools_dict if tools_dict else NOT_GIVEN,
130134
thinking=(
131-
{"type": "enabled", "budget_tokens": 16000} if use_thinking else NOT_GIVEN
135+
{"type": "enabled", "budget_tokens": thinking_budget}
136+
if use_thinking
137+
else NOT_GIVEN
132138
),
133139
)
134140
content = response.content
@@ -162,17 +168,23 @@ def stream(
162168

163169
model_meta = get_model(f"anthropic/{model}")
164170
use_thinking = _should_use_thinking(model, tools)
171+
thinking_budget = 16000
172+
max_tokens = (model_meta.max_output or 4096) + (
173+
thinking_budget if use_thinking else 0
174+
)
165175

166176
with _anthropic.messages.stream(
167177
model=model,
168178
messages=messages_dicts,
169179
system=system_messages,
170180
temperature=TEMPERATURE if not use_thinking else 1,
171181
top_p=TOP_P if not use_thinking else NOT_GIVEN,
172-
max_tokens=model_meta.max_output or 4096,
182+
max_tokens=max_tokens,
173183
tools=tools_dict if tools_dict else NOT_GIVEN,
174184
thinking=(
175-
{"type": "enabled", "budget_tokens": 16000} if use_thinking else NOT_GIVEN
185+
{"type": "enabled", "budget_tokens": thinking_budget}
186+
if use_thinking
187+
else NOT_GIVEN
176188
),
177189
) as stream:
178190
for chunk in stream:
@@ -184,9 +196,9 @@ def stream(
184196
tool_use = block
185197
yield f"\n@{tool_use.name}({tool_use.id}): "
186198
elif isinstance(block, anthropic.types.ThinkingBlock):
187-
yield "\n<think>\n"
199+
yield "<think>\n"
188200
elif isinstance(block, anthropic.types.RedactedThinkingBlock):
189-
yield "\n<think redacted>\n"
201+
yield "<think redacted>\n"
190202
elif isinstance(block, anthropic.types.TextBlock):
191203
if block.text:
192204
logger.warning("unexpected text block: %s", block.text)

gptme/llm/models.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class ModelMeta:
5050
max_output: int | None = None
5151
supports_streaming: bool = True
5252
supports_vision: bool = False
53+
supports_reasoning: bool = False # models which support reasoning do not need prompting to use <thinking> tags
5354

5455
# price in USD per 1M tokens
5556
# if price is not set, it is assumed to be 0
@@ -66,13 +67,15 @@ def full(self) -> str:
6667
class _ModelDictMeta(TypedDict):
6768
context: int
6869
max_output: NotRequired[int]
69-
supports_streaming: NotRequired[bool]
70-
supports_vision: NotRequired[bool]
7170

7271
# price in USD per 1M tokens
7372
price_input: NotRequired[float]
7473
price_output: NotRequired[float]
7574

75+
supports_streaming: NotRequired[bool]
76+
supports_vision: NotRequired[bool]
77+
supports_reasoning: NotRequired[bool]
78+
7679
knowledge_cutoff: NotRequired[datetime]
7780

7881

@@ -92,6 +95,7 @@ class _ModelDictMeta(TypedDict):
9295
"price_input": 3,
9396
"price_output": 15,
9497
"supports_vision": True,
98+
"supports_reasoning": True,
9599
"knowledge_cutoff": datetime(2024, 10, 1),
96100
},
97101
"claude-3-5-sonnet-20241022": {

gptme/prompts.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .__version__ import __version__
1616
from .config import get_config, get_project_config
1717
from .dirs import get_project_git_dir
18+
from .llm.models import get_model
1819
from .message import Message
1920
from .tools import ToolFormat
2021
from .util import document_prompt_function
@@ -28,13 +29,14 @@ def get_prompt(
2829
prompt: PromptType | str = "full",
2930
interactive: bool = True,
3031
tool_format: ToolFormat = "markdown",
32+
model: str | None = None,
3133
) -> Message:
3234
"""
3335
Get the initial system prompt.
3436
"""
3537
msgs: Iterable
3638
if prompt == "full":
37-
msgs = prompt_full(interactive, tool_format)
39+
msgs = prompt_full(interactive, tool_format, model)
3840
elif prompt == "short":
3941
msgs = prompt_short(interactive, tool_format)
4042
else:
@@ -56,10 +58,10 @@ def _join_messages(msgs: list[Message]) -> Message:
5658

5759

5860
def prompt_full(
59-
interactive: bool, tool_format: ToolFormat
61+
interactive: bool, tool_format: ToolFormat, model: str | None
6062
) -> Generator[Message, None, None]:
6163
"""Full prompt to start the conversation."""
62-
yield from prompt_gptme(interactive)
64+
yield from prompt_gptme(interactive, model)
6365
yield from prompt_tools(tool_format=tool_format)
6466
if interactive:
6567
yield from prompt_user()
@@ -79,7 +81,9 @@ def prompt_short(
7981
yield from prompt_project()
8082

8183

82-
def prompt_gptme(interactive: bool) -> Generator[Message, None, None]:
84+
def prompt_gptme(
85+
interactive: bool, model: str | None = None
86+
) -> Generator[Message, None, None]:
8387
"""
8488
Base system prompt for gptme.
8589
@@ -90,20 +94,23 @@ def prompt_gptme(interactive: bool) -> Generator[Message, None, None]:
9094
- Not mention tools which may not be loaded (browser, vision)
9195
- Mention the ability to self-correct and ask clarifying questions
9296
"""
97+
model_meta = get_model(model)
98+
99+
# use <thinking> tags as a fallback if the model doesn't natively support reasoning
100+
use_thinking_tags = not model_meta.supports_reasoning
93101

94102
default_base_prompt = f"""
95-
You are gptme v{__version__}, a general-purpose AI assistant powered by LLMs.
103+
You are gptme v{__version__}, a general-purpose AI assistant powered by LLMs. {('Currently using model: ' + model_meta.full) if model_meta else ''}
96104
You are designed to help users with programming tasks, such as writing code, debugging, and learning new concepts.
97105
You can run code, execute terminal commands, and access the filesystem on the local machine.
98106
You will help the user with writing code, either from scratch or in existing projects.
99-
You will think step by step when solving a problem, in `<thinking>` tags.
107+
{'You will think step by step when solving a problem, in `<thinking>` tags.' if use_thinking_tags else ''}
100108
Break down complex tasks into smaller, manageable steps.
101109
102-
You have the ability to self-correct.
103-
If you receive feedback that your output or actions were incorrect, you should:
110+
You have the ability to self-correct. {'''If you receive feedback that your output or actions were incorrect, you should:
104111
- acknowledge the mistake
105112
- analyze what went wrong in `<thinking>` tags
106-
- provide a corrected response
113+
- provide a corrected response''' if use_thinking_tags else ''}
107114
108115
You should learn about the context needed to provide the best help,
109116
such as exploring the current working directory and reading the code using terminal tools.
@@ -125,7 +132,7 @@ def prompt_gptme(interactive: bool) -> Generator[Message, None, None]:
125132
126133
Maintain a professional and efficient communication style. Be concise but thorough in your explanations.
127134
128-
Use `<thinking>` tags to think before you answer.
135+
{'Use `<thinking>` tags to think before you answer.' if use_thinking_tags else ''}
129136
""".strip()
130137

131138
interactive_prompt = """

0 commit comments

Comments
 (0)