Skip to content

Commit 20435f3

Browse files
feat(context): implement hooks-based context compression architecture (#844)
* feat(autocompact): add strategic removal with reasoning stripping Implements Option B from PR discussion - extends autocompact with: 1. Reasoning stripping for older messages: - Strips <think> and <thinking> tags from messages beyond threshold - Configurable age threshold (default: 5 messages from end) - Reduces token usage while preserving recent reasoning 2. Strategic removal enhancements: - Age-based prioritization (older messages processed first) - Phase-aware (keeps recent context intact) - Preserves existing massive tool result removal 3. Comprehensive test coverage: - Tests for strip_reasoning() helper function - Tests for age-based reasoning stripping - Tests for threshold variations (0, 5) - All existing tests continue passing Token savings tracked separately for tool results vs reasoning, providing visibility into both compaction strategies. Addresses Erik's feedback on PR #844. * refactor(context): create compress module for reusable compression utilities - Create gptme/context/compress.py with strip_reasoning() function - Expose function via gptme/context/__init__.py - Update autocompact.py to import from new module - Enables reuse via hooks, shell tool integration, etc. Addresses feedback from @ErikBjare in PR #844 * fix(tests): correct import path for strip_reasoning Changed imports from 'gptme.tools.autocompact' to 'gptme.context' as the function was moved to the new compress module. Fixes test failures identified by Greptile review. * fix: fixed gptme.context.__init__ --------- Co-authored-by: Erik Bjäreholt <erik@bjareho.lt>
1 parent b898c85 commit 20435f3

File tree

4 files changed

+189
-14
lines changed

4 files changed

+189
-14
lines changed

gptme/context/__init__.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
1-
"""Context management for gptme.
1+
"""Context management utilities.
22
33
This module provides:
44
- Unified context configuration (context.config)
55
- Context selection strategies (context.selector)
6+
- Context compression utilities (context.compress)
67
"""
78

9+
from .compress import strip_reasoning
810
from .config import ContextConfig
911
from .selector import ContextSelectorConfig
1012

11-
__all__ = [
12-
"ContextConfig",
13-
"ContextSelectorConfig",
14-
]
13+
__all__ = ["ContextConfig", "ContextSelectorConfig", "strip_reasoning"]

gptme/context/compress.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""Context compression utilities.
2+
3+
Provides core compression utilities that can be used via hooks,
4+
shell tool integration, or direct invocation.
5+
"""
6+
7+
import re
8+
9+
from ..util.tokens import len_tokens
10+
11+
12+
def strip_reasoning(content: str, model: str = "gpt-4") -> tuple[str, int]:
13+
"""
14+
Strip reasoning tags from message content.
15+
16+
Removes <think>...</think> and <thinking>...</thinking> blocks
17+
while preserving the rest of the content.
18+
19+
Args:
20+
content: Message content potentially containing reasoning tags
21+
model: Model name for token counting
22+
23+
Returns:
24+
Tuple of (stripped_content, tokens_saved)
25+
"""
26+
original_tokens = len_tokens(content, model)
27+
28+
# Remove <think>...</think> blocks (including newlines inside)
29+
stripped = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
30+
31+
# Remove <thinking>...</thinking> blocks (including newlines inside)
32+
stripped = re.sub(r"<thinking>.*?</thinking>", "", stripped, flags=re.DOTALL)
33+
34+
# Clean up extra whitespace left by removals
35+
stripped = re.sub(r"\n\n\n+", "\n\n", stripped) # Multiple blank lines -> two
36+
stripped = stripped.strip()
37+
38+
tokens_saved = original_tokens - len_tokens(stripped, model)
39+
return stripped, tokens_saved

gptme/tools/autocompact.py

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
"""
77

88
import logging
9+
import re
910
from collections.abc import Generator
1011
from pathlib import Path
1112
from typing import TYPE_CHECKING
1213

14+
from ..context import strip_reasoning
1315
from ..hooks import StopPropagation
1416
from ..message import Message, len_tokens
1517
from ..util.output_storage import create_tool_result_summary
@@ -28,18 +30,22 @@ def auto_compact_log(
2830
log: list[Message],
2931
limit: int | None = None,
3032
max_tool_result_tokens: int = 2000,
33+
reasoning_strip_age_threshold: int = 5,
3134
logdir: Path | None = None,
3235
) -> Generator[Message, None, None]:
3336
"""
3437
Auto-compact log for conversations with massive tool results.
3538
36-
More aggressive than reduce_log - completely removes massive tool results
37-
instead of just truncating them, to allow conversation resumption.
39+
More aggressive than reduce_log - implements strategic removal:
40+
1. Strips reasoning tags from older messages (age-based)
41+
2. Removes massive tool results (existing behavior)
42+
3. Prioritizes keeping recent context (phase-aware)
3843
3944
Args:
4045
log: List of messages to compact
4146
limit: Token limit (defaults to 80% of model context)
4247
max_tool_result_tokens: Maximum tokens allowed in a tool result before removal
48+
reasoning_strip_age_threshold: Strip reasoning from messages >N positions back
4349
logdir: Path to conversation directory for saving removed outputs
4450
"""
4551
from ..llm.models import get_default_model, get_model
@@ -53,26 +59,60 @@ def auto_compact_log(
5359
tokens = len_tokens(log, model=model.model)
5460
close_to_limit = tokens >= int(0.7 * model.context)
5561

56-
# Only return early if we're not close to limit and don't have massive tool results
57-
if tokens <= limit and not close_to_limit:
62+
# Calculate message positions from end (for age-based reasoning stripping)
63+
log_length = len(log)
64+
65+
# Check if any reasoning stripping is needed
66+
needs_reasoning_strip = any(
67+
(log_length - idx - 1) >= reasoning_strip_age_threshold
68+
and ("<think>" in msg.content or "<thinking>" in msg.content)
69+
for idx, msg in enumerate(log)
70+
)
71+
needs_compacting = tokens > limit or close_to_limit
72+
73+
# Only return early if nothing needs processing
74+
if not needs_reasoning_strip and not needs_compacting:
5875
yield from log
5976
return
6077

61-
logger.info(f"Auto-compacting log: {tokens} tokens exceeds limit of {limit}")
78+
if needs_compacting:
79+
logger.info(f"Auto-compacting log: {tokens} tokens exceeds limit of {limit}")
80+
if needs_reasoning_strip:
81+
logger.info(
82+
f"Stripping reasoning from messages beyond threshold {reasoning_strip_age_threshold}"
83+
)
6284

6385
# Process messages and remove massive tool results
6486
compacted_log = []
6587
tokens_saved = 0
88+
reasoning_tokens_saved = 0
6689

67-
for msg in log:
90+
for idx, msg in enumerate(log):
6891
# Skip processing pinned messages
6992
if msg.pinned:
7093
compacted_log.append(msg)
7194
continue
7295

96+
# Calculate distance from end (for age-based processing)
97+
distance_from_end = log_length - idx - 1
98+
99+
# Phase 1: Strategic reasoning stripping for older messages
100+
# Strip reasoning from messages beyond the threshold
101+
if distance_from_end >= reasoning_strip_age_threshold:
102+
stripped_content, reasoning_saved = strip_reasoning(
103+
msg.content, model.model
104+
)
105+
if reasoning_saved > 0:
106+
msg = msg.replace(content=stripped_content)
107+
reasoning_tokens_saved += reasoning_saved
108+
logger.info(
109+
f"Stripped reasoning from message {idx}: "
110+
f"saved {reasoning_saved} tokens (distance from end: {distance_from_end})"
111+
)
112+
73113
msg_tokens = len_tokens(msg.content, model.model)
74114

75-
# Check if this is a massive tool result (system message with huge content)
115+
# Phase 2: Check if this is a massive tool result (system message with huge content)
76116
# Use same logic as should_auto_compact: over limit OR close to limit with massive tool result
77117
close_to_limit = tokens >= int(0.8 * model.context)
78118
if (
@@ -99,9 +139,12 @@ def auto_compact_log(
99139

100140
# Check if we're now within limits
101141
final_tokens = len_tokens(compacted_log, model.model)
142+
total_saved = tokens_saved + reasoning_tokens_saved
102143
if final_tokens <= limit:
103144
logger.info(
104-
f"Auto-compacting successful: {tokens} -> {final_tokens} tokens (saved {tokens_saved})"
145+
f"Auto-compacting successful: {tokens} -> {final_tokens} tokens "
146+
f"(saved {total_saved}: {tokens_saved} from tool results, "
147+
f"{reasoning_tokens_saved} from reasoning)"
105148
)
106149
yield from compacted_log
107150
return
@@ -299,7 +342,6 @@ def _get_compacted_name(conversation_name: str) -> str:
299342
if not conversation_name:
300343
raise ValueError("conversation name cannot be empty")
301344

302-
import re
303345
from datetime import datetime
304346

305347
# Strip any existing compacted suffixes: -compacted-YYYYMMDDHHMM

tests/test_auto_compact.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,101 @@ def test_get_compacted_name_empty_string():
254254
_get_compacted_name("")
255255

256256

257+
def test_strip_reasoning_removes_think_tags():
258+
"""Test that strip_reasoning removes <think> tags."""
259+
from gptme.context import strip_reasoning
260+
261+
content = "Before <think>This is reasoning</think> After"
262+
stripped, tokens_saved = strip_reasoning(content, "gpt-4")
263+
264+
assert "<think>" not in stripped
265+
assert "</think>" not in stripped
266+
assert "Before" in stripped
267+
assert "After" in stripped
268+
assert tokens_saved > 0
269+
270+
271+
def test_strip_reasoning_removes_thinking_tags():
272+
"""Test that strip_reasoning removes <thinking> tags."""
273+
from gptme.context import strip_reasoning
274+
275+
content = "Before <thinking>This is reasoning</thinking> After"
276+
stripped, tokens_saved = strip_reasoning(content, "gpt-4")
277+
278+
assert "<thinking>" not in stripped
279+
assert "</thinking>" not in stripped
280+
assert "Before" in stripped
281+
assert "After" in stripped
282+
assert tokens_saved > 0
283+
284+
285+
def test_strip_reasoning_handles_multiple_blocks():
286+
"""Test that strip_reasoning removes multiple reasoning blocks."""
287+
from gptme.context import strip_reasoning
288+
289+
content = "<think>First</think> Middle <thinking>Second</thinking> End"
290+
stripped, tokens_saved = strip_reasoning(content, "gpt-4")
291+
292+
assert "<think>" not in stripped
293+
assert "<thinking>" not in stripped
294+
assert "Middle" in stripped
295+
assert "End" in stripped
296+
assert tokens_saved > 0
297+
298+
299+
def test_strip_reasoning_preserves_content_without_tags():
300+
"""Test that strip_reasoning preserves content without reasoning tags."""
301+
from gptme.context import strip_reasoning
302+
303+
content = "This is normal content without reasoning"
304+
stripped, tokens_saved = strip_reasoning(content, "gpt-4")
305+
306+
assert stripped == content
307+
assert tokens_saved == 0
308+
309+
310+
def test_auto_compact_strips_reasoning_from_older_messages():
311+
"""Test that auto_compact_log strips reasoning from older messages."""
312+
messages = [
313+
Message("user", "First <think>old reasoning</think>", datetime.now()),
314+
Message("assistant", "Second <think>old reasoning</think>", datetime.now()),
315+
Message("user", "Third <think>old reasoning</think>", datetime.now()),
316+
Message("assistant", "Fourth <think>old reasoning</think>", datetime.now()),
317+
Message("user", "Fifth <think>old reasoning</think>", datetime.now()),
318+
Message("assistant", "Recent <think>recent reasoning</think>", datetime.now()),
319+
Message("user", "Most recent <think>recent reasoning</think>", datetime.now()),
320+
]
321+
322+
# Apply auto-compacting with reasoning_strip_age_threshold=5
323+
compacted = list(auto_compact_log(messages, reasoning_strip_age_threshold=5))
324+
325+
# First two messages (distance from end >= 5) should have reasoning stripped
326+
assert "<think>" not in compacted[0].content
327+
assert "<think>" not in compacted[1].content
328+
329+
# Last 5 messages (distance from end < 5) should keep reasoning
330+
for i in range(-5, 0):
331+
# Check if original had <think>, if so, compacted should too
332+
if "<think>" in messages[i].content:
333+
assert "<think>" in compacted[i].content
334+
335+
336+
def test_auto_compact_reasoning_strip_threshold_zero():
337+
"""Test that threshold=0 strips reasoning from all messages."""
338+
messages = [
339+
Message("user", "Message 1 <think>reasoning 1</think>", datetime.now()),
340+
Message("assistant", "Message 2 <think>reasoning 2</think>", datetime.now()),
341+
Message("user", "Message 3 <think>reasoning 3</think>", datetime.now()),
342+
]
343+
344+
# Apply with threshold=0 (strip all)
345+
compacted = list(auto_compact_log(messages, reasoning_strip_age_threshold=0))
346+
347+
# All messages should have reasoning stripped
348+
for msg in compacted:
349+
assert "<think>" not in msg.content
350+
351+
257352
if __name__ == "__main__":
258353
# Allow running the test directly
259354
pytest.main([__file__])

0 commit comments

Comments
 (0)