Skip to content

Commit fae65ca

Browse files
authored
fix: better prompt caching & less debug logging (#323)
1 parent 283e8b0 commit fae65ca

File tree

13 files changed

+42
-29
lines changed

13 files changed

+42
-29
lines changed

gptme/cli.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,10 @@ def main(
188188
config = get_config()
189189

190190
tool_format = tool_format or config.get_env("TOOL_FORMAT") or "markdown"
191-
192191
set_tool_format(tool_format)
193192

194193
# early init tools to generate system prompt
195-
init_tools(tool_allowlist)
194+
init_tools(frozenset(tool_allowlist) if tool_allowlist else None)
196195

197196
# get initial system prompt
198197
initial_msgs = [

gptme/init.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ def init(model: str | None, interactive: bool, tool_allowlist: list[str] | None)
3030
_init_done = True
3131

3232
# init
33-
logger.debug("Started")
3433
load_dotenv()
3534

3635
# fixes issues with transformers parallelism
@@ -72,20 +71,21 @@ def init(model: str | None, interactive: bool, tool_allowlist: list[str] | None)
7271
# for some reason it bugs out shell tests in CI
7372
register_tabcomplete()
7473

75-
init_tools(tool_allowlist)
74+
init_tools(frozenset(tool_allowlist) if tool_allowlist else None)
7675

7776

7877
def init_logging(verbose):
79-
# log init
80-
handler = RichHandler()
78+
handler = RichHandler() # show_time=False
8179
logging.basicConfig(
8280
level=logging.DEBUG if verbose else logging.INFO,
8381
format="%(message)s",
8482
datefmt="[%X]",
8583
handlers=[handler],
8684
)
85+
8786
# anthropic spams debug logs for every request
8887
logging.getLogger("anthropic").setLevel(logging.INFO)
88+
logging.getLogger("openai").setLevel(logging.INFO)
8989
# set httpx logging to WARNING
9090
logging.getLogger("httpx").setLevel(logging.WARNING)
9191
logging.getLogger("httpcore").setLevel(logging.WARNING)

gptme/llm/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import shutil
33
import sys
4+
import time
45
from collections.abc import Iterator
56
from functools import lru_cache
67
from typing import cast
@@ -95,11 +96,14 @@ def print_clear():
9596
print(" " * shutil.get_terminal_size().columns, end="\r")
9697

9798
output = ""
99+
start_time = time.time()
100+
first_token_time = None
98101
try:
99102
for char in (
100103
char for chunk in _stream(messages, model, tools) for char in chunk
101104
):
102105
if not output: # first character
106+
first_token_time = time.time()
103107
print_clear()
104108
print(f"{PROMPT_ASSISTANT}: ", end="")
105109
print(char, end="")
@@ -126,6 +130,15 @@ def print_clear():
126130
return Message("assistant", output + "... ^C Interrupted")
127131
finally:
128132
print_clear()
133+
if first_token_time:
134+
end_time = time.time()
135+
logger.debug(
136+
f"Generation interrupted after {end_time - start_time:.1f}s "
137+
f"(ttft: {first_token_time - start_time:.2f}s, "
138+
f"gen: {end_time - first_token_time:.2f}s, "
139+
f"tok/s: {len_tokens(output)/(end_time - first_token_time):.1f})"
140+
)
141+
129142
return Message("assistant", output)
130143

131144

gptme/llm/llm_anthropic.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,9 @@ def _prepare_messages_for_api(
349349

350350
messages_dicts_new.append({"role": msg["role"], "content": content_parts})
351351

352+
# set for the first system message (static between sessions)
353+
system_messages[0]["cache_control"] = {"type": "ephemeral"}
354+
352355
# set cache points at the two last user messages, as suggested in Anthropic docs:
353356
# > The conversation history (previous messages) is included in the messages array.
354357
# > The final turn is marked with cache-control, for continuing in followups.

gptme/logmanager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def __init__(
103103
# Try to acquire an exclusive lock
104104
try:
105105
fcntl.flock(self._lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
106-
logger.debug(f"Acquired lock on {self.logdir}")
106+
# logger.debug(f"Acquired lock on {self.logdir}")
107107
except BlockingIOError:
108108
self._lock_fd.close()
109109
self._lock_fd = None
@@ -132,7 +132,7 @@ def __del__(self):
132132
try:
133133
fcntl.flock(self._lock_fd, fcntl.LOCK_UN)
134134
self._lock_fd.close()
135-
logger.debug(f"Released lock on {self.logdir}")
135+
# logger.debug(f"Released lock on {self.logdir}")
136136
except Exception as e:
137137
logger.warning(f"Error releasing lock: {e}")
138138

@@ -256,7 +256,7 @@ def load(
256256

257257
if not Path(logfile).exists():
258258
if create:
259-
logger.debug(f"Creating new logfile {logfile}")
259+
# logger.debug(f"Creating new logfile {logfile}")
260260
Path(logfile).parent.mkdir(parents=True, exist_ok=True)
261261
Log([]).write_jsonl(logfile)
262262
else:

gptme/prompts.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
import glob
99
import logging
1010
import platform
11-
import subprocess
1211
from collections.abc import Generator, Iterable
12+
from datetime import datetime
1313
from pathlib import Path
1414
from typing import Literal
1515

@@ -242,8 +242,8 @@ def prompt_systeminfo() -> Generator[Message, None, None]:
242242

243243
def prompt_timeinfo() -> Generator[Message, None, None]:
244244
"""Generate the current time prompt."""
245-
# TODO: this should be updated when time changes significantly (such as when resuming a session)
246-
prompt = f"## Current Time\n\n**UTC:** {subprocess.run(['date', '-u'], capture_output=True, text=True).stdout.strip()}"
245+
# we only set the date in order for prompt caching and such to work
246+
prompt = f"## Current Date\n\n**UTC:** {datetime.utcnow().strftime('%Y-%m-%d')}"
247247
yield Message("system", prompt)
248248

249249

gptme/tools/__init__.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@
7272
]
7373

7474

75-
def init_tools(allowlist=None) -> None:
75+
@lru_cache
76+
def init_tools(allowlist: frozenset[str] | None = None) -> None:
7677
"""Runs initialization logic for tools."""
7778
# init python tool last
7879
tools = list(
@@ -93,22 +94,20 @@ def init_tools(allowlist=None) -> None:
9394
if tool.name in tools_default_disabled:
9495
if not allowlist or tool.name not in allowlist:
9596
continue
96-
load_tool(tool)
97+
_load_tool(tool)
9798

9899
for tool_name in allowlist or []:
99100
if not has_tool(tool_name):
100101
raise ValueError(f"Tool '{tool_name}' not found")
101102

102103

103-
def load_tool(tool: ToolSpec) -> None:
104+
def _load_tool(tool: ToolSpec) -> None:
104105
"""Loads a tool."""
105-
# FIXME: when are tools first initialized?
106106
if tool in loaded_tools:
107107
logger.warning(f"Tool '{tool.name}' already loaded")
108108
return
109109

110-
if tool.init:
111-
tool.init()
110+
# tool init happens in init_tools to check that spec is available
112111
if tool.functions:
113112
for func in tool.functions:
114113
register_function(func)

gptme/tools/python.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def execute_python(
120120

121121

122122
@functools.lru_cache
123-
def get_installed_python_libraries() -> set[str]:
123+
def get_installed_python_libraries() -> list[str]:
124124
"""Check if a select list of Python libraries are installed."""
125125
candidates = [
126126
"numpy",
@@ -137,7 +137,7 @@ def get_installed_python_libraries() -> set[str]:
137137
if importlib.util.find_spec(candidate):
138138
installed.add(candidate)
139139

140-
return installed
140+
return list(sorted(installed))
141141

142142

143143
def get_functions():

gptme/tools/shell.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545

4646
shell_programs_str = "\n".join(
47-
f"- {prog}" for prog in get_installed_programs(candidates)
47+
f"- {prog}" for prog in sorted(get_installed_programs(candidates))
4848
)
4949
is_macos = sys.platform == "darwin"
5050

gptme/util/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def get_tokenizer(model: str):
3737
except KeyError:
3838
global _warned_models
3939
if model not in _warned_models:
40-
logger.warning(
40+
logger.info(
4141
f"No tokenizer for '{model}'. Using tiktoken cl100k_base. Use results only as estimates."
4242
)
4343
_warned_models |= {model}

0 commit comments

Comments
 (0)