fix: fix conversation list order in picker, lazily load conversation metadata, add get_user_conversations(), add ?limit=<int> to /api/conversations and use it in webui

ErikBjare · ErikBjare · commit 9c53aa0fbcde · 2024-09-22T19:10:00.000+02:00
diff --git a/gptme/cli.py b/gptme/cli.py
@@ -9,6 +9,7 @@
 import urllib.parse
 from collections.abc import Generator
 from datetime import datetime
+from itertools import islice
 from pathlib import Path
 from typing import Literal
 
@@ -28,7 +29,7 @@
 from .dirs import get_logs_dir
 from .init import init, init_logging
 from .llm import reply
-from .logmanager import LogManager, _conversations
+from .logmanager import Conversation, LogManager, get_user_conversations
 from .message import Message
 from .models import get_model
 from .prompts import get_prompt
@@ -407,16 +408,24 @@ def get_name(name: str) -> Path:
     return logpath
 
 
-def get_logfile(name: str | Literal["random", "resume"], interactive=True) -> Path:
+def get_logfile(
+    name: str | Literal["random", "resume"], interactive=True, limit=20
+) -> Path:
     # let user select between starting a new conversation and loading a previous one
     # using the library
     title = "New conversation or load previous? "
     NEW_CONV = "New conversation"
-    prev_conv_files = list(reversed(_conversations()))
+    LOAD_MORE = "Load more"
+    gen_convs = get_user_conversations()
+    convs: list[Conversation] = []
+    try:
+        convs.append(next(gen_convs))
+    except StopIteration:
+        pass
 
     if name == "resume":
-        if prev_conv_files:
-            return prev_conv_files[0].parent / "conversation.jsonl"
+        if convs:
+            return Path(convs[0].path)
         else:
             raise ValueError("No previous conversations to resume")
 
@@ -426,24 +435,32 @@ def get_logfile(name: str | Literal["random", "resume"], interactive=True) -> Pa
     #     return "-test-" in name or name.startswith("test-")
     # prev_conv_files = [f for f in prev_conv_files if not is_test(f.parent.name)]
 
-    NEWLINE = "\n"
+    # load more conversations
+    convs.extend(islice(gen_convs, limit - 1))
+
     prev_convs = [
-        f"{f.parent.name:30s} \t{epoch_to_age(f.stat().st_mtime)} \t{len(f.read_text().split(NEWLINE)):5d} msgs"
-        for f in prev_conv_files
+        f"{conv.name:30s} \t{epoch_to_age(conv.modified)} \t{conv.messages:5d} msgs"
+        for conv in convs
     ]
 
     # don't run pick in tests/non-interactive mode, or if the user specifies a name
     if interactive and name in ["random"]:
-        options = [
-            NEW_CONV,
-        ] + prev_convs
+        options = (
+            [
+                NEW_CONV,
+            ]
+            + prev_convs
+            + [LOAD_MORE]
+        )
 
         index: int
         _, index = pick(options, title)  # type: ignore
         if index == 0:
             logdir = get_name(name)
+        elif index == len(options) - 1:
+            return get_logfile(name, interactive, limit + 100)
         else:
-            logdir = get_logs_dir() / prev_conv_files[index - 1].parent
+            logdir = get_logs_dir() / convs[index - 1].name
     else:
         logdir = get_name(name)
 
diff --git a/gptme/logmanager.py b/gptme/logmanager.py
@@ -4,8 +4,9 @@
 import textwrap
 from collections.abc import Generator
 from copy import copy
+from dataclasses import dataclass
 from datetime import datetime
-from itertools import zip_longest
+from itertools import islice, zip_longest
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Any, Literal, TypeAlias
@@ -288,40 +289,68 @@ def to_dict(self, branches=False) -> dict:
         return d
 
 
-def _conversations() -> list[Path]:
+def _conversation_files() -> list[Path]:
     # NOTE: only returns the main conversation, not branches (to avoid duplicates)
-    # returns the most recent first
+    # returns the conversation files sorted by modified time (newest first)
     logsdir = get_logs_dir()
     return list(
         sorted(logsdir.glob("*/conversation.jsonl"), key=lambda f: -f.stat().st_mtime)
     )
 
 
-def get_conversations() -> Generator[dict, None, None]:
-    for conv_fn in _conversations():
-        msgs = []
-        msgs = _read_jsonl(conv_fn)
-        modified = conv_fn.stat().st_mtime
-        first_timestamp = msgs[0].timestamp.timestamp() if msgs else modified
-        yield {
-            "name": f"{conv_fn.parent.name}",
-            "path": str(conv_fn),
-            "created": first_timestamp,
-            "modified": modified,
-            "messages": len(msgs),
-            "branches": 1 + len(list(conv_fn.parent.glob("branches/*.jsonl"))),
-        }
+@dataclass
+class Conversation:
+    name: str
+    path: str
+    created: float
+    modified: float
+    messages: int
+    branches: int
 
 
-def _read_jsonl(path: PathLike) -> list[Message]:
-    msgs = []
+def get_conversations() -> Generator[Conversation, None, None]:
+    """Returns all conversations, excluding ones used for testing, evals, etc."""
+    for conv_fn in _conversation_files():
+        msgs = _read_jsonl(conv_fn, limit=1)
+        # TODO: can we avoid reading the entire file? maybe wont even be used, due to user convo filtering
+        len_msgs = conv_fn.read_text().count("}\n{")
+        assert len(msgs) <= 1
+        modified = conv_fn.stat().st_mtime
+        first_timestamp = msgs[0].timestamp.timestamp() if msgs else modified
+        yield Conversation(
+            name=f"{conv_fn.parent.name}",
+            path=str(conv_fn),
+            created=first_timestamp,
+            modified=modified,
+            messages=len_msgs,
+            branches=1 + len(list(conv_fn.parent.glob("branches/*.jsonl"))),
+        )
+
+
+def get_user_conversations() -> Generator[Conversation, None, None]:
+    """Returns all user conversations, excluding ones used for testing, evals, etc."""
+    for conv in get_conversations():
+        if any(conv.name.startswith(prefix) for prefix in ["tmp", "test-"]) or any(
+            substr in conv.name for substr in ["gptme-evals-"]
+        ):
+            continue
+        yield conv
+
+
+def _gen_read_jsonl(path: PathLike) -> Generator[Message, None, None]:
     with open(path) as file:
         for line in file.readlines():
             json_data = json.loads(line)
             if "timestamp" in json_data:
                 json_data["timestamp"] = datetime.fromisoformat(json_data["timestamp"])
-            msgs.append(Message(**json_data))
-    return msgs
+            yield Message(**json_data)
+
+
+def _read_jsonl(path: PathLike, limit=None) -> list[Message]:
+    gen = _gen_read_jsonl(path)
+    if limit:
+        gen = islice(gen, limit)  # type: ignore
+    return list(gen)
 
 
 def _write_jsonl(path: PathLike, msgs: list[Message]) -> None:
diff --git a/gptme/message.py b/gptme/message.py
@@ -24,6 +24,11 @@
 
 logger = logging.getLogger(__name__)
 
+# max tokens allowed in a single system message
+# if you hit this limit, you and/or I f-ed up, and should make the message shorter
+# maybe we should make it possible to store long outputs in files, and link/summarize it/preview it in the message
+max_system_len = 20000
+
 
 @dataclass(frozen=True, eq=False)
 class Message:
@@ -51,6 +56,9 @@ class Message:
 
     def __post_init__(self):
         assert isinstance(self.timestamp, datetime)
+        if self.role == "system":
+            if (length := len_tokens(self)) >= max_system_len:
+                logger.warning(f"System message too long: {length} tokens")
 
     def __repr__(self):
         content = textwrap.shorten(self.content, 20, placeholder="...")
diff --git a/gptme/server/__init__.py b/gptme/server/__init__.py
@@ -2,6 +2,7 @@
 Server for gptme.
 """
 
-from .api import create_app, main
+from .api import create_app
+from .cli import main
 
 __all__ = ["main", "create_app"]
diff --git a/gptme/server/api.py b/gptme/server/api.py
@@ -10,14 +10,15 @@
 from contextlib import redirect_stdout
 from datetime import datetime
 from importlib import resources
+from itertools import islice
 
 import flask
-from flask import current_app
+from flask import current_app, request
 
 from ..commands import execute_cmd
 from ..dirs import get_logs_dir
 from ..llm import reply
-from ..logmanager import LogManager, get_conversations
+from ..logmanager import LogManager, get_user_conversations
 from ..message import Message
 from ..models import get_model
 from ..tools import execute_msg
@@ -32,7 +33,8 @@ def api_root():
 
 @api.route("/api/conversations")
 def api_conversations():
-    conversations = list(get_conversations())
+    limit = int(request.args.get("limit", 100))
+    conversations = list(islice(get_user_conversations(), limit))
     return flask.jsonify(conversations)
 
 
@@ -149,9 +151,3 @@ def create_app() -> flask.Flask:
     app = flask.Flask(__name__, static_folder=static_path)
     app.register_blueprint(api)
     return app
-
-
-def main() -> None:
-    """Run the Flask app."""
-    app = create_app()
-    app.run(debug=True)
diff --git a/gptme/server/cli.py b/gptme/server/cli.py
@@ -3,18 +3,20 @@
 import click
 
 from ..init import init, init_logging
+from .api import create_app
 
 logger = logging.getLogger(__name__)
 
 
 @click.command("gptme-server")
-@click.option("-v", "--verbose", is_flag=True, help="Verbose output.")
+@click.option("--debug", is_flag=True, help="Debug mode")
+@click.option("-v", "--verbose", is_flag=True, help="Verbose output")
 @click.option(
     "--model",
     default=None,
     help="Model to use by default, can be overridden in each request.",
 )
-def main(verbose: bool, model: str | None):  # pragma: no cover
+def main(debug: bool, verbose: bool, model: str | None):  # pragma: no cover
     """
     Starts a server and web UI for gptme.
 
@@ -34,7 +36,5 @@ def main(verbose: bool, model: str | None):  # pragma: no cover
         exit(1)
     click.echo("Initialization complete, starting server")
 
-    # noreorder
-    from gptme.server.api import main as server_main  # fmt: skip
-
-    server_main()
+    app = create_app()
+    app.run(debug=debug)
diff --git a/gptme/tools/chats.py b/gptme/tools/chats.py
@@ -6,10 +6,14 @@
 import logging
 from pathlib import Path
 from textwrap import indent
+from typing import TYPE_CHECKING
 
 from ..message import Message
 from .base import ToolSpec
 
+if TYPE_CHECKING:
+    from ..logmanager import LogManager
+
 logger = logging.getLogger(__name__)
 
 
@@ -33,7 +37,9 @@ def _get_matching_messages(log_manager, query: str, system=False) -> list[Messag
     ]
 
 
-def _summarize_conversation(log_manager, include_summary: bool) -> list[str]:
+def _summarize_conversation(
+    log_manager: "LogManager", include_summary: bool
+) -> list[str]:
     """Summarize a conversation."""
     # noreorder
     from ..llm import summarize as llm_summarize  # fmt: skip
@@ -80,11 +86,10 @@ def list_chats(max_results: int = 5, include_summary: bool = False) -> None:
 
     print(f"Recent conversations (showing up to {max_results}):")
     for i, conv in enumerate(conversations, 1):
-        print(f"\n{i}. {conv['name']}")
-        if "created_at" in conv:
-            print(f"   Created: {conv['created_at']}")
+        print(f"\n{i}. {conv.name}")
+        print(f"   Created: {conv.created}")
 
-        log_path = Path(conv["path"])
+        log_path = Path(conv.path)
         log_manager = LogManager.load(log_path)
 
         summary_lines = _summarize_conversation(log_manager, include_summary)
@@ -101,19 +106,19 @@ def search_chats(query: str, max_results: int = 5, system=False) -> None:
         system (bool): Whether to include system messages in the search.
     """
     # noreorder
-    from ..logmanager import LogManager, get_conversations  # fmt: skip
+    from ..logmanager import LogManager, get_user_conversations  # fmt: skip
 
-    results = []
-    for conv in get_conversations():
-        log_path = Path(conv["path"])
+    results: list[dict] = []
+    for conv in get_user_conversations():
+        log_path = Path(conv.path)
         log_manager = LogManager.load(log_path)
 
         matching_messages = _get_matching_messages(log_manager, query, system)
 
         if matching_messages:
             results.append(
                 {
-                    "conversation": conv["name"],
+                    "conversation": conv.name,
                     "log_manager": log_manager,
                     "matching_messages": matching_messages,
                 }
@@ -165,8 +170,8 @@ def read_chat(conversation: str, max_results: int = 5, incl_system=False) -> Non
     conversations = list(get_conversations())
 
     for conv in conversations:
-        if conv["name"] == conversation:
-            log_path = Path(conv["path"])
+        if conv.name == conversation:
+            log_path = Path(conv.path)
             logmanager = LogManager.load(log_path)
             print(f"Reading conversation: {conversation}")
             i = 0
diff --git a/scripts/list_user_messages.py b/scripts/list_user_messages.py
diff --git a/static/index.html b/static/index.html