|
4 | 4 | import textwrap
|
5 | 5 | from collections.abc import Generator
|
6 | 6 | from copy import copy
|
| 7 | +from dataclasses import dataclass |
7 | 8 | from datetime import datetime
|
8 |
| -from itertools import zip_longest |
| 9 | +from itertools import islice, zip_longest |
9 | 10 | from pathlib import Path
|
10 | 11 | from tempfile import TemporaryDirectory
|
11 | 12 | from typing import Any, Literal, TypeAlias
|
@@ -288,40 +289,68 @@ def to_dict(self, branches=False) -> dict:
|
288 | 289 | return d
|
289 | 290 |
|
290 | 291 |
|
291 |
| -def _conversations() -> list[Path]: |
| 292 | +def _conversation_files() -> list[Path]: |
292 | 293 | # NOTE: only returns the main conversation, not branches (to avoid duplicates)
|
293 |
| - # returns the most recent first |
| 294 | + # returns the conversation files sorted by modified time (newest first) |
294 | 295 | logsdir = get_logs_dir()
|
295 | 296 | return list(
|
296 | 297 | sorted(logsdir.glob("*/conversation.jsonl"), key=lambda f: -f.stat().st_mtime)
|
297 | 298 | )
|
298 | 299 |
|
299 | 300 |
|
300 |
| -def get_conversations() -> Generator[dict, None, None]: |
301 |
| - for conv_fn in _conversations(): |
302 |
| - msgs = [] |
303 |
| - msgs = _read_jsonl(conv_fn) |
304 |
| - modified = conv_fn.stat().st_mtime |
305 |
| - first_timestamp = msgs[0].timestamp.timestamp() if msgs else modified |
306 |
| - yield { |
307 |
| - "name": f"{conv_fn.parent.name}", |
308 |
| - "path": str(conv_fn), |
309 |
| - "created": first_timestamp, |
310 |
| - "modified": modified, |
311 |
| - "messages": len(msgs), |
312 |
| - "branches": 1 + len(list(conv_fn.parent.glob("branches/*.jsonl"))), |
313 |
| - } |
| 301 | +@dataclass |
| 302 | +class Conversation: |
| 303 | + name: str |
| 304 | + path: str |
| 305 | + created: float |
| 306 | + modified: float |
| 307 | + messages: int |
| 308 | + branches: int |
314 | 309 |
|
315 | 310 |
|
316 |
| -def _read_jsonl(path: PathLike) -> list[Message]: |
317 |
| - msgs = [] |
| 311 | +def get_conversations() -> Generator[Conversation, None, None]: |
| 312 | + """Returns all conversations, excluding ones used for testing, evals, etc.""" |
| 313 | + for conv_fn in _conversation_files(): |
| 314 | + msgs = _read_jsonl(conv_fn, limit=1) |
| 315 | + # TODO: can we avoid reading the entire file? maybe wont even be used, due to user convo filtering |
| 316 | + len_msgs = conv_fn.read_text().count("}\n{") |
| 317 | + assert len(msgs) <= 1 |
| 318 | + modified = conv_fn.stat().st_mtime |
| 319 | + first_timestamp = msgs[0].timestamp.timestamp() if msgs else modified |
| 320 | + yield Conversation( |
| 321 | + name=f"{conv_fn.parent.name}", |
| 322 | + path=str(conv_fn), |
| 323 | + created=first_timestamp, |
| 324 | + modified=modified, |
| 325 | + messages=len_msgs, |
| 326 | + branches=1 + len(list(conv_fn.parent.glob("branches/*.jsonl"))), |
| 327 | + ) |
| 328 | + |
| 329 | + |
| 330 | +def get_user_conversations() -> Generator[Conversation, None, None]: |
| 331 | + """Returns all user conversations, excluding ones used for testing, evals, etc.""" |
| 332 | + for conv in get_conversations(): |
| 333 | + if any(conv.name.startswith(prefix) for prefix in ["tmp", "test-"]) or any( |
| 334 | + substr in conv.name for substr in ["gptme-evals-"] |
| 335 | + ): |
| 336 | + continue |
| 337 | + yield conv |
| 338 | + |
| 339 | + |
| 340 | +def _gen_read_jsonl(path: PathLike) -> Generator[Message, None, None]: |
318 | 341 | with open(path) as file:
|
319 | 342 | for line in file.readlines():
|
320 | 343 | json_data = json.loads(line)
|
321 | 344 | if "timestamp" in json_data:
|
322 | 345 | json_data["timestamp"] = datetime.fromisoformat(json_data["timestamp"])
|
323 |
| - msgs.append(Message(**json_data)) |
324 |
| - return msgs |
| 346 | + yield Message(**json_data) |
| 347 | + |
| 348 | + |
| 349 | +def _read_jsonl(path: PathLike, limit=None) -> list[Message]: |
| 350 | + gen = _gen_read_jsonl(path) |
| 351 | + if limit: |
| 352 | + gen = islice(gen, limit) # type: ignore |
| 353 | + return list(gen) |
325 | 354 |
|
326 | 355 |
|
327 | 356 | def _write_jsonl(path: PathLike, msgs: list[Message]) -> None:
|
|
0 commit comments