Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- Query: Permitted loading context file from local filesystem
per `ABOUT_CONTEXT_URL`
- Query: Introduced caching for context payloads on HTTP remote URLs
- llms-txt: Introduced caching for expanding outline into Markdown file

## v0.0.3 - 2025-05-10
- Outline: Refactored the source of truth for the documentation outline
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ dependencies = [
"colorlog<7",
"hishel<0.2",
"llms-txt==0.0.4",
"platformdirs<5",
"pueblo==0.0.11",
"requests<3",
]
Expand Down
7 changes: 6 additions & 1 deletion src/cratedb_about/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@
type=str,
required=False,
default=None,
help="URL to the outline file. By default, the built-in outline is used.",
metavar="URL|FILE",
help="Outline source. Provide either an HTTP(S) URL or a local file path. "
"If omitted, the built-in outline is used.",
callback=lambda _, __, v: v
if not v or v.startswith(("http://", "https://"))
else Path(v).expanduser().resolve(),
)


Expand Down
22 changes: 15 additions & 7 deletions src/cratedb_about/outline/model.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import typing as t
from io import StringIO
from unittest import mock

from attr import Factory
from attrs import define

from cratedb_about.util import DictTools, Dumpable, Metadata
from cratedb_about.util import DictTools, Dumpable, Metadata, get_cache_client


@define
Expand Down Expand Up @@ -89,12 +90,19 @@ def to_llms_txt(self, optional: bool = False) -> str:
The string representation of the context in llms.txt format.
"""

# Import module lazily to relax dependency surface.
from llms_txt import create_ctx

markdown = self.to_markdown()
ctx = create_ctx(markdown, optional=optional, n_workers=None)
return str(ctx)
# Patch `llms_txt` package to use caching via Hishel.
# https://hishel.com/
http_client = get_cache_client()
with http_client as client:
# Patch the client object.
with mock.patch("llms_txt.core.httpx", client):
# Import module lazily to relax dependency surface.
from llms_txt import create_ctx

# Expand links and output in Markdown format.
markdown = self.to_markdown()
ctx = create_ctx(markdown, optional=optional, n_workers=None)
return str(ctx)

def get_item_titles(self, section_name: t.Optional[str] = None) -> t.List[str]:
"""
Expand Down
10 changes: 4 additions & 6 deletions src/cratedb_about/query/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import typing as t
from pathlib import Path

import hishel
from cratedb_about.settings import settings
from cratedb_about.util import get_cache_client

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -54,13 +55,10 @@ class KnowledgeContextLoader:
instructions = "You are a helpful and concise assistant."

# Configure default cache lifetime to one hour.
default_cache_ttl: int = 3600
default_cache_ttl: int = settings.http_cache_ttl

def __init__(self):
# Configure Hishel, an httpx client with caching.
controller = hishel.Controller(allow_stale=True)
storage = hishel.SQLiteStorage(ttl=self.cache_ttl)
self.http_client = hishel.CacheClient(controller=controller, storage=storage, timeout=10.0)
self.http_client = get_cache_client(ttl=self.cache_ttl)

@property
def url(self) -> str:
Expand Down
21 changes: 21 additions & 0 deletions src/cratedb_about/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pathlib import Path

import platformdirs


class Settings:
"""
Application-wide settings bundle class.
"""

http_timeout: float = 10.0
http_cache_ttl: int = 3600

@property
def http_cache_path(self) -> Path:
path = platformdirs.user_cache_path(appname="cratedb-about")
path.mkdir(parents=True, exist_ok=True)
return path / ".hishel.sqlite"


settings = Settings()
32 changes: 32 additions & 0 deletions src/cratedb_about/util.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
import logging
import sqlite3
import typing as t
from collections import OrderedDict

import attr
import hishel
from attrs import define
from cattrs.preconf.json import make_converter as make_json_converter
from cattrs.preconf.pyyaml import make_converter as make_yaml_converter

from cratedb_about.settings import settings

logger = logging.getLogger()


@define
class Metadata:
Expand Down Expand Up @@ -48,3 +55,28 @@ def from_json(cls, json_str: str):
def from_yaml(cls, yaml_str: str):
converter = make_yaml_converter(dict_factory=OrderedDict)
return converter.loads(yaml_str, cls)


def get_cache_client(ttl: t.Optional[t.Union[int, float]] = settings.http_cache_ttl):
"""
Return the configured cache client.
https://hishel.com/
"""
# Configure Hishel, a httpx client with caching.
logger.info(f"Configuring cache. ttl={ttl}, path={settings.http_cache_path}")
try:
controller = hishel.Controller(allow_stale=True)
storage = hishel.SQLiteStorage(
connection=sqlite3.connect(settings.http_cache_path, check_same_thread=False),
ttl=ttl,
)
return hishel.CacheClient(
controller=controller, storage=storage, timeout=settings.http_timeout
)
except Exception as e:
msg = (
f"Failed to configure Hishel cache with SQLite. "
f"ttl={ttl}, path={settings.http_cache_path}. Reason: {e}"
)
logger.exception(msg)
raise e.__class__(msg) from e
21 changes: 21 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import hishel
import pytest

from cratedb_about.util import get_cache_client


def test_get_cache_client_valid():
client = get_cache_client()
assert isinstance(client, hishel.CacheClient)


def test_get_cache_client_failure(mocker, caplog):
def _raise(*_args, **_kwargs):
raise Exception("Test error")

mocker.patch.object(hishel.CacheClient, "__init__", _raise)
with pytest.raises(Exception) as excinfo:
get_cache_client()
assert excinfo.match("Test error")
assert excinfo.match("Failed to configure Hishel cache with SQLite")
assert "Failed to configure Hishel cache with SQLite" in caplog.text