crate · amotl · Apr 16, 2025 · Apr 16, 2025 · Apr 16, 2025
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,16 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "daily"
+
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,65 @@
+name: "Tests"
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+
+  test:
+    name: "
+    Python ${{ matrix.python-version }}
+    "
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: ['ubuntu-latest']
+        python-version: [
+          '3.9',
+          '3.13',
+        ]
+
+    env:
+      OS: ${{ matrix.os }}
+      PYTHON: ${{ matrix.python-version }}
+      UV_SYSTEM_PYTHON: true
+
+    steps:
+
+      - name: Acquire sources
+        uses: actions/checkout@v4
+
+      - name: Install `sponge`
+        run: sudo apt-get --yes install moreutils
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          cache-dependency-glob: |
+            pyproject.toml
+          cache-suffix: ${{ matrix.python-version }}
+          enable-cache: true
+          version: "latest"
+
+      - name: Set up project
+        run: |
+          uv pip install --editable='.[develop,test]'
+
+      - name: Run linter and software tests
+        run: |
+          poe check
+          poe build
+          cratedb-about --version
+          cratedb-about list-questions
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@
 .venv*
 *.egg-info
 *.lock
+bdist.*
+__pycache__
diff --git a/CHANGES.md b/CHANGES.md
@@ -4,3 +4,5 @@
 - Established project layout
 - Added source files (`cratedb-overview.md`), generator program wrapper
   (`uv run poe build`), and build artifacts (`llms-ctx.txt` and `llms-ctx-full.txt`)
+- Added CLI program `cratedb-about` with subcommands `ask` and `list-questions`
+  for ad hoc conversations about CrateDB
diff --git a/README.md b/README.md
@@ -19,13 +19,20 @@ to relevant resources in the spirit of a curated knowledge backbone.
 
 ## Usage
 
-To rebuild the `llms.txt` files, acquire the sources of the repository,
-and invoke the build command.
+Install `cratedb-about` package.
+```shell
+uv tool install --upgrade 'cratedb-about @ git+https://github.com/crate/about'
+```
+
+Ask questions about CrateDB.
+```shell
+export OPENAI_API_KEY=<YOUR_OPENAI_API_KEY>
+cratedb-about ask "CrateDB does not seem to provide an AUTOINCREMENT feature?"
+```
 
+If you are running out of questions, get inspired by the standard library.
 ```shell
-git clone https://github.com/crate/about cratedb-about
-cd cratedb-about
-uv run poe build
+cratedb-about list-questions
 ```
 
 

diff --git a/docs/backlog.md b/docs/backlog.md
@@ -0,0 +1,10 @@
+# Backlog
+
+## Iteration +1
+- JSON/YAML/Markdown output
+
+## Iteration +2
+- Unlock Discourse: https://community.cratedb.com/raw/1015
+- Unlock HTML resources: https://www.urltoany.com/url-to-markdown.
+  => Find the best standalone program.
+- Unlock GitHub projects: https://github.com/mattduck/gh2md
diff --git a/docs/sandbox.md b/docs/sandbox.md
@@ -0,0 +1,22 @@
+# Sandbox
+
+Acquire the source code repository.
+```shell
+git clone https://github.com/crate/about cratedb-about
+cd cratedb-about
+```
+
+Rebuild all the `llms.txt` files.
+```shell
+uv run poe build
+```
+
+Ask questions about CrateDB.
+```shell
+uvx --with-editable=. cratedb-about ask "CrateDB does not seem to provide an AUTOINCREMENT feature?"
+```
+
+If you are running out of questions, get inspired by the standard library.
+```shell
+uvx --with-editable=. cratedb-about list-questions
+```
diff --git a/pyproject.toml b/pyproject.toml
@@ -69,9 +69,12 @@ dynamic = [
   "version",
 ]
 dependencies = [
+  "claudette",
   "click<9",
   "llms-txt==0.0.4",
+  "openai",
   "poethepoet<1",
+  "requests<3",
 ]
 optional-dependencies.develop = [
   "mypy<1.16",
@@ -83,6 +86,8 @@ urls.Changelog = "https://github.com/crate/about/blob/main/CHANGES.md"
 urls.Issues = "https://github.com/crate/about/issues"
 urls.Repository = "https://github.com/crate/about"
 
+scripts.cratedb-about = "cratedb_about.cli:cli"
+
 [tool.ruff]
 line-length = 100
 
@@ -118,7 +123,10 @@ lint.select = [
 ]
 
 [tool.mypy]
-exclude = [  ]
+mypy_path = "src"
+packages = [
+  "cratedb_about",
+]
 check_untyped_defs = true
 ignore_missing_imports = true
 implicit_optional = true

diff --git a/src/cratedb_about/cli.py b/src/cratedb_about/cli.py
@@ -0,0 +1,47 @@
+import typing as t
+
+import click
+
+from cratedb_about.core import CrateDBConversation
+from cratedb_about.model import Example
+
+
+@click.group()
+@click.version_option()
+@click.pass_context
+def cli(ctx: click.Context) -> None:
+    pass
+
+
+@cli.command()
+@click.argument("question", type=str, required=False)
+@click.option("--backend", type=click.Choice(["openai", "claude"]), default="openai")
+def ask(question: str, backend: t.Literal["claude", "openai"]):
+    """
+    Ask questions about CrateDB.
+
+    Requires:
+      - OpenAI backend: Set OPENAI_API_KEY environment variable
+      - Claude backend: Set ANTHROPIC_API_KEY environment variable
+    """
+    wizard = CrateDBConversation(
+        backend=backend,
+        use_knowledge=True,
+    )
+    if not question:
+        # Use the AUTOINCREMENT question or fall back to the first question if not found
+        default_question = next(
+            (q for q in Example.questions if "AUTOINCREMENT" in q),
+            Example.questions[0] if Example.questions else "What is CrateDB?",
+        )
+        question = default_question
+    click.echo(f"Question: {question}\nAnswer:\n")
+    click.echo(wizard.ask(question))
+
+
+@cli.command()
+def list_questions():
+    """
+    List a few example questions about CrateDB.
+    """
+    click.echo("\n".join(Example.questions))
diff --git a/src/cratedb_about/core.py b/src/cratedb_about/core.py
@@ -0,0 +1,148 @@
+# Derived from: https://llmstxt.org/domains.html
+import dataclasses
+import os
+import sys
+import typing as t
+
+from cratedb_about.model import Settings
+
+# Import backends conditionally to avoid errors if dependencies are missing
+CLAUDE_AVAILABLE = False
+OPENAI_AVAILABLE = False
+
+try:
+    from claudette import Chat, contents, models
+
+    CLAUDE_AVAILABLE = True
+except ImportError:
+    pass
+
+try:
+    from openai import OpenAI
+    from openai.types.responses import ResponseInputTextParam
+    from openai.types.responses.response_input_param import Message
+    from openai.types.shared_params import Reasoning
+
+    OPENAI_AVAILABLE = True
+except ImportError:
+    pass
+
+
+@dataclasses.dataclass
+class CrateDBConversation:
+    """
+    Manage conversations about CrateDB.
+
+    Requires:
+    - OPENAI_API_KEY environment variable when using "openai" backend
+    - ANTHROPIC_API_KEY environment variable when using "claude" backend
+    """
+
+    backend: t.Literal["claude", "openai"] = "openai"
+    use_knowledge: bool = True
+
+    def __post_init__(self):
+        """Validate configuration."""
+        if self.backend == "openai" and not OPENAI_AVAILABLE:
+            raise ImportError("The 'openai' package is required when using the OpenAI backend")
+        if self.backend == "claude" and not CLAUDE_AVAILABLE:
+            raise ImportError("The 'claudette' package is required when using the Claude backend")
+        if self.backend == "openai" and not os.environ.get("OPENAI_API_KEY"):
+            raise ValueError(
+                "OPENAI_API_KEY environment variable is required when using 'openai' backend"
+            )
+        if self.backend == "claude" and not os.environ.get("ANTHROPIC_API_KEY"):
+            raise ValueError(
+                "ANTHROPIC_API_KEY environment variable is required when using 'claude' backend"
+            )
+
+    def ask(self, question: str) -> str:
+        """
+        Ask a question about CrateDB using the configured LLM backend.
+
+        Args:
+            question: The question to ask about CrateDB
+
+        Returns:
+            str: The response from the LLM
+
+        Raises:
+            NotImplementedError: If an unsupported backend is specified
+            ValueError: If required environment variables are missing
+            RuntimeError: If there's an error communicating with the LLM API
+        """
+        if self.backend == "openai":
+            return self.ask_gpt(question)
+        if self.backend == "claude":
+            return self.ask_claude(question)
+        raise NotImplementedError("Please select an available LLM backend")
+
+    def ask_claude(self, question: str) -> str:
+        # FIXME: API does not provide lookup by name.
+        model = models[1]  # Sonnet 3.5
+        chat = Chat(model, sp=Settings.instructions)
+        if self.use_knowledge:
+            try:
+                chat(Settings.get_prompt())
+            except Exception as e:
+                print(f"Warning: Failed to load knowledge context: {e}", file=sys.stderr)  # noqa: T201
+        try:
+            result = chat(question)
+            return contents(result)
+        except Exception as e:
+            raise RuntimeError(f"Claude API error: {e}") from e
+
+    def ask_gpt(self, question: str) -> str:
+        """
+        Ask the machine, enriched with CrateDB context, catalyzed through OpenAI's GPT.
+
+        Models like o3 and o4-mini are reasoning models.
+        https://platform.openai.com/docs/guides/reasoning
+
+        The OpenAI API provides different kinds of roles for messages. Let's use the
+        `developer` role to relay information on top of the user's question.
+
+        - https://community.openai.com/t/the-system-role-how-it-influences-the-chat-behavior/87353
+        - https://community.openai.com/t/understanding-role-management-in-openais-api-two-methods-compared/253289
+        - https://community.openai.com/t/how-is-developer-message-better-than-system-prompt/1062784
+        """
+
+        client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+
+        input_messages: t.List[Message] = []
+        if self.use_knowledge:
+            try:
+                prompt = Settings.get_prompt()
+                if prompt:
+                    input_messages.append(
+                        Message(
+                            content=[ResponseInputTextParam(text=prompt, type="input_text")],
+                            role="developer",
+                            status="completed",
+                            type="message",
+                        )
+                    )
+            except Exception as e:
+                print(f"Warning: Failed to load knowledge context: {e}", file=sys.stderr)  # noqa: T201
+        # Always add the user question
+        input_messages.append(
+            Message(
+                content=[ResponseInputTextParam(text=question, type="input_text")],
+                role="user",
+                status="completed",
+                type="message",
+            )
+        )
+
+        response = client.responses.create(
+            # model="gpt-4o",  # noqa: ERA001
+            model="o4-mini",
+            reasoning=Reasoning(
+                effort="medium",
+                # Your organization must be verified to generate reasoning summaries
+                # summary="detailed",  # noqa: ERA001
+            ),
+            instructions=Settings.instructions,
+            input=input_messages,  # type: ignore[arg-type]
+        )
+        return response.output_text
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,3 +2,5 @@ @@
     .venv*
     *.egg-info
     *.lock
+    bdist.*
+    __pycache__