From 4696b770f5826613cdf8164d64fefb70c852bda0 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 16 Apr 2025 21:02:05 +0200
Subject: [PATCH 1/2] Add CLI program `cratedb-about`

... with subcommands `ask` and `list-questions` for ad hoc conversations
about CrateDB.
---
 .github/dependabot.yml      | 16 ++++++++
 .github/workflows/tests.yml | 64 ++++++++++++++++++++++++++++++
 .gitignore                  |  2 +
 CHANGES.md                  |  2 +
 README.md                   | 17 +++++---
 docs/backlog.md             | 10 +++++
 docs/sandbox.md             | 22 +++++++++++
 pyproject.toml              | 10 ++++-
 src/cratedb_about/cli.py    | 42 ++++++++++++++++++++
 src/cratedb_about/core.py   | 79 +++++++++++++++++++++++++++++++++++++
 src/cratedb_about/model.py  | 44 +++++++++++++++++++++
 11 files changed, 302 insertions(+), 6 deletions(-)
 create mode 100644 .github/dependabot.yml
 create mode 100644 .github/workflows/tests.yml
 create mode 100644 docs/backlog.md
 create mode 100644 docs/sandbox.md
 create mode 100644 src/cratedb_about/cli.py
 create mode 100644 src/cratedb_about/core.py
 create mode 100644 src/cratedb_about/model.py

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..ac401ef
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,16 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "daily"
+
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..d729b76
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,64 @@
+name: "Tests"
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+
+  test:
+    name: "
+    Python ${{ matrix.python-version }}
+    "
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: ['ubuntu-latest']
+        python-version: [
+          '3.9',
+          '3.13',
+        ]
+
+    env:
+      OS: ${{ matrix.os }}
+      PYTHON: ${{ matrix.python-version }}
+      UV_SYSTEM_PYTHON: true
+
+    steps:
+
+      - name: Acquire sources
+        uses: actions/checkout@v4
+
+      - name: Install `sponge`
+        run: sudo apt-get --yes install moreutils
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          cache-dependency-glob: |
+            pyproject.toml
+          cache-suffix: ${{ matrix.python-version }}
+          enable-cache: true
+          version: "latest"
+
+      - name: Set up project
+        run: |
+          uv pip install --editable='.[develop,test]'
+
+      - name: Run linter and software tests
+        run: |
+          poe check
+          poe build
+          cratedb-about list-questions
diff --git a/.gitignore b/.gitignore
index 28a49b5..937df2d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 .venv*
 *.egg-info
 *.lock
+bdist.*
+__pycache__
diff --git a/CHANGES.md b/CHANGES.md
index 1f312bc..408d460 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,3 +4,5 @@
 - Established project layout
 - Added source files (`cratedb-overview.md`), generator program wrapper
   (`uv run poe build`), and build artifacts (`llms-ctx.txt` and `llms-ctx-full.txt`)
+- Added CLI program `cratedb-about` with subcommands `ask` and `list-questions`
+  for ad hoc conversations about CrateDB
diff --git a/README.md b/README.md
index 51be41b..766f528 100644
--- a/README.md
+++ b/README.md
@@ -19,13 +19,20 @@ to relevant resources in the spirit of a curated knowledge backbone.
 
 ## Usage
 
-To rebuild the `llms.txt` files, acquire the sources of the repository,
-and invoke the build command.
+Install `cratedb-about` package.
+```shell
+uv tool install --upgrade 'cratedb-about @ git+https://github.com/crate/about'
+```
+
+Ask questions about CrateDB.
+```shell
+export OPENAI_API_KEY=<YOUR_OPENAI_API_KEY>
+cratedb-about ask "CrateDB does not seem to provide an AUTOINCREMENT feature?"
+```
 
+If you are running out of questions, get inspired by the standard library.
 ```shell
-git clone https://github.com/crate/about cratedb-about
-cd cratedb-about
-uv run poe build
+cratedb-about list-questions
 ```
 
 
diff --git a/docs/backlog.md b/docs/backlog.md
new file mode 100644
index 0000000..ab82719
--- /dev/null
+++ b/docs/backlog.md
@@ -0,0 +1,10 @@
+# Backlog
+
+## Iteration +1
+- JSON/YAML/Markdown output
+
+## Iteration +2
+- Unlock Discourse: https://community.cratedb.com/raw/1015
+- Unlock HTML resources: https://www.urltoany.com/url-to-markdown.
+  => Find the best standalone program.
+- Unlock GitHub projects: https://github.com/mattduck/gh2md
diff --git a/docs/sandbox.md b/docs/sandbox.md
new file mode 100644
index 0000000..8b32e98
--- /dev/null
+++ b/docs/sandbox.md
@@ -0,0 +1,22 @@
+# Sandbox
+
+Acquire the source code repository.
+```shell
+git clone https://github.com/crate/about cratedb-about
+cd cratedb-about
+```
+
+Rebuild all the `llms.txt` files.
+```shell
+uv run poe build
+```
+
+Ask questions about CrateDB.
+```shell
+uvx --with-editable=. cratedb-about ask "CrateDB does not seem to provide an AUTOINCREMENT feature?"
+```
+
+If you are running out of questions, get inspired by the standard library.
+```shell
+uvx --with-editable=. cratedb-about list-questions
+```
diff --git a/pyproject.toml b/pyproject.toml
index 4f1569a..5b1b24f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,9 +69,12 @@ dynamic = [
   "version",
 ]
 dependencies = [
+  "claudette",
   "click<9",
   "llms-txt==0.0.4",
+  "openai",
   "poethepoet<1",
+  "requests<3",
 ]
 optional-dependencies.develop = [
   "mypy<1.16",
@@ -83,6 +86,8 @@ urls.Changelog = "https://github.com/crate/about/blob/main/CHANGES.md"
 urls.Issues = "https://github.com/crate/about/issues"
 urls.Repository = "https://github.com/crate/about"
 
+scripts.cratedb-about = "cratedb_about.cli:cli"
+
 [tool.ruff]
 line-length = 100
 
@@ -118,7 +123,10 @@ lint.select = [
 ]
 
 [tool.mypy]
-exclude = [  ]
+mypy_path = "src"
+packages = [
+  "cratedb_about",
+]
 check_untyped_defs = true
 ignore_missing_imports = true
 implicit_optional = true
diff --git a/src/cratedb_about/cli.py b/src/cratedb_about/cli.py
new file mode 100644
index 0000000..b286478
--- /dev/null
+++ b/src/cratedb_about/cli.py
@@ -0,0 +1,42 @@
+import sys
+import typing as t
+
+import click
+
+from cratedb_about.core import CrateDBConversation
+from cratedb_about.model import Example
+
+
+@click.group()
+@click.version_option()
+@click.pass_context
+def cli(ctx: click.Context) -> None:
+    pass
+
+
+@cli.command()
+@click.argument("question", type=str, required=False)
+@click.option("--backend", type=str, default="openai")
+def ask(question: str, backend: t.Literal["claude", "openai"]):
+    """
+    Ask questions about CrateDB.
+    """
+    wizard = CrateDBConversation(
+        backend=backend,
+        use_knowledge=True,
+    )
+    if not question:
+        question = Example.questions[4]
+
+    sys.stdout.write(f"Question: {question}\nAnswer:\n")
+    sys.stdout.write(wizard.ask(question))
+    sys.stdout.write("\n")
+
+
+@cli.command()
+def list_questions():
+    """
+    List a few example questions about CrateDB.
+    """
+    sys.stdout.write("\n".join(Example.questions))
+    sys.stdout.write("\n")
diff --git a/src/cratedb_about/core.py b/src/cratedb_about/core.py
new file mode 100644
index 0000000..a88bdf9
--- /dev/null
+++ b/src/cratedb_about/core.py
@@ -0,0 +1,79 @@
+# Derived from: https://llmstxt.org/domains.html
+import dataclasses
+import os
+import typing as t
+
+from cratedb_about.model import Settings
+
+
+@dataclasses.dataclass
+class CrateDBConversation:
+    """
+    Manage conversations about CrateDB.
+    """
+
+    backend: t.Literal["claude", "openai"] = "openai"
+    use_knowledge: bool = True
+
+    def ask(self, question: str) -> str:
+        if self.backend == "openai":
+            return self.ask_gpt(question)
+        if self.backend == "claude":
+            return self.ask_claude(question)
+        raise NotImplementedError("Please select an available LLM backend")
+
+    def ask_claude(self, question: str) -> str:
+        from claudette import Chat, contents, models
+
+        model = models[1]  # Sonnet 3.5
+        chat = Chat(model, sp=Settings.instructions)
+        chat(Settings.get_prompt())
+        result = chat(question)
+        return contents(result)
+
+    def ask_gpt(self, question: str) -> str:
+        """
+        Ask the machine, enriched with CrateDB context, catalyzed through OpenAI's GPT.
+
+        Models like o3 and o4-mini are reasoning models.
+        https://platform.openai.com/docs/guides/reasoning
+
+        The OpenAI API provides different kinds of roles for messages. Let's use the
+        `developer` role to relay information on top of the user's question.
+
+        - https://community.openai.com/t/the-system-role-how-it-influences-the-chat-behavior/87353
+        - https://community.openai.com/t/understanding-role-management-in-openais-api-two-methods-compared/253289
+        - https://community.openai.com/t/how-is-developer-message-better-than-system-prompt/1062784
+        """
+        from openai import OpenAI
+        from openai.types.responses import ResponseInputTextParam
+        from openai.types.shared_params import Reasoning
+
+        client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+        from openai.types.responses.response_input_param import Message
+
+        response = client.responses.create(
+            # model="gpt-4o",  # noqa: ERA001
+            model="o4-mini",
+            reasoning=Reasoning(
+                effort="medium",
+                # Your organization must be verified to generate reasoning summaries
+                # summary="detailed",  # noqa: ERA001
+            ),
+            instructions=Settings.instructions,
+            input=[
+                Message(
+                    content=[ResponseInputTextParam(text=Settings.get_prompt(), type="input_text")],
+                    role="developer",
+                    status="completed",
+                    type="message",
+                ),
+                Message(
+                    content=[ResponseInputTextParam(text=question, type="input_text")],
+                    role="user",
+                    status="completed",
+                    type="message",
+                ),
+            ],
+        )
+        return response.output_text
diff --git a/src/cratedb_about/model.py b/src/cratedb_about/model.py
new file mode 100644
index 0000000..b889c41
--- /dev/null
+++ b/src/cratedb_about/model.py
@@ -0,0 +1,44 @@
+import sys
+
+import requests
+
+
+class Example:
+    """
+    A few example questions to ask about CrateDB.
+    """
+
+    questions = [
+        "What are the benefits of CrateDB?",
+        "Tell me about why CrateDB is different.",
+        "Tell me about CrateDB Cloud.",
+        "How to use sequences with CrateDB?",
+        "CrateDB does not seem to provide an AUTOINCREMENT feature?",
+        "How do I apply sharding properly?",
+        "How much data can CrateDB store?",
+        "Please tell me how CrateDB stores data.",
+        "Does CrateDB support SQLAlchemy and pandas?",
+    ]
+
+
+class Settings:
+    """
+    Configure the language model to support conversations about CrateDB.
+    """
+
+    llms_txt_url = "https://raw.githubusercontent.com/crate/about/6876fedee57f59b693f37996e04f53c6446f2ad6/build/llm/llms-ctx.txt"
+    instructions = "You are a helpful and concise assistant."
+    llms_txt = None
+    prompt = None
+
+    @classmethod
+    def get_prompt(cls):
+        if cls.llms_txt is None:
+            try:
+                cls.llms_txt = requests.get(cls.llms_txt_url, timeout=10).text
+                cls.prompt = (
+                    cls.llms_txt + "\n\nThe above is necessary context for the conversation."
+                )
+            except requests.RequestException as e:
+                print(f"Error fetching context: {e}", file=sys.stderr)  # noqa: T201
+        return cls.prompt

From f9794d8c206626cf037fb3a1ca3d955c9395ad3d Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Thu, 17 Apr 2025 00:59:50 +0200
Subject: [PATCH 2/2] cratedb-about: Improve input parameter and error handling
 per CodeRabbit

---
 .github/workflows/tests.yml |   1 +
 src/cratedb_about/cli.py    |  23 +++++---
 src/cratedb_about/core.py   | 115 ++++++++++++++++++++++++++++--------
 src/cratedb_about/model.py  |  15 ++++-
 4 files changed, 121 insertions(+), 33 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index d729b76..29edb96 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -61,4 +61,5 @@ jobs:
         run: |
           poe check
           poe build
+          cratedb-about --version
           cratedb-about list-questions
diff --git a/src/cratedb_about/cli.py b/src/cratedb_about/cli.py
index b286478..8c87e34 100644
--- a/src/cratedb_about/cli.py
+++ b/src/cratedb_about/cli.py
@@ -1,4 +1,3 @@
-import sys
 import typing as t
 
 import click
@@ -16,21 +15,28 @@ def cli(ctx: click.Context) -> None:
 
 @cli.command()
 @click.argument("question", type=str, required=False)
-@click.option("--backend", type=str, default="openai")
+@click.option("--backend", type=click.Choice(["openai", "claude"]), default="openai")
 def ask(question: str, backend: t.Literal["claude", "openai"]):
     """
     Ask questions about CrateDB.
+
+    Requires:
+      - OpenAI backend: Set OPENAI_API_KEY environment variable
+      - Claude backend: Set ANTHROPIC_API_KEY environment variable
     """
     wizard = CrateDBConversation(
         backend=backend,
         use_knowledge=True,
     )
     if not question:
-        question = Example.questions[4]
-
-    sys.stdout.write(f"Question: {question}\nAnswer:\n")
-    sys.stdout.write(wizard.ask(question))
-    sys.stdout.write("\n")
+        # Use the AUTOINCREMENT question or fall back to the first question if not found
+        default_question = next(
+            (q for q in Example.questions if "AUTOINCREMENT" in q),
+            Example.questions[0] if Example.questions else "What is CrateDB?",
+        )
+        question = default_question
+    click.echo(f"Question: {question}\nAnswer:\n")
+    click.echo(wizard.ask(question))
 
 
 @cli.command()
@@ -38,5 +44,4 @@ def list_questions():
     """
     List a few example questions about CrateDB.
     """
-    sys.stdout.write("\n".join(Example.questions))
-    sys.stdout.write("\n")
+    click.echo("\n".join(Example.questions))
diff --git a/src/cratedb_about/core.py b/src/cratedb_about/core.py
index a88bdf9..6f82984 100644
--- a/src/cratedb_about/core.py
+++ b/src/cratedb_about/core.py
@@ -1,21 +1,76 @@
 # Derived from: https://llmstxt.org/domains.html
 import dataclasses
 import os
+import sys
 import typing as t
 
 from cratedb_about.model import Settings
 
+# Import backends conditionally to avoid errors if dependencies are missing
+CLAUDE_AVAILABLE = False
+OPENAI_AVAILABLE = False
+
+try:
+    from claudette import Chat, contents, models
+
+    CLAUDE_AVAILABLE = True
+except ImportError:
+    pass
+
+try:
+    from openai import OpenAI
+    from openai.types.responses import ResponseInputTextParam
+    from openai.types.responses.response_input_param import Message
+    from openai.types.shared_params import Reasoning
+
+    OPENAI_AVAILABLE = True
+except ImportError:
+    pass
+
 
 @dataclasses.dataclass
 class CrateDBConversation:
     """
     Manage conversations about CrateDB.
+
+    Requires:
+    - OPENAI_API_KEY environment variable when using "openai" backend
+    - ANTHROPIC_API_KEY environment variable when using "claude" backend
     """
 
     backend: t.Literal["claude", "openai"] = "openai"
     use_knowledge: bool = True
 
+    def __post_init__(self):
+        """Validate configuration."""
+        if self.backend == "openai" and not OPENAI_AVAILABLE:
+            raise ImportError("The 'openai' package is required when using the OpenAI backend")
+        if self.backend == "claude" and not CLAUDE_AVAILABLE:
+            raise ImportError("The 'claudette' package is required when using the Claude backend")
+        if self.backend == "openai" and not os.environ.get("OPENAI_API_KEY"):
+            raise ValueError(
+                "OPENAI_API_KEY environment variable is required when using 'openai' backend"
+            )
+        if self.backend == "claude" and not os.environ.get("ANTHROPIC_API_KEY"):
+            raise ValueError(
+                "ANTHROPIC_API_KEY environment variable is required when using 'claude' backend"
+            )
+
     def ask(self, question: str) -> str:
+        """
+        Ask a question about CrateDB using the configured LLM backend.
+
+        Args:
+            question: The question to ask about CrateDB
+
+        Returns:
+            str: The response from the LLM
+
+        Raises:
+            NotImplementedError: If an unsupported backend is specified
+            ValueError: If required environment variables are missing
+            RuntimeError: If there's an error communicating with the LLM API
+        """
         if self.backend == "openai":
             return self.ask_gpt(question)
         if self.backend == "claude":
@@ -23,13 +78,19 @@ def ask(self, question: str) -> str:
         raise NotImplementedError("Please select an available LLM backend")
 
     def ask_claude(self, question: str) -> str:
-        from claudette import Chat, contents, models
-
+        # FIXME: API does not provide lookup by name.
         model = models[1]  # Sonnet 3.5
         chat = Chat(model, sp=Settings.instructions)
-        chat(Settings.get_prompt())
-        result = chat(question)
-        return contents(result)
+        if self.use_knowledge:
+            try:
+                chat(Settings.get_prompt())
+            except Exception as e:
+                print(f"Warning: Failed to load knowledge context: {e}", file=sys.stderr)  # noqa: T201
+        try:
+            result = chat(question)
+            return contents(result)
+        except Exception as e:
+            raise RuntimeError(f"Claude API error: {e}") from e
 
     def ask_gpt(self, question: str) -> str:
         """
@@ -45,12 +106,33 @@ def ask_gpt(self, question: str) -> str:
         - https://community.openai.com/t/understanding-role-management-in-openais-api-two-methods-compared/253289
         - https://community.openai.com/t/how-is-developer-message-better-than-system-prompt/1062784
         """
-        from openai import OpenAI
-        from openai.types.responses import ResponseInputTextParam
-        from openai.types.shared_params import Reasoning
 
         client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
-        from openai.types.responses.response_input_param import Message
+
+        input_messages: t.List[Message] = []
+        if self.use_knowledge:
+            try:
+                prompt = Settings.get_prompt()
+                if prompt:
+                    input_messages.append(
+                        Message(
+                            content=[ResponseInputTextParam(text=prompt, type="input_text")],
+                            role="developer",
+                            status="completed",
+                            type="message",
+                        )
+                    )
+            except Exception as e:
+                print(f"Warning: Failed to load knowledge context: {e}", file=sys.stderr)  # noqa: T201
+        # Always add the user question
+        input_messages.append(
+            Message(
+                content=[ResponseInputTextParam(text=question, type="input_text")],
+                role="user",
+                status="completed",
+                type="message",
+            )
+        )
 
         response = client.responses.create(
             # model="gpt-4o",  # noqa: ERA001
@@ -61,19 +143,6 @@ def ask_gpt(self, question: str) -> str:
                 # summary="detailed",  # noqa: ERA001
             ),
             instructions=Settings.instructions,
-            input=[
-                Message(
-                    content=[ResponseInputTextParam(text=Settings.get_prompt(), type="input_text")],
-                    role="developer",
-                    status="completed",
-                    type="message",
-                ),
-                Message(
-                    content=[ResponseInputTextParam(text=question, type="input_text")],
-                    role="user",
-                    status="completed",
-                    type="message",
-                ),
-            ],
+            input=input_messages,  # type: ignore[arg-type]
         )
         return response.output_text
diff --git a/src/cratedb_about/model.py b/src/cratedb_about/model.py
index b889c41..22153b4 100644
--- a/src/cratedb_about/model.py
+++ b/src/cratedb_about/model.py
@@ -1,3 +1,4 @@
+import os
 import sys
 
 import requests
@@ -26,7 +27,15 @@ class Settings:
     Configure the language model to support conversations about CrateDB.
     """
 
-    llms_txt_url = "https://raw.githubusercontent.com/crate/about/6876fedee57f59b693f37996e04f53c6446f2ad6/build/llm/llms-ctx.txt"
+    default_context = (
+        "CrateDB is a distributed SQL database that makes it simple to"
+        "store and analyze massive amounts of data in real-time."
+    )
+
+    llms_txt_url = os.getenv(
+        "CRATEDB_CONTEXT_URL",
+        "https://raw.githubusercontent.com/crate/about/6876fedee57f59b693f37996e04f53c6446f2ad6/build/llm/llms-ctx.txt",
+    )
     instructions = "You are a helpful and concise assistant."
     llms_txt = None
     prompt = None
@@ -41,4 +50,8 @@ def get_prompt(cls):
                 )
             except requests.RequestException as e:
                 print(f"Error fetching context: {e}", file=sys.stderr)  # noqa: T201
+                # Provide minimal fallback context.
+                cls.llms_txt = cls.default_context
+                cls.prompt = cls.llms_txt + "\n\nThe above is minimal context for the conversation."
+
         return cls.prompt