From 4696b770f5826613cdf8164d64fefb70c852bda0 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 16 Apr 2025 21:02:05 +0200 Subject: [PATCH 1/2] Add CLI program `cratedb-about` ... with subcommands `ask` and `list-questions` for ad hoc conversations about CrateDB. --- .github/dependabot.yml | 16 ++++++++ .github/workflows/tests.yml | 64 ++++++++++++++++++++++++++++++ .gitignore | 2 + CHANGES.md | 2 + README.md | 17 +++++--- docs/backlog.md | 10 +++++ docs/sandbox.md | 22 +++++++++++ pyproject.toml | 10 ++++- src/cratedb_about/cli.py | 42 ++++++++++++++++++++ src/cratedb_about/core.py | 79 +++++++++++++++++++++++++++++++++++++ src/cratedb_about/model.py | 44 +++++++++++++++++++++ 11 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/tests.yml create mode 100644 docs/backlog.md create mode 100644 docs/sandbox.md create mode 100644 src/cratedb_about/cli.py create mode 100644 src/cratedb_about/core.py create mode 100644 src/cratedb_about/model.py diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..ac401ef --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "daily" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..d729b76 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,64 @@ +name: "Tests" + +on: + push: + branches: [ main ] + pull_request: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + test: + name: " + Python ${{ matrix.python-version }} + " + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ['ubuntu-latest'] + python-version: [ + '3.9', + '3.13', + ] + + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python-version }} + UV_SYSTEM_PYTHON: true + + steps: + + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Install `sponge` + run: sudo apt-get --yes install moreutils + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Set up uv + uses: astral-sh/setup-uv@v5 + with: + cache-dependency-glob: | + pyproject.toml + cache-suffix: ${{ matrix.python-version }} + enable-cache: true + version: "latest" + + - name: Set up project + run: | + uv pip install --editable='.[develop,test]' + + - name: Run linter and software tests + run: | + poe check + poe build + cratedb-about list-questions diff --git a/.gitignore b/.gitignore index 28a49b5..937df2d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ .venv* *.egg-info *.lock +bdist.* +__pycache__ diff --git a/CHANGES.md b/CHANGES.md index 1f312bc..408d460 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,3 +4,5 @@ - Established project layout - Added source files (`cratedb-overview.md`), generator program wrapper (`uv run poe build`), and build artifacts (`llms-ctx.txt` and `llms-ctx-full.txt`) +- Added CLI program `cratedb-about` with subcommands `ask` and `list-questions` + for ad hoc conversations about CrateDB diff --git a/README.md b/README.md index 51be41b..766f528 100644 --- a/README.md +++ b/README.md @@ -19,13 +19,20 @@ to relevant resources in the spirit of a curated knowledge backbone. ## Usage -To rebuild the `llms.txt` files, acquire the sources of the repository, -and invoke the build command. +Install `cratedb-about` package. +```shell +uv tool install --upgrade 'cratedb-about @ git+https://github.com/crate/about' +``` + +Ask questions about CrateDB. +```shell +export OPENAI_API_KEY= +cratedb-about ask "CrateDB does not seem to provide an AUTOINCREMENT feature?" +``` +If you are running out of questions, get inspired by the standard library. ```shell -git clone https://github.com/crate/about cratedb-about -cd cratedb-about -uv run poe build +cratedb-about list-questions ``` diff --git a/docs/backlog.md b/docs/backlog.md new file mode 100644 index 0000000..ab82719 --- /dev/null +++ b/docs/backlog.md @@ -0,0 +1,10 @@ +# Backlog + +## Iteration +1 +- JSON/YAML/Markdown output + +## Iteration +2 +- Unlock Discourse: https://community.cratedb.com/raw/1015 +- Unlock HTML resources: https://www.urltoany.com/url-to-markdown. + => Find the best standalone program. +- Unlock GitHub projects: https://github.com/mattduck/gh2md diff --git a/docs/sandbox.md b/docs/sandbox.md new file mode 100644 index 0000000..8b32e98 --- /dev/null +++ b/docs/sandbox.md @@ -0,0 +1,22 @@ +# Sandbox + +Acquire the source code repository. +```shell +git clone https://github.com/crate/about cratedb-about +cd cratedb-about +``` + +Rebuild all the `llms.txt` files. +```shell +uv run poe build +``` + +Ask questions about CrateDB. +```shell +uvx --with-editable=. cratedb-about ask "CrateDB does not seem to provide an AUTOINCREMENT feature?" +``` + +If you are running out of questions, get inspired by the standard library. +```shell +uvx --with-editable=. cratedb-about list-questions +``` diff --git a/pyproject.toml b/pyproject.toml index 4f1569a..5b1b24f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,9 +69,12 @@ dynamic = [ "version", ] dependencies = [ + "claudette", "click<9", "llms-txt==0.0.4", + "openai", "poethepoet<1", + "requests<3", ] optional-dependencies.develop = [ "mypy<1.16", @@ -83,6 +86,8 @@ urls.Changelog = "https://github.com/crate/about/blob/main/CHANGES.md" urls.Issues = "https://github.com/crate/about/issues" urls.Repository = "https://github.com/crate/about" +scripts.cratedb-about = "cratedb_about.cli:cli" + [tool.ruff] line-length = 100 @@ -118,7 +123,10 @@ lint.select = [ ] [tool.mypy] -exclude = [ ] +mypy_path = "src" +packages = [ + "cratedb_about", +] check_untyped_defs = true ignore_missing_imports = true implicit_optional = true diff --git a/src/cratedb_about/cli.py b/src/cratedb_about/cli.py new file mode 100644 index 0000000..b286478 --- /dev/null +++ b/src/cratedb_about/cli.py @@ -0,0 +1,42 @@ +import sys +import typing as t + +import click + +from cratedb_about.core import CrateDBConversation +from cratedb_about.model import Example + + +@click.group() +@click.version_option() +@click.pass_context +def cli(ctx: click.Context) -> None: + pass + + +@cli.command() +@click.argument("question", type=str, required=False) +@click.option("--backend", type=str, default="openai") +def ask(question: str, backend: t.Literal["claude", "openai"]): + """ + Ask questions about CrateDB. + """ + wizard = CrateDBConversation( + backend=backend, + use_knowledge=True, + ) + if not question: + question = Example.questions[4] + + sys.stdout.write(f"Question: {question}\nAnswer:\n") + sys.stdout.write(wizard.ask(question)) + sys.stdout.write("\n") + + +@cli.command() +def list_questions(): + """ + List a few example questions about CrateDB. + """ + sys.stdout.write("\n".join(Example.questions)) + sys.stdout.write("\n") diff --git a/src/cratedb_about/core.py b/src/cratedb_about/core.py new file mode 100644 index 0000000..a88bdf9 --- /dev/null +++ b/src/cratedb_about/core.py @@ -0,0 +1,79 @@ +# Derived from: https://llmstxt.org/domains.html +import dataclasses +import os +import typing as t + +from cratedb_about.model import Settings + + +@dataclasses.dataclass +class CrateDBConversation: + """ + Manage conversations about CrateDB. + """ + + backend: t.Literal["claude", "openai"] = "openai" + use_knowledge: bool = True + + def ask(self, question: str) -> str: + if self.backend == "openai": + return self.ask_gpt(question) + if self.backend == "claude": + return self.ask_claude(question) + raise NotImplementedError("Please select an available LLM backend") + + def ask_claude(self, question: str) -> str: + from claudette import Chat, contents, models + + model = models[1] # Sonnet 3.5 + chat = Chat(model, sp=Settings.instructions) + chat(Settings.get_prompt()) + result = chat(question) + return contents(result) + + def ask_gpt(self, question: str) -> str: + """ + Ask the machine, enriched with CrateDB context, catalyzed through OpenAI's GPT. + + Models like o3 and o4-mini are reasoning models. + https://platform.openai.com/docs/guides/reasoning + + The OpenAI API provides different kinds of roles for messages. Let's use the + `developer` role to relay information on top of the user's question. + + - https://community.openai.com/t/the-system-role-how-it-influences-the-chat-behavior/87353 + - https://community.openai.com/t/understanding-role-management-in-openais-api-two-methods-compared/253289 + - https://community.openai.com/t/how-is-developer-message-better-than-system-prompt/1062784 + """ + from openai import OpenAI + from openai.types.responses import ResponseInputTextParam + from openai.types.shared_params import Reasoning + + client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) + from openai.types.responses.response_input_param import Message + + response = client.responses.create( + # model="gpt-4o", # noqa: ERA001 + model="o4-mini", + reasoning=Reasoning( + effort="medium", + # Your organization must be verified to generate reasoning summaries + # summary="detailed", # noqa: ERA001 + ), + instructions=Settings.instructions, + input=[ + Message( + content=[ResponseInputTextParam(text=Settings.get_prompt(), type="input_text")], + role="developer", + status="completed", + type="message", + ), + Message( + content=[ResponseInputTextParam(text=question, type="input_text")], + role="user", + status="completed", + type="message", + ), + ], + ) + return response.output_text diff --git a/src/cratedb_about/model.py b/src/cratedb_about/model.py new file mode 100644 index 0000000..b889c41 --- /dev/null +++ b/src/cratedb_about/model.py @@ -0,0 +1,44 @@ +import sys + +import requests + + +class Example: + """ + A few example questions to ask about CrateDB. + """ + + questions = [ + "What are the benefits of CrateDB?", + "Tell me about why CrateDB is different.", + "Tell me about CrateDB Cloud.", + "How to use sequences with CrateDB?", + "CrateDB does not seem to provide an AUTOINCREMENT feature?", + "How do I apply sharding properly?", + "How much data can CrateDB store?", + "Please tell me how CrateDB stores data.", + "Does CrateDB support SQLAlchemy and pandas?", + ] + + +class Settings: + """ + Configure the language model to support conversations about CrateDB. + """ + + llms_txt_url = "https://raw.githubusercontent.com/crate/about/6876fedee57f59b693f37996e04f53c6446f2ad6/build/llm/llms-ctx.txt" + instructions = "You are a helpful and concise assistant." + llms_txt = None + prompt = None + + @classmethod + def get_prompt(cls): + if cls.llms_txt is None: + try: + cls.llms_txt = requests.get(cls.llms_txt_url, timeout=10).text + cls.prompt = ( + cls.llms_txt + "\n\nThe above is necessary context for the conversation." + ) + except requests.RequestException as e: + print(f"Error fetching context: {e}", file=sys.stderr) # noqa: T201 + return cls.prompt From f9794d8c206626cf037fb3a1ca3d955c9395ad3d Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 17 Apr 2025 00:59:50 +0200 Subject: [PATCH 2/2] cratedb-about: Improve input parameter and error handling per CodeRabbit --- .github/workflows/tests.yml | 1 + src/cratedb_about/cli.py | 23 +++++--- src/cratedb_about/core.py | 115 ++++++++++++++++++++++++++++-------- src/cratedb_about/model.py | 15 ++++- 4 files changed, 121 insertions(+), 33 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d729b76..29edb96 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -61,4 +61,5 @@ jobs: run: | poe check poe build + cratedb-about --version cratedb-about list-questions diff --git a/src/cratedb_about/cli.py b/src/cratedb_about/cli.py index b286478..8c87e34 100644 --- a/src/cratedb_about/cli.py +++ b/src/cratedb_about/cli.py @@ -1,4 +1,3 @@ -import sys import typing as t import click @@ -16,21 +15,28 @@ def cli(ctx: click.Context) -> None: @cli.command() @click.argument("question", type=str, required=False) -@click.option("--backend", type=str, default="openai") +@click.option("--backend", type=click.Choice(["openai", "claude"]), default="openai") def ask(question: str, backend: t.Literal["claude", "openai"]): """ Ask questions about CrateDB. + + Requires: + - OpenAI backend: Set OPENAI_API_KEY environment variable + - Claude backend: Set ANTHROPIC_API_KEY environment variable """ wizard = CrateDBConversation( backend=backend, use_knowledge=True, ) if not question: - question = Example.questions[4] - - sys.stdout.write(f"Question: {question}\nAnswer:\n") - sys.stdout.write(wizard.ask(question)) - sys.stdout.write("\n") + # Use the AUTOINCREMENT question or fall back to the first question if not found + default_question = next( + (q for q in Example.questions if "AUTOINCREMENT" in q), + Example.questions[0] if Example.questions else "What is CrateDB?", + ) + question = default_question + click.echo(f"Question: {question}\nAnswer:\n") + click.echo(wizard.ask(question)) @cli.command() @@ -38,5 +44,4 @@ def list_questions(): """ List a few example questions about CrateDB. """ - sys.stdout.write("\n".join(Example.questions)) - sys.stdout.write("\n") + click.echo("\n".join(Example.questions)) diff --git a/src/cratedb_about/core.py b/src/cratedb_about/core.py index a88bdf9..6f82984 100644 --- a/src/cratedb_about/core.py +++ b/src/cratedb_about/core.py @@ -1,21 +1,76 @@ # Derived from: https://llmstxt.org/domains.html import dataclasses import os +import sys import typing as t from cratedb_about.model import Settings +# Import backends conditionally to avoid errors if dependencies are missing +CLAUDE_AVAILABLE = False +OPENAI_AVAILABLE = False + +try: + from claudette import Chat, contents, models + + CLAUDE_AVAILABLE = True +except ImportError: + pass + +try: + from openai import OpenAI + from openai.types.responses import ResponseInputTextParam + from openai.types.responses.response_input_param import Message + from openai.types.shared_params import Reasoning + + OPENAI_AVAILABLE = True +except ImportError: + pass + @dataclasses.dataclass class CrateDBConversation: """ Manage conversations about CrateDB. + + Requires: + - OPENAI_API_KEY environment variable when using "openai" backend + - ANTHROPIC_API_KEY environment variable when using "claude" backend """ backend: t.Literal["claude", "openai"] = "openai" use_knowledge: bool = True + def __post_init__(self): + """Validate configuration.""" + if self.backend == "openai" and not OPENAI_AVAILABLE: + raise ImportError("The 'openai' package is required when using the OpenAI backend") + if self.backend == "claude" and not CLAUDE_AVAILABLE: + raise ImportError("The 'claudette' package is required when using the Claude backend") + if self.backend == "openai" and not os.environ.get("OPENAI_API_KEY"): + raise ValueError( + "OPENAI_API_KEY environment variable is required when using 'openai' backend" + ) + if self.backend == "claude" and not os.environ.get("ANTHROPIC_API_KEY"): + raise ValueError( + "ANTHROPIC_API_KEY environment variable is required when using 'claude' backend" + ) + def ask(self, question: str) -> str: + """ + Ask a question about CrateDB using the configured LLM backend. + + Args: + question: The question to ask about CrateDB + + Returns: + str: The response from the LLM + + Raises: + NotImplementedError: If an unsupported backend is specified + ValueError: If required environment variables are missing + RuntimeError: If there's an error communicating with the LLM API + """ if self.backend == "openai": return self.ask_gpt(question) if self.backend == "claude": @@ -23,13 +78,19 @@ def ask(self, question: str) -> str: raise NotImplementedError("Please select an available LLM backend") def ask_claude(self, question: str) -> str: - from claudette import Chat, contents, models - + # FIXME: API does not provide lookup by name. model = models[1] # Sonnet 3.5 chat = Chat(model, sp=Settings.instructions) - chat(Settings.get_prompt()) - result = chat(question) - return contents(result) + if self.use_knowledge: + try: + chat(Settings.get_prompt()) + except Exception as e: + print(f"Warning: Failed to load knowledge context: {e}", file=sys.stderr) # noqa: T201 + try: + result = chat(question) + return contents(result) + except Exception as e: + raise RuntimeError(f"Claude API error: {e}") from e def ask_gpt(self, question: str) -> str: """ @@ -45,12 +106,33 @@ def ask_gpt(self, question: str) -> str: - https://community.openai.com/t/understanding-role-management-in-openais-api-two-methods-compared/253289 - https://community.openai.com/t/how-is-developer-message-better-than-system-prompt/1062784 """ - from openai import OpenAI - from openai.types.responses import ResponseInputTextParam - from openai.types.shared_params import Reasoning client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) - from openai.types.responses.response_input_param import Message + + input_messages: t.List[Message] = [] + if self.use_knowledge: + try: + prompt = Settings.get_prompt() + if prompt: + input_messages.append( + Message( + content=[ResponseInputTextParam(text=prompt, type="input_text")], + role="developer", + status="completed", + type="message", + ) + ) + except Exception as e: + print(f"Warning: Failed to load knowledge context: {e}", file=sys.stderr) # noqa: T201 + # Always add the user question + input_messages.append( + Message( + content=[ResponseInputTextParam(text=question, type="input_text")], + role="user", + status="completed", + type="message", + ) + ) response = client.responses.create( # model="gpt-4o", # noqa: ERA001 @@ -61,19 +143,6 @@ def ask_gpt(self, question: str) -> str: # summary="detailed", # noqa: ERA001 ), instructions=Settings.instructions, - input=[ - Message( - content=[ResponseInputTextParam(text=Settings.get_prompt(), type="input_text")], - role="developer", - status="completed", - type="message", - ), - Message( - content=[ResponseInputTextParam(text=question, type="input_text")], - role="user", - status="completed", - type="message", - ), - ], + input=input_messages, # type: ignore[arg-type] ) return response.output_text diff --git a/src/cratedb_about/model.py b/src/cratedb_about/model.py index b889c41..22153b4 100644 --- a/src/cratedb_about/model.py +++ b/src/cratedb_about/model.py @@ -1,3 +1,4 @@ +import os import sys import requests @@ -26,7 +27,15 @@ class Settings: Configure the language model to support conversations about CrateDB. """ - llms_txt_url = "https://raw.githubusercontent.com/crate/about/6876fedee57f59b693f37996e04f53c6446f2ad6/build/llm/llms-ctx.txt" + default_context = ( + "CrateDB is a distributed SQL database that makes it simple to" + "store and analyze massive amounts of data in real-time." + ) + + llms_txt_url = os.getenv( + "CRATEDB_CONTEXT_URL", + "https://raw.githubusercontent.com/crate/about/6876fedee57f59b693f37996e04f53c6446f2ad6/build/llm/llms-ctx.txt", + ) instructions = "You are a helpful and concise assistant." llms_txt = None prompt = None @@ -41,4 +50,8 @@ def get_prompt(cls): ) except requests.RequestException as e: print(f"Error fetching context: {e}", file=sys.stderr) # noqa: T201 + # Provide minimal fallback context. + cls.llms_txt = cls.default_context + cls.prompt = cls.llms_txt + "\n\nThe above is minimal context for the conversation." + return cls.prompt