Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates

version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"

- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
65 changes: 65 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: "Tests"

on:
push:
branches: [ main ]
pull_request:
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:

test:
name: "
Python ${{ matrix.python-version }}
"
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ['ubuntu-latest']
python-version: [
'3.9',
'3.13',
]

env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python-version }}
UV_SYSTEM_PYTHON: true

steps:

- name: Acquire sources
uses: actions/checkout@v4

- name: Install `sponge`
run: sudo apt-get --yes install moreutils

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Set up uv
uses: astral-sh/setup-uv@v5
with:
cache-dependency-glob: |
pyproject.toml
cache-suffix: ${{ matrix.python-version }}
enable-cache: true
version: "latest"

- name: Set up project
run: |
uv pip install --editable='.[develop,test]'

- name: Run linter and software tests
run: |
poe check
poe build
cratedb-about --version
cratedb-about list-questions
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
.venv*
*.egg-info
*.lock
bdist.*
__pycache__
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
- Established project layout
- Added source files (`cratedb-overview.md`), generator program wrapper
(`uv run poe build`), and build artifacts (`llms-ctx.txt` and `llms-ctx-full.txt`)
- Added CLI program `cratedb-about` with subcommands `ask` and `list-questions`
for ad hoc conversations about CrateDB
17 changes: 12 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,20 @@ to relevant resources in the spirit of a curated knowledge backbone.

## Usage

To rebuild the `llms.txt` files, acquire the sources of the repository,
and invoke the build command.
Install `cratedb-about` package.
```shell
uv tool install --upgrade 'cratedb-about @ git+https://github.com/crate/about'
```

Ask questions about CrateDB.
```shell
export OPENAI_API_KEY=<YOUR_OPENAI_API_KEY>
cratedb-about ask "CrateDB does not seem to provide an AUTOINCREMENT feature?"
```

If you are running out of questions, get inspired by the standard library.
```shell
git clone https://github.com/crate/about cratedb-about
cd cratedb-about
uv run poe build
cratedb-about list-questions
```


Expand Down
10 changes: 10 additions & 0 deletions docs/backlog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Backlog

## Iteration +1
- JSON/YAML/Markdown output

## Iteration +2
- Unlock Discourse: https://community.cratedb.com/raw/1015
- Unlock HTML resources: https://www.urltoany.com/url-to-markdown.
=> Find the best standalone program.
- Unlock GitHub projects: https://github.com/mattduck/gh2md
22 changes: 22 additions & 0 deletions docs/sandbox.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Sandbox

Acquire the source code repository.
```shell
git clone https://github.com/crate/about cratedb-about
cd cratedb-about
```

Rebuild all the `llms.txt` files.
```shell
uv run poe build
```

Ask questions about CrateDB.
```shell
uvx --with-editable=. cratedb-about ask "CrateDB does not seem to provide an AUTOINCREMENT feature?"
```

If you are running out of questions, get inspired by the standard library.
```shell
uvx --with-editable=. cratedb-about list-questions
```
10 changes: 9 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,12 @@ dynamic = [
"version",
]
dependencies = [
"claudette",
"click<9",
"llms-txt==0.0.4",
"openai",
"poethepoet<1",
"requests<3",
]
optional-dependencies.develop = [
"mypy<1.16",
Expand All @@ -83,6 +86,8 @@ urls.Changelog = "https://github.com/crate/about/blob/main/CHANGES.md"
urls.Issues = "https://github.com/crate/about/issues"
urls.Repository = "https://github.com/crate/about"

scripts.cratedb-about = "cratedb_about.cli:cli"

[tool.ruff]
line-length = 100

Expand Down Expand Up @@ -118,7 +123,10 @@ lint.select = [
]

[tool.mypy]
exclude = [ ]
mypy_path = "src"
packages = [
"cratedb_about",
]
check_untyped_defs = true
ignore_missing_imports = true
implicit_optional = true
Expand Down
47 changes: 47 additions & 0 deletions src/cratedb_about/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import typing as t

import click

from cratedb_about.core import CrateDBConversation
from cratedb_about.model import Example


@click.group()
@click.version_option()
@click.pass_context
def cli(ctx: click.Context) -> None:
pass


@cli.command()
@click.argument("question", type=str, required=False)
@click.option("--backend", type=click.Choice(["openai", "claude"]), default="openai")
def ask(question: str, backend: t.Literal["claude", "openai"]):
"""
Ask questions about CrateDB.

Requires:
- OpenAI backend: Set OPENAI_API_KEY environment variable
- Claude backend: Set ANTHROPIC_API_KEY environment variable
"""
wizard = CrateDBConversation(
backend=backend,
use_knowledge=True,
)
if not question:
# Use the AUTOINCREMENT question or fall back to the first question if not found
default_question = next(
(q for q in Example.questions if "AUTOINCREMENT" in q),
Example.questions[0] if Example.questions else "What is CrateDB?",
)
question = default_question
click.echo(f"Question: {question}\nAnswer:\n")
click.echo(wizard.ask(question))


@cli.command()
def list_questions():
"""
List a few example questions about CrateDB.
"""
click.echo("\n".join(Example.questions))
148 changes: 148 additions & 0 deletions src/cratedb_about/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Derived from: https://llmstxt.org/domains.html
import dataclasses
import os
import sys
import typing as t

from cratedb_about.model import Settings

# Import backends conditionally to avoid errors if dependencies are missing
CLAUDE_AVAILABLE = False
OPENAI_AVAILABLE = False

try:
from claudette import Chat, contents, models

CLAUDE_AVAILABLE = True
except ImportError:
pass

try:
from openai import OpenAI
from openai.types.responses import ResponseInputTextParam
from openai.types.responses.response_input_param import Message
from openai.types.shared_params import Reasoning

OPENAI_AVAILABLE = True
except ImportError:
pass


@dataclasses.dataclass
class CrateDBConversation:
"""
Manage conversations about CrateDB.

Requires:
- OPENAI_API_KEY environment variable when using "openai" backend
- ANTHROPIC_API_KEY environment variable when using "claude" backend
"""

backend: t.Literal["claude", "openai"] = "openai"
use_knowledge: bool = True

def __post_init__(self):
"""Validate configuration."""
if self.backend == "openai" and not OPENAI_AVAILABLE:
raise ImportError("The 'openai' package is required when using the OpenAI backend")
if self.backend == "claude" and not CLAUDE_AVAILABLE:
raise ImportError("The 'claudette' package is required when using the Claude backend")
if self.backend == "openai" and not os.environ.get("OPENAI_API_KEY"):
raise ValueError(
"OPENAI_API_KEY environment variable is required when using 'openai' backend"
)
if self.backend == "claude" and not os.environ.get("ANTHROPIC_API_KEY"):
raise ValueError(
"ANTHROPIC_API_KEY environment variable is required when using 'claude' backend"
)

def ask(self, question: str) -> str:
"""
Ask a question about CrateDB using the configured LLM backend.

Args:
question: The question to ask about CrateDB

Returns:
str: The response from the LLM

Raises:
NotImplementedError: If an unsupported backend is specified
ValueError: If required environment variables are missing
RuntimeError: If there's an error communicating with the LLM API
"""
if self.backend == "openai":
return self.ask_gpt(question)
if self.backend == "claude":
return self.ask_claude(question)
raise NotImplementedError("Please select an available LLM backend")

def ask_claude(self, question: str) -> str:
# FIXME: API does not provide lookup by name.
model = models[1] # Sonnet 3.5
chat = Chat(model, sp=Settings.instructions)
if self.use_knowledge:
try:
chat(Settings.get_prompt())
except Exception as e:
print(f"Warning: Failed to load knowledge context: {e}", file=sys.stderr) # noqa: T201
try:
result = chat(question)
return contents(result)
except Exception as e:
raise RuntimeError(f"Claude API error: {e}") from e

def ask_gpt(self, question: str) -> str:
"""
Ask the machine, enriched with CrateDB context, catalyzed through OpenAI's GPT.

Models like o3 and o4-mini are reasoning models.
https://platform.openai.com/docs/guides/reasoning

The OpenAI API provides different kinds of roles for messages. Let's use the
`developer` role to relay information on top of the user's question.

- https://community.openai.com/t/the-system-role-how-it-influences-the-chat-behavior/87353
- https://community.openai.com/t/understanding-role-management-in-openais-api-two-methods-compared/253289
- https://community.openai.com/t/how-is-developer-message-better-than-system-prompt/1062784
"""

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

input_messages: t.List[Message] = []
if self.use_knowledge:
try:
prompt = Settings.get_prompt()
if prompt:
input_messages.append(
Message(
content=[ResponseInputTextParam(text=prompt, type="input_text")],
role="developer",
status="completed",
type="message",
)
)
except Exception as e:
print(f"Warning: Failed to load knowledge context: {e}", file=sys.stderr) # noqa: T201
# Always add the user question
input_messages.append(
Message(
content=[ResponseInputTextParam(text=question, type="input_text")],
role="user",
status="completed",
type="message",
)
)

response = client.responses.create(
# model="gpt-4o", # noqa: ERA001
model="o4-mini",
reasoning=Reasoning(
effort="medium",
# Your organization must be verified to generate reasoning summaries
# summary="detailed", # noqa: ERA001
),
instructions=Settings.instructions,
input=input_messages, # type: ignore[arg-type]
)
return response.output_text
Loading