From e1851c2983004d256c50f54f60f7b97e014e56b2 Mon Sep 17 00:00:00 2001 From: mountain Date: Sun, 12 Apr 2026 09:17:33 +0800 Subject: [PATCH 1/2] Skip lint when knowledge base has no indexed documents When the hash registry is empty (no documents added yet), lint would still run structural checks (all zero), call the LLM for knowledge lint (wasting an API call), write an empty report file, and log the operation. Now it exits early with a clear message instead. --- openkb/cli.py | 10 ++++++ tests/test_lint_cli.py | 74 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 tests/test_lint_cli.py diff --git a/openkb/cli.py b/openkb/cli.py index 028e546e..6fd75388 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -545,6 +545,16 @@ def lint(ctx, fix): _setup_llm_key(kb_dir) model: str = config.get("model", DEFAULT_CONFIG["model"]) + # Skip lint entirely when the KB has no indexed documents + hashes_file = openkb_dir / "hashes.json" + if hashes_file.exists(): + hashes = json.loads(hashes_file.read_text(encoding="utf-8")) + else: + hashes = {} + if not hashes: + click.echo("Nothing to lint — no documents indexed yet. Run `openkb add` first.") + return + # Structural lint click.echo("Running structural lint...") structural_report = run_structural_lint(kb_dir) diff --git a/tests/test_lint_cli.py b/tests/test_lint_cli.py new file mode 100644 index 00000000..440afb54 --- /dev/null +++ b/tests/test_lint_cli.py @@ -0,0 +1,74 @@ +"""Tests for the openkb lint CLI command.""" +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import patch + +from click.testing import CliRunner + +from openkb.cli import cli + + +def _setup_kb(tmp_path: Path) -> Path: + """Create a minimal KB structure and return kb_dir.""" + kb_dir = tmp_path + (kb_dir / "raw").mkdir() + (kb_dir / "wiki" / "sources" / "images").mkdir(parents=True) + (kb_dir / "wiki" / "summaries").mkdir(parents=True) + (kb_dir / "wiki" / "concepts").mkdir(parents=True) + (kb_dir / "wiki" / "reports").mkdir(parents=True) + openkb_dir = kb_dir / ".openkb" + openkb_dir.mkdir() + (openkb_dir / "config.yaml").write_text("model: gpt-4o-mini\n") + (openkb_dir / "hashes.json").write_text(json.dumps({})) + (kb_dir / "wiki" / "index.md").write_text( + "# Knowledge Base Index\n\n## Documents\n\n## Concepts\n" + ) + return kb_dir + + +class TestLintCommand: + def test_lint_empty_kb_skips(self, tmp_path): + """Lint on an empty KB (no indexed docs) should exit early.""" + kb_dir = _setup_kb(tmp_path) + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb_dir): + result = runner.invoke(cli, ["lint"]) + assert result.exit_code == 0 + assert "Nothing to lint" in result.output + assert "no documents indexed" in result.output + # No report should be written + reports = list((kb_dir / "wiki" / "reports").glob("*.md")) + assert reports == [] + + def test_lint_no_hashes_file_skips(self, tmp_path): + """Lint should also skip when hashes.json doesn't exist.""" + kb_dir = _setup_kb(tmp_path) + (kb_dir / ".openkb" / "hashes.json").unlink() + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb_dir): + result = runner.invoke(cli, ["lint"]) + assert result.exit_code == 0 + assert "Nothing to lint" in result.output + + def test_lint_no_kb(self, tmp_path): + runner = CliRunner() + with runner.isolated_filesystem(temp_dir=tmp_path), \ + patch("openkb.cli._find_kb_dir", return_value=None): + result = runner.invoke(cli, ["lint"]) + assert "No knowledge base found" in result.output + + def test_lint_runs_when_docs_exist(self, tmp_path): + """Lint should proceed when there are indexed documents.""" + kb_dir = _setup_kb(tmp_path) + hashes = {"abc": {"name": "paper.pdf", "type": "pdf"}} + (kb_dir / ".openkb" / "hashes.json").write_text(json.dumps(hashes)) + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb_dir), \ + patch("openkb.cli._setup_llm_key"), \ + patch("openkb.agent.linter.run_knowledge_lint", return_value="No issues."): + result = runner.invoke(cli, ["lint"]) + assert "Running structural lint" in result.output + assert "Running knowledge lint" in result.output + assert "Report written to" in result.output From 5ac7b43047a15876e421bd53ffebed2fabdb930c Mon Sep 17 00:00:00 2001 From: mountain Date: Sun, 12 Apr 2026 09:24:28 +0800 Subject: [PATCH 2/2] Move empty-KB check before _setup_llm_key and add exit_code assertion - Hoist the hashes check above _setup_llm_key/config/model so that an empty KB exits immediately without triggering API key warnings - Add missing assert result.exit_code == 0 in test_lint_runs_when_docs_exist --- openkb/cli.py | 7 ++++--- tests/test_lint_cli.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/openkb/cli.py b/openkb/cli.py index 6fd75388..d91789f8 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -541,9 +541,6 @@ def lint(ctx, fix): from openkb.agent.linter import run_knowledge_lint openkb_dir = kb_dir / ".openkb" - config = load_config(openkb_dir / "config.yaml") - _setup_llm_key(kb_dir) - model: str = config.get("model", DEFAULT_CONFIG["model"]) # Skip lint entirely when the KB has no indexed documents hashes_file = openkb_dir / "hashes.json" @@ -555,6 +552,10 @@ def lint(ctx, fix): click.echo("Nothing to lint — no documents indexed yet. Run `openkb add` first.") return + config = load_config(openkb_dir / "config.yaml") + _setup_llm_key(kb_dir) + model: str = config.get("model", DEFAULT_CONFIG["model"]) + # Structural lint click.echo("Running structural lint...") structural_report = run_structural_lint(kb_dir) diff --git a/tests/test_lint_cli.py b/tests/test_lint_cli.py index 440afb54..bc207f08 100644 --- a/tests/test_lint_cli.py +++ b/tests/test_lint_cli.py @@ -69,6 +69,7 @@ def test_lint_runs_when_docs_exist(self, tmp_path): patch("openkb.cli._setup_llm_key"), \ patch("openkb.agent.linter.run_knowledge_lint", return_value="No issues."): result = runner.invoke(cli, ["lint"]) + assert result.exit_code == 0 assert "Running structural lint" in result.output assert "Running knowledge lint" in result.output assert "Report written to" in result.output