Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 167 additions & 1 deletion lib/rules/structural_drift.ex
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,9 @@ defmodule Hypatia.Rules.StructuralDrift do
sd010_tracked_node_modules(repo_path) ++
sd011_missing_gitignore(repo_path) ++
sd013_path_specific_gitignore(repo_path) ++
sd014_safedom_example_dialect(repo_path)
sd014_safedom_example_dialect(repo_path) ++
sd022_stale_path_after_rename(repo_path) ++
sd023_state_a2ml_divergence(repo_path)

needs_intensive = Enum.any?(findings, & &1[:trigger_intensive])
needs_alert = Enum.any?(findings, & &1[:alert_user])
Expand Down Expand Up @@ -803,4 +805,168 @@ defmodule Hypatia.Rules.StructuralDrift do
true -> branch_block_items(rest, false, acc)
end
end

# ─── SD022: Stale path references after directory rename ───────────────
#
# When a source directory is renamed (e.g. `src/ephapax/` → `src/paint_core/`
# in a single commit), trailing-edge documentation references frequently
# outlive the rename. This rule scans docs for `src/<dir>/` references and
# flags any whose `<dir>` is NOT a real directory in the current tree.
#
# Discovered on JoshuaJewell/paint-type 2026-06-02: PR #48 renamed
# src/ephapax → src/paint_core in the Cargo workspace; 25 docs (49
# occurrences) still pointed at the old path. Caught by manual sweep
# in PR #49. This rule prevents the next recurrence.
#
# Exemption: CHANGELOG.md (historical references are documentation, not
# drift) and anything under `third_party/` (vendored).

@doc """
SD022: Detect documentation that references a `src/<dir>/` path
whose `<dir>` is not a real directory in the current tree.

Severity: medium (doc-only; doesn't break the build, but misleads readers).
Action: sed sweep `s|src/<stale-dir>|src/<new-dir>|g` once the rename
target is identified (typically via `git log --diff-filter=R`).
Triggers: intensive scan (where one rename-drift hits, others follow).
"""
def sd022_stale_path_after_rename(repo_path) do
src_root = Path.join(repo_path, "src")

real_subdirs =
case File.ls(src_root) do
{:ok, entries} ->
entries
|> Enum.filter(&File.dir?(Path.join(src_root, &1)))
|> MapSet.new()

{:error, _} ->
MapSet.new()
end

if MapSet.size(real_subdirs) == 0 do
[]
else
doc_files =
find_files_by_ext(repo_path, [
".md",
".adoc",
".txt",
".a2ml",
".contractile",
".toml",
".twasm"
])

doc_files
|> Enum.reject(fn rel ->
rel == "CHANGELOG.md" or String.starts_with?(rel, "third_party/")
end)
|> Enum.flat_map(fn rel ->
path = Path.join(repo_path, rel)

case File.read(path) do
{:ok, content} ->
~r{\bsrc/([A-Za-z0-9_][A-Za-z0-9_-]*)/}
|> Regex.scan(content)
|> Enum.map(fn [_, dir] -> dir end)
|> Enum.uniq()
|> Enum.reject(&MapSet.member?(real_subdirs, &1))
|> Enum.map(fn stale_dir ->
%{
rule: "SD022",
file: rel,
severity: :medium,
reason:
"doc references `src/#{stale_dir}/` but no such directory exists in the tree (likely surviving a directory rename)",
action: :rename_sweep,
stale_dir: stale_dir,
trigger_intensive: true
}
end)

_ ->
[]
end
end)
end
end

# ─── SD023: STATE.a2ml divergence (top-level vs 6a2/) ──────────────────
#
# The estate v2 convention puts STATE at `.machine_readable/6a2/STATE.a2ml`.
# Some repos retain a legacy top-level `.machine_readable/STATE.a2ml`. When
# both exist, they MUST agree on the `last-updated` field — otherwise one
# is stale and consumers (Hypatia, agents reading 6a2) see the wrong reality.
#
# Discovered on JoshuaJewell/paint-type 2026-06-02: top-level STATE.a2ml
# was 2026-06-01 with 22% completion while 6a2/STATE.a2ml was 2026-05-11
# with 10% completion. Caught by manual sweep; PR #49 unified them.

@doc """
SD023: Detect divergence between `.machine_readable/STATE.a2ml` and
`.machine_readable/6a2/STATE.a2ml` when both exist.

Severity: medium (one is stale; consumers may read either).
Action: pick the freshest as truth, mirror to the other, document
in CHANGELOG which is canonical going forward.
"""
def sd023_state_a2ml_divergence(repo_path) do
top = Path.join([repo_path, ".machine_readable", "STATE.a2ml"])
six = Path.join([repo_path, ".machine_readable", "6a2", "STATE.a2ml"])

with true <- File.exists?(top),
true <- File.exists?(six),
{:ok, top_content} <- File.read(top),
{:ok, six_content} <- File.read(six) do
top_date = extract_last_updated(top_content)
six_date = extract_last_updated(six_content)

cond do
top_date == nil or six_date == nil ->
[]

top_date == six_date ->
[]

true ->
[
%{
rule: "SD023",
file: ".machine_readable/STATE.a2ml + .machine_readable/6a2/STATE.a2ml",
severity: :medium,
reason:
"STATE.a2ml divergence: top-level last-updated=#{top_date}, 6a2/ last-updated=#{six_date}. One is stale; consumers may read either.",
action: :unify_state,
top_last_updated: top_date,
six_last_updated: six_date,
trigger_intensive: false
}
]
end
else
_ -> []
end
end

defp extract_last_updated(content) do
# Matches both TOML (`last-updated = "2026-06-02"`) and Scheme
# (`(last-updated "2026-06-02")`) variants.
case Regex.run(~r/last[-_]updated\s*[=\s]\s*"([^"]+)"/, content) do
[_, date] -> date
_ -> nil
end
end

defp find_files_by_ext(repo_path, exts) do
case System.cmd("git", ["-C", repo_path, "ls-files"], stderr_to_stdout: true) do
{output, 0} ->
output
|> String.split("\n", trim: true)
|> Enum.filter(fn rel -> Path.extname(rel) in exts end)

_ ->
[]
end
end
end
116 changes: 116 additions & 0 deletions test/structural_drift_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -294,4 +294,120 @@ defmodule Hypatia.Rules.StructuralDriftTest do
assert Map.has_key?(result, :dispatch)
end
end

describe "sd022_stale_path_after_rename/1" do
test "flags docs referencing src/<dir>/ where dir no longer exists", %{repo: repo} do
# Real layout: only src/paint_core/ exists
File.mkdir_p!(Path.join([repo, "src", "paint_core"]))
# Doc still references old src/ephapax/
File.write!(Path.join(repo, "EXPLAINME.adoc"), "See src/ephapax/lib.rs for the tile API.")
System.cmd("git", ["init"], cd: repo)
System.cmd("git", ["add", "."], cd: repo)
System.cmd("git", ["commit", "-m", "init", "--no-gpg-sign"], cd: repo,
env: [{"GIT_AUTHOR_NAME", "T"}, {"GIT_AUTHOR_EMAIL", "t@t"},
{"GIT_COMMITTER_NAME", "T"}, {"GIT_COMMITTER_EMAIL", "t@t"}])

findings = StructuralDrift.sd022_stale_path_after_rename(repo)
assert Enum.any?(findings, &(&1.rule == "SD022"))
assert Enum.any?(findings, &(&1.stale_dir == "ephapax"))
assert Enum.all?(findings, &(&1.severity == :medium))
assert Enum.all?(findings, & &1.trigger_intensive)
end

test "ignores CHANGELOG.md (historical references are intentional)", %{repo: repo} do
File.mkdir_p!(Path.join([repo, "src", "paint_core"]))
File.write!(Path.join(repo, "CHANGELOG.md"), "Renamed src/ephapax to src/paint_core.")
System.cmd("git", ["init"], cd: repo)
System.cmd("git", ["add", "."], cd: repo)
System.cmd("git", ["commit", "-m", "init", "--no-gpg-sign"], cd: repo,
env: [{"GIT_AUTHOR_NAME", "T"}, {"GIT_AUTHOR_EMAIL", "t@t"},
{"GIT_COMMITTER_NAME", "T"}, {"GIT_COMMITTER_EMAIL", "t@t"}])

findings = StructuralDrift.sd022_stale_path_after_rename(repo)
assert findings == []
end

test "ignores third_party/ subtree (vendored)", %{repo: repo} do
File.mkdir_p!(Path.join([repo, "src", "paint_core"]))
File.mkdir_p!(Path.join([repo, "third_party", "x"]))
File.write!(Path.join([repo, "third_party", "x", "README.md"]), "uses src/ephapax/foo")
System.cmd("git", ["init"], cd: repo)
System.cmd("git", ["add", "."], cd: repo)
System.cmd("git", ["commit", "-m", "init", "--no-gpg-sign"], cd: repo,
env: [{"GIT_AUTHOR_NAME", "T"}, {"GIT_AUTHOR_EMAIL", "t@t"},
{"GIT_COMMITTER_NAME", "T"}, {"GIT_COMMITTER_EMAIL", "t@t"}])

findings = StructuralDrift.sd022_stale_path_after_rename(repo)
assert findings == []
end

test "returns empty when src/ has no subdirs", %{repo: repo} do
File.write!(Path.join(repo, "README.md"), "test")
findings = StructuralDrift.sd022_stale_path_after_rename(repo)
assert findings == []
end
end

describe "sd023_state_a2ml_divergence/1" do
test "flags divergent last-updated between top-level and 6a2/", %{repo: repo} do
File.mkdir_p!(Path.join([repo, ".machine_readable", "6a2"]))

File.write!(
Path.join([repo, ".machine_readable", "STATE.a2ml"]),
"[metadata]\nlast-updated = \"2026-06-02\"\n"
)

File.write!(
Path.join([repo, ".machine_readable", "6a2", "STATE.a2ml"]),
"[metadata]\nlast-updated = \"2026-05-11\"\n"
)

findings = StructuralDrift.sd023_state_a2ml_divergence(repo)
assert length(findings) == 1
assert hd(findings).rule == "SD023"
assert hd(findings).top_last_updated == "2026-06-02"
assert hd(findings).six_last_updated == "2026-05-11"
end

test "no finding when dates match", %{repo: repo} do
File.mkdir_p!(Path.join([repo, ".machine_readable", "6a2"]))
File.write!(
Path.join([repo, ".machine_readable", "STATE.a2ml"]),
"last-updated = \"2026-06-02\""
)
File.write!(
Path.join([repo, ".machine_readable", "6a2", "STATE.a2ml"]),
"last-updated = \"2026-06-02\""
)

assert StructuralDrift.sd023_state_a2ml_divergence(repo) == []
end

test "no finding when only one of the two files exists", %{repo: repo} do
File.mkdir_p!(Path.join([repo, ".machine_readable"]))
File.write!(
Path.join([repo, ".machine_readable", "STATE.a2ml"]),
"last-updated = \"2026-06-02\""
)

assert StructuralDrift.sd023_state_a2ml_divergence(repo) == []
end

test "matches Scheme-style (last-updated \"...\") variant", %{repo: repo} do
File.mkdir_p!(Path.join([repo, ".machine_readable", "6a2"]))

File.write!(
Path.join([repo, ".machine_readable", "STATE.a2ml"]),
"(state (metadata (last-updated \"2026-06-02\")))"
)

File.write!(
Path.join([repo, ".machine_readable", "6a2", "STATE.a2ml"]),
"last-updated = \"2026-05-11\""
)

findings = StructuralDrift.sd023_state_a2ml_divergence(repo)
assert length(findings) == 1
end
end
end
Loading