From db6d05943a9b8625acbec86f2ec7073efa95103d Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 12 May 2026 04:44:36 +0000 Subject: [PATCH] [SPARK-56832][INFRA] Surface fatal javadoc errors in unidoc log summary and CI annotations After the noise filters from #55605, the Documentation generation CI log is about 4K lines. The two-line per-file fatal diagnostics (`error: reference not found`) are still buried in the middle of the log and the GitHub Actions check panel only shows "Process completed with exit code 1", which leaves reviewers grepping through the raw log to find the actual problem. This change is purely additive -- it drops no existing log lines. After the unidoc pipe closes, `build_api_docs.rb` prints a trailing `Fatal javadoc errors (N):` block listing each captured diagnostic, then emits a `::error file=,line=::` GitHub Actions workflow command per diagnostic so they appear as inline annotations on the PR check panel. Diagnostics are captured strictly within the Standard Doclet phase bracketed by `Building tree for all the packages and classes...` and `Building index for all classes...`, which is where doclint emits the build-failing diagnostics that count toward javadoc's exit code. Source- loading "error:" chatter outside that window is excluded. The captured count is cross-checked against javadoc's own `N errors` summary line. If they diverge -- e.g. because a future JDK changes the Standard Doclet phase wording -- a `::warning::` workflow command is emitted so the drift is surfaced without silently masking real failures. Co-authored-by: Isaac --- docs/_plugins/build_api_docs.rb | 88 ++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/docs/_plugins/build_api_docs.rb b/docs/_plugins/build_api_docs.rb index 1ef80bfaf09a4..429cef5aa026c 100644 --- a/docs/_plugins/build_api_docs.rb +++ b/docs/_plugins/build_api_docs.rb @@ -132,7 +132,7 @@ def build_spark_scala_and_java_docs_if_necessary command = "build/sbt -Pkinesis-asl unidoc" puts "Running '#{command}'..." - # Two filter passes on the unidoc output: + # Two filter passes on the unidoc output, plus an additive fatal-error summary: # # 1. Genjavadoc-stub diagnostic blocks (~28 `[error]` lines on stubs under # `target/java/`, plus 3-5 continuation lines each). Inert because @@ -146,6 +146,18 @@ def build_spark_scala_and_java_docs_if_necessary # per-file `error: reference not found` diagnostics) but carry no signal # of their own. Suppressing them brings the visible log from ~17K to ~5K # lines on a typical run while leaving every diagnostic untouched. + # + # 3. Fatal-error summary (additive, drops no log lines). The filtered log is + # still ~4K lines and most `error:` text in it is non-fatal source-loading + # chatter, so the build-failing diagnostics are hard to spot. After the + # pipe closes, we print a `Fatal javadoc errors (N): ...` block and emit + # `::error file=,line=::` GitHub Actions annotations so they surface in the + # PR check panel. Captured strictly within the Standard Doclet phase + # bracketed by `Building tree for all the packages and classes...` and + # `Building index for all classes...`, which is where doclint diagnostics + # are emitted -- this matches what javadoc counts toward exit code 1. + # Self-checked against javadoc's own `N errors` summary line; a mismatch + # emits a `::warning::` so future phase-marker drift is visible. ansi = /\e\[[0-9;]*[A-Za-z]/ stub_header = %r{ \[(?:error|warn)\]\s+ @@ -167,10 +179,51 @@ def build_spark_scala_and_java_docs_if_necessary |Generating\s+\S+\.html ) }x + + # Doclint phase tracking for the trailing summary. Standard Doclet bookends the + # phase that produces build-failing diagnostics with these marker lines; any + # `error:` outside this window is source-loading noise that does not contribute + # to javadoc's exit code. The summary below captures only the fatal ones and + # re-emits them as GitHub Actions annotations so they surface in the PR check + # panel instead of being buried in a 4K-line log. + doclint_start = %r{\bBuilding\s+tree\s+for\s+all\s+the\s+packages\s+and\s+classes\b} + doclint_end = %r{\bBuilding\s+index\s+for\s+all\s+classes\b} + doclint_diag = %r{\A\[warn\]\s+(?\S+):(?\d+)(?::\d+)?:\s+error:\s+(?.+?)\s*\z} + doclint_cont = %r{\A\[warn\]\s(?!\S+:\d+(?::\d+)?:\s+error:)(?.*?)\s*\z} + doclint_summary = %r{\A\[warn\]\s+(?[\d,]+)\s+errors?\s*\z} + in_stub = false + in_doclint = false + fatal_diagnostics = [] + pending_context_lines = 0 # snippet + caret lines that follow each diag header + reported_error_count = nil + IO.popen("#{command} 2>&1", 'r') do |pipe| pipe.each_line do |line| plain = line.gsub(ansi, '') + + if plain =~ doclint_start + in_doclint = true + elsif in_doclint && plain =~ doclint_end + in_doclint = false + pending_context_lines = 0 + end + + if in_doclint && (m = plain.match(doclint_diag)) + fatal_diagnostics << { + path: m[:path], line: m[:lineno], msg: m[:msg], context: [] + } + pending_context_lines = 2 + elsif in_doclint && pending_context_lines > 0 && + (m = plain.match(doclint_cont)) && !fatal_diagnostics.empty? + fatal_diagnostics.last[:context] << m[:content] + pending_context_lines -= 1 + end + + if reported_error_count.nil? && (m = plain.match(doclint_summary)) + reported_error_count = m[:count].delete(',').to_i + end + if plain =~ verbose_line in_stub = false # suppress -verbose progress line @@ -185,6 +238,39 @@ def build_spark_scala_and_java_docs_if_necessary end end end + + unless fatal_diagnostics.empty? + bar = "=" * 72 + puts "" + puts bar + puts "Fatal javadoc errors (#{fatal_diagnostics.size}):" + puts bar + fatal_diagnostics.each_with_index do |d, i| + puts " #{i + 1}. #{d[:path]}:#{d[:line]}: #{d[:msg]}" + d[:context].each { |c| puts " #{c}" } + end + puts bar + puts "" + + # GitHub Actions inline annotations. `%`, `\r`, `\n` require URL-style + # escaping per the workflow command spec; newlines render as multiple + # lines inside the annotation, so the source snippet and caret display + # under the error message in the PR check panel. + project_root = SPARK_PROJECT_ROOT + '/' + fatal_diagnostics.each do |d| + rel = d[:path].start_with?(project_root) ? d[:path][project_root.length..] : d[:path] + full = ([d[:msg]] + d[:context]).join("\n") + enc = full.gsub(/[%\r\n]/, '%' => '%25', "\r" => '%0D', "\n" => '%0A') + puts "::error file=#{rel},line=#{d[:line]},title=javadoc::#{enc}" + end + end + + if reported_error_count && reported_error_count != fatal_diagnostics.size + puts "::warning::Javadoc reported #{reported_error_count} errors but " \ + "build_api_docs.rb captured #{fatal_diagnostics.size}. The doclint " \ + "phase markers may have shifted; please update build_api_docs.rb." + end + raise("Unidoc generation failed") unless $?.success? end