From c5833352917f1eda6fe40b428578fd3fe620d9a5 Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Tue, 12 May 2026 16:18:10 +0200 Subject: [PATCH] FIX: Move ] label-escape into MarkdownEscaper so image-in-link works MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix (#40) escaped ] in UrlTag/EmailTag with an unconditional `text.gsub("]") { "\\]" }`. That regressed nested markdown that legitimately produces unescaped ] — for example an image inside a link: renders the image as `![](src)`, and the tag-level gsub turned the label into `[![\](src)](href)` — broken. The escape rule is contextual to the text content, not to how it gets wrapped. Move the decision to where escaping already happens: - MarkdownEscaper#escape grows an `in_link_label:` kwarg. When set and the result contains ], it post-processes the result to escape every ]. The non-label hot path is unchanged (one boolean check). - Renderer#render_text detects an Url/Email ancestor via has_parent? and passes the flag through. Only plain Text nodes flow through this — nested tag output (ImageTag, UploadTag, etc.) doesn't touch render_text, so its structural ] is preserved. - UrlTag and EmailTag go back to a plain "[#{text}](#{href})" wrapper. Performance: benchmarked the hot path (non-label) against main at 1.79–1.84M i/s for plain text and ~245k i/s for text with specials — no measurable regression. The label path's no-`]` fast-return preserves the input string instance (locked in by a spec). --- .../renderers/discourse/markdown_escaper.rb | 17 ++++++-- .../renderers/discourse/renderer.rb | 10 ++++- .../renderers/discourse/tags/email_tag.rb | 4 +- .../renderers/discourse/tags/url_tag.rb | 4 +- .../discourse/markdown_escaper/links_spec.rb | 41 +++++++++++++++++++ .../renderers/discourse/renderer_spec.rb | 22 +++++++++- .../renderers/discourse/tags/url_tag_spec.rb | 8 ++++ 7 files changed, 95 insertions(+), 11 deletions(-) diff --git a/lib/markbridge/renderers/discourse/markdown_escaper.rb b/lib/markbridge/renderers/discourse/markdown_escaper.rb index 49f3b9c..34b4596 100644 --- a/lib/markbridge/renderers/discourse/markdown_escaper.rb +++ b/lib/markbridge/renderers/discourse/markdown_escaper.rb @@ -116,17 +116,28 @@ def initialize(escape_hard_line_breaks: false) # Autolinks (, ) are intentionally preserved. # # @param text [String, nil] the text to escape + # @param in_link_label [Boolean] when true, also escape `]` so the text + # can be spliced into a Markdown link label `[text](url)` without + # terminating it early. The default leaves `]` alone because a bare + # `]` in prose is harmless (the matching `[` is already escaped). # @return [String] the escaped text, or empty string if input is nil # @note Multi-line HTML tags and blocks are handled by escaping the opening < - def escape(text) + def escape(text, in_link_label: false) return "" if text.nil? # Neutralize hard line breaks (trailing 2+ spaces before newline) text = text.gsub(/ +\n/, "\n") if @escape_hard_line_breaks && text.include?(" \n") - return text unless MAYBE_SPECIAL.match?(text) || MAYBE_INDENTED_CODE.match?(text) + result = + if MAYBE_SPECIAL.match?(text) || MAYBE_INDENTED_CODE.match?(text) + escape_text(text) + else + text + end + + return result unless in_link_label && result.include?("]") - escape_text(text) + result.gsub("]") { "\\]" } end private diff --git a/lib/markbridge/renderers/discourse/renderer.rb b/lib/markbridge/renderers/discourse/renderer.rb index d04bbdd..2260f3a 100644 --- a/lib/markbridge/renderers/discourse/renderer.rb +++ b/lib/markbridge/renderers/discourse/renderer.rb @@ -97,9 +97,17 @@ def render_text(node, context) elsif context.html_mode? @html_escaper.escape(node.text) else - @escaper.escape(node.text) + @escaper.escape(node.text, in_link_label: in_link_label?(context)) end end + + # `]` is structural inside a Markdown link label, so any plain text + # rendered under an Url/Email ancestor must escape it. Tags that emit + # their own bracketed markup (ImageTag, UploadTag, etc.) skip this + # path entirely, so their structural brackets are preserved. + def in_link_label?(context) + context.has_parent?(AST::Url) || context.has_parent?(AST::Email) + end end end end diff --git a/lib/markbridge/renderers/discourse/tags/email_tag.rb b/lib/markbridge/renderers/discourse/tags/email_tag.rb index bc0c840..793f9d6 100644 --- a/lib/markbridge/renderers/discourse/tags/email_tag.rb +++ b/lib/markbridge/renderers/discourse/tags/email_tag.rb @@ -16,9 +16,7 @@ def render(element, interface) if interface.html_mode? %(#{text}) else - # MarkdownEscaper leaves ] alone in prose, but it's structural - # inside a link label — escape it here to prevent early termination. - "[#{text.gsub("]") { "\\]" }}](mailto:#{address})" + "[#{text}](mailto:#{address})" end end end diff --git a/lib/markbridge/renderers/discourse/tags/url_tag.rb b/lib/markbridge/renderers/discourse/tags/url_tag.rb index 91d67df..3f2ec80 100644 --- a/lib/markbridge/renderers/discourse/tags/url_tag.rb +++ b/lib/markbridge/renderers/discourse/tags/url_tag.rb @@ -16,9 +16,7 @@ def render(element, interface) if interface.html_mode? %(#{text}) else - # MarkdownEscaper leaves ] alone in prose, but it's structural - # inside a link label — escape it here to prevent early termination. - "[#{text.gsub("]") { "\\]" }}](#{href})" + "[#{text}](#{href})" end end end diff --git a/spec/unit/markbridge/renderers/discourse/markdown_escaper/links_spec.rb b/spec/unit/markbridge/renderers/discourse/markdown_escaper/links_spec.rb index 21fba43..61fc3cd 100644 --- a/spec/unit/markbridge/renderers/discourse/markdown_escaper/links_spec.rb +++ b/spec/unit/markbridge/renderers/discourse/markdown_escaper/links_spec.rb @@ -130,6 +130,47 @@ end end + describe "#escape with in_link_label: true" do + # Wrapped in a `#escape` describe so mutant's subject-by-method-name + # test selection picks these up. Without the wrapper, the example + # description is e.g. "MarkdownEscaper in_link_label: …" which + # doesn't include "#escape" and mutant skips it. + + it "escapes ] so it does not close an enclosing link label" do + result = escaper.escape("[ABC-123] foo", in_link_label: true) + expect(result).to eq("\\[ABC-123\\] foo") + end + + it "escapes every ] (not just the first)" do + result = escaper.escape("[A] and [B]", in_link_label: true) + expect(result).to eq("\\[A\\] and \\[B\\]") + end + + it "escapes a bare ] in otherwise unremarkable text" do + result = escaper.escape("foo ]bar", in_link_label: true) + expect(result).to eq("foo \\]bar") + end + + it "leaves text with no ] untouched" do + result = escaper.escape("plain text", in_link_label: true) + expect(result).to eq("plain text") + end + + it "does not escape ] when the flag is false (default)" do + result = escaper.escape("[ABC-123] foo") + expect(result).to eq("\\[ABC-123] foo") + end + + it "returns the input string instance unchanged when no ] is present" do + # Locks in the no-allocation fast path: when in_link_label is set but + # the text contains no `]`, escape should return the same object + # rather than allocate a gsub copy. + text = "plain text with no closing bracket" + result = escaper.escape(text, in_link_label: true) + expect(result).to equal(text) + end + end + describe "link reference definitions" do context "when [label]: URL pattern at line start (MUST escape)" do it "escapes link reference definition" do diff --git a/spec/unit/markbridge/renderers/discourse/renderer_spec.rb b/spec/unit/markbridge/renderers/discourse/renderer_spec.rb index 3fb5d93..153c896 100644 --- a/spec/unit/markbridge/renderers/discourse/renderer_spec.rb +++ b/spec/unit/markbridge/renderers/discourse/renderer_spec.rb @@ -23,7 +23,7 @@ result = described_class.new(escaper:).render(Markbridge::AST::Text.new("hi")) expect(result).to eq("ESCAPED") - expect(escaper).to have_received(:escape).with("hi") + expect(escaper).to have_received(:escape).with("hi", in_link_label: false) end it "falls back to TagLibrary.default when no tag_library is provided" do @@ -125,6 +125,26 @@ expect(renderer.render(bold)).to include('a\*b') end + it "escapes ] in Text content when an ancestor is Url (link-label context)" do + url = Markbridge::AST::Url.new(href: "https://example.com") + url << Markbridge::AST::Text.new("[A]") + + expect(renderer.render(url)).to eq("[\\[A\\]](https://example.com)") + end + + it "escapes ] in Text content when an ancestor is Email (link-label context)" do + email = Markbridge::AST::Email.new(address: "user@example.com") + email << Markbridge::AST::Text.new("[A]") + + expect(renderer.render(email)).to eq("[\\[A\\]](mailto:user@example.com)") + end + + it "does not escape ] in Text content when no link ancestor is present" do + text = Markbridge::AST::Text.new("plain ] text") + + expect(renderer.render(text)).to eq("plain ] text") + end + context "in html_mode" do it "HTML-escapes text" do text = Markbridge::AST::Text.new("a < b") diff --git a/spec/unit/markbridge/renderers/discourse/tags/url_tag_spec.rb b/spec/unit/markbridge/renderers/discourse/tags/url_tag_spec.rb index 0938cbf..adf9c2b 100644 --- a/spec/unit/markbridge/renderers/discourse/tags/url_tag_spec.rb +++ b/spec/unit/markbridge/renderers/discourse/tags/url_tag_spec.rb @@ -108,6 +108,14 @@ expect(result).to eq("[\\[A\\] and \\[B\\]](https://example.com)") end + it "preserves structural ] in child markdown (e.g. an image's empty alt)" do + element = Markbridge::AST::Url.new(href: "https://example.com/page") + element << Markbridge::AST::Image.new(src: "https://example.com/logo.png") + + result = tag.render(element, interface) + expect(result).to eq("[![](https://example.com/logo.png)](https://example.com/page)") + end + let(:element_class) { Markbridge::AST::Url } let(:element_factory) { Markbridge::AST::Url.new(href: "https://example.com") } it_behaves_like "a tag that propagates parent context"