diff --git a/lib/markbridge/ast.rb b/lib/markbridge/ast.rb index a0a3cb1..67198fc 100644 --- a/lib/markbridge/ast.rb +++ b/lib/markbridge/ast.rb @@ -18,6 +18,7 @@ require_relative "ast/line_break" require_relative "ast/list" require_relative "ast/list_item" +require_relative "ast/table" require_relative "ast/paragraph" require_relative "ast/quote" require_relative "ast/size" diff --git a/lib/markbridge/ast/table.rb b/lib/markbridge/ast/table.rb new file mode 100644 index 0000000..9255acf --- /dev/null +++ b/lib/markbridge/ast/table.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +module Markbridge + module AST + # Represents a table element containing rows. + # + # @example + # table = AST::Table.new + # table << AST::TableRow.new + class Table < Element + # Add a child node to the table. + # Whitespace-only Text nodes are ignored. + # + # @param child [Node] the node to add + # @return [Table] self for chaining + def <<(child) + return self if child.is_a?(Text) && child.text.strip.empty? + + super + end + end + + # Represents a table row containing cells. + # + # @example + # row = AST::TableRow.new + # row << AST::TableCell.new + class TableRow < Element + # Add a child node to the row. + # Whitespace-only Text nodes are ignored. + # + # @param child [Node] the node to add + # @return [TableRow] self for chaining + def <<(child) + return self if child.is_a?(Text) && child.text.strip.empty? + + super + end + end + + # Represents a table cell (td or th). + # + # @example Data cell + # cell = AST::TableCell.new + # cell << AST::Text.new("data") + # + # @example Header cell + # cell = AST::TableCell.new(header: true) + # cell << AST::Text.new("header") + class TableCell < Element + # Create a new table cell. + # + # @param header [Boolean] whether this is a header cell (th) + def initialize(header: false) + super() + @header = header + end + + # Check if this is a header cell. + # + # @return [Boolean] true if this is a header cell + def header? + @header + end + end + end +end diff --git a/lib/markbridge/parsers/bbcode.rb b/lib/markbridge/parsers/bbcode.rb index 7216687..9f6ea9e 100644 --- a/lib/markbridge/parsers/bbcode.rb +++ b/lib/markbridge/parsers/bbcode.rb @@ -35,6 +35,9 @@ require_relative "bbcode/handlers/simple_handler" require_relative "bbcode/handlers/size_handler" require_relative "bbcode/handlers/spoiler_handler" +require_relative "bbcode/handlers/table_handler" +require_relative "bbcode/handlers/table_row_handler" +require_relative "bbcode/handlers/table_cell_handler" require_relative "bbcode/handlers/url_handler" # Parser components diff --git a/lib/markbridge/parsers/bbcode/handler_registry.rb b/lib/markbridge/parsers/bbcode/handler_registry.rb index 56e0c25..5d89c7e 100644 --- a/lib/markbridge/parsers/bbcode/handler_registry.rb +++ b/lib/markbridge/parsers/bbcode/handler_registry.rb @@ -132,6 +132,11 @@ def self.default(closing_strategy: nil) registry.register(%w[list ul ol ulist olist], Handlers::ListHandler.new) registry.register(%w[* li .], Handlers::ListItemHandler.new) + # Table handlers + registry.register("table", Handlers::TableHandler.new) + registry.register("tr", Handlers::TableRowHandler.new) + registry.register(%w[td th], Handlers::TableCellHandler.new) + # Set the closing strategy registry.closing_strategy = closing_strategy || default_closing_strategy(registry) diff --git a/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb b/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb new file mode 100644 index 0000000..9fe45ed --- /dev/null +++ b/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Markbridge + module Parsers + module BBCode + module Handlers + # Handler for table cell tags (td, th) + class TableCellHandler < BaseHandler + def initialize + @element_class = AST::TableCell + end + + def on_open(token:, context:, registry:, tokens: nil) + # Auto-close previous cell if still open + context.pop if context.current.is_a?(AST::TableCell) + + element = AST::TableCell.new(header: token.tag == "th") + context.push(element, token:) + end + + attr_reader :element_class + end + end + end + end +end diff --git a/lib/markbridge/parsers/bbcode/handlers/table_handler.rb b/lib/markbridge/parsers/bbcode/handlers/table_handler.rb new file mode 100644 index 0000000..e29a35f --- /dev/null +++ b/lib/markbridge/parsers/bbcode/handlers/table_handler.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Markbridge + module Parsers + module BBCode + module Handlers + # Handler for table tags + class TableHandler < BaseHandler + def initialize + @element_class = AST::Table + end + + def on_open(token:, context:, registry:, tokens: nil) + element = AST::Table.new + context.push(element, token:) + end + + def on_close(token:, context:, registry:, tokens: nil) + # Auto-close open cell before closing row + context.pop if context.current.is_a?(AST::TableCell) + # Auto-close open row before closing table + context.pop if context.current.is_a?(AST::TableRow) + + super + end + + attr_reader :element_class + end + end + end + end +end diff --git a/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb b/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb new file mode 100644 index 0000000..9643baa --- /dev/null +++ b/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module Markbridge + module Parsers + module BBCode + module Handlers + # Handler for table row tags (tr) + class TableRowHandler < BaseHandler + def initialize + @element_class = AST::TableRow + end + + def on_open(token:, context:, registry:, tokens: nil) + # Auto-close open cell before starting new row + context.pop if context.current.is_a?(AST::TableCell) + # Auto-close previous row if still open + context.pop if context.current.is_a?(AST::TableRow) + + element = AST::TableRow.new + context.push(element, token:) + end + + def on_close(token:, context:, registry:, tokens: nil) + # Auto-close open cell before closing row + context.pop if context.current.is_a?(AST::TableCell) + + super + end + + attr_reader :element_class + end + end + end + end +end diff --git a/lib/markbridge/parsers/html.rb b/lib/markbridge/parsers/html.rb index ee1a08b..5f26766 100644 --- a/lib/markbridge/parsers/html.rb +++ b/lib/markbridge/parsers/html.rb @@ -17,6 +17,9 @@ require_relative "html/handlers/list_item_handler" require_relative "html/handlers/quote_handler" require_relative "html/handlers/paragraph_handler" +require_relative "html/handlers/table_handler" +require_relative "html/handlers/table_row_handler" +require_relative "html/handlers/table_cell_handler" # Parser components require_relative "html/handler_registry" diff --git a/lib/markbridge/parsers/html/handler_registry.rb b/lib/markbridge/parsers/html/handler_registry.rb index 42d2bac..0256906 100644 --- a/lib/markbridge/parsers/html/handler_registry.rb +++ b/lib/markbridge/parsers/html/handler_registry.rb @@ -67,6 +67,11 @@ def self.default registry.register(%w[ul ol], Handlers::ListHandler.new) registry.register("li", Handlers::ListItemHandler.new) + # Table handlers (thead/tbody/tfoot are transparent - unregistered tags pass through) + registry.register("table", Handlers::TableHandler.new) + registry.register("tr", Handlers::TableRowHandler.new) + registry.register(%w[td th], Handlers::TableCellHandler.new) + # Paragraph handler (transparent - doesn't create AST node) registry.register("p", Handlers::ParagraphHandler.new) diff --git a/lib/markbridge/parsers/html/handlers/table_cell_handler.rb b/lib/markbridge/parsers/html/handlers/table_cell_handler.rb new file mode 100644 index 0000000..7d5495c --- /dev/null +++ b/lib/markbridge/parsers/html/handlers/table_cell_handler.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Markbridge + module Parsers + module HTML + module Handlers + # Handler for table cell tags (, ) + class TableCellHandler < BaseHandler + def initialize + @element_class = AST::TableCell + end + + def process(element:, parent:) + ast_element = AST::TableCell.new(header: element.name.downcase == "th") + parent << ast_element + ast_element + end + + attr_reader :element_class + end + end + end + end +end diff --git a/lib/markbridge/parsers/html/handlers/table_handler.rb b/lib/markbridge/parsers/html/handlers/table_handler.rb new file mode 100644 index 0000000..d61122a --- /dev/null +++ b/lib/markbridge/parsers/html/handlers/table_handler.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Markbridge + module Parsers + module HTML + module Handlers + # Handler for table tags () + class TableHandler < BaseHandler + def initialize + @element_class = AST::Table + end + + def process(element:, parent:) + ast_element = AST::Table.new + parent << ast_element + ast_element + end + + attr_reader :element_class + end + end + end + end +end diff --git a/lib/markbridge/parsers/html/handlers/table_row_handler.rb b/lib/markbridge/parsers/html/handlers/table_row_handler.rb new file mode 100644 index 0000000..032c3bd --- /dev/null +++ b/lib/markbridge/parsers/html/handlers/table_row_handler.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Markbridge + module Parsers + module HTML + module Handlers + # Handler for table row tags () + class TableRowHandler < BaseHandler + def initialize + @element_class = AST::TableRow + end + + def process(element:, parent:) + ast_element = AST::TableRow.new + parent << ast_element + ast_element + end + + attr_reader :element_class + end + end + end + end +end diff --git a/lib/markbridge/parsers/media_wiki/parser.rb b/lib/markbridge/parsers/media_wiki/parser.rb index 7db9b4c..e423554 100644 --- a/lib/markbridge/parsers/media_wiki/parser.rb +++ b/lib/markbridge/parsers/media_wiki/parser.rb @@ -14,6 +14,7 @@ module MediaWiki # - Internal links ([[target]] / [[target|display]]) # - External links ([url text]) # - Preformatted text (lines starting with a space) + # - Tables ({| ... |}) # - HTML tags: , ,
, 
, , , , , , # # @example Basic usage @@ -67,6 +68,9 @@ def process_lines(lines) elsif horizontal_rule_line?(line) close_open_lists @document << AST::HorizontalRule.new + elsif table_start_line?(line) + close_open_lists + i = process_table(lines, i) elsif list_line?(line) process_list_item(line) elsif preformatted_line?(line) @@ -134,6 +138,83 @@ def blank_line?(line) line.strip.empty? end + # Check if a line starts a table ({|). + # + # @param line [String] + # @return [Boolean] + def table_start_line?(line) + line.match?(/\A\s*\{\|/) + end + + # Process a table block from {| to |}. + # Consumes lines until the closing |} is found. + # + # @param lines [Array] + # @param start_index [Integer] + # @return [Integer] the last index consumed + def process_table(lines, start_index) + table = AST::Table.new + current_row = nil + i = start_index + 1 # Skip the {| line + + while i < lines.length + stripped = lines[i].strip + + if stripped.start_with?("|}") + break + elsif stripped.start_with?("|-") + # Row separator - next cells will go in a new row + current_row = nil + elsif stripped.start_with?("!") + # Header cells + current_row = ensure_table_row(table, current_row) + parse_table_cells(stripped[1..], header: true, row: current_row) + elsif stripped.start_with?("|") + # Data cells + current_row = ensure_table_row(table, current_row) + parse_table_cells(stripped[1..], header: false, row: current_row) + end + + i += 1 + end + + @document << table + i + end + + # Ensure a row exists for the table, creating one if needed. + # + # @param table [AST::Table] + # @param current_row [AST::TableRow, nil] + # @return [AST::TableRow] + def ensure_table_row(table, current_row) + return current_row if current_row + + row = AST::TableRow.new + table << row + row + end + + # Parse cell content from a line and add cells to the row. + # Cells are separated by !! (headers) or || (data cells). + # + # @param content [String] the line content after the leading ! or | + # @param header [Boolean] whether these are header cells + # @param row [AST::TableRow] + def parse_table_cells(content, header:, row:) + separator = header ? "!!" : "||" + cells = content.split(separator) + + cells.each do |raw_cell| + # A single | in a cell separates attributes from content + cell_text = raw_cell.include?("|") ? raw_cell.split("|", 2).last : raw_cell + + cell = AST::TableCell.new(header:) + @inline_parser.parse(cell_text.strip, parent: cell) + row << cell + end + end + # Process a heading line and add it to the document. # # @param line [String] diff --git a/lib/markbridge/parsers/text_formatter.rb b/lib/markbridge/parsers/text_formatter.rb index c1687e7..07cb28f 100644 --- a/lib/markbridge/parsers/text_formatter.rb +++ b/lib/markbridge/parsers/text_formatter.rb @@ -18,6 +18,7 @@ require_relative "text_formatter/handlers/list_handler" require_relative "text_formatter/handlers/quote_handler" require_relative "text_formatter/handlers/url_handler" +require_relative "text_formatter/handlers/table_cell_handler" # Parser components require_relative "text_formatter/handler_registry" diff --git a/lib/markbridge/parsers/text_formatter/handler_registry.rb b/lib/markbridge/parsers/text_formatter/handler_registry.rb index 7db7da5..e514cee 100644 --- a/lib/markbridge/parsers/text_formatter/handler_registry.rb +++ b/lib/markbridge/parsers/text_formatter/handler_registry.rb @@ -122,6 +122,12 @@ def register_defaults # Paragraphs register("P", Handlers::SimpleHandler.new(AST::Paragraph)) + # Table elements + register("TABLE", Handlers::SimpleHandler.new(AST::Table)) + register("TR", Handlers::SimpleHandler.new(AST::TableRow)) + register("TD", Handlers::TableCellHandler.new) + register("TH", Handlers::TableCellHandler.new) + self end end diff --git a/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb b/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb new file mode 100644 index 0000000..2280327 --- /dev/null +++ b/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Markbridge + module Parsers + module TextFormatter + module Handlers + # Handler for table cell elements (TD, TH) + class TableCellHandler < BaseHandler + def initialize + @element_class = AST::TableCell + end + + def process(element:, parent:) + node = AST::TableCell.new(header: element.name.upcase == "TH") + parent << node + node + end + + def element_class + @element_class + end + end + end + end + end +end diff --git a/lib/markbridge/renderers/discourse.rb b/lib/markbridge/renderers/discourse.rb index 450cc30..510c376 100644 --- a/lib/markbridge/renderers/discourse.rb +++ b/lib/markbridge/renderers/discourse.rb @@ -30,6 +30,9 @@ require_relative "discourse/tags/strikethrough_tag" require_relative "discourse/tags/subscript_tag" require_relative "discourse/tags/superscript_tag" +require_relative "discourse/tags/table_tag" +require_relative "discourse/tags/table_row_tag" +require_relative "discourse/tags/table_cell_tag" require_relative "discourse/tags/underline_tag" require_relative "discourse/tags/url_tag" diff --git a/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb b/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb new file mode 100644 index 0000000..8467417 --- /dev/null +++ b/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Markbridge + module Renderers + module Discourse + module Tags + # Tag for rendering table cells (passthrough - renders children only) + # The TableTag handles cells directly; this is a safety net for standalone rendering. + class TableCellTag < Tag + def render(element, interface) + child_context = interface.with_parent(element) + interface.render_children(element, context: child_context) + end + end + end + end + end +end diff --git a/lib/markbridge/renderers/discourse/tags/table_row_tag.rb b/lib/markbridge/renderers/discourse/tags/table_row_tag.rb new file mode 100644 index 0000000..fd55a66 --- /dev/null +++ b/lib/markbridge/renderers/discourse/tags/table_row_tag.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Markbridge + module Renderers + module Discourse + module Tags + # Tag for rendering table rows (passthrough - renders children only) + # The TableTag handles rows directly; this is a safety net for standalone rendering. + class TableRowTag < Tag + def render(element, interface) + child_context = interface.with_parent(element) + interface.render_children(element, context: child_context) + end + end + end + end + end +end diff --git a/lib/markbridge/renderers/discourse/tags/table_tag.rb b/lib/markbridge/renderers/discourse/tags/table_tag.rb new file mode 100644 index 0000000..0265517 --- /dev/null +++ b/lib/markbridge/renderers/discourse/tags/table_tag.rb @@ -0,0 +1,124 @@ +# frozen_string_literal: true + +module Markbridge + module Renderers + module Discourse + module Tags + # Tag for rendering tables as Markdown pipe tables with HTML fallback + class TableTag < Tag + def render(element, interface) + child_context = interface.with_parent(element) + rows_data = extract_rows(element, interface, child_context) + + return "" if rows_data.empty? + + if markdown_compatible?(rows_data, interface) + render_markdown(rows_data) + else + render_html(rows_data) + end + end + + private + + # Extract rendered cell data from each row + # @return [Array] array of {cells: [{content:, header:}], ...} + def extract_rows(element, interface, child_context) + element.children.filter_map do |child| + next unless child.is_a?(AST::TableRow) + + cells = + child.children.filter_map do |cell| + next unless cell.is_a?(AST::TableCell) + + cell_context = child_context.with_parent(child) + content = interface.render_children(cell, context: cell_context).strip + { content:, header: cell.header? } + end + + { cells: } unless cells.empty? + end + end + + # Check if the table can be rendered as Markdown + def markdown_compatible?(rows_data, interface) + return false if rows_data.empty? + return false if interface.has_parent?(AST::Table) + + cell_count = rows_data.first[:cells].length + rows_data.all? do |row| + row[:cells].length == cell_count && + row[:cells].none? { |c| c[:content].include?("\n") } + end + end + + # Render as Markdown pipe table + def render_markdown(rows_data) + header_idx = rows_data.index { |r| r[:cells].all? { |c| c[:header] } } + header_row = header_idx ? rows_data[header_idx] : rows_data.first + data_rows = + ( + if header_idx + rows_data[0...header_idx] + rows_data[(header_idx + 1)..] + else + rows_data[1..] + end + ) + + col_count = header_row[:cells].length + lines = [] + lines << format_row(header_row[:cells]) + lines << "| #{(["---"] * col_count).join(" | ")} |" + data_rows.each { |row| lines << format_row(row[:cells]) } + + "\n\n#{lines.join("\n")}\n\n" + end + + # Format a single row as a Markdown pipe row + def format_row(cells) + # Pipe characters in cell content are already escaped by the markdown escaper + "| #{cells.map { |c| c[:content] }.join(" | ")} |" + end + + # Render as HTML table + def render_html(rows_data) + has_header = rows_data.any? { |r| r[:cells].any? { |c| c[:header] } } + lines = ["
"] + + if has_header + header_rows, body_rows = rows_data.partition { |r| r[:cells].all? { |c| c[:header] } } + + unless header_rows.empty? + lines << "" + header_rows.each { |row| lines << html_row(row, force_header: true) } + lines << "" + end + + unless body_rows.empty? + lines << "" + body_rows.each { |row| lines << html_row(row) } + lines << "" + end + else + rows_data.each { |row| lines << html_row(row) } + end + + lines << "
" + "\n\n#{lines.join("\n")}\n\n" + end + + # Render a single HTML table row + def html_row(row, force_header: false) + cells_html = + row[:cells].map do |cell| + tag = (cell[:header] || force_header) ? "th" : "td" + "<#{tag}>#{cell[:content]}" + end + + "#{cells_html.join}" + end + end + end + end + end +end diff --git a/playground/ast_presenter.rb b/playground/ast_presenter.rb index 06c69a7..10ab293 100644 --- a/playground/ast_presenter.rb +++ b/playground/ast_presenter.rb @@ -29,6 +29,9 @@ class ASTPresenter "Strikethrough" => "formatting", "Subscript" => "formatting", "Superscript" => "formatting", + "Table" => "block", + "TableCell" => "block", + "TableRow" => "block", "Text" => "text", "Underline" => "formatting", "Upload" => "media", @@ -61,6 +64,9 @@ class ASTPresenter "Strikethrough" => "strikethrough", "Subscript" => "subscript", "Superscript" => "superscript", + "Table" => "table", + "TableCell" => "squareAsterisk", + "TableRow" => "rows3", "Text" => "textCursor", "Underline" => "underline", "Upload" => "upload", diff --git a/spec/system/bbcode_to_markdown_spec.rb b/spec/system/bbcode_to_markdown_spec.rb index 1bf6f88..6b0e25a 100644 --- a/spec/system/bbcode_to_markdown_spec.rb +++ b/spec/system/bbcode_to_markdown_spec.rb @@ -508,4 +508,39 @@ expect(result).to eq(expected) end end + + describe "tables" do + it "renders a simple table with headers as Markdown" do + bbcode = "[table][tr][th]Name[/th][th]Age[/th][/tr][tr][td]Alice[/td][td]30[/td][/tr][/table]" + + result = Markbridge.bbcode_to_markdown(bbcode) + + expect(result).to eq("| Name | Age |\n| --- | --- |\n| Alice | 30 |") + end + + it "renders a table without headers using first row as header" do + bbcode = "[table][tr][td]A[/td][td]B[/td][/tr][tr][td]1[/td][td]2[/td][/tr][/table]" + + result = Markbridge.bbcode_to_markdown(bbcode) + + expect(result).to eq("| A | B |\n| --- | --- |\n| 1 | 2 |") + end + + it "renders formatted content inside table cells" do + bbcode = "[table][tr][th]Name[/th][/tr][tr][td][b]Alice[/b][/td][/tr][/table]" + + result = Markbridge.bbcode_to_markdown(bbcode) + + expect(result).to include("| **Alice** |") + end + + it "falls back to HTML for uneven rows" do + bbcode = "[table][tr][td]A[/td][td]B[/td][/tr][tr][td]1[/td][/tr][/table]" + + result = Markbridge.bbcode_to_markdown(bbcode) + + expect(result).to include("") + expect(result).to include("") + end + end end diff --git a/spec/system/html_to_markdown_spec.rb b/spec/system/html_to_markdown_spec.rb index 70c1c26..a80d99c 100644 --- a/spec/system/html_to_markdown_spec.rb +++ b/spec/system/html_to_markdown_spec.rb @@ -305,4 +305,31 @@ expect(result).to eq(expected) end end + + describe "tables" do + it "renders a simple HTML table as Markdown" do + html = "
A
NameAge
Alice30
" + + result = Markbridge.html_to_markdown(html) + + expect(result).to eq("| Name | Age |\n| --- | --- |\n| Alice | 30 |") + end + + it "handles thead and tbody" do + html = + "
AB
12
" + + result = Markbridge.html_to_markdown(html) + + expect(result).to eq("| A | B |\n| --- | --- |\n| 1 | 2 |") + end + + it "falls back to HTML for uneven rows" do + html = "
AB
1
" + + result = Markbridge.html_to_markdown(html) + + expect(result).to include("") + end + end end diff --git a/spec/system/mediawiki_to_markdown_spec.rb b/spec/system/mediawiki_to_markdown_spec.rb index bd471b4..89ba272 100644 --- a/spec/system/mediawiki_to_markdown_spec.rb +++ b/spec/system/mediawiki_to_markdown_spec.rb @@ -194,4 +194,51 @@ expect(result).to eq("Just plain text") end end + + describe "tables" do + it "renders a simple table as Markdown" do + wiki = <<~WIKI.chomp + {| + ! Name !! Age + |- + | Alice || 30 + |} + WIKI + + result = Markbridge.mediawiki_to_markdown(wiki) + + expect(result).to eq("| Name | Age |\n| --- | --- |\n| Alice | 30 |") + end + + it "handles header and data rows" do + wiki = <<~WIKI.chomp + {| + |- + ! A + ! B + |- + | 1 + | 2 + |} + WIKI + + result = Markbridge.mediawiki_to_markdown(wiki) + + expect(result).to eq("| A | B |\n| --- | --- |\n| 1 | 2 |") + end + + it "handles inline formatting in cells" do + wiki = <<~WIKI.chomp + {| + ! Name + |- + | '''Alice''' + |} + WIKI + + result = Markbridge.mediawiki_to_markdown(wiki) + + expect(result).to include("| **Alice** |") + end + end end diff --git a/spec/system/text_formatter_xml_to_markdown_spec.rb b/spec/system/text_formatter_xml_to_markdown_spec.rb index aa74131..c446bc8 100644 --- a/spec/system/text_formatter_xml_to_markdown_spec.rb +++ b/spec/system/text_formatter_xml_to_markdown_spec.rb @@ -123,4 +123,23 @@ expect(result).to eq('big text') end end + + describe "tables" do + it "renders a table as Markdown" do + xml = + "
NameAge
Alice30
" + + result = Markbridge.text_formatter_xml_to_markdown(xml) + + expect(result).to eq("| Name | Age |\n| --- | --- |\n| Alice | 30 |") + end + + it "falls back to HTML for uneven rows" do + xml = "
AB
1
" + + result = Markbridge.text_formatter_xml_to_markdown(xml) + + expect(result).to include("") + end + end end diff --git a/spec/unit/markbridge/ast/table_spec.rb b/spec/unit/markbridge/ast/table_spec.rb new file mode 100644 index 0000000..4d885bb --- /dev/null +++ b/spec/unit/markbridge/ast/table_spec.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +RSpec.describe Markbridge::AST::Table do + it "is an Element" do + expect(described_class.new).to be_a(Markbridge::AST::Element) + end + + it "can have TableRow children" do + table = described_class.new + row = Markbridge::AST::TableRow.new + table << row + + expect(table.children).to eq([row]) + end + + it "ignores whitespace-only Text children" do + table = described_class.new + table << Markbridge::AST::Text.new(" \n ") + + expect(table.children).to be_empty + end + + it "preserves non-whitespace Text children" do + table = described_class.new + text = Markbridge::AST::Text.new("content") + table << text + + expect(table.children).to eq([text]) + end +end + +RSpec.describe Markbridge::AST::TableRow do + it "is an Element" do + expect(described_class.new).to be_a(Markbridge::AST::Element) + end + + it "can have TableCell children" do + row = described_class.new + cell = Markbridge::AST::TableCell.new + row << cell + + expect(row.children).to eq([cell]) + end + + it "ignores whitespace-only Text children" do + row = described_class.new + row << Markbridge::AST::Text.new(" \n ") + + expect(row.children).to be_empty + end +end + +RSpec.describe Markbridge::AST::TableCell do + it "is an Element" do + expect(described_class.new).to be_a(Markbridge::AST::Element) + end + + describe "#header?" do + it "returns false by default" do + expect(described_class.new.header?).to be false + end + + it "returns true when created as header" do + expect(described_class.new(header: true).header?).to be true + end + + it "returns false when created as non-header" do + expect(described_class.new(header: false).header?).to be false + end + end + + it "can have Text children" do + cell = described_class.new + text = Markbridge::AST::Text.new("content") + cell << text + + expect(cell.children).to eq([text]) + end +end diff --git a/spec/unit/markbridge/parsers/bbcode/handlers/table_cell_handler_spec.rb b/spec/unit/markbridge/parsers/bbcode/handlers/table_cell_handler_spec.rb new file mode 100644 index 0000000..fd6b4bc --- /dev/null +++ b/spec/unit/markbridge/parsers/bbcode/handlers/table_cell_handler_spec.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +RSpec.describe Markbridge::Parsers::BBCode::Handlers::TableCellHandler do + let(:handler) { described_class.new } + let(:document) { Markbridge::AST::Document.new } + let(:context) { Markbridge::Parsers::BBCode::ParserState.new(document) } + let(:registry) do + reg = Markbridge::Parsers::BBCode::HandlerRegistry.new + reconciler = Markbridge::Parsers::BBCode::ClosingStrategies::TagReconciler.new(registry: reg) + closing_strategy = Markbridge::Parsers::BBCode::ClosingStrategies::Reordering.new(reconciler) + reg.instance_variable_set(:@closing_strategy, closing_strategy) + reg.register(%w[td th], handler) + reg + end + + describe "#on_open" do + before do + table = Markbridge::AST::Table.new + context.push(table) + row = Markbridge::AST::TableRow.new + context.push(row) + end + + it "creates a non-header cell for td tag" do + token = + Markbridge::Parsers::BBCode::TagStartToken.new(tag: "td", attrs: {}, pos: 0, source: "[td]") + + handler.on_open(token:, context:, registry:) + + expect(context.current).to be_a(Markbridge::AST::TableCell) + expect(context.current.header?).to be false + end + + it "creates a header cell for th tag" do + token = + Markbridge::Parsers::BBCode::TagStartToken.new(tag: "th", attrs: {}, pos: 0, source: "[th]") + + handler.on_open(token:, context:, registry:) + + expect(context.current).to be_a(Markbridge::AST::TableCell) + expect(context.current.header?).to be true + end + + it "auto-closes previous TableCell" do + token1 = + Markbridge::Parsers::BBCode::TagStartToken.new(tag: "td", attrs: {}, pos: 0, source: "[td]") + token2 = + Markbridge::Parsers::BBCode::TagStartToken.new( + tag: "td", + attrs: { + }, + pos: 10, + source: "[td]", + ) + + handler.on_open(token: token1, context:, registry:) + first_cell = context.current + + handler.on_open(token: token2, context:, registry:) + + expect(context.current).to be_a(Markbridge::AST::TableCell) + expect(context.current).not_to eq(first_cell) + end + end + + describe "#element_class" do + it "returns AST::TableCell" do + expect(handler.element_class).to eq(Markbridge::AST::TableCell) + end + end +end diff --git a/spec/unit/markbridge/parsers/bbcode/handlers/table_handler_spec.rb b/spec/unit/markbridge/parsers/bbcode/handlers/table_handler_spec.rb new file mode 100644 index 0000000..fb14888 --- /dev/null +++ b/spec/unit/markbridge/parsers/bbcode/handlers/table_handler_spec.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +RSpec.describe Markbridge::Parsers::BBCode::Handlers::TableHandler do + let(:handler) { described_class.new } + let(:document) { Markbridge::AST::Document.new } + let(:context) { Markbridge::Parsers::BBCode::ParserState.new(document) } + let(:registry) do + reg = Markbridge::Parsers::BBCode::HandlerRegistry.new + reconciler = Markbridge::Parsers::BBCode::ClosingStrategies::TagReconciler.new(registry: reg) + closing_strategy = Markbridge::Parsers::BBCode::ClosingStrategies::Reordering.new(reconciler) + reg.instance_variable_set(:@closing_strategy, closing_strategy) + reg.register("table", handler) + reg + end + + describe "#on_open" do + it "pushes a Table onto the context" do + token = + Markbridge::Parsers::BBCode::TagStartToken.new( + tag: "table", + attrs: { + }, + pos: 0, + source: "[table]", + ) + + handler.on_open(token:, context:, registry:) + + expect(context.current).to be_a(Markbridge::AST::Table) + end + end + + describe "#on_close" do + it "pops the table from context" do + table = Markbridge::AST::Table.new + context.push(table) + + close_token = + Markbridge::Parsers::BBCode::TagEndToken.new(tag: "table", pos: 10, source: "[/table]") + + handler.on_close(token: close_token, context:, registry:) + + expect(context.current).to eq(document) + end + + it "auto-closes open TableRow before closing table" do + table = Markbridge::AST::Table.new + context.push(table) + row = Markbridge::AST::TableRow.new + context.push(row) + + close_token = + Markbridge::Parsers::BBCode::TagEndToken.new(tag: "table", pos: 10, source: "[/table]") + + handler.on_close(token: close_token, context:, registry:) + + expect(context.current).to eq(document) + end + + it "auto-closes open TableCell and TableRow before closing table" do + table = Markbridge::AST::Table.new + context.push(table) + row = Markbridge::AST::TableRow.new + context.push(row) + cell = Markbridge::AST::TableCell.new + context.push(cell) + + close_token = + Markbridge::Parsers::BBCode::TagEndToken.new(tag: "table", pos: 10, source: "[/table]") + + handler.on_close(token: close_token, context:, registry:) + + expect(context.current).to eq(document) + end + end + + describe "#element_class" do + it "returns AST::Table" do + expect(handler.element_class).to eq(Markbridge::AST::Table) + end + end +end diff --git a/spec/unit/markbridge/parsers/bbcode/handlers/table_row_handler_spec.rb b/spec/unit/markbridge/parsers/bbcode/handlers/table_row_handler_spec.rb new file mode 100644 index 0000000..470ae6d --- /dev/null +++ b/spec/unit/markbridge/parsers/bbcode/handlers/table_row_handler_spec.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +RSpec.describe Markbridge::Parsers::BBCode::Handlers::TableRowHandler do + let(:handler) { described_class.new } + let(:document) { Markbridge::AST::Document.new } + let(:context) { Markbridge::Parsers::BBCode::ParserState.new(document) } + let(:registry) do + reg = Markbridge::Parsers::BBCode::HandlerRegistry.new + reconciler = Markbridge::Parsers::BBCode::ClosingStrategies::TagReconciler.new(registry: reg) + closing_strategy = Markbridge::Parsers::BBCode::ClosingStrategies::Reordering.new(reconciler) + reg.instance_variable_set(:@closing_strategy, closing_strategy) + reg.register("tr", handler) + reg + end + let(:open_token) do + Markbridge::Parsers::BBCode::TagStartToken.new(tag: "tr", attrs: {}, pos: 0, source: "[tr]") + end + + describe "#on_open" do + it "pushes a TableRow onto the context" do + table = Markbridge::AST::Table.new + context.push(table) + + handler.on_open(token: open_token, context:, registry:) + + expect(context.current).to be_a(Markbridge::AST::TableRow) + end + + it "auto-closes previous TableRow" do + table = Markbridge::AST::Table.new + context.push(table) + old_row = Markbridge::AST::TableRow.new + context.push(old_row) + + handler.on_open(token: open_token, context:, registry:) + + expect(context.current).to be_a(Markbridge::AST::TableRow) + expect(context.current).not_to eq(old_row) + end + + it "auto-closes open TableCell before closing previous row" do + table = Markbridge::AST::Table.new + context.push(table) + old_row = Markbridge::AST::TableRow.new + context.push(old_row) + cell = Markbridge::AST::TableCell.new + context.push(cell) + + handler.on_open(token: open_token, context:, registry:) + + expect(context.current).to be_a(Markbridge::AST::TableRow) + expect(context.current).not_to eq(old_row) + end + end + + describe "#on_close" do + it "auto-closes open TableCell before closing row" do + table = Markbridge::AST::Table.new + context.push(table) + row = Markbridge::AST::TableRow.new + context.push(row) + cell = Markbridge::AST::TableCell.new + context.push(cell) + + close_token = + Markbridge::Parsers::BBCode::TagEndToken.new(tag: "tr", pos: 10, source: "[/tr]") + + handler.on_close(token: close_token, context:, registry:) + + expect(context.current).to eq(table) + end + end + + describe "#element_class" do + it "returns AST::TableRow" do + expect(handler.element_class).to eq(Markbridge::AST::TableRow) + end + end +end diff --git a/spec/unit/markbridge/parsers/html/handlers/table_cell_handler_spec.rb b/spec/unit/markbridge/parsers/html/handlers/table_cell_handler_spec.rb new file mode 100644 index 0000000..a47665f --- /dev/null +++ b/spec/unit/markbridge/parsers/html/handlers/table_cell_handler_spec.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +RSpec.describe Markbridge::Parsers::HTML::Handlers::TableCellHandler do + let(:parent) { Markbridge::AST::TableRow.new } + let(:handler) { described_class.new } + + describe "#process" do + it "creates a non-header cell for ").root + + result = handler.process(element:, parent:) + + expect(parent.children[0]).to be_a(Markbridge::AST::TableCell) + expect(parent.children[0].header?).to be false + expect(result).to eq(parent.children[0]) + end + + it "creates a header cell for TH" do + element = Nokogiri.XML("").root + + result = handler.process(element:, parent:) + + expect(parent.children[0].header?).to be true + expect(result).to eq(parent.children[0]) + end + end + + describe "#element_class" do + it "returns AST::TableCell" do + expect(handler.element_class).to eq(Markbridge::AST::TableCell) + end + end +end diff --git a/spec/unit/markbridge/renderers/discourse/tags/table_tag_spec.rb b/spec/unit/markbridge/renderers/discourse/tags/table_tag_spec.rb new file mode 100644 index 0000000..e86723b --- /dev/null +++ b/spec/unit/markbridge/renderers/discourse/tags/table_tag_spec.rb @@ -0,0 +1,177 @@ +# frozen_string_literal: true + +RSpec.describe Markbridge::Renderers::Discourse::Tags::TableTag do + let(:tag) { described_class.new } + let(:renderer) { Markbridge::Renderers::Discourse::Renderer.new } + let(:context) { Markbridge::Renderers::Discourse::RenderContext.new } + let(:interface) { Markbridge::Renderers::Discourse::RenderingInterface.new(renderer, context) } + + def build_table(rows) + table = Markbridge::AST::Table.new + rows.each do |row_data| + row = Markbridge::AST::TableRow.new + row_data.each do |cell_data| + header = cell_data.is_a?(Hash) ? cell_data[:header] : false + text = cell_data.is_a?(Hash) ? cell_data[:text] : cell_data + cell = Markbridge::AST::TableCell.new(header:) + cell << Markbridge::AST::Text.new(text) + row << cell + end + table << row + end + table + end + + describe "Markdown rendering" do + it "renders a simple table with headers" do + table = build_table([[{ text: "A", header: true }, { text: "B", header: true }], %w[1 2]]) + + result = tag.render(table, interface) + + expect(result).to eq("\n\n| A | B |\n| --- | --- |\n| 1 | 2 |\n\n") + end + + it "treats first row as header when no explicit headers" do + table = build_table([%w[A B], %w[1 2]]) + + result = tag.render(table, interface) + + expect(result).to eq("\n\n| A | B |\n| --- | --- |\n| 1 | 2 |\n\n") + end + + it "renders multiple data rows" do + table = + build_table( + [ + [{ text: "Name", header: true }, { text: "Age", header: true }], + %w[Alice 30], + %w[Bob 25], + ], + ) + + result = tag.render(table, interface) + + expect(result).to include("| Name | Age |") + expect(result).to include("| --- | --- |") + expect(result).to include("| Alice | 30 |") + expect(result).to include("| Bob | 25 |") + end + + it "handles pipe characters in cell content (escaped by markdown escaper)" do + table = build_table([[{ text: "A", header: true }, { text: "B", header: true }], %w[x|y z]]) + + result = tag.render(table, interface) + + # The markdown escaper converts | to \| in text content + expect(result).to include('x\|y') + expect(result).to include("| z |") + end + + it "handles empty cells" do + table = + build_table([[{ text: "A", header: true }, { text: "B", header: true }], ["", "data"]]) + + result = tag.render(table, interface) + + expect(result).to include("| | data |") + end + + it "renders formatted content in cells" do + table = Markbridge::AST::Table.new + header_row = Markbridge::AST::TableRow.new + h1 = Markbridge::AST::TableCell.new(header: true) + h1 << Markbridge::AST::Text.new("Name") + header_row << h1 + table << header_row + + data_row = Markbridge::AST::TableRow.new + d1 = Markbridge::AST::TableCell.new + bold = Markbridge::AST::Bold.new + bold << Markbridge::AST::Text.new("Alice") + d1 << bold + data_row << d1 + table << data_row + + result = tag.render(table, interface) + + expect(result).to include("| **Alice** |") + end + end + + describe "HTML fallback" do + it "falls back to HTML when rows have different cell counts" do + table = build_table([[{ text: "A", header: true }, { text: "B", header: true }], ["1"]]) + + result = tag.render(table, interface) + + expect(result).to include("
" do + node = instance_double(Nokogiri::XML::Element, name: "td") + + result = handler.process(element: node, parent:) + + expect(parent.children[0]).to be_a(Markbridge::AST::TableCell) + expect(parent.children[0].header?).to be false + expect(result).to eq(parent.children[0]) + end + + it "creates a header cell for " do + node = instance_double(Nokogiri::XML::Element, name: "th") + + result = handler.process(element: node, parent:) + + expect(parent.children[0].header?).to be true + expect(result).to eq(parent.children[0]) + end + + it "handles case-insensitive tag names" do + node = instance_double(Nokogiri::XML::Element, name: "TH") + + handler.process(element: node, parent:) + + expect(parent.children[0].header?).to be true + end + end + + describe "#element_class" do + it "returns AST::TableCell" do + expect(handler.element_class).to eq(Markbridge::AST::TableCell) + end + end +end diff --git a/spec/unit/markbridge/parsers/html/handlers/table_handler_spec.rb b/spec/unit/markbridge/parsers/html/handlers/table_handler_spec.rb new file mode 100644 index 0000000..47ebfbc --- /dev/null +++ b/spec/unit/markbridge/parsers/html/handlers/table_handler_spec.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +RSpec.describe Markbridge::Parsers::HTML::Handlers::TableHandler do + let(:parent) { Markbridge::AST::Document.new } + let(:handler) { described_class.new } + + describe "#process" do + it "creates a Table element" do + node = instance_double(Nokogiri::XML::Element, name: "table") + + result = handler.process(element: node, parent:) + + expect(parent.children.size).to eq(1) + expect(parent.children[0]).to be_a(Markbridge::AST::Table) + expect(result).to eq(parent.children[0]) + end + end + + describe "#element_class" do + it "returns AST::Table" do + expect(handler.element_class).to eq(Markbridge::AST::Table) + end + end +end diff --git a/spec/unit/markbridge/parsers/html/handlers/table_row_handler_spec.rb b/spec/unit/markbridge/parsers/html/handlers/table_row_handler_spec.rb new file mode 100644 index 0000000..4e9cb13 --- /dev/null +++ b/spec/unit/markbridge/parsers/html/handlers/table_row_handler_spec.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +RSpec.describe Markbridge::Parsers::HTML::Handlers::TableRowHandler do + let(:parent) { Markbridge::AST::Table.new } + let(:handler) { described_class.new } + + describe "#process" do + it "creates a TableRow element" do + node = instance_double(Nokogiri::XML::Element, name: "tr") + + result = handler.process(element: node, parent:) + + expect(parent.children.size).to eq(1) + expect(parent.children[0]).to be_a(Markbridge::AST::TableRow) + expect(result).to eq(parent.children[0]) + end + end + + describe "#element_class" do + it "returns AST::TableRow" do + expect(handler.element_class).to eq(Markbridge::AST::TableRow) + end + end +end diff --git a/spec/unit/markbridge/parsers/text_formatter/handlers/table_cell_handler_spec.rb b/spec/unit/markbridge/parsers/text_formatter/handlers/table_cell_handler_spec.rb new file mode 100644 index 0000000..2a475eb --- /dev/null +++ b/spec/unit/markbridge/parsers/text_formatter/handlers/table_cell_handler_spec.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require "nokogiri" + +RSpec.describe Markbridge::Parsers::TextFormatter::Handlers::TableCellHandler do + let(:parent) { Markbridge::AST::TableRow.new } + let(:handler) { described_class.new } + + describe "#process" do + it "creates a non-header cell for TD" do + element = Nokogiri.XML("dataheader
") + expect(result).to include("") + expect(result).to include("") + expect(result).to include("
A1
") + end + + it "falls back to HTML when cell content has newlines" do + table = Markbridge::AST::Table.new + row = Markbridge::AST::TableRow.new + cell = Markbridge::AST::TableCell.new + cell << Markbridge::AST::Text.new("line1\nline2") + row << cell + table << row + + result = tag.render(table, interface) + + expect(result).to include("") + expect(result).to include("") + end + + it "falls back to HTML for nested tables" do + outer_table = Markbridge::AST::Table.new + parent_context = Markbridge::Renderers::Discourse::RenderContext.new([outer_table]) + nested_interface = + Markbridge::Renderers::Discourse::RenderingInterface.new(renderer, parent_context) + + table = build_table([%w[A B]]) + + result = tag.render(table, nested_interface) + + expect(result).to include("
line1\nline2
") + end + + it "uses thead/tbody when header rows exist" do + table = build_table([[{ text: "H1", header: true }, { text: "H2", header: true }], %w[a b]]) + + # Force HTML fallback by making rows uneven + extra_row = Markbridge::AST::TableRow.new + cell = Markbridge::AST::TableCell.new + cell << Markbridge::AST::Text.new("only one") + extra_row << cell + table << extra_row + + result = tag.render(table, interface) + + expect(result).to include("") + expect(result).to include("") + expect(result).to include("") + expect(result).to include("") + end + end + + describe "edge cases" do + it "returns empty string for table with no rows" do + table = Markbridge::AST::Table.new + + result = tag.render(table, interface) + + expect(result).to eq("") + end + + it "handles single-cell table" do + table = build_table([["only"]]) + + result = tag.render(table, interface) + + expect(result).to include("| only |") + expect(result).to include("| --- |") + end + end +end