added support for raw html and link support for line/column numbers

jackdewinter · Sep 5, 2020 · f518085 · f518085
1 parent c6cc992
commit f518085
Show file tree

Hide file tree

Showing 23 changed files with 518 additions and 323 deletions.
diff --git a/publish/coverage.json b/publish/coverage.json
@@ -1 +1 @@
-{"projectName": "pymarkdown", "reportSource": "pytest", "branchLevel": {"totalMeasured": 1652, "totalCovered": 1648}, "lineLevel": {"totalMeasured": 5535, "totalCovered": 5531}}
+{"projectName": "pymarkdown", "reportSource": "pytest", "branchLevel": {"totalMeasured": 1658, "totalCovered": 1654}, "lineLevel": {"totalMeasured": 5556, "totalCovered": 5552}}
diff --git a/publish/test-results.json b/publish/test-results.json
diff --git a/pymarkdown/html_helper.py b/pymarkdown/html_helper.py
@@ -543,7 +543,7 @@ def __process_raw_special(
         return valid_raw_html, parse_index
 
     @staticmethod
-    def parse_raw_html(only_between_angles, remaining_line):
+    def parse_raw_html(only_between_angles, remaining_line, line_number, column_number):
         """
         Given an open HTML tag character (<), try the various possibilities for
         types of tag, and determine if any of them parse validly.
@@ -588,7 +588,10 @@ def parse_raw_html(only_between_angles, remaining_line):
             valid_raw_html = HtmlHelper.__parse_raw_declaration(only_between_angles)
 
         if valid_raw_html:
-            return RawHtmlMarkdownToken(valid_raw_html), remaining_line_parse_index
+            return (
+                RawHtmlMarkdownToken(valid_raw_html, line_number, column_number),
+                remaining_line_parse_index,
+            )
         return None, -1
 
     @staticmethod

diff --git a/pymarkdown/inline_helper.py b/pymarkdown/inline_helper.py
@@ -13,6 +13,7 @@
     EmailAutolinkMarkdownToken,
     HardBreakMarkdownToken,
     InlineCodeSpanMarkdownToken,
+    MarkdownToken,
     UriAutolinkMarkdownToken,
 )
 from pymarkdown.parser_helper import ParserHelper
@@ -779,9 +780,18 @@ def handle_angle_brackets(inline_request):
             if not new_token:
                 new_token = InlineHelper.__parse_valid_email_autolink(between_brackets)
             if not new_token:
-                LOGGER.debug(">>between_brackets>>%s", str(between_brackets))
+
+                new_column_number = inline_request.column_number
+                LOGGER.debug(">>between_brackets>>%s<<", str(between_brackets))
+                LOGGER.debug(">>new_column_number>>%s", str(new_column_number))
+                new_column_number += len(inline_request.remaining_line)
+                LOGGER.debug(">>new_column_number>>%s", str(new_column_number))
+
                 new_token, after_index = HtmlHelper.parse_raw_html(
-                    between_brackets, remaining_line
+                    between_brackets,
+                    remaining_line,
+                    inline_request.line_number,
+                    new_column_number,
                 )
                 LOGGER.debug(">>new_token>>%s", str(new_token))
                 if after_index != -1:
@@ -795,8 +805,29 @@ def handle_angle_brackets(inline_request):
         else:
             inline_response.new_string = InlineHelper.angle_bracket_start
             inline_response.new_index = inline_request.next_index + 1
+
         inline_response.delta_line_number = 0
-        inline_response.delta_column_number = (
-            inline_response.new_index - inline_request.next_index
-        )
+        if (
+            new_token
+            and new_token.token_name == MarkdownToken.token_inline_raw_html
+            and "\n" in new_token.raw_tag
+        ):
+            split_raw_tag = new_token.raw_tag.split("\n")
+            LOGGER.debug(
+                ">>split_raw_tag>>%s<<", ParserHelper.make_value_visible(split_raw_tag)
+            )
+            inline_response.delta_line_number += len(split_raw_tag) - 1
+            length_of_last_elements = len(split_raw_tag[-1])
+            LOGGER.debug(
+                ">>xx>>%s<<", ParserHelper.make_value_visible(length_of_last_elements)
+            )
+            inline_response.delta_column_number = -(length_of_last_elements + 2)
+            LOGGER.debug(
+                ">>delta_column_number>>%s<<",
+                ParserHelper.make_value_visible(inline_response.delta_column_number),
+            )
+        else:
+            inline_response.delta_column_number = (
+                inline_response.new_index - inline_request.next_index
+            )
         return inline_response
diff --git a/pymarkdown/inline_processor.py b/pymarkdown/inline_processor.py
@@ -581,6 +581,7 @@ def __process_inline_text_block(
             whitespace_to_add = None
 
             was_new_line = False
+            was_column_number_reset = False
 
             LOGGER.debug(
                 "__process_inline_text_block>>%s>>%s", str(start_index), str(next_index)
@@ -603,6 +604,10 @@ def __process_inline_text_block(
             )
             if source_text[next_index] in InlineProcessor.__inline_character_handlers:
                 LOGGER.debug("handler(before)>>%s<<", source_text[next_index])
+                LOGGER.debug(
+                    "current_string_unresolved>>%s<<", str(current_string_unresolved)
+                )
+                LOGGER.debug("remaining_line>>%s<<", str(remaining_line))
                 LOGGER.debug("column_number>>%s<<", str(column_number))
                 proc_fn = InlineProcessor.__inline_character_handlers[
                     source_text[next_index]
@@ -614,7 +619,11 @@ def __process_inline_text_block(
                 )
 
                 line_number += inline_response.delta_line_number
-                column_number += inline_response.delta_column_number
+                if inline_response.delta_column_number < 0:
+                    column_number = -(inline_response.delta_column_number)
+                    was_column_number_reset = True
+                else:
+                    column_number += inline_response.delta_column_number
                 LOGGER.debug(
                     "handler(after)>>%s,%s<<", str(line_number), str(column_number)
                 )
@@ -766,7 +775,7 @@ def __process_inline_text_block(
                     LOGGER.debug("fold_space(after)>>%s<<", str(fold_space))
                     column_number += len(fold_space[0])
 
-            else:
+            elif not was_column_number_reset:
                 column_number += len(remaining_line)
             LOGGER.debug(
                 "l/c(after)>>%s,%s<<",

diff --git a/pymarkdown/link_helper.py b/pymarkdown/link_helper.py
@@ -973,6 +973,8 @@ def __create_link_token(
         )
         LOGGER.debug(">>text_from_blocks_raw>>%s>>", text_from_blocks_raw)
 
+        line_number = inline_blocks[ind].line_number
+        column_number = inline_blocks[ind].column_number
         if start_text == LinkHelper.__link_start_sequence:
             inline_blocks[ind] = LinkStartMarkdownToken(
                 inline_link,
@@ -987,6 +989,8 @@ def __create_link_token(
                 before_link_whitespace,
                 before_title_whitespace,
                 after_title_whitespace,
+                line_number,
+                column_number,
             )
             token_to_append = EndMarkdownToken(
                 MarkdownToken.token_inline_link, "", "", None, False

diff --git a/pymarkdown/markdown_token.py b/pymarkdown/markdown_token.py
@@ -1141,6 +1141,8 @@ def __init__(
         before_link_whitespace,
         before_title_whitespace,
         after_title_whitespace,
+        line_number,
+        column_number,
     ):
         self.link_uri = link_uri
         self.link_title = link_title
@@ -1181,6 +1183,8 @@ def __init__(
             + before_title_whitespace
             + ":"
             + after_title_whitespace,
+            line_number=line_number,
+            column_number=column_number,
         )
 
     # pylint: enable=too-many-arguments
@@ -1282,13 +1286,15 @@ class RawHtmlMarkdownToken(MarkdownToken):
     Class to provide for an encapsulation of the inline raw html element.
     """
 
-    def __init__(self, raw_tag):
+    def __init__(self, raw_tag, line_number, column_number):
         self.raw_tag = raw_tag
         MarkdownToken.__init__(
             self,
             MarkdownToken.token_inline_raw_html,
             MarkdownTokenClass.INLINE_BLOCK,
             raw_tag,
+            line_number=line_number,
+            column_number=column_number,
         )
 
 

diff --git a/readme.md b/readme.md
@@ -21,6 +21,11 @@
 - why does hard break not have \n?
 - HTML and Fenced Blocks and better handling of capturing newlines to avoid counting token height
 - verifying vs validating?
+- 634a in bq and in list
+- raw html, 500
+- links, 518b
+  - links with LF in link_label
+  - each inline token surrounded by text
 
 ## Bugs - Character Entities
 
@@ -35,8 +40,6 @@
 
 - backslashes - 600
 - atx heading text, 183
-- raw html, 500
-- links, 518b
 - images, with newline in title?
 - lrds spanning lines
 - code span

diff --git a/test/test_markdown_backslash_escapes.py b/test/test_markdown_backslash_escapes.py
@@ -334,7 +334,7 @@ def test_backslash_escapes_318():
     source_markdown = """[foo](/bar\\* "ti\\*tle")"""
     expected_tokens = [
         "[para(1,1):]",
-        '[link:inline:/bar*:ti*tle:/bar\\*:ti\\*tle::foo:False:":: :]',
+        '[link(1,1):inline:/bar*:ti*tle:/bar\\*:ti\\*tle::foo:False:":: :]',
         "[text(1,2):foo:]",
         "[end-link:::False]",
         "[end-para:::True]",
@@ -366,7 +366,7 @@ def test_backslash_escapes_319():
 """
     expected_tokens = [
         "[para(1,1):]",
-        "[link:shortcut:/bar*:ti*tle::::foo:::::]",
+        "[link(1,1):shortcut:/bar*:ti*tle::::foo:::::]",
         "[text(1,2):foo:]",
         "[end-link:::False]",
         "[end-para:::True]",

diff --git a/test/test_markdown_code_spans.py b/test/test_markdown_code_spans.py
@@ -505,7 +505,7 @@ def test_code_spans_354():
     source_markdown = """<a href="`">`"""
     expected_tokens = [
         "[para(1,1):]",
-        '[raw-html:a href="`"]',
+        '[raw-html(1,1):a href="`"]',
         "[text(1,13):`:]",
         "[end-para:::True]",
     ]

diff --git a/test/test_markdown_emphasis_rule_10.py b/test/test_markdown_emphasis_rule_10.py
@@ -27,7 +27,7 @@ def test_emphasis_431():
         "[para(1,1):]",
         "[emphasis(1,1):2:*]",
         "[text(1,3):foo :]",
-        "[link:inline:/url:::::bar:False::::]",
+        "[link(1,7):inline:/url:::::bar:False::::]",
         "[text(1,8):bar:]",
         "[end-link:::False]",
         "[end-emphasis(1,18)::2:*:False]",
@@ -389,7 +389,7 @@ def test_emphasis_442():
         "[para(1,1):]",
         "[emphasis(1,1):2:*]",
         "[text(1,3):foo :]",
-        "[link:inline:/url:::::*bar*:False::::]",
+        "[link(1,7):inline:/url:::::*bar*:False::::]",
         "[emphasis(1,8):1:*]",
         "[text(1,9):bar:]",
         "[end-emphasis(1,12)::1:*:False]",

diff --git a/test/test_markdown_emphasis_rule_14_to_17.py b/test/test_markdown_emphasis_rule_14_to_17.py
@@ -220,7 +220,7 @@ def test_emphasis_482():
     expected_tokens = [
         "[para(1,1):]",
         "[text(1,1):*:]",
-        "[link:inline:/url:::::bar*:False::::]",
+        "[link(1,2):inline:/url:::::bar*:False::::]",
         "[text(1,3):bar:]",
         "[text(1,6):*:]",
         "[end-link:::False]",
@@ -252,7 +252,7 @@ def test_emphasis_483():
         "[para(1,1):]",
         "[text(1,1):_:]",
         "[text(1,2):foo :]",
-        "[link:inline:/url:::::bar_:False::::]",
+        "[link(1,6):inline:/url:::::bar_:False::::]",
         "[text(1,7):bar:]",
         "[text(1,10):_:]",
         "[end-link:::False]",
@@ -283,7 +283,7 @@ def test_emphasis_484():
     expected_tokens = [
         "[para(1,1):]",
         "[text(1,1):*:]",
-        '[raw-html:img src="foo" title="*"/]',
+        '[raw-html(1,2):img src="foo" title="*"/]',
         "[end-para:::True]",
     ]
     expected_gfm = """<p>*<img src="foo" title="*"/></p>"""
@@ -311,7 +311,7 @@ def test_emphasis_485():
     expected_tokens = [
         "[para(1,1):]",
         "[text(1,1):**:]",
-        '[raw-html:a href="**"]',
+        '[raw-html(1,3):a href="**"]',
         "[end-para:::True]",
     ]
     expected_gfm = """<p>**<a href="**"></p>"""
@@ -339,7 +339,7 @@ def test_emphasis_486():
     expected_tokens = [
         "[para(1,1):]",
         "[text(1,1):__:]",
-        '[raw-html:a href="__"]',
+        '[raw-html(1,3):a href="__"]',
         "[end-para:::True]",
     ]
     expected_gfm = """<p>__<a href="__"></p>"""

diff --git a/test/test_markdown_emphasis_rule_9.py b/test/test_markdown_emphasis_rule_9.py
@@ -27,7 +27,7 @@ def test_emphasis_413():
         "[para(1,1):]",
         "[emphasis(1,1):1:*]",
         "[text(1,2):foo :]",
-        "[link:inline:/url:::::bar:False::::]",
+        "[link(1,6):inline:/url:::::bar:False::::]",
         "[text(1,7):bar:]",
         "[end-link:::False]",
         "[end-emphasis(1,17)::1:*:False]",
@@ -521,7 +521,7 @@ def test_emphasis_428():
         "[para(1,1):]",
         "[emphasis(1,1):1:*]",
         "[text(1,2):foo :]",
-        "[link:inline:/url:::::*bar*:False::::]",
+        "[link(1,6):inline:/url:::::*bar*:False::::]",
         "[emphasis(1,7):1:*]",
         "[text(1,8):bar:]",
         "[end-emphasis(1,11)::1:*:False]",

diff --git a/test/test_markdown_entity_and_numeric_character_references.py b/test/test_markdown_entity_and_numeric_character_references.py
@@ -254,7 +254,7 @@ def test_character_references_328():
     source_markdown = '[foo](/f&ouml;&ouml; "f&ouml;&ouml;")'
     expected_tokens = [
         "[para(1,1):]",
-        '[link:inline:/f%C3%B6%C3%B6:föö:/f&ouml;&ouml;:f&ouml;&ouml;::foo:False:":: :]',
+        '[link(1,1):inline:/f%C3%B6%C3%B6:föö:/f&ouml;&ouml;:f&ouml;&ouml;::foo:False:":: :]',
         "[text(1,2):foo:]",
         "[end-link:::False]",
         "[end-para:::True]",
@@ -283,7 +283,7 @@ def test_character_references_328a():
     source_markdown = '[f&ouml;&ouml;](/f&ouml;&ouml; "f&ouml;&ouml;")'
     expected_tokens = [
         "[para(1,1):]",
-        '[link:inline:/f%C3%B6%C3%B6:föö:/f&ouml;&ouml;:f&ouml;&ouml;::f&ouml;&ouml;:False:":: :]',
+        '[link(1,1):inline:/f%C3%B6%C3%B6:föö:/f&ouml;&ouml;:f&ouml;&ouml;::f&ouml;&ouml;:False:":: :]',
         "[text(1,2):f\a&ouml;\aö\a\a&ouml;\aö\a:]",
         "[end-link:::False]",
         "[end-para:::True]",
@@ -314,7 +314,7 @@ def test_character_references_329():
 [foo]: /f&ouml;&ouml; "f&ouml;&ouml;\""""
     expected_tokens = [
         "[para(1,1):]",
-        "[link:shortcut:/f%C3%B6%C3%B6:föö::::foo:::::]",
+        "[link(1,1):shortcut:/f%C3%B6%C3%B6:föö::::foo:::::]",
         "[text(1,2):foo:]",
         "[end-link:::False]",
         "[end-para:::True]",

diff --git a/test/test_markdown_extra.py b/test/test_markdown_extra.py
@@ -67,7 +67,7 @@ def test_extra_003():
     source_markdown = "[link](!\"#$%&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~)"
     expected_tokens = [
         "[para(1,1):]",
-        "[link:inline:!%22#$%25&amp;'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~::!\"#$%&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~:::link:False::::]",
+        "[link(1,1):inline:!%22#$%25&amp;'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~::!\"#$%&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~:::link:False::::]",
         "[text(1,2):link:]",
         "[end-link:::False]",
         "[end-para:::True]",
@@ -102,7 +102,7 @@ def test_extra_004():
     )
     expected_tokens = [
         "[para(1,1):]",
-        "[link:inline:!%22#$%12&amp;'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~::!\"#$%12&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~:::link:False::::]",
+        "[link(1,1):inline:!%22#$%12&amp;'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~::!\"#$%12&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~:::link:False::::]",
         "[text(1,2):link:]",
         "[end-link:::False]",
         "[end-para:::True]",
@@ -132,7 +132,7 @@ def test_extra_005():
     source_markdown = "[link](http://google.com/search%)"
     expected_tokens = [
         "[para(1,1):]",
-        "[link:inline:http://google.com/search%25::http://google.com/search%:::link:False::::]",
+        "[link(1,1):inline:http://google.com/search%25::http://google.com/search%:::link:False::::]",
         "[text(1,2):link:]",
         "[end-link:::False]",
         "[end-para:::True]",

diff --git a/test/test_markdown_hard_line_breaks.py b/test/test_markdown_hard_line_breaks.py
@@ -318,7 +318,7 @@ def test_hard_line_breaks_663():
     )
     expected_tokens = [
         "[para(1,1):\n]",
-        '[raw-html:a href="foo  \nbar"]',
+        '[raw-html(1,1):a href="foo  \nbar"]',
         "[end-para:::True]",
     ]
     expected_gfm = """<p><a href="foo\a\a
@@ -349,7 +349,7 @@ def test_hard_line_breaks_664():
 bar">"""
     expected_tokens = [
         "[para(1,1):\n]",
-        '[raw-html:a href="foo\\\nbar"]',
+        '[raw-html(1,1):a href="foo\\\nbar"]',
         "[end-para:::True]",
     ]
     expected_gfm = """<p><a href="foo\\