Skip to content

Commit

Permalink
added support for raw html and link support for line/column numbers
Browse files Browse the repository at this point in the history
  • Loading branch information
jackdewinter committed Sep 5, 2020
1 parent c6cc992 commit f518085
Show file tree
Hide file tree
Showing 23 changed files with 518 additions and 323 deletions.
2 changes: 1 addition & 1 deletion publish/coverage.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"projectName": "pymarkdown", "reportSource": "pytest", "branchLevel": {"totalMeasured": 1652, "totalCovered": 1648}, "lineLevel": {"totalMeasured": 5535, "totalCovered": 5531}}
{"projectName": "pymarkdown", "reportSource": "pytest", "branchLevel": {"totalMeasured": 1658, "totalCovered": 1654}, "lineLevel": {"totalMeasured": 5556, "totalCovered": 5552}}
2 changes: 1 addition & 1 deletion publish/test-results.json

Large diffs are not rendered by default.

7 changes: 5 additions & 2 deletions pymarkdown/html_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ def __process_raw_special(
return valid_raw_html, parse_index

@staticmethod
def parse_raw_html(only_between_angles, remaining_line):
def parse_raw_html(only_between_angles, remaining_line, line_number, column_number):
"""
Given an open HTML tag character (<), try the various possibilities for
types of tag, and determine if any of them parse validly.
Expand Down Expand Up @@ -588,7 +588,10 @@ def parse_raw_html(only_between_angles, remaining_line):
valid_raw_html = HtmlHelper.__parse_raw_declaration(only_between_angles)

if valid_raw_html:
return RawHtmlMarkdownToken(valid_raw_html), remaining_line_parse_index
return (
RawHtmlMarkdownToken(valid_raw_html, line_number, column_number),
remaining_line_parse_index,
)
return None, -1

@staticmethod
Expand Down
41 changes: 36 additions & 5 deletions pymarkdown/inline_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
EmailAutolinkMarkdownToken,
HardBreakMarkdownToken,
InlineCodeSpanMarkdownToken,
MarkdownToken,
UriAutolinkMarkdownToken,
)
from pymarkdown.parser_helper import ParserHelper
Expand Down Expand Up @@ -779,9 +780,18 @@ def handle_angle_brackets(inline_request):
if not new_token:
new_token = InlineHelper.__parse_valid_email_autolink(between_brackets)
if not new_token:
LOGGER.debug(">>between_brackets>>%s", str(between_brackets))

new_column_number = inline_request.column_number
LOGGER.debug(">>between_brackets>>%s<<", str(between_brackets))
LOGGER.debug(">>new_column_number>>%s", str(new_column_number))
new_column_number += len(inline_request.remaining_line)
LOGGER.debug(">>new_column_number>>%s", str(new_column_number))

new_token, after_index = HtmlHelper.parse_raw_html(
between_brackets, remaining_line
between_brackets,
remaining_line,
inline_request.line_number,
new_column_number,
)
LOGGER.debug(">>new_token>>%s", str(new_token))
if after_index != -1:
Expand All @@ -795,8 +805,29 @@ def handle_angle_brackets(inline_request):
else:
inline_response.new_string = InlineHelper.angle_bracket_start
inline_response.new_index = inline_request.next_index + 1

inline_response.delta_line_number = 0
inline_response.delta_column_number = (
inline_response.new_index - inline_request.next_index
)
if (
new_token
and new_token.token_name == MarkdownToken.token_inline_raw_html
and "\n" in new_token.raw_tag
):
split_raw_tag = new_token.raw_tag.split("\n")
LOGGER.debug(
">>split_raw_tag>>%s<<", ParserHelper.make_value_visible(split_raw_tag)
)
inline_response.delta_line_number += len(split_raw_tag) - 1
length_of_last_elements = len(split_raw_tag[-1])
LOGGER.debug(
">>xx>>%s<<", ParserHelper.make_value_visible(length_of_last_elements)
)
inline_response.delta_column_number = -(length_of_last_elements + 2)
LOGGER.debug(
">>delta_column_number>>%s<<",
ParserHelper.make_value_visible(inline_response.delta_column_number),
)
else:
inline_response.delta_column_number = (
inline_response.new_index - inline_request.next_index
)
return inline_response
13 changes: 11 additions & 2 deletions pymarkdown/inline_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,7 @@ def __process_inline_text_block(
whitespace_to_add = None

was_new_line = False
was_column_number_reset = False

LOGGER.debug(
"__process_inline_text_block>>%s>>%s", str(start_index), str(next_index)
Expand All @@ -603,6 +604,10 @@ def __process_inline_text_block(
)
if source_text[next_index] in InlineProcessor.__inline_character_handlers:
LOGGER.debug("handler(before)>>%s<<", source_text[next_index])
LOGGER.debug(
"current_string_unresolved>>%s<<", str(current_string_unresolved)
)
LOGGER.debug("remaining_line>>%s<<", str(remaining_line))
LOGGER.debug("column_number>>%s<<", str(column_number))
proc_fn = InlineProcessor.__inline_character_handlers[
source_text[next_index]
Expand All @@ -614,7 +619,11 @@ def __process_inline_text_block(
)

line_number += inline_response.delta_line_number
column_number += inline_response.delta_column_number
if inline_response.delta_column_number < 0:
column_number = -(inline_response.delta_column_number)
was_column_number_reset = True
else:
column_number += inline_response.delta_column_number
LOGGER.debug(
"handler(after)>>%s,%s<<", str(line_number), str(column_number)
)
Expand Down Expand Up @@ -766,7 +775,7 @@ def __process_inline_text_block(
LOGGER.debug("fold_space(after)>>%s<<", str(fold_space))
column_number += len(fold_space[0])

else:
elif not was_column_number_reset:
column_number += len(remaining_line)
LOGGER.debug(
"l/c(after)>>%s,%s<<",
Expand Down
4 changes: 4 additions & 0 deletions pymarkdown/link_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,8 @@ def __create_link_token(
)
LOGGER.debug(">>text_from_blocks_raw>>%s>>", text_from_blocks_raw)

line_number = inline_blocks[ind].line_number
column_number = inline_blocks[ind].column_number
if start_text == LinkHelper.__link_start_sequence:
inline_blocks[ind] = LinkStartMarkdownToken(
inline_link,
Expand All @@ -987,6 +989,8 @@ def __create_link_token(
before_link_whitespace,
before_title_whitespace,
after_title_whitespace,
line_number,
column_number,
)
token_to_append = EndMarkdownToken(
MarkdownToken.token_inline_link, "", "", None, False
Expand Down
8 changes: 7 additions & 1 deletion pymarkdown/markdown_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -1141,6 +1141,8 @@ def __init__(
before_link_whitespace,
before_title_whitespace,
after_title_whitespace,
line_number,
column_number,
):
self.link_uri = link_uri
self.link_title = link_title
Expand Down Expand Up @@ -1181,6 +1183,8 @@ def __init__(
+ before_title_whitespace
+ ":"
+ after_title_whitespace,
line_number=line_number,
column_number=column_number,
)

# pylint: enable=too-many-arguments
Expand Down Expand Up @@ -1282,13 +1286,15 @@ class RawHtmlMarkdownToken(MarkdownToken):
Class to provide for an encapsulation of the inline raw html element.
"""

def __init__(self, raw_tag):
def __init__(self, raw_tag, line_number, column_number):
self.raw_tag = raw_tag
MarkdownToken.__init__(
self,
MarkdownToken.token_inline_raw_html,
MarkdownTokenClass.INLINE_BLOCK,
raw_tag,
line_number=line_number,
column_number=column_number,
)


Expand Down
7 changes: 5 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
- why does hard break not have \n?
- HTML and Fenced Blocks and better handling of capturing newlines to avoid counting token height
- verifying vs validating?
- 634a in bq and in list
- raw html, 500
- links, 518b
- links with LF in link_label
- each inline token surrounded by text

## Bugs - Character Entities

Expand All @@ -35,8 +40,6 @@

- backslashes - 600
- atx heading text, 183
- raw html, 500
- links, 518b
- images, with newline in title?
- lrds spanning lines
- code span
Expand Down
4 changes: 2 additions & 2 deletions test/test_markdown_backslash_escapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def test_backslash_escapes_318():
source_markdown = """[foo](/bar\\* "ti\\*tle")"""
expected_tokens = [
"[para(1,1):]",
'[link:inline:/bar*:ti*tle:/bar\\*:ti\\*tle::foo:False:":: :]',
'[link(1,1):inline:/bar*:ti*tle:/bar\\*:ti\\*tle::foo:False:":: :]',
"[text(1,2):foo:]",
"[end-link:::False]",
"[end-para:::True]",
Expand Down Expand Up @@ -366,7 +366,7 @@ def test_backslash_escapes_319():
"""
expected_tokens = [
"[para(1,1):]",
"[link:shortcut:/bar*:ti*tle::::foo:::::]",
"[link(1,1):shortcut:/bar*:ti*tle::::foo:::::]",
"[text(1,2):foo:]",
"[end-link:::False]",
"[end-para:::True]",
Expand Down
2 changes: 1 addition & 1 deletion test/test_markdown_code_spans.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ def test_code_spans_354():
source_markdown = """<a href="`">`"""
expected_tokens = [
"[para(1,1):]",
'[raw-html:a href="`"]',
'[raw-html(1,1):a href="`"]',
"[text(1,13):`:]",
"[end-para:::True]",
]
Expand Down
4 changes: 2 additions & 2 deletions test/test_markdown_emphasis_rule_10.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_emphasis_431():
"[para(1,1):]",
"[emphasis(1,1):2:*]",
"[text(1,3):foo :]",
"[link:inline:/url:::::bar:False::::]",
"[link(1,7):inline:/url:::::bar:False::::]",
"[text(1,8):bar:]",
"[end-link:::False]",
"[end-emphasis(1,18)::2:*:False]",
Expand Down Expand Up @@ -389,7 +389,7 @@ def test_emphasis_442():
"[para(1,1):]",
"[emphasis(1,1):2:*]",
"[text(1,3):foo :]",
"[link:inline:/url:::::*bar*:False::::]",
"[link(1,7):inline:/url:::::*bar*:False::::]",
"[emphasis(1,8):1:*]",
"[text(1,9):bar:]",
"[end-emphasis(1,12)::1:*:False]",
Expand Down
10 changes: 5 additions & 5 deletions test/test_markdown_emphasis_rule_14_to_17.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def test_emphasis_482():
expected_tokens = [
"[para(1,1):]",
"[text(1,1):*:]",
"[link:inline:/url:::::bar*:False::::]",
"[link(1,2):inline:/url:::::bar*:False::::]",
"[text(1,3):bar:]",
"[text(1,6):*:]",
"[end-link:::False]",
Expand Down Expand Up @@ -252,7 +252,7 @@ def test_emphasis_483():
"[para(1,1):]",
"[text(1,1):_:]",
"[text(1,2):foo :]",
"[link:inline:/url:::::bar_:False::::]",
"[link(1,6):inline:/url:::::bar_:False::::]",
"[text(1,7):bar:]",
"[text(1,10):_:]",
"[end-link:::False]",
Expand Down Expand Up @@ -283,7 +283,7 @@ def test_emphasis_484():
expected_tokens = [
"[para(1,1):]",
"[text(1,1):*:]",
'[raw-html:img src="foo" title="*"/]',
'[raw-html(1,2):img src="foo" title="*"/]',
"[end-para:::True]",
]
expected_gfm = """<p>*<img src="foo" title="*"/></p>"""
Expand Down Expand Up @@ -311,7 +311,7 @@ def test_emphasis_485():
expected_tokens = [
"[para(1,1):]",
"[text(1,1):**:]",
'[raw-html:a href="**"]',
'[raw-html(1,3):a href="**"]',
"[end-para:::True]",
]
expected_gfm = """<p>**<a href="**"></p>"""
Expand Down Expand Up @@ -339,7 +339,7 @@ def test_emphasis_486():
expected_tokens = [
"[para(1,1):]",
"[text(1,1):__:]",
'[raw-html:a href="__"]',
'[raw-html(1,3):a href="__"]',
"[end-para:::True]",
]
expected_gfm = """<p>__<a href="__"></p>"""
Expand Down
4 changes: 2 additions & 2 deletions test/test_markdown_emphasis_rule_9.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_emphasis_413():
"[para(1,1):]",
"[emphasis(1,1):1:*]",
"[text(1,2):foo :]",
"[link:inline:/url:::::bar:False::::]",
"[link(1,6):inline:/url:::::bar:False::::]",
"[text(1,7):bar:]",
"[end-link:::False]",
"[end-emphasis(1,17)::1:*:False]",
Expand Down Expand Up @@ -521,7 +521,7 @@ def test_emphasis_428():
"[para(1,1):]",
"[emphasis(1,1):1:*]",
"[text(1,2):foo :]",
"[link:inline:/url:::::*bar*:False::::]",
"[link(1,6):inline:/url:::::*bar*:False::::]",
"[emphasis(1,7):1:*]",
"[text(1,8):bar:]",
"[end-emphasis(1,11)::1:*:False]",
Expand Down
6 changes: 3 additions & 3 deletions test/test_markdown_entity_and_numeric_character_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def test_character_references_328():
source_markdown = '[foo](/f&ouml;&ouml; "f&ouml;&ouml;")'
expected_tokens = [
"[para(1,1):]",
'[link:inline:/f%C3%B6%C3%B6:föö:/f&ouml;&ouml;:f&ouml;&ouml;::foo:False:":: :]',
'[link(1,1):inline:/f%C3%B6%C3%B6:föö:/f&ouml;&ouml;:f&ouml;&ouml;::foo:False:":: :]',
"[text(1,2):foo:]",
"[end-link:::False]",
"[end-para:::True]",
Expand Down Expand Up @@ -283,7 +283,7 @@ def test_character_references_328a():
source_markdown = '[f&ouml;&ouml;](/f&ouml;&ouml; "f&ouml;&ouml;")'
expected_tokens = [
"[para(1,1):]",
'[link:inline:/f%C3%B6%C3%B6:föö:/f&ouml;&ouml;:f&ouml;&ouml;::f&ouml;&ouml;:False:":: :]',
'[link(1,1):inline:/f%C3%B6%C3%B6:föö:/f&ouml;&ouml;:f&ouml;&ouml;::f&ouml;&ouml;:False:":: :]',
"[text(1,2):f\a&ouml;\aö\a\a&ouml;\aö\a:]",
"[end-link:::False]",
"[end-para:::True]",
Expand Down Expand Up @@ -314,7 +314,7 @@ def test_character_references_329():
[foo]: /f&ouml;&ouml; "f&ouml;&ouml;\""""
expected_tokens = [
"[para(1,1):]",
"[link:shortcut:/f%C3%B6%C3%B6:föö::::foo:::::]",
"[link(1,1):shortcut:/f%C3%B6%C3%B6:föö::::foo:::::]",
"[text(1,2):foo:]",
"[end-link:::False]",
"[end-para:::True]",
Expand Down
6 changes: 3 additions & 3 deletions test/test_markdown_extra.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_extra_003():
source_markdown = "[link](!\"#$%&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~)"
expected_tokens = [
"[para(1,1):]",
"[link:inline:!%22#$%25&amp;'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~::!\"#$%&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~:::link:False::::]",
"[link(1,1):inline:!%22#$%25&amp;'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~::!\"#$%&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~:::link:False::::]",
"[text(1,2):link:]",
"[end-link:::False]",
"[end-para:::True]",
Expand Down Expand Up @@ -102,7 +102,7 @@ def test_extra_004():
)
expected_tokens = [
"[para(1,1):]",
"[link:inline:!%22#$%12&amp;'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~::!\"#$%12&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~:::link:False::::]",
"[link(1,1):inline:!%22#$%12&amp;'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~::!\"#$%12&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~:::link:False::::]",
"[text(1,2):link:]",
"[end-link:::False]",
"[end-para:::True]",
Expand Down Expand Up @@ -132,7 +132,7 @@ def test_extra_005():
source_markdown = "[link](http://google.com/search%)"
expected_tokens = [
"[para(1,1):]",
"[link:inline:http://google.com/search%25::http://google.com/search%:::link:False::::]",
"[link(1,1):inline:http://google.com/search%25::http://google.com/search%:::link:False::::]",
"[text(1,2):link:]",
"[end-link:::False]",
"[end-para:::True]",
Expand Down
4 changes: 2 additions & 2 deletions test/test_markdown_hard_line_breaks.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def test_hard_line_breaks_663():
)
expected_tokens = [
"[para(1,1):\n]",
'[raw-html:a href="foo \nbar"]',
'[raw-html(1,1):a href="foo \nbar"]',
"[end-para:::True]",
]
expected_gfm = """<p><a href="foo\a\a
Expand Down Expand Up @@ -349,7 +349,7 @@ def test_hard_line_breaks_664():
bar">"""
expected_tokens = [
"[para(1,1):\n]",
'[raw-html:a href="foo\\\nbar"]',
'[raw-html(1,1):a href="foo\\\nbar"]',
"[end-para:::True]",
]
expected_gfm = """<p><a href="foo\\
Expand Down
Loading

0 comments on commit f518085

Please sign in to comment.