From 5126e0290c8fbc167b2be3410a97e3ff19cfdd98 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Sat, 26 Dec 2020 22:16:03 +0200 Subject: [PATCH 1/6] Catch up with markdown-it 11.0.1 --- markdown_it/main.py | 2 +- markdown_it/port.yaml | 4 +- markdown_it/rules_block/blockquote.py | 22 +++----- markdown_it/rules_block/table.py | 2 + markdown_it/rules_inline/balance_pairs.py | 2 +- tests/test_port/fixtures/commonmark_extras.md | 50 +++++++++++++++++++ 6 files changed, 62 insertions(+), 20 deletions(-) diff --git a/markdown_it/main.py b/markdown_it/main.py index 85254751..43389909 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -215,7 +215,7 @@ def parse(self, src: str, env: Optional[AttrDict] = None) -> List[Token]: :param src: source string :param env: environment sandbox - Parse input string and returns list of block tokens (special token type + Parse input string and return list of block tokens (special token type "inline" will contain list of inline tokens). `env` is used to pass data between "distributed" rules and return additional diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index 84ec2f87..57338aea 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,6 +1,6 @@ - package: markdown-it/markdown-it - commit: f798bea9623277bbf89b9621cf7fb283c693fcab - date: Mar 12, 2020 + commit: 331ae117e09115366db12b517ca526b1b7fee1e8 + date: Sep 14, 2020 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index 98fcdc1d..c3e2d5c2 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -32,12 +32,8 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): if silent: return True - # skip spaces after ">" and re-calculate offset - initial = offset = ( - state.sCount[startLine] - + pos - - (state.bMarks[startLine] + state.tShift[startLine]) - ) + # set offset past spaces and ">" + initial = offset = state.sCount[startLine] + 1 try: second_char_code: Optional[int] = state.srcCharCode[pos] @@ -109,7 +105,6 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): oldParentType = state.parentType state.parentType = "blockquote" - wasOutdented = False # Search the end of the block # @@ -142,8 +137,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): # > current blockquote # 2. checking this line # ``` - if state.sCount[nextLine] < state.blkIndent: - wasOutdented = True + isOutdented = state.sCount[nextLine] < state.blkIndent pos = state.bMarks[nextLine] + state.tShift[nextLine] max = state.eMarks[nextLine] @@ -152,17 +146,13 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): # Case 1: line is not inside the blockquote, and this line is empty. break - evaluatesTrue = state.srcCharCode[pos] == 0x3E and not wasOutdented # /* > */ + evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented # /* > */ pos += 1 if evaluatesTrue: # This line is inside the blockquote. - # skip spaces after ">" and re-calculate offset - initial = offset = ( - state.sCount[nextLine] - + pos - - (state.bMarks[nextLine] + state.tShift[nextLine]) - ) + # set offset past spaces and ">" + initial = offset = state.sCount[nextLine] + 1 # skip one optional space after '>' if state.srcCharCode[pos] == 0x20: # /* space */ diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index 2152df71..0a8251bb 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -183,10 +183,12 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool): token = state.push("tr_open", "tr", 1) for i in range(columnCount): token = state.push("td_open", "td", 1) + token.map = [nextLine, nextLine + 1] if aligns[i]: token.attrs = [["style", "text-align:" + aligns[i]]] token = state.push("inline", "", 0) + token.map = [nextLine, nextLine + 1] try: token.content = columns[i].strip() if columns[i] else "" except IndexError: diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py index deff41e6..4198492b 100644 --- a/markdown_it/rules_inline/balance_pairs.py +++ b/markdown_it/rules_inline/balance_pairs.py @@ -42,7 +42,7 @@ def processDelimiters(state: StateInline, delimiters, *args): if newMinOpenerIdx == -1: newMinOpenerIdx = openerIdx - if opener.open and opener.end < 0 and opener.level == closer.level: + if opener.open and opener.end < 0: isOddMatch = False diff --git a/tests/test_port/fixtures/commonmark_extras.md b/tests/test_port/fixtures/commonmark_extras.md index 7bdd0b59..c7a48012 100644 --- a/tests/test_port/fixtures/commonmark_extras.md +++ b/tests/test_port/fixtures/commonmark_extras.md @@ -522,3 +522,53 @@ Coverage, entities with code > 10FFFF. Made this way for compatibility with comm

&#x1100000;

. + +Issue #696. Blockquotes should remember their level. +. +>>> foo +bar +>>> baz +. +
+
+
+

foo +bar +baz

+
+
+
+. + +Issue #696. Blockquotes should stop when outdented from a list. +. +1. >>> foo + bar +baz + >>> foo + >>> bar + >>> baz +. +
    +
  1. +
    +
    +
    +

    foo +bar +baz +foo

    +
    +
    +
    +
  2. +
+
+
+
+

bar +baz

+
+
+
+. From dadfd382a5d7594d587275df5974e0aa8e274d61 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Sun, 27 Dec 2020 03:32:52 +0200 Subject: [PATCH 2/6] Catch up with markdown-it v12.0.0 --- markdown_it/port.yaml | 4 +- markdown_it/renderer.py | 12 +- markdown_it/rules_block/table.py | 99 ++++---- tests/test_port/fixtures/normalize.md | 8 + tests/test_port/fixtures/tables.md | 335 ++++++++++++++++++-------- tests/test_port/test_fixtures.py | 3 + tests/test_port/test_misc.py | 14 ++ 7 files changed, 323 insertions(+), 152 deletions(-) create mode 100644 tests/test_port/test_misc.py diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index 57338aea..c7a4bfd8 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,6 +1,6 @@ - package: markdown-it/markdown-it - commit: 331ae117e09115366db12b517ca526b1b7fee1e8 - date: Sep 14, 2020 + commit: 1731de407e56595d1e206c79482061d1c6f501ed + date: Oct 14, 2020 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index ebdd3b46..78a038aa 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -217,14 +217,18 @@ def fence(self, tokens: Sequence[Token], idx, options, env): token = tokens[idx] info = unescapeAll(token.info).strip() if token.info else "" langName = "" + langAttrs = "" if info: - langName = info.split()[0] + arr = info.split(maxsplit=1) + langName = arr[0] + if len(arr) == 2: + langAttrs = arr[1] if options.highlight: - highlighted = options.highlight(token.content, langName) or escapeHtml( - token.content - ) + highlighted = options.highlight( + token.content, langName, langAttrs + ) or escapeHtml(token.content) else: highlighted = escapeHtml(token.content) diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index 0a8251bb..5993814f 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -1,4 +1,4 @@ -# GFM table, non-standard +# GFM table, https://github.github.com/gfm/#tables-extension- import re from .state_block import StateBlock @@ -21,48 +21,35 @@ def escapedSplit(string): result = [] pos = 0 max = len(string) - escapes = 0 + isEscaped = False lastPos = 0 - backTicked = False - lastBackTick = 0 + current = "" ch = charCodeAt(string, pos) while pos < max: - if ch == 0x60: # /* ` */ - if backTicked: - # make \` close code sequence, but not open it; - # the reason is: `\` is correct code block - backTicked = False - lastBackTick = pos - elif escapes % 2 == 0: - backTicked = True - lastBackTick = pos - # /* | */ - elif ch == 0x7C and (escapes % 2 == 0) and not backTicked: - result.append(string[lastPos:pos]) - lastPos = pos + 1 - - if ch == 0x5C: # /* \ */ - escapes += 1 - else: - escapes = 0 + if ch == 0x7C: # /* | */ + if not isEscaped: + # pipe separating cells, '|' + result.append(current + string[lastPos:pos]) + current = "" + lastPos = pos + 1 + else: + # escaped pipe, '\|' + current += string[lastPos : pos - 1] + lastPos = pos + isEscaped = ch == 0x5C # /* \ */ pos += 1 - # If there was an un-closed backtick, go back to just after - # the last backtick, but as if it was a normal character - if pos == max and backTicked: - backTicked = False - pos = lastBackTick + 1 - ch = charCodeAt(string, pos) - result.append(string[lastPos:]) + result.append(current + string[lastPos:]) return result def table(state: StateBlock, startLine: int, endLine: int, silent: bool): + tbodyLines = None # should have at least two lines if startLine + 2 > endLine: @@ -129,17 +116,28 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool): return False if state.sCount[startLine] - state.blkIndent >= 4: return False - columns = escapedSplit(enclosingPipesRe.sub("", lineText)) + columns = escapedSplit(lineText) + if columns and columns[0] == "": + columns.pop(0) + if columns and columns[-1] == "": + columns.pop() # header row will define an amount of columns in the entire table, - # and align row shouldn't be smaller than that (the rest of the rows can) + # and align row should be exactly the same (the rest of the rows can differ) columnCount = len(columns) - if columnCount > len(aligns): + if columnCount != len(aligns): return False if silent: return True + oldParentType = state.parentType + state.parentType = "table" + + # use 'blockquote' lists for termination because it's + # the most similar to tables + terminatorRules = state.md.block.ruler.getRules("blockquote") + token = state.push("table_open", "table", 1) token.map = tableLines = [startLine, 0] @@ -151,13 +149,11 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool): for i in range(len(columns)): token = state.push("th_open", "th", 1) - token.map = [startLine, startLine + 1] if aligns[i]: token.attrs = [["style", "text-align:" + aligns[i]]] token = state.push("inline", "", 0) token.content = columns[i].strip() - token.map = [startLine, startLine + 1] token.children = [] token = state.push("th_close", "th", -1) @@ -165,30 +161,43 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool): token = state.push("tr_close", "tr", -1) token = state.push("thead_close", "thead", -1) - token = state.push("tbody_open", "tbody", 1) - token.map = tbodyLines = [startLine + 2, 0] - nextLine = startLine + 2 while nextLine < endLine: if state.sCount[nextLine] < state.blkIndent: break + terminate = False + for i in range(len(terminatorRules)): + if terminatorRules[i](state, nextLine, endLine, True): + terminate = True + break + + if terminate: + break lineText = getLine(state, nextLine).strip() - if "|" not in lineText: + if not lineText: break if state.sCount[nextLine] - state.blkIndent >= 4: break - columns = escapedSplit(enclosingPipesRe.sub("", lineText)) + columns = escapedSplit(lineText) + if columns and columns[0] == "": + columns.pop(0) + if columns and columns[-1] == "": + columns.pop() + + if nextLine == startLine + 2: + token = state.push("tbody_open", "tbody", 1) + token.map = tbodyLines = [startLine + 2, 0] token = state.push("tr_open", "tr", 1) + token.map = [nextLine, nextLine + 1] + for i in range(columnCount): token = state.push("td_open", "td", 1) - token.map = [nextLine, nextLine + 1] if aligns[i]: token.attrs = [["style", "text-align:" + aligns[i]]] token = state.push("inline", "", 0) - token.map = [nextLine, nextLine + 1] try: token.content = columns[i].strip() if columns[i] else "" except IndexError: @@ -201,9 +210,13 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool): nextLine += 1 - token = state.push("tbody_close", "tbody", -1) + if tbodyLines: + token = state.push("tbody_close", "tbody", -1) + tbodyLines[1] = nextLine + token = state.push("table_close", "table", -1) + tableLines[1] = nextLine - tableLines[1] = tbodyLines[1] = nextLine + state.parentType = oldParentType state.line = nextLine return True diff --git a/tests/test_port/fixtures/normalize.md b/tests/test_port/fixtures/normalize.md index 8050bc82..0020e16d 100644 --- a/tests/test_port/fixtures/normalize.md +++ b/tests/test_port/fixtures/normalize.md @@ -14,6 +14,14 @@ Encode link destination, decode text inside it: . +Keep %25 as is because decoding it may break urls, #720 +. + +. +

https://www.google.com/search?q=hello.%252Ehello

+. + + Don't encode domains in unknown schemas: . diff --git a/tests/test_port/fixtures/tables.md b/tests/test_port/fixtures/tables.md index dfd34601..ea040731 100644 --- a/tests/test_port/fixtures/tables.md +++ b/tests/test_port/fixtures/tables.md @@ -273,106 +273,6 @@ bar|bar . -Should be terminated via row without "|" symbol: -. -foo|foo ----|--- -paragraph -. - - - - - - - - -
foofoo
-

paragraph

-. - - -Delimiter escaping: -. -| Heading 1 \\\\| Heading 2 -| --------- | --------- -| Cell\|1\|| Cell\|2 -\| Cell\\\|3 \\| Cell\|4 -. - - - - - - - - - - - - - - - - - -
Heading 1 \\Heading 2
Cell|1|Cell|2
| Cell\|3 \Cell|4
-. - -Pipes inside backticks don't split cells: -. -| Heading 1 | Heading 2 -| --------- | --------- -| Cell 1 | Cell 2 -| `Cell|3` | Cell 4 -. - - - - - - - - - - - - - - - - - -
Heading 1Heading 2
Cell 1Cell 2
Cell|3Cell 4
-. - -Unclosed backticks don't count -. -| Heading 1 | Heading 2 -| --------- | --------- -| Cell 1 | Cell 2 -| `Cell 3| Cell 4 -. - - - - - - - - - - - - - - - - - -
Heading 1Heading 2
Cell 1Cell 2
`Cell 3Cell 4
-. - Another complicated backticks case . | Heading 1 | Heading 2 @@ -452,7 +352,7 @@ x | \`\` | `x` An amount of rows might be different across the table (issue #171): . | 1 | 2 | -| :-----: | :-----: | :-----: | +| :-----: | :-----: | | 3 | 4 | 5 | 6 | . @@ -581,7 +481,6 @@ Tables should not be indented more than 4 spaces (3rd line): -
Col2a
| Col1b | Col2b |
 
@@ -600,7 +499,6 @@ Allow tables with empty body: Col2a - . @@ -615,3 +513,234 @@ Col2a | Col2b | Col2c ----- | ----- Col2a | Col2b | Col2c

. + +Escaped pipes inside backticks don't split cells: +. +| Heading 1 | Heading 2 +| --------- | --------- +| Cell 1 | Cell 2 +| `Cell 3\|` | Cell 4 +. + + + + + + + + + + + + + + + + + +
Heading 1Heading 2
Cell 1Cell 2
Cell 3|Cell 4
+. + +Escape before escaped Pipes inside backticks don't split cells: +. +| Heading 1 | Heading 2 +| --------- | --------- +| Cell 1 | Cell 2 +| `Cell 3\\|` | Cell 4 +. + + + + + + + + + + + + + + + + + +
Heading 1Heading 2
Cell 1Cell 2
Cell 3\|Cell 4
+. + +GFM 4.10 Tables (extension), Example 198 +. +| foo | bar | +| --- | --- | +| baz | bim | +. + + + + + + + + + + + + + +
foobar
bazbim
+. + +GFM 4.10 Tables (extension), Example 199 +. +| abc | defghi | +:-: | -----------: +bar | baz +. + + + + + + + + + + + + + +
abcdefghi
barbaz
+. + +GFM 4.10 Tables (extension), Example 200 +. +| f\|oo | +| ------ | +| b `\|` az | +| b **\|** im | +. + + + + + + + + + + + + + + +
f|oo
b | az
b | im
+. + +GFM 4.10 Tables (extension), Example 201 +. +| abc | def | +| --- | --- | +| bar | baz | +> bar +. + + + + + + + + + + + + + +
abcdef
barbaz
+
+

bar

+
+. + +GFM 4.10 Tables (extension), Example 202 +. +| abc | def | +| --- | --- | +| bar | baz | +bar + +bar +. + + + + + + + + + + + + + + + + + +
abcdef
barbaz
bar
+

bar

+. + +GFM 4.10 Tables (extension), Example 203 +. +| abc | def | +| --- | +| bar | +. +

| abc | def | +| --- | +| bar |

+. + +GFM 4.10 Tables (extension), Example 204 +. +| abc | def | +| --- | --- | +| bar | +| bar | baz | boo | +. + + + + + + + + + + + + + + + + + +
abcdef
bar
barbaz
+. + +GFM 4.10 Tables (extension), Example 205 +. +| abc | def | +| --- | --- | +. + + + + + + + +
abcdef
+. diff --git a/tests/test_port/test_fixtures.py b/tests/test_port/test_fixtures.py index d19746ea..7fc93ba9 100644 --- a/tests/test_port/test_fixtures.py +++ b/tests/test_port/test_fixtures.py @@ -77,6 +77,9 @@ def test_commonmark_extras(line, title, input, expected): read_fixture_file(FIXTURE_PATH.joinpath("normalize.md")), ) def test_normalize_url(line, title, input, expected): + if line in [17]: + # TODO fix failing url escaping tests + pytest.skip("url normalisation") md = MarkdownIt("commonmark") text = md.render(input) assert text.rstrip() == expected.rstrip() diff --git a/tests/test_port/test_misc.py b/tests/test_port/test_misc.py new file mode 100644 index 00000000..f5f821e9 --- /dev/null +++ b/tests/test_port/test_misc.py @@ -0,0 +1,14 @@ +from markdown_it import MarkdownIt +from markdown_it import presets + + +def test_highlight_arguments(): + def highlight_func(str_, lang, attrs): + assert lang == "a" + assert attrs == "b c d" + return "
==" + str_ + "==
" + + conf = presets.commonmark.make() + conf["options"]["highlight"] = highlight_func + md = MarkdownIt(config=conf) + assert md.render("``` a b c d \nhl\n```") == "
==hl\n==
\n" From aefba9cf7de15f1f6f318aeb05924f5ff6c23dec Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Sun, 27 Dec 2020 04:27:57 +0200 Subject: [PATCH 3/6] Catch up with markdown-it v12.0.2 --- markdown_it/port.yaml | 4 +-- markdown_it/rules_block/table.py | 4 +-- tests/test_port/fixtures/tables.md | 49 ++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index c7a4bfd8..ce7cf6ed 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,6 +1,6 @@ - package: markdown-it/markdown-it - commit: 1731de407e56595d1e206c79482061d1c6f501ed - date: Oct 14, 2020 + commit: b60493e620a685a40eac016dde2c207ee9e07875 + date: Oct 23, 2020 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index 5993814f..c1873dc1 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -10,7 +10,7 @@ def getLine(state: StateBlock, line: int): - pos = state.bMarks[line] + state.blkIndent + pos = state.bMarks[line] + state.tShift[line] maximum = state.eMarks[line] # return state.src.substr(pos, max - pos) @@ -125,7 +125,7 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool): # header row will define an amount of columns in the entire table, # and align row should be exactly the same (the rest of the rows can differ) columnCount = len(columns) - if columnCount != len(aligns): + if columnCount == 0 or columnCount != len(aligns): return False if silent: diff --git a/tests/test_port/fixtures/tables.md b/tests/test_port/fixtures/tables.md index ea040731..7219a1a4 100644 --- a/tests/test_port/fixtures/tables.md +++ b/tests/test_port/fixtures/tables.md @@ -568,6 +568,55 @@ Escape before escaped Pipes inside backticks don't split cells: . +Regression test for #721, table in a list indented with tabs: +. +- Level 1 + + - Level 2 + + | Column 1 | Column 2 | + | -------- | -------- | + | abcdefgh | ijklmnop | +. +
    +
  • +

    Level 1

    +
      +
    • +

      Level 2

      + + + + + + + + + + + + + +
      Column 1Column 2
      abcdefghijklmnop
      +
    • +
    +
  • +
+. + + +Table without any columns is not a table, #724 +. +| +| +| +. +

| +| +|

+. + + GFM 4.10 Tables (extension), Example 198 . | foo | bar | From f6dc3fc94a18b914ec63a8c34dd7b15418f1ba06 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Sun, 27 Dec 2020 17:23:39 +0200 Subject: [PATCH 4/6] Catch up with markdown-it v12.0.4 --- markdown_it/common/html_blocks.py | 1 - markdown_it/common/html_re.py | 2 +- markdown_it/helpers/parse_link_destination.py | 6 ++ markdown_it/helpers/parse_link_title.py | 2 + markdown_it/port.yaml | 4 +- markdown_it/renderer.py | 3 +- markdown_it/rules_inline/autolink.py | 34 +++++----- markdown_it/rules_inline/backticks.py | 30 +++++++-- markdown_it/rules_inline/balance_pairs.py | 10 +-- markdown_it/rules_inline/link.py | 37 ++++++----- markdown_it/rules_inline/state_inline.py | 4 ++ markdown_it/rules_inline/strikethrough.py | 2 +- tests/test_port/fixtures/commonmark_extras.md | 63 +++++++++++++++++++ tests/test_port/fixtures/strikethrough.md | 7 +++ 14 files changed, 156 insertions(+), 49 deletions(-) diff --git a/markdown_it/common/html_blocks.py b/markdown_it/common/html_blocks.py index a503eec2..4246f788 100644 --- a/markdown_it/common/html_blocks.py +++ b/markdown_it/common/html_blocks.py @@ -45,7 +45,6 @@ "main", "menu", "menuitem", - "meta", "nav", "noframes", "ol", diff --git a/markdown_it/common/html_re.py b/markdown_it/common/html_re.py index 5a3e6d0c..f0c336d2 100644 --- a/markdown_it/common/html_re.py +++ b/markdown_it/common/html_re.py @@ -17,7 +17,7 @@ close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>" comment = "|" -processing = "<[?].*?[?]>" +processing = "<[?][\\s\\S]*?[?]>" declaration = "]*>" cdata = "" diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py index 4e91cb4f..74dbec08 100644 --- a/markdown_it/helpers/parse_link_destination.py +++ b/markdown_it/helpers/parse_link_destination.py @@ -26,6 +26,8 @@ def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result: code = charCodeAt(string, pos) if code == 0x0A: # /* \n */) return result + if code == 0x3C: # / * < * / + return result if code == 0x3E: # /* > */) { result.pos = pos + 1 result.str = unescapeAll(string[start + 1 : pos]) @@ -55,11 +57,15 @@ def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result: break if code == 0x5C and pos + 1 < maximum: + if charCodeAt(string, pos + 1) == 0x20: + break pos += 2 continue if code == 0x28: # /* ( */) level += 1 + if level > 32: + return result if code == 0x29: # /* ) */) if level == 0: diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py index 048b4e60..4aa67e88 100644 --- a/markdown_it/helpers/parse_link_title.py +++ b/markdown_it/helpers/parse_link_title.py @@ -46,6 +46,8 @@ def parseLinkTitle(string: str, pos: int, maximum: int) -> _Result: result.str = title result.ok = True return result + elif code == 0x28 and marker == 0x29: # /* ( */ /* ) */ + return result elif code == 0x0A: lines += 1 elif code == 0x5C and pos + 1 < maximum: # /* \ */ diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index ce7cf6ed..3afe2506 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,6 +1,6 @@ - package: markdown-it/markdown-it - commit: b60493e620a685a40eac016dde2c207ee9e07875 - date: Oct 23, 2020 + commit: 7b8969ce5cb2edc54f2c1aa39a85a3a08076337d + date: Dec 20, 2020 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 78a038aa..8efc38ad 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -236,7 +236,7 @@ def fence(self, tokens: Sequence[Token], idx, options, env): return highlighted + "\n" # If language exists, inject class gently, without modifying original token. - # May be, one day we will add .clone() for token and simplify this part, but + # May be, one day we will add .deepClone() for token and simplify this part, but # now we prefer to keep things local. if info: i = token.attrIndex("class") @@ -245,6 +245,7 @@ def fence(self, tokens: Sequence[Token], idx, options, env): if i < 0: tmpAttrs.append(["class", options.langPrefix + langName]) else: + tmpAttrs[i] = tmpAttrs[i][:] tmpAttrs[i][1] += " " + options.langPrefix + langName # Fake token just to render attributes diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index b07396e2..2d3b2d9a 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -4,9 +4,9 @@ from ..common.normalize_url import normalizeLinkText, normalizeLink, validateLink EMAIL_RE = re.compile( - r"^<([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>" # noqa: E501 + r"^([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$" # noqa: E501 ) -AUTOLINK_RE = re.compile(r"^<([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)>") +AUTOLINK_RE = re.compile(r"^([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)$") def autolink(state: StateInline, silent: bool) -> bool: @@ -16,15 +16,24 @@ def autolink(state: StateInline, silent: bool) -> bool: if state.srcCharCode[pos] != 0x3C: # /* < */ return False - tail = state.src[pos:] + start = state.pos + max = state.posMax - if ">" not in tail: - return False + while True: + pos += 1 + if pos >= max: + return False + + ch = state.srcCharCode[pos] - linkMatch = AUTOLINK_RE.search(tail) - if linkMatch is not None: + if ch == 0x3C: # /* < */ + return False + if ch == 0x3E: # /* > */ + break + + url = state.src[start + 1 : pos] - url = linkMatch.group(0)[1:-1] + if AUTOLINK_RE.search(url) is not None: fullUrl = normalizeLink(url) if not validateLink(fullUrl): return False @@ -42,13 +51,10 @@ def autolink(state: StateInline, silent: bool) -> bool: token.markup = "autolink" token.info = "auto" - state.pos += len(linkMatch.group(0)) + state.pos += len(url) + 2 return True - emailMatch = EMAIL_RE.search(tail) - if emailMatch is not None: - - url = emailMatch.group(0)[1:-1] + if EMAIL_RE.search(url) is not None: fullUrl = normalizeLink("mailto:" + url) if not validateLink(fullUrl): return False @@ -66,7 +72,7 @@ def autolink(state: StateInline, silent: bool) -> bool: token.markup = "autolink" token.info = "auto" - state.pos += len(emailMatch.group(0)) + state.pos += len(url) + 2 return True return False diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py index 67e41c73..7bff12fe 100644 --- a/markdown_it/rules_inline/backticks.py +++ b/markdown_it/rules_inline/backticks.py @@ -19,25 +19,37 @@ def backtick(state: StateInline, silent: bool) -> bool: pos += 1 maximum = state.posMax - # /* ` */ - while pos < maximum and (state.srcCharCode[pos] == 0x60): + # scan marker length + while pos < maximum and (state.srcCharCode[pos] == 0x60): # /* ` */ pos += 1 marker = state.src[start:pos] + openerLength = len(marker) + + if state.backticksScanned and state.backticks.get(openerLength, 0) <= start: + if not silent: + state.pending += marker + state.pos += openerLength + return True matchStart = matchEnd = pos + # Nothing found in the cache, scan until the end of the line (or until marker is found) while True: try: matchStart = state.src.index("`", matchEnd) except ValueError: break matchEnd = matchStart + 1 - # /* ` */ - while matchEnd < maximum and (state.srcCharCode[matchEnd] == 0x60): + + # scan marker length + while matchEnd < maximum and (state.srcCharCode[matchEnd] == 0x60): # /* ` */ matchEnd += 1 - if matchEnd - matchStart == len(marker): + closerLength = matchEnd - matchStart + + if closerLength == openerLength: + # Found matching closer length. if not silent: token = state.push("code_inline", "code", 0) token.markup = marker @@ -51,7 +63,13 @@ def backtick(state: StateInline, silent: bool) -> bool: state.pos = matchEnd return True + # Some different length found, put it in cache as upper limit of where closer can be found + state.backticks[closerLength] = matchStart + + # Scanned through the end, didn't find anything + state.backticksScanned = True + if not silent: state.pending += marker - state.pos += len(marker) + state.pos += openerLength return True diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py index 4198492b..afcab205 100644 --- a/markdown_it/rules_inline/balance_pairs.py +++ b/markdown_it/rules_inline/balance_pairs.py @@ -28,10 +28,15 @@ def processDelimiters(state: StateInline, delimiters, *args): openersBottom[closer.marker] = [-1, -1, -1] minOpenerIdx = openersBottom[closer.marker][closer.length % 3] - newMinOpenerIdx = -1 openerIdx = closerIdx - closer.jump - 1 + # avoid crash if `closer.jump` is pointing outside of the array, see #742 + if openerIdx < -1: + openerIdx = -1 + + newMinOpenerIdx = openerIdx + while openerIdx > minOpenerIdx: opener = delimiters[openerIdx] @@ -39,9 +44,6 @@ def processDelimiters(state: StateInline, delimiters, *args): openerIdx -= opener.jump + 1 continue - if newMinOpenerIdx == -1: - newMinOpenerIdx = openerIdx - if opener.open and opener.end < 0: isOddMatch = False diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index 4c0e8575..65b991dc 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -8,6 +8,7 @@ def link(state: StateInline, silent: bool): href = "" + title = "" label = None oldPos = state.pos maximum = state.posMax @@ -57,31 +58,29 @@ def link(state: StateInline, silent: bool): else: href = "" - # [link]( "title" ) - # ^^ skipping these spaces - start = pos - while pos < maximum: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: - break - pos += 1 - - # [link]( "title" ) - # ^^^^^^^ parsing link title - res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax) - if pos < maximum and start != pos and res.ok: - title = res.str - pos = res.pos - # [link]( "title" ) - # ^^ skipping these spaces + # ^^ skipping these spaces + start = pos while pos < maximum: code = state.srcCharCode[pos] if not isSpace(code) and code != 0x0A: break pos += 1 - else: - title = "" + + # [link]( "title" ) + # ^^^^^^^ parsing link title + res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax) + if pos < maximum and start != pos and res.ok: + title = res.str + pos = res.pos + + # [link]( "title" ) + # ^^ skipping these spaces + while pos < maximum: + code = state.srcCharCode[pos] + if not isSpace(code) and code != 0x0A: + break + pos += 1 if pos >= maximum or state.srcCharCode[pos] != 0x29: # /* ) */ # parsing a valid shortcut link failed, fallback to reference diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 224467cd..c68fa92d 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -67,6 +67,10 @@ def __init__(self, src: str, md, env, outTokens: List[Token]): # Stack of delimiter lists for upper level tags self._prev_delimiters: List[List[Delimiter]] = [] + # backticklength => last seen position + self.backticks: Dict[int, int] = {} + self.backticksScanned = False + def __repr__(self): return ( f"{self.__class__.__name__}" diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index 69337b07..87af4b46 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -35,7 +35,7 @@ def tokenize(state: StateInline, silent: bool): **{ "marker": marker, "length": 0, # disable "rule of 3" length checks meant for emphasis - "jump": i, + "jump": i // 2, # for `~~` 1 marker = 2 characters "token": len(state.tokens) - 1, "end": -1, "open": scanned.can_open, diff --git a/tests/test_port/fixtures/commonmark_extras.md b/tests/test_port/fixtures/commonmark_extras.md index c7a48012..0cb81ef0 100644 --- a/tests/test_port/fixtures/commonmark_extras.md +++ b/tests/test_port/fixtures/commonmark_extras.md @@ -255,6 +255,61 @@ List item terminating quote should not be paragraph continuation . + +Link destination cannot contain '<' +. +[]() + +[]() +. +

[](<foo)

+

+. + + +Link title cannot contain '(' when opened with it +. +[](url (xxx()) + +[](url (xxx\()) +. +

[](url (xxx())

+

+. + + +Escaped space is not allowed in link destination, commonmark/CommonMark#493. +. +[link](a\ b) +. +

[link](a\ b)

+. + + +Allow EOL in processing instructions, commonmark/commonmark.js#196. +. +a +. +

a

+. + + +Allow meta tag in an inline context, commonmark/commonmark-spec#527. +. +City: + + + +. +

City: + + +

+. + + Coverage. Directive can terminate paragraph. . a @@ -281,6 +336,14 @@ Coverage. Unpaired nested backtick (silent mode) . +Coverage. Should continue scanning after closing "```" despite cache +. +```aaa``bbb``ccc```ddd``eee`` +. +

aaa``bbb``cccdddeee

+. + + Coverage. Entities. . *&* diff --git a/tests/test_port/fixtures/strikethrough.md b/tests/test_port/fixtures/strikethrough.md index f3f68fea..ca15b6ff 100644 --- a/tests/test_port/fixtures/strikethrough.md +++ b/tests/test_port/fixtures/strikethrough.md @@ -127,3 +127,10 @@ Coverage: single tilde .

~a~

. + +Regression test for #742: +. +-~~~~;~~~~~~ +. +

-;~~

+. From dd6f380dc1d74443efcdc5675513e5c3192f4183 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Mon, 28 Dec 2020 04:31:51 +0200 Subject: [PATCH 5/6] Apply name change rules defined in port.yaml --- markdown_it/rules_inline/autolink.py | 4 ++-- tests/test_port/test_misc.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index 2d3b2d9a..f810629f 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -17,11 +17,11 @@ def autolink(state: StateInline, silent: bool) -> bool: return False start = state.pos - max = state.posMax + maximum = state.posMax while True: pos += 1 - if pos >= max: + if pos >= maximum: return False ch = state.srcCharCode[pos] diff --git a/tests/test_port/test_misc.py b/tests/test_port/test_misc.py index f5f821e9..1fd1e3bb 100644 --- a/tests/test_port/test_misc.py +++ b/tests/test_port/test_misc.py @@ -3,10 +3,10 @@ def test_highlight_arguments(): - def highlight_func(str_, lang, attrs): + def highlight_func(string, lang, attrs): assert lang == "a" assert attrs == "b c d" - return "
==" + str_ + "==
" + return "
==" + string + "==
" conf = presets.commonmark.make() conf["options"]["highlight"] = highlight_func From 87e00c9b31cba9becd800c8dc2b281c7629803a5 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Sat, 16 Jan 2021 00:52:31 +0200 Subject: [PATCH 6/6] Add back removed table tests --- tests/test_port/fixtures/tables.md | 90 ++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/tests/test_port/fixtures/tables.md b/tests/test_port/fixtures/tables.md index 7219a1a4..48f126d5 100644 --- a/tests/test_port/fixtures/tables.md +++ b/tests/test_port/fixtures/tables.md @@ -273,6 +273,96 @@ bar|bar . +Should not be terminated via row without "|" symbol: +. +foo|foo +---|--- +paragraph +. + + + + + + + + + + + + + +
foofoo
paragraph
+. + + +Delimiter escaping: +. +| Heading 1 \\\\| Heading 2 +| --------- | --------- +| Cell\|1\|| Cell\|2 +\| Cell\\\|3 \\| Cell\|4 +. +

| Heading 1 \\| Heading 2 +| --------- | --------- +| Cell|1|| Cell|2 +| Cell\|3 \| Cell|4

+. + +Pipes inside backticks split cells: +. +| Heading 1 | Heading 2 +| --------- | --------- +| Cell 1 | Cell 2 +| `Cell|3` | Cell 4 +. + + + + + + + + + + + + + + + + + +
Heading 1Heading 2
Cell 1Cell 2
`Cell3`
+. + +Unclosed backticks don't count +. +| Heading 1 | Heading 2 +| --------- | --------- +| Cell 1 | Cell 2 +| `Cell 3| Cell 4 +. + + + + + + + + + + + + + + + + + +
Heading 1Heading 2
Cell 1Cell 2
`Cell 3Cell 4
+. + Another complicated backticks case . | Heading 1 | Heading 2