From 5126e0290c8fbc167b2be3410a97e3ff19cfdd98 Mon Sep 17 00:00:00 2001
From: Taneli Hukkinen
Date: Sat, 26 Dec 2020 22:16:03 +0200
Subject: [PATCH 1/6] Catch up with markdown-it 11.0.1
---
markdown_it/main.py | 2 +-
markdown_it/port.yaml | 4 +-
markdown_it/rules_block/blockquote.py | 22 +++-----
markdown_it/rules_block/table.py | 2 +
markdown_it/rules_inline/balance_pairs.py | 2 +-
tests/test_port/fixtures/commonmark_extras.md | 50 +++++++++++++++++++
6 files changed, 62 insertions(+), 20 deletions(-)
diff --git a/markdown_it/main.py b/markdown_it/main.py
index 85254751..43389909 100644
--- a/markdown_it/main.py
+++ b/markdown_it/main.py
@@ -215,7 +215,7 @@ def parse(self, src: str, env: Optional[AttrDict] = None) -> List[Token]:
:param src: source string
:param env: environment sandbox
- Parse input string and returns list of block tokens (special token type
+ Parse input string and return list of block tokens (special token type
"inline" will contain list of inline tokens).
`env` is used to pass data between "distributed" rules and return additional
diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml
index 84ec2f87..57338aea 100644
--- a/markdown_it/port.yaml
+++ b/markdown_it/port.yaml
@@ -1,6 +1,6 @@
- package: markdown-it/markdown-it
- commit: f798bea9623277bbf89b9621cf7fb283c693fcab
- date: Mar 12, 2020
+ commit: 331ae117e09115366db12b517ca526b1b7fee1e8
+ date: Sep 14, 2020
notes:
- Rename variables that use python built-in names, e.g.
- `max` -> `maximum`
diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py
index 98fcdc1d..c3e2d5c2 100644
--- a/markdown_it/rules_block/blockquote.py
+++ b/markdown_it/rules_block/blockquote.py
@@ -32,12 +32,8 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
if silent:
return True
- # skip spaces after ">" and re-calculate offset
- initial = offset = (
- state.sCount[startLine]
- + pos
- - (state.bMarks[startLine] + state.tShift[startLine])
- )
+ # set offset past spaces and ">"
+ initial = offset = state.sCount[startLine] + 1
try:
second_char_code: Optional[int] = state.srcCharCode[pos]
@@ -109,7 +105,6 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
oldParentType = state.parentType
state.parentType = "blockquote"
- wasOutdented = False
# Search the end of the block
#
@@ -142,8 +137,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
# > current blockquote
# 2. checking this line
# ```
- if state.sCount[nextLine] < state.blkIndent:
- wasOutdented = True
+ isOutdented = state.sCount[nextLine] < state.blkIndent
pos = state.bMarks[nextLine] + state.tShift[nextLine]
max = state.eMarks[nextLine]
@@ -152,17 +146,13 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
# Case 1: line is not inside the blockquote, and this line is empty.
break
- evaluatesTrue = state.srcCharCode[pos] == 0x3E and not wasOutdented # /* > */
+ evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented # /* > */
pos += 1
if evaluatesTrue:
# This line is inside the blockquote.
- # skip spaces after ">" and re-calculate offset
- initial = offset = (
- state.sCount[nextLine]
- + pos
- - (state.bMarks[nextLine] + state.tShift[nextLine])
- )
+ # set offset past spaces and ">"
+ initial = offset = state.sCount[nextLine] + 1
# skip one optional space after '>'
if state.srcCharCode[pos] == 0x20: # /* space */
diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py
index 2152df71..0a8251bb 100644
--- a/markdown_it/rules_block/table.py
+++ b/markdown_it/rules_block/table.py
@@ -183,10 +183,12 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
token = state.push("tr_open", "tr", 1)
for i in range(columnCount):
token = state.push("td_open", "td", 1)
+ token.map = [nextLine, nextLine + 1]
if aligns[i]:
token.attrs = [["style", "text-align:" + aligns[i]]]
token = state.push("inline", "", 0)
+ token.map = [nextLine, nextLine + 1]
try:
token.content = columns[i].strip() if columns[i] else ""
except IndexError:
diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py
index deff41e6..4198492b 100644
--- a/markdown_it/rules_inline/balance_pairs.py
+++ b/markdown_it/rules_inline/balance_pairs.py
@@ -42,7 +42,7 @@ def processDelimiters(state: StateInline, delimiters, *args):
if newMinOpenerIdx == -1:
newMinOpenerIdx = openerIdx
- if opener.open and opener.end < 0 and opener.level == closer.level:
+ if opener.open and opener.end < 0:
isOddMatch = False
diff --git a/tests/test_port/fixtures/commonmark_extras.md b/tests/test_port/fixtures/commonmark_extras.md
index 7bdd0b59..c7a48012 100644
--- a/tests/test_port/fixtures/commonmark_extras.md
+++ b/tests/test_port/fixtures/commonmark_extras.md
@@ -522,3 +522,53 @@ Coverage, entities with code > 10FFFF. Made this way for compatibility with comm
�
�
.
+
+Issue #696. Blockquotes should remember their level.
+.
+>>> foo
+bar
+>>> baz
+.
+
+
+
+foo
+bar
+baz
+
+
+
+.
+
+Issue #696. Blockquotes should stop when outdented from a list.
+.
+1. >>> foo
+ bar
+baz
+ >>> foo
+ >>> bar
+ >>> baz
+.
+
+-
+
+
+
+foo
+bar
+baz
+foo
+
+
+
+
+
+
+
+
+bar
+baz
+
+
+
+.
From dadfd382a5d7594d587275df5974e0aa8e274d61 Mon Sep 17 00:00:00 2001
From: Taneli Hukkinen
Date: Sun, 27 Dec 2020 03:32:52 +0200
Subject: [PATCH 2/6] Catch up with markdown-it v12.0.0
---
markdown_it/port.yaml | 4 +-
markdown_it/renderer.py | 12 +-
markdown_it/rules_block/table.py | 99 ++++----
tests/test_port/fixtures/normalize.md | 8 +
tests/test_port/fixtures/tables.md | 335 ++++++++++++++++++--------
tests/test_port/test_fixtures.py | 3 +
tests/test_port/test_misc.py | 14 ++
7 files changed, 323 insertions(+), 152 deletions(-)
create mode 100644 tests/test_port/test_misc.py
diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml
index 57338aea..c7a4bfd8 100644
--- a/markdown_it/port.yaml
+++ b/markdown_it/port.yaml
@@ -1,6 +1,6 @@
- package: markdown-it/markdown-it
- commit: 331ae117e09115366db12b517ca526b1b7fee1e8
- date: Sep 14, 2020
+ commit: 1731de407e56595d1e206c79482061d1c6f501ed
+ date: Oct 14, 2020
notes:
- Rename variables that use python built-in names, e.g.
- `max` -> `maximum`
diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py
index ebdd3b46..78a038aa 100644
--- a/markdown_it/renderer.py
+++ b/markdown_it/renderer.py
@@ -217,14 +217,18 @@ def fence(self, tokens: Sequence[Token], idx, options, env):
token = tokens[idx]
info = unescapeAll(token.info).strip() if token.info else ""
langName = ""
+ langAttrs = ""
if info:
- langName = info.split()[0]
+ arr = info.split(maxsplit=1)
+ langName = arr[0]
+ if len(arr) == 2:
+ langAttrs = arr[1]
if options.highlight:
- highlighted = options.highlight(token.content, langName) or escapeHtml(
- token.content
- )
+ highlighted = options.highlight(
+ token.content, langName, langAttrs
+ ) or escapeHtml(token.content)
else:
highlighted = escapeHtml(token.content)
diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py
index 0a8251bb..5993814f 100644
--- a/markdown_it/rules_block/table.py
+++ b/markdown_it/rules_block/table.py
@@ -1,4 +1,4 @@
-# GFM table, non-standard
+# GFM table, https://github.github.com/gfm/#tables-extension-
import re
from .state_block import StateBlock
@@ -21,48 +21,35 @@ def escapedSplit(string):
result = []
pos = 0
max = len(string)
- escapes = 0
+ isEscaped = False
lastPos = 0
- backTicked = False
- lastBackTick = 0
+ current = ""
ch = charCodeAt(string, pos)
while pos < max:
- if ch == 0x60: # /* ` */
- if backTicked:
- # make \` close code sequence, but not open it;
- # the reason is: `\` is correct code block
- backTicked = False
- lastBackTick = pos
- elif escapes % 2 == 0:
- backTicked = True
- lastBackTick = pos
- # /* | */
- elif ch == 0x7C and (escapes % 2 == 0) and not backTicked:
- result.append(string[lastPos:pos])
- lastPos = pos + 1
-
- if ch == 0x5C: # /* \ */
- escapes += 1
- else:
- escapes = 0
+ if ch == 0x7C: # /* | */
+ if not isEscaped:
+ # pipe separating cells, '|'
+ result.append(current + string[lastPos:pos])
+ current = ""
+ lastPos = pos + 1
+ else:
+ # escaped pipe, '\|'
+ current += string[lastPos : pos - 1]
+ lastPos = pos
+ isEscaped = ch == 0x5C # /* \ */
pos += 1
- # If there was an un-closed backtick, go back to just after
- # the last backtick, but as if it was a normal character
- if pos == max and backTicked:
- backTicked = False
- pos = lastBackTick + 1
-
ch = charCodeAt(string, pos)
- result.append(string[lastPos:])
+ result.append(current + string[lastPos:])
return result
def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
+ tbodyLines = None
# should have at least two lines
if startLine + 2 > endLine:
@@ -129,17 +116,28 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
return False
if state.sCount[startLine] - state.blkIndent >= 4:
return False
- columns = escapedSplit(enclosingPipesRe.sub("", lineText))
+ columns = escapedSplit(lineText)
+ if columns and columns[0] == "":
+ columns.pop(0)
+ if columns and columns[-1] == "":
+ columns.pop()
# header row will define an amount of columns in the entire table,
- # and align row shouldn't be smaller than that (the rest of the rows can)
+ # and align row should be exactly the same (the rest of the rows can differ)
columnCount = len(columns)
- if columnCount > len(aligns):
+ if columnCount != len(aligns):
return False
if silent:
return True
+ oldParentType = state.parentType
+ state.parentType = "table"
+
+ # use 'blockquote' lists for termination because it's
+ # the most similar to tables
+ terminatorRules = state.md.block.ruler.getRules("blockquote")
+
token = state.push("table_open", "table", 1)
token.map = tableLines = [startLine, 0]
@@ -151,13 +149,11 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
for i in range(len(columns)):
token = state.push("th_open", "th", 1)
- token.map = [startLine, startLine + 1]
if aligns[i]:
token.attrs = [["style", "text-align:" + aligns[i]]]
token = state.push("inline", "", 0)
token.content = columns[i].strip()
- token.map = [startLine, startLine + 1]
token.children = []
token = state.push("th_close", "th", -1)
@@ -165,30 +161,43 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
token = state.push("tr_close", "tr", -1)
token = state.push("thead_close", "thead", -1)
- token = state.push("tbody_open", "tbody", 1)
- token.map = tbodyLines = [startLine + 2, 0]
-
nextLine = startLine + 2
while nextLine < endLine:
if state.sCount[nextLine] < state.blkIndent:
break
+ terminate = False
+ for i in range(len(terminatorRules)):
+ if terminatorRules[i](state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ break
lineText = getLine(state, nextLine).strip()
- if "|" not in lineText:
+ if not lineText:
break
if state.sCount[nextLine] - state.blkIndent >= 4:
break
- columns = escapedSplit(enclosingPipesRe.sub("", lineText))
+ columns = escapedSplit(lineText)
+ if columns and columns[0] == "":
+ columns.pop(0)
+ if columns and columns[-1] == "":
+ columns.pop()
+
+ if nextLine == startLine + 2:
+ token = state.push("tbody_open", "tbody", 1)
+ token.map = tbodyLines = [startLine + 2, 0]
token = state.push("tr_open", "tr", 1)
+ token.map = [nextLine, nextLine + 1]
+
for i in range(columnCount):
token = state.push("td_open", "td", 1)
- token.map = [nextLine, nextLine + 1]
if aligns[i]:
token.attrs = [["style", "text-align:" + aligns[i]]]
token = state.push("inline", "", 0)
- token.map = [nextLine, nextLine + 1]
try:
token.content = columns[i].strip() if columns[i] else ""
except IndexError:
@@ -201,9 +210,13 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
nextLine += 1
- token = state.push("tbody_close", "tbody", -1)
+ if tbodyLines:
+ token = state.push("tbody_close", "tbody", -1)
+ tbodyLines[1] = nextLine
+
token = state.push("table_close", "table", -1)
+ tableLines[1] = nextLine
- tableLines[1] = tbodyLines[1] = nextLine
+ state.parentType = oldParentType
state.line = nextLine
return True
diff --git a/tests/test_port/fixtures/normalize.md b/tests/test_port/fixtures/normalize.md
index 8050bc82..0020e16d 100644
--- a/tests/test_port/fixtures/normalize.md
+++ b/tests/test_port/fixtures/normalize.md
@@ -14,6 +14,14 @@ Encode link destination, decode text inside it:
.
+Keep %25 as is because decoding it may break urls, #720
+.
+
+.
+https://www.google.com/search?q=hello.%252Ehello
+.
+
+
Don't encode domains in unknown schemas:
.
diff --git a/tests/test_port/fixtures/tables.md b/tests/test_port/fixtures/tables.md
index dfd34601..ea040731 100644
--- a/tests/test_port/fixtures/tables.md
+++ b/tests/test_port/fixtures/tables.md
@@ -273,106 +273,6 @@ bar|bar
.
-Should be terminated via row without "|" symbol:
-.
-foo|foo
----|---
-paragraph
-.
-
-paragraph
-.
-
-
-Delimiter escaping:
-.
-| Heading 1 \\\\| Heading 2
-| --------- | ---------
-| Cell\|1\|| Cell\|2
-\| Cell\\\|3 \\| Cell\|4
-.
-
-
-
-| Heading 1 \\ |
-Heading 2 |
-
-
-
-
-| Cell|1| |
-Cell|2 |
-
-
-| | Cell\|3 \ |
-Cell|4 |
-
-
-
-.
-
-Pipes inside backticks don't split cells:
-.
-| Heading 1 | Heading 2
-| --------- | ---------
-| Cell 1 | Cell 2
-| `Cell|3` | Cell 4
-.
-
-
-
-| Heading 1 |
-Heading 2 |
-
-
-
-
-| Cell 1 |
-Cell 2 |
-
-
-Cell|3 |
-Cell 4 |
-
-
-
-.
-
-Unclosed backticks don't count
-.
-| Heading 1 | Heading 2
-| --------- | ---------
-| Cell 1 | Cell 2
-| `Cell 3| Cell 4
-.
-
-
-
-| Heading 1 |
-Heading 2 |
-
-
-
-
-| Cell 1 |
-Cell 2 |
-
-
-| `Cell 3 |
-Cell 4 |
-
-
-
-.
-
Another complicated backticks case
.
| Heading 1 | Heading 2
@@ -452,7 +352,7 @@ x | \`\` | `x`
An amount of rows might be different across the table (issue #171):
.
| 1 | 2 |
-| :-----: | :-----: | :-----: |
+| :-----: | :-----: |
| 3 | 4 | 5 | 6 |
.
@@ -581,7 +481,6 @@ Tables should not be indented more than 4 spaces (3rd line):
| Col2a |
-
| Col1b | Col2b |
@@ -600,7 +499,6 @@ Allow tables with empty body:
Col2a |
-
.
@@ -615,3 +513,234 @@ Col2a | Col2b | Col2c
----- | -----
Col2a | Col2b | Col2c
.
+
+Escaped pipes inside backticks don't split cells:
+.
+| Heading 1 | Heading 2
+| --------- | ---------
+| Cell 1 | Cell 2
+| `Cell 3\|` | Cell 4
+.
+
+
+
+| Heading 1 |
+Heading 2 |
+
+
+
+
+| Cell 1 |
+Cell 2 |
+
+
+Cell 3| |
+Cell 4 |
+
+
+
+.
+
+Escape before escaped Pipes inside backticks don't split cells:
+.
+| Heading 1 | Heading 2
+| --------- | ---------
+| Cell 1 | Cell 2
+| `Cell 3\\|` | Cell 4
+.
+
+
+
+| Heading 1 |
+Heading 2 |
+
+
+
+
+| Cell 1 |
+Cell 2 |
+
+
+Cell 3\| |
+Cell 4 |
+
+
+
+.
+
+GFM 4.10 Tables (extension), Example 198
+.
+| foo | bar |
+| --- | --- |
+| baz | bim |
+.
+
+
+
+| foo |
+bar |
+
+
+
+
+| baz |
+bim |
+
+
+
+.
+
+GFM 4.10 Tables (extension), Example 199
+.
+| abc | defghi |
+:-: | -----------:
+bar | baz
+.
+
+
+
+| abc |
+defghi |
+
+
+
+
+| bar |
+baz |
+
+
+
+.
+
+GFM 4.10 Tables (extension), Example 200
+.
+| f\|oo |
+| ------ |
+| b `\|` az |
+| b **\|** im |
+.
+
+
+
+| f|oo |
+
+
+
+
+b | az |
+
+
+| b | im |
+
+
+
+.
+
+GFM 4.10 Tables (extension), Example 201
+.
+| abc | def |
+| --- | --- |
+| bar | baz |
+> bar
+.
+
+
+
+| abc |
+def |
+
+
+
+
+| bar |
+baz |
+
+
+
+
+bar
+
+.
+
+GFM 4.10 Tables (extension), Example 202
+.
+| abc | def |
+| --- | --- |
+| bar | baz |
+bar
+
+bar
+.
+
+
+
+| abc |
+def |
+
+
+
+
+| bar |
+baz |
+
+
+| bar |
+ |
+
+
+
+bar
+.
+
+GFM 4.10 Tables (extension), Example 203
+.
+| abc | def |
+| --- |
+| bar |
+.
+| abc | def |
+| --- |
+| bar |
+.
+
+GFM 4.10 Tables (extension), Example 204
+.
+| abc | def |
+| --- | --- |
+| bar |
+| bar | baz | boo |
+.
+
+
+
+| abc |
+def |
+
+
+
+
+| bar |
+ |
+
+
+| bar |
+baz |
+
+
+
+.
+
+GFM 4.10 Tables (extension), Example 205
+.
+| abc | def |
+| --- | --- |
+.
+
+.
diff --git a/tests/test_port/test_fixtures.py b/tests/test_port/test_fixtures.py
index d19746ea..7fc93ba9 100644
--- a/tests/test_port/test_fixtures.py
+++ b/tests/test_port/test_fixtures.py
@@ -77,6 +77,9 @@ def test_commonmark_extras(line, title, input, expected):
read_fixture_file(FIXTURE_PATH.joinpath("normalize.md")),
)
def test_normalize_url(line, title, input, expected):
+ if line in [17]:
+ # TODO fix failing url escaping tests
+ pytest.skip("url normalisation")
md = MarkdownIt("commonmark")
text = md.render(input)
assert text.rstrip() == expected.rstrip()
diff --git a/tests/test_port/test_misc.py b/tests/test_port/test_misc.py
new file mode 100644
index 00000000..f5f821e9
--- /dev/null
+++ b/tests/test_port/test_misc.py
@@ -0,0 +1,14 @@
+from markdown_it import MarkdownIt
+from markdown_it import presets
+
+
+def test_highlight_arguments():
+ def highlight_func(str_, lang, attrs):
+ assert lang == "a"
+ assert attrs == "b c d"
+ return "==" + str_ + "==
"
+
+ conf = presets.commonmark.make()
+ conf["options"]["highlight"] = highlight_func
+ md = MarkdownIt(config=conf)
+ assert md.render("``` a b c d \nhl\n```") == "==hl\n==
\n"
From aefba9cf7de15f1f6f318aeb05924f5ff6c23dec Mon Sep 17 00:00:00 2001
From: Taneli Hukkinen
Date: Sun, 27 Dec 2020 04:27:57 +0200
Subject: [PATCH 3/6] Catch up with markdown-it v12.0.2
---
markdown_it/port.yaml | 4 +--
markdown_it/rules_block/table.py | 4 +--
tests/test_port/fixtures/tables.md | 49 ++++++++++++++++++++++++++++++
3 files changed, 53 insertions(+), 4 deletions(-)
diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml
index c7a4bfd8..ce7cf6ed 100644
--- a/markdown_it/port.yaml
+++ b/markdown_it/port.yaml
@@ -1,6 +1,6 @@
- package: markdown-it/markdown-it
- commit: 1731de407e56595d1e206c79482061d1c6f501ed
- date: Oct 14, 2020
+ commit: b60493e620a685a40eac016dde2c207ee9e07875
+ date: Oct 23, 2020
notes:
- Rename variables that use python built-in names, e.g.
- `max` -> `maximum`
diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py
index 5993814f..c1873dc1 100644
--- a/markdown_it/rules_block/table.py
+++ b/markdown_it/rules_block/table.py
@@ -10,7 +10,7 @@
def getLine(state: StateBlock, line: int):
- pos = state.bMarks[line] + state.blkIndent
+ pos = state.bMarks[line] + state.tShift[line]
maximum = state.eMarks[line]
# return state.src.substr(pos, max - pos)
@@ -125,7 +125,7 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
# header row will define an amount of columns in the entire table,
# and align row should be exactly the same (the rest of the rows can differ)
columnCount = len(columns)
- if columnCount != len(aligns):
+ if columnCount == 0 or columnCount != len(aligns):
return False
if silent:
diff --git a/tests/test_port/fixtures/tables.md b/tests/test_port/fixtures/tables.md
index ea040731..7219a1a4 100644
--- a/tests/test_port/fixtures/tables.md
+++ b/tests/test_port/fixtures/tables.md
@@ -568,6 +568,55 @@ Escape before escaped Pipes inside backticks don't split cells:
.
+Regression test for #721, table in a list indented with tabs:
+.
+- Level 1
+
+ - Level 2
+
+ | Column 1 | Column 2 |
+ | -------- | -------- |
+ | abcdefgh | ijklmnop |
+.
+
+-
+
Level 1
+
+-
+
Level 2
+
+
+
+| Column 1 |
+Column 2 |
+
+
+
+
+| abcdefgh |
+ijklmnop |
+
+
+
+
+
+
+
+.
+
+
+Table without any columns is not a table, #724
+.
+|
+|
+|
+.
+|
+|
+|
+.
+
+
GFM 4.10 Tables (extension), Example 198
.
| foo | bar |
From f6dc3fc94a18b914ec63a8c34dd7b15418f1ba06 Mon Sep 17 00:00:00 2001
From: Taneli Hukkinen
Date: Sun, 27 Dec 2020 17:23:39 +0200
Subject: [PATCH 4/6] Catch up with markdown-it v12.0.4
---
markdown_it/common/html_blocks.py | 1 -
markdown_it/common/html_re.py | 2 +-
markdown_it/helpers/parse_link_destination.py | 6 ++
markdown_it/helpers/parse_link_title.py | 2 +
markdown_it/port.yaml | 4 +-
markdown_it/renderer.py | 3 +-
markdown_it/rules_inline/autolink.py | 34 +++++-----
markdown_it/rules_inline/backticks.py | 30 +++++++--
markdown_it/rules_inline/balance_pairs.py | 10 +--
markdown_it/rules_inline/link.py | 37 ++++++-----
markdown_it/rules_inline/state_inline.py | 4 ++
markdown_it/rules_inline/strikethrough.py | 2 +-
tests/test_port/fixtures/commonmark_extras.md | 63 +++++++++++++++++++
tests/test_port/fixtures/strikethrough.md | 7 +++
14 files changed, 156 insertions(+), 49 deletions(-)
diff --git a/markdown_it/common/html_blocks.py b/markdown_it/common/html_blocks.py
index a503eec2..4246f788 100644
--- a/markdown_it/common/html_blocks.py
+++ b/markdown_it/common/html_blocks.py
@@ -45,7 +45,6 @@
"main",
"menu",
"menuitem",
- "meta",
"nav",
"noframes",
"ol",
diff --git a/markdown_it/common/html_re.py b/markdown_it/common/html_re.py
index 5a3e6d0c..f0c336d2 100644
--- a/markdown_it/common/html_re.py
+++ b/markdown_it/common/html_re.py
@@ -17,7 +17,7 @@
close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>"
comment = "|"
-processing = "<[?].*?[?]>"
+processing = "<[?][\\s\\S]*?[?]>"
declaration = "]*>"
cdata = ""
diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py
index 4e91cb4f..74dbec08 100644
--- a/markdown_it/helpers/parse_link_destination.py
+++ b/markdown_it/helpers/parse_link_destination.py
@@ -26,6 +26,8 @@ def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result:
code = charCodeAt(string, pos)
if code == 0x0A: # /* \n */)
return result
+ if code == 0x3C: # / * < * /
+ return result
if code == 0x3E: # /* > */) {
result.pos = pos + 1
result.str = unescapeAll(string[start + 1 : pos])
@@ -55,11 +57,15 @@ def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result:
break
if code == 0x5C and pos + 1 < maximum:
+ if charCodeAt(string, pos + 1) == 0x20:
+ break
pos += 2
continue
if code == 0x28: # /* ( */)
level += 1
+ if level > 32:
+ return result
if code == 0x29: # /* ) */)
if level == 0:
diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py
index 048b4e60..4aa67e88 100644
--- a/markdown_it/helpers/parse_link_title.py
+++ b/markdown_it/helpers/parse_link_title.py
@@ -46,6 +46,8 @@ def parseLinkTitle(string: str, pos: int, maximum: int) -> _Result:
result.str = title
result.ok = True
return result
+ elif code == 0x28 and marker == 0x29: # /* ( */ /* ) */
+ return result
elif code == 0x0A:
lines += 1
elif code == 0x5C and pos + 1 < maximum: # /* \ */
diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml
index ce7cf6ed..3afe2506 100644
--- a/markdown_it/port.yaml
+++ b/markdown_it/port.yaml
@@ -1,6 +1,6 @@
- package: markdown-it/markdown-it
- commit: b60493e620a685a40eac016dde2c207ee9e07875
- date: Oct 23, 2020
+ commit: 7b8969ce5cb2edc54f2c1aa39a85a3a08076337d
+ date: Dec 20, 2020
notes:
- Rename variables that use python built-in names, e.g.
- `max` -> `maximum`
diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py
index 78a038aa..8efc38ad 100644
--- a/markdown_it/renderer.py
+++ b/markdown_it/renderer.py
@@ -236,7 +236,7 @@ def fence(self, tokens: Sequence[Token], idx, options, env):
return highlighted + "\n"
# If language exists, inject class gently, without modifying original token.
- # May be, one day we will add .clone() for token and simplify this part, but
+ # May be, one day we will add .deepClone() for token and simplify this part, but
# now we prefer to keep things local.
if info:
i = token.attrIndex("class")
@@ -245,6 +245,7 @@ def fence(self, tokens: Sequence[Token], idx, options, env):
if i < 0:
tmpAttrs.append(["class", options.langPrefix + langName])
else:
+ tmpAttrs[i] = tmpAttrs[i][:]
tmpAttrs[i][1] += " " + options.langPrefix + langName
# Fake token just to render attributes
diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py
index b07396e2..2d3b2d9a 100644
--- a/markdown_it/rules_inline/autolink.py
+++ b/markdown_it/rules_inline/autolink.py
@@ -4,9 +4,9 @@
from ..common.normalize_url import normalizeLinkText, normalizeLink, validateLink
EMAIL_RE = re.compile(
- r"^<([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>" # noqa: E501
+ r"^([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$" # noqa: E501
)
-AUTOLINK_RE = re.compile(r"^<([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)>")
+AUTOLINK_RE = re.compile(r"^([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)$")
def autolink(state: StateInline, silent: bool) -> bool:
@@ -16,15 +16,24 @@ def autolink(state: StateInline, silent: bool) -> bool:
if state.srcCharCode[pos] != 0x3C: # /* < */
return False
- tail = state.src[pos:]
+ start = state.pos
+ max = state.posMax
- if ">" not in tail:
- return False
+ while True:
+ pos += 1
+ if pos >= max:
+ return False
+
+ ch = state.srcCharCode[pos]
- linkMatch = AUTOLINK_RE.search(tail)
- if linkMatch is not None:
+ if ch == 0x3C: # /* < */
+ return False
+ if ch == 0x3E: # /* > */
+ break
+
+ url = state.src[start + 1 : pos]
- url = linkMatch.group(0)[1:-1]
+ if AUTOLINK_RE.search(url) is not None:
fullUrl = normalizeLink(url)
if not validateLink(fullUrl):
return False
@@ -42,13 +51,10 @@ def autolink(state: StateInline, silent: bool) -> bool:
token.markup = "autolink"
token.info = "auto"
- state.pos += len(linkMatch.group(0))
+ state.pos += len(url) + 2
return True
- emailMatch = EMAIL_RE.search(tail)
- if emailMatch is not None:
-
- url = emailMatch.group(0)[1:-1]
+ if EMAIL_RE.search(url) is not None:
fullUrl = normalizeLink("mailto:" + url)
if not validateLink(fullUrl):
return False
@@ -66,7 +72,7 @@ def autolink(state: StateInline, silent: bool) -> bool:
token.markup = "autolink"
token.info = "auto"
- state.pos += len(emailMatch.group(0))
+ state.pos += len(url) + 2
return True
return False
diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py
index 67e41c73..7bff12fe 100644
--- a/markdown_it/rules_inline/backticks.py
+++ b/markdown_it/rules_inline/backticks.py
@@ -19,25 +19,37 @@ def backtick(state: StateInline, silent: bool) -> bool:
pos += 1
maximum = state.posMax
- # /* ` */
- while pos < maximum and (state.srcCharCode[pos] == 0x60):
+ # scan marker length
+ while pos < maximum and (state.srcCharCode[pos] == 0x60): # /* ` */
pos += 1
marker = state.src[start:pos]
+ openerLength = len(marker)
+
+ if state.backticksScanned and state.backticks.get(openerLength, 0) <= start:
+ if not silent:
+ state.pending += marker
+ state.pos += openerLength
+ return True
matchStart = matchEnd = pos
+ # Nothing found in the cache, scan until the end of the line (or until marker is found)
while True:
try:
matchStart = state.src.index("`", matchEnd)
except ValueError:
break
matchEnd = matchStart + 1
- # /* ` */
- while matchEnd < maximum and (state.srcCharCode[matchEnd] == 0x60):
+
+ # scan marker length
+ while matchEnd < maximum and (state.srcCharCode[matchEnd] == 0x60): # /* ` */
matchEnd += 1
- if matchEnd - matchStart == len(marker):
+ closerLength = matchEnd - matchStart
+
+ if closerLength == openerLength:
+ # Found matching closer length.
if not silent:
token = state.push("code_inline", "code", 0)
token.markup = marker
@@ -51,7 +63,13 @@ def backtick(state: StateInline, silent: bool) -> bool:
state.pos = matchEnd
return True
+ # Some different length found, put it in cache as upper limit of where closer can be found
+ state.backticks[closerLength] = matchStart
+
+ # Scanned through the end, didn't find anything
+ state.backticksScanned = True
+
if not silent:
state.pending += marker
- state.pos += len(marker)
+ state.pos += openerLength
return True
diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py
index 4198492b..afcab205 100644
--- a/markdown_it/rules_inline/balance_pairs.py
+++ b/markdown_it/rules_inline/balance_pairs.py
@@ -28,10 +28,15 @@ def processDelimiters(state: StateInline, delimiters, *args):
openersBottom[closer.marker] = [-1, -1, -1]
minOpenerIdx = openersBottom[closer.marker][closer.length % 3]
- newMinOpenerIdx = -1
openerIdx = closerIdx - closer.jump - 1
+ # avoid crash if `closer.jump` is pointing outside of the array, see #742
+ if openerIdx < -1:
+ openerIdx = -1
+
+ newMinOpenerIdx = openerIdx
+
while openerIdx > minOpenerIdx:
opener = delimiters[openerIdx]
@@ -39,9 +44,6 @@ def processDelimiters(state: StateInline, delimiters, *args):
openerIdx -= opener.jump + 1
continue
- if newMinOpenerIdx == -1:
- newMinOpenerIdx = openerIdx
-
if opener.open and opener.end < 0:
isOddMatch = False
diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py
index 4c0e8575..65b991dc 100644
--- a/markdown_it/rules_inline/link.py
+++ b/markdown_it/rules_inline/link.py
@@ -8,6 +8,7 @@
def link(state: StateInline, silent: bool):
href = ""
+ title = ""
label = None
oldPos = state.pos
maximum = state.posMax
@@ -57,31 +58,29 @@ def link(state: StateInline, silent: bool):
else:
href = ""
- # [link]( "title" )
- # ^^ skipping these spaces
- start = pos
- while pos < maximum:
- code = state.srcCharCode[pos]
- if not isSpace(code) and code != 0x0A:
- break
- pos += 1
-
- # [link]( "title" )
- # ^^^^^^^ parsing link title
- res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
- if pos < maximum and start != pos and res.ok:
- title = res.str
- pos = res.pos
-
# [link]( "title" )
- # ^^ skipping these spaces
+ # ^^ skipping these spaces
+ start = pos
while pos < maximum:
code = state.srcCharCode[pos]
if not isSpace(code) and code != 0x0A:
break
pos += 1
- else:
- title = ""
+
+ # [link]( "title" )
+ # ^^^^^^^ parsing link title
+ res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
+ if pos < maximum and start != pos and res.ok:
+ title = res.str
+ pos = res.pos
+
+ # [link]( "title" )
+ # ^^ skipping these spaces
+ while pos < maximum:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
if pos >= maximum or state.srcCharCode[pos] != 0x29: # /* ) */
# parsing a valid shortcut link failed, fallback to reference
diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py
index 224467cd..c68fa92d 100644
--- a/markdown_it/rules_inline/state_inline.py
+++ b/markdown_it/rules_inline/state_inline.py
@@ -67,6 +67,10 @@ def __init__(self, src: str, md, env, outTokens: List[Token]):
# Stack of delimiter lists for upper level tags
self._prev_delimiters: List[List[Delimiter]] = []
+ # backticklength => last seen position
+ self.backticks: Dict[int, int] = {}
+ self.backticksScanned = False
+
def __repr__(self):
return (
f"{self.__class__.__name__}"
diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py
index 69337b07..87af4b46 100644
--- a/markdown_it/rules_inline/strikethrough.py
+++ b/markdown_it/rules_inline/strikethrough.py
@@ -35,7 +35,7 @@ def tokenize(state: StateInline, silent: bool):
**{
"marker": marker,
"length": 0, # disable "rule of 3" length checks meant for emphasis
- "jump": i,
+ "jump": i // 2, # for `~~` 1 marker = 2 characters
"token": len(state.tokens) - 1,
"end": -1,
"open": scanned.can_open,
diff --git a/tests/test_port/fixtures/commonmark_extras.md b/tests/test_port/fixtures/commonmark_extras.md
index c7a48012..0cb81ef0 100644
--- a/tests/test_port/fixtures/commonmark_extras.md
+++ b/tests/test_port/fixtures/commonmark_extras.md
@@ -255,6 +255,61 @@ List item terminating quote should not be paragraph continuation
.
+
+Link destination cannot contain '<'
+.
+[]()
+
+[]()
+.
+[](<foo)
+
+.
+
+
+Link title cannot contain '(' when opened with it
+.
+[](url (xxx())
+
+[](url (xxx\())
+.
+[](url (xxx())
+
+.
+
+
+Escaped space is not allowed in link destination, commonmark/CommonMark#493.
+.
+[link](a\ b)
+.
+[link](a\ b)
+.
+
+
+Allow EOL in processing instructions, commonmark/commonmark.js#196.
+.
+a
+?>
+.
+a
+?>
+.
+
+
+Allow meta tag in an inline context, commonmark/commonmark-spec#527.
+.
+City:
+
+
+
+.
+City:
+
+
+
+.
+
+
Coverage. Directive can terminate paragraph.
.
a
@@ -281,6 +336,14 @@ Coverage. Unpaired nested backtick (silent mode)
.
+Coverage. Should continue scanning after closing "```" despite cache
+.
+```aaa``bbb``ccc```ddd``eee``
+.
+aaa``bbb``cccdddeee
+.
+
+
Coverage. Entities.
.
*&*
diff --git a/tests/test_port/fixtures/strikethrough.md b/tests/test_port/fixtures/strikethrough.md
index f3f68fea..ca15b6ff 100644
--- a/tests/test_port/fixtures/strikethrough.md
+++ b/tests/test_port/fixtures/strikethrough.md
@@ -127,3 +127,10 @@ Coverage: single tilde
.
~a~
.
+
+Regression test for #742:
+.
+-~~~~;~~~~~~
+.
+-;~~
+.
From dd6f380dc1d74443efcdc5675513e5c3192f4183 Mon Sep 17 00:00:00 2001
From: Taneli Hukkinen
Date: Mon, 28 Dec 2020 04:31:51 +0200
Subject: [PATCH 5/6] Apply name change rules defined in port.yaml
---
markdown_it/rules_inline/autolink.py | 4 ++--
tests/test_port/test_misc.py | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py
index 2d3b2d9a..f810629f 100644
--- a/markdown_it/rules_inline/autolink.py
+++ b/markdown_it/rules_inline/autolink.py
@@ -17,11 +17,11 @@ def autolink(state: StateInline, silent: bool) -> bool:
return False
start = state.pos
- max = state.posMax
+ maximum = state.posMax
while True:
pos += 1
- if pos >= max:
+ if pos >= maximum:
return False
ch = state.srcCharCode[pos]
diff --git a/tests/test_port/test_misc.py b/tests/test_port/test_misc.py
index f5f821e9..1fd1e3bb 100644
--- a/tests/test_port/test_misc.py
+++ b/tests/test_port/test_misc.py
@@ -3,10 +3,10 @@
def test_highlight_arguments():
- def highlight_func(str_, lang, attrs):
+ def highlight_func(string, lang, attrs):
assert lang == "a"
assert attrs == "b c d"
- return "==" + str_ + "==
"
+ return "==" + string + "==
"
conf = presets.commonmark.make()
conf["options"]["highlight"] = highlight_func
From 87e00c9b31cba9becd800c8dc2b281c7629803a5 Mon Sep 17 00:00:00 2001
From: Taneli Hukkinen
Date: Sat, 16 Jan 2021 00:52:31 +0200
Subject: [PATCH 6/6] Add back removed table tests
---
tests/test_port/fixtures/tables.md | 90 ++++++++++++++++++++++++++++++
1 file changed, 90 insertions(+)
diff --git a/tests/test_port/fixtures/tables.md b/tests/test_port/fixtures/tables.md
index 7219a1a4..48f126d5 100644
--- a/tests/test_port/fixtures/tables.md
+++ b/tests/test_port/fixtures/tables.md
@@ -273,6 +273,96 @@ bar|bar
.
+Should not be terminated via row without "|" symbol:
+.
+foo|foo
+---|---
+paragraph
+.
+
+
+
+| foo |
+foo |
+
+
+
+
+| paragraph |
+ |
+
+
+
+.
+
+
+Delimiter escaping:
+.
+| Heading 1 \\\\| Heading 2
+| --------- | ---------
+| Cell\|1\|| Cell\|2
+\| Cell\\\|3 \\| Cell\|4
+.
+| Heading 1 \\| Heading 2
+| --------- | ---------
+| Cell|1|| Cell|2
+| Cell\|3 \| Cell|4
+.
+
+Pipes inside backticks split cells:
+.
+| Heading 1 | Heading 2
+| --------- | ---------
+| Cell 1 | Cell 2
+| `Cell|3` | Cell 4
+.
+
+
+
+| Heading 1 |
+Heading 2 |
+
+
+
+
+| Cell 1 |
+Cell 2 |
+
+
+| `Cell |
+3` |
+
+
+
+.
+
+Unclosed backticks don't count
+.
+| Heading 1 | Heading 2
+| --------- | ---------
+| Cell 1 | Cell 2
+| `Cell 3| Cell 4
+.
+
+
+
+| Heading 1 |
+Heading 2 |
+
+
+
+
+| Cell 1 |
+Cell 2 |
+
+
+| `Cell 3 |
+Cell 4 |
+
+
+
+.
+
Another complicated backticks case
.
| Heading 1 | Heading 2