Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

馃憣 Improve nested emphasis parsing #273

Merged
merged 2 commits into from
Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions markdown_it/port.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
- package: markdown-it/markdown-it
version: 12.2.0
commit: 6e2de08a0b03d3d0dcc524b89710ce05f83a0283
date: Aug 2, 2021
version: 12.3.0
commit: 2e31d3430187d2eee1ba120c954783eebb93b4e8
date: Dec 9, 2021
notes:
- Rename variables that use python built-in names, e.g.
- `max` -> `maximum`
Expand Down
43 changes: 32 additions & 11 deletions markdown_it/rules_inline/balance_pairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,33 @@

def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
"""For each opening emphasis-like marker find a matching closing one."""
if not delimiters:
return

openersBottom = {}
maximum = len(delimiters)

# headerIdx is the first delimiter of the current (where closer is) delimiter run
headerIdx = 0
lastTokenIdx = -2 # needs any value lower than -1
jumps: list[int] = []
closerIdx = 0
while closerIdx < maximum:
closer = delimiters[closerIdx]

jumps.append(0)

# markers belong to same delimiter run if:
# - they have adjacent tokens
# - AND markers are the same
#
if (
delimiters[headerIdx].marker != closer.marker
or lastTokenIdx != closer.token - 1
):
headerIdx = closerIdx
lastTokenIdx = closer.token

# Length is only used for emphasis-specific "rule of 3",
# if it's not defined (in strikethrough or 3rd party plugins),
# we can default it to 0 to disable those checks.
Expand All @@ -34,20 +54,15 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
(3 if closer.open else 0) + (closer.length % 3)
]

openerIdx = closerIdx - closer.jump - 1

# avoid crash if `closer.jump` is pointing outside of the array,
# e.g. for strikethrough
if openerIdx < -1:
openerIdx = -1
openerIdx = headerIdx - jumps[headerIdx] - 1

newMinOpenerIdx = openerIdx

while openerIdx > minOpenerIdx:
opener = delimiters[openerIdx]

if opener.marker != closer.marker:
openerIdx -= opener.jump + 1
openerIdx -= jumps[openerIdx] + 1
continue

if opener.open and opener.end < 0:
Expand All @@ -73,19 +88,25 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
# sure algorithm has linear complexity (see *_*_*_*_*_... case).
#
if openerIdx > 0 and not delimiters[openerIdx - 1].open:
lastJump = delimiters[openerIdx - 1].jump + 1
lastJump = jumps[openerIdx - 1] + 1
else:
lastJump = 0

closer.jump = closerIdx - openerIdx + lastJump
jumps[closerIdx] = closerIdx - openerIdx + lastJump
jumps[openerIdx] = lastJump

closer.open = False
opener.end = closerIdx
opener.jump = lastJump
opener.close = False
newMinOpenerIdx = -1

# treat next token as start of run,
# it optimizes skips in **<...>**a**<...>** pathological case
lastTokenIdx = -2

break

openerIdx -= opener.jump + 1
openerIdx -= jumps[openerIdx] + 1

if newMinOpenerIdx != -1:
# If match for this delimiter run failed, we want to set lower bound for
Expand Down
7 changes: 4 additions & 3 deletions markdown_it/rules_inline/emphasis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,13 @@ def tokenize(state: StateInline, silent: bool) -> bool:

scanned = state.scanDelims(state.pos, marker == "*")

for i in range(scanned.length):
for _ in range(scanned.length):
token = state.push("text", "", 0)
token.content = marker
state.delimiters.append(
Delimiter(
marker=ord(marker),
length=scanned.length,
jump=i,
token=len(state.tokens) - 1,
end=-1,
open=scanned.can_open,
Expand Down Expand Up @@ -63,9 +62,11 @@ def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None:
isStrong = (
i > 0
and delimiters[i - 1].end == startDelim.end + 1
# check that first two markers match and adjacent
and delimiters[i - 1].marker == startDelim.marker
and delimiters[i - 1].token == startDelim.token - 1
# check that last two markers are adjacent (we can safely assume they match)
and delimiters[startDelim.end + 1].token == endDelim.token + 1
and delimiters[i - 1].marker == startDelim.marker
)

ch = chr(startDelim.marker)
Expand Down
8 changes: 0 additions & 8 deletions markdown_it/rules_inline/state_inline.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,6 @@ class Delimiter:
# Total length of these series of delimiters.
length: int

# An amount of characters before this one that's equivalent to
# current one. In plain English: if this delimiter does not open
# an emphasis, neither do previous `jump` characters.
#
# Used to skip sequences like "*****" in one step, for 1st asterisk
# value will be 0, for 2nd it's 1 and so on.
jump: int

# A position of the token this delimiter corresponds to.
token: int

Expand Down
1 change: 0 additions & 1 deletion markdown_it/rules_inline/strikethrough.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def tokenize(state: StateInline, silent: bool) -> bool:
Delimiter(
marker=ord(ch),
length=0, # disable "rule of 3" length checks meant for emphasis
jump=i // 2, # for `~~` 1 marker = 2 characters
token=len(state.tokens) - 1,
end=-1,
open=scanned.can_open,
Expand Down