Skip to content

Commit

Permalink
Fix for em/strong corner cases (#1853)
Browse files Browse the repository at this point in the history
Fixes #1775
  • Loading branch information
facelessuser committed Nov 8, 2022
1 parent 459a2f3 commit 18ba91e
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 29 deletions.
4 changes: 4 additions & 0 deletions docs/src/markdown/about/changelog.md
@@ -1,5 +1,9 @@
# Changelog

## 9.7.1

- **FIX**: BetterEm: Fix case where `**` nested between `*` would be handled in an unexpected way.

## 9.7

- **NEW**: Tabbed: Add new syntax to allow forcing a specific tab to be select by default.
Expand Down
19 changes: 14 additions & 5 deletions docs/src/markdown/extensions/betterem.md
Expand Up @@ -60,6 +60,19 @@ BetterEm allows for a more natural nested token feel.
***I'm bold and italic!** I am just italic.*
```

BetterEm will try to prioritize the more sane option when nesting bold (`**`) between italic (`*`).

!!! example "Prioritize Best Example"

=== "Output"
*I'm italic. **I'm bold and italic.** I'm also just italic.*

=== "Markdown"

```
*I'm italic. **I'm bold and italic.** I'm also just italic.*
```

BetterEm will ensure smart mode doesn't terminate in scenarios where there are a large amount of consecutive tokens
inside.

Expand All @@ -68,12 +81,8 @@ inside.
=== "Output"
___A lot of underscores____________is okay___

___A lot of underscores____________is okay___

=== "Markdown"
```
___A lot of underscores____________is okay___

___A lot of underscores____________is okay___
```

Expand Down Expand Up @@ -113,7 +122,7 @@ BetterEm will allow non-smart emphasis to contain "floating" like tokens.

*All will not* be italic*

*All will not ** be italic*
*All will ** be italic*

**All will * be bold**

Expand Down
2 changes: 1 addition & 1 deletion pymdownx/__meta__.py
Expand Up @@ -185,5 +185,5 @@ def parse_version(ver, pre=False):
return Version(major, minor, micro, release, pre, post, dev)


__version_info__ = Version(9, 7, 0, "final")
__version_info__ = Version(9, 7, 1, "final")
__version__ = __version_info__._get_canonical()
21 changes: 17 additions & 4 deletions pymdownx/betterem.py
Expand Up @@ -53,6 +53,12 @@
STAR_STRONG = r'(\*{2})(?!\s)%s(?<!\s)\1' % STAR_CONTENT2
# __strong__
UNDER_STRONG = r'(_{2})(?!\s)%s(?<!\s)\1' % UNDER_CONTENT2

# Prioritize *value* when **value** is nested within
STAR_EM2 = r'(?<!\*)(\*)(?![\*\s])(.+?)(?<![\*\s])(\*)(?!\*)'
# Prioritize _value_ when __value__ is nested within
UNDER_EM2 = r'(?<!_)(_)(?![_\s])(.+?)(?<![_\s])(_)(?!_)'

# *emphasis*
STAR_EM = r'(\*)(?!\s)%s(?<!\s)\1' % STAR_CONTENT
# _emphasis_
Expand All @@ -71,6 +77,8 @@
SMART_UNDER_STRONG = r'(?<!\w)(_{2})(?![\s_])%s(?<!\s)\1(?!\w)' % SMART_UNDER_CONTENT
# SMART _em_
SMART_UNDER_EM = r'(?<!\w)(_)(?![\s_])%s(?<!\s)\1(?!\w)' % SMART_UNDER_CONTENT
# Prioritize _value_ when __value__ is nested within
SMART_UNDER_EM2 = r'(?<![\w_])(_)(?![_\s])(.+?)(?<![_\s])(_)(?![_\w])'

# Smart rules for when "smart asterisk" is enabled
# SMART: ***strong,em***
Expand All @@ -89,6 +97,8 @@
SMART_STAR_STRONG = r'(?:(?<=_)|(?<![\w\*]))(\*{2})(?![\s\*])%s(?<!\s)\1(?:(?=_)|(?![\w\*]))' % SMART_STAR_CONTENT
# SMART *em*
SMART_STAR_EM = r'(?:(?<=_)|(?<![\w\*]))(\*)(?![\s\*])%s(?<!\s)\1(?:(?=_)|(?![\w\*]))' % SMART_STAR_CONTENT
# Prioritize *value* when **value** is nested within
SMART_STAR_EM2 = r'(?<![\w\*])(\*)(?![\*\s])(.+?)(?<![\*\s])(\*)(?![\*\w])'


class AsteriskProcessor(util.PatternSequenceProcessor):
Expand All @@ -100,6 +110,7 @@ class AsteriskProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(STAR_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(STAR_STRONG_EM3, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
util.PatSeqItem(re.compile(STAR_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(STAR_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(STAR_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand All @@ -112,6 +123,7 @@ class SmartAsteriskProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(SMART_STAR_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
util.PatSeqItem(re.compile(SMART_STAR_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(SMART_STAR_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(SMART_STAR_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(SMART_STAR_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand All @@ -125,6 +137,7 @@ class UnderscoreProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(UNDER_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(UNDER_STRONG_EM3, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
util.PatSeqItem(re.compile(UNDER_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(UNDER_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(UNDER_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand All @@ -137,6 +150,7 @@ class SmartUnderscoreProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(SMART_UNDER_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
util.PatSeqItem(re.compile(SMART_UNDER_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(SMART_UNDER_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(SMART_UNDER_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(SMART_UNDER_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand Down Expand Up @@ -170,10 +184,9 @@ def make_better(self, md):

config = self.getConfigs()
enabled = config["smart_enable"]
if enabled:
enable_all = enabled == "all"
enable_under = enabled == "underscore" or enable_all
enable_star = enabled == "asterisk" or enable_all
enable_all = enabled == "all"
enable_under = enabled == "underscore" or enable_all
enable_star = enabled == "asterisk" or enable_all

# If we don't have to move an existing extension, use the same priority,
# but if we do have to, move it closely to the relative needed position.
Expand Down
39 changes: 22 additions & 17 deletions pymdownx/util.py
Expand Up @@ -159,50 +159,55 @@ def parse_url(url):
return (scheme, netloc, path, params, query, fragment, is_url, is_absolute)


class PatSeqItem(namedtuple('PatSeqItem', ['pattern', 'builder', 'tags'])):
class PatSeqItem(namedtuple('PatSeqItem', ['pattern', 'builder', 'tags', 'full_recursion'])):
"""Pattern sequence item item."""

def __new__(cls, pattern, builder, tags, full_recursion=False):
"""Create object."""

return super(PatSeqItem, cls).__new__(cls, pattern, builder, tags, full_recursion)


class PatternSequenceProcessor(InlineProcessor):
"""Processor for handling complex nested patterns such as strong and em matches."""

PATTERNS = []

def build_single(self, m, tag, idx):
def build_single(self, m, tag, full_recursion, idx):
"""Return single tag."""
el1 = etree.Element(tag)
text = m.group(2)
self.parse_sub_patterns(text, el1, None, idx)
self.parse_sub_patterns(text, el1, None, full_recursion, idx)
return el1

def build_double(self, m, tags, idx):
def build_double(self, m, tags, full_recursion, idx):
"""Return double tag."""

tag1, tag2 = tags.split(",")
el1 = etree.Element(tag1)
el2 = etree.Element(tag2)
text = m.group(2)
self.parse_sub_patterns(text, el2, None, idx)
self.parse_sub_patterns(text, el2, None, full_recursion, idx)
el1.append(el2)
if len(m.groups()) == 3:
text = m.group(3)
self.parse_sub_patterns(text, el1, el2, idx)
self.parse_sub_patterns(text, el1, el2, full_recursion, idx)
return el1

def build_double2(self, m, tags, idx):
def build_double2(self, m, tags, full_recursion, idx):
"""Return double tags (variant 2): `<strong>text <em>text</em></strong>`."""

tag1, tag2 = tags.split(",")
el1 = etree.Element(tag1)
el2 = etree.Element(tag2)
text = m.group(2)
self.parse_sub_patterns(text, el1, None, idx)
self.parse_sub_patterns(text, el1, None, full_recursion, idx)
text = m.group(3)
el1.append(el2)
self.parse_sub_patterns(text, el2, None, idx)
self.parse_sub_patterns(text, el2, None, full_recursion, idx)
return el1

def parse_sub_patterns(self, data, parent, last, idx):
def parse_sub_patterns(self, data, parent, last, full_recursion, idx):
"""
Parses sub patterns.
Expand Down Expand Up @@ -231,7 +236,7 @@ def parse_sub_patterns(self, data, parent, last, idx):
# See if the we can match an emphasis/strong pattern
for index, item in enumerate(self.PATTERNS):
# Only evaluate patterns that are after what was used on the parent
if index <= idx:
if not full_recursion and index <= idx:
continue
m = item.pattern.match(data, pos)
if m:
Expand All @@ -245,7 +250,7 @@ def parse_sub_patterns(self, data, parent, last, idx):
last.tail = text
else:
parent.text = text
el = self.build_element(m, item.builder, item.tags, index)
el = self.build_element(m, item.builder, item.tags, item.full_recursion, index)
parent.append(el)
last = el
# Move our position past the matched hunk
Expand All @@ -266,15 +271,15 @@ def parse_sub_patterns(self, data, parent, last, idx):
else:
parent.text = text

def build_element(self, m, builder, tags, index):
def build_element(self, m, builder, tags, full_recursion, index):
"""Element builder."""

if builder == 'double2':
return self.build_double2(m, tags, index)
return self.build_double2(m, tags, full_recursion, index)
elif builder == 'double':
return self.build_double(m, tags, index)
return self.build_double(m, tags, full_recursion, index)
else:
return self.build_single(m, tags, index)
return self.build_single(m, tags, full_recursion, index)

def handleMatch(self, m, data):
"""Parse patterns."""
Expand All @@ -288,7 +293,7 @@ def handleMatch(self, m, data):
if m1:
start = m1.start(0)
end = m1.end(0)
el = self.build_element(m1, item.builder, item.tags, index)
el = self.build_element(m1, item.builder, item.tags, item.full_recursion, index)
break
return el, start, end

Expand Down
2 changes: 1 addition & 1 deletion tests/extensions/betterem/betterem (normal).html
Expand Up @@ -11,7 +11,7 @@
<p>Test: <em>All will * be italic</em></p>
<p>Test: <em>All will *be italic</em></p>
<p>Test: <em>All will not</em> be italic*</p>
<p>Test: <em>All will not *</em> be italic*</p>
<p>Test: <em>All will not ** be italic</em></p>
<p>Test: <strong>All will * be bold</strong></p>
<p>Test: <em>All will *be italic</em>*</p>
<p>Test: <strong>All will not</strong>* be bold**</p>
Expand Down
2 changes: 1 addition & 1 deletion tests/extensions/betterem/betterem (reverse).html
Expand Up @@ -11,7 +11,7 @@
<p>Test: <em>All will _ be italic</em></p>
<p>Test: <em>All will _be italic</em></p>
<p>Test: <em>All will not</em> be italic_</p>
<p>Test: <em>All will not _</em> be italic_</p>
<p>Test: <em>All will not __ be italic</em></p>
<p>Test: <strong>All will _ be bold</strong></p>
<p>Test: <em>All will _be italic</em>_</p>
<p>Test: <strong>All will not</strong>_ be bold__</p>
Expand Down

0 comments on commit 18ba91e

Please sign in to comment.