Skip to content

Commit

Permalink
always escape \ when needed
Browse files Browse the repository at this point in the history
  • Loading branch information
brondsem committed Nov 5, 2012
1 parent a207970 commit 510c0e0
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 5 deletions.
9 changes: 8 additions & 1 deletion html2text.py
Expand Up @@ -744,7 +744,7 @@ def optwrap(self, text):
ordered_list_matcher = re.compile(r'\d+\.\s')
unordered_list_matcher = re.compile(r'[-\*\+]\s')
md_chars_matcher = re.compile(r"([\\\[\]\(\)])")
md_chars_matcher_all = re.compile(r"([\\`\*_{}\[\]\(\)#!])")
md_chars_matcher_all = re.compile(r"([`\*_{}\[\]\(\)#!])")
md_dot_matcher = re.compile(r"""
^ # start of line
(\s*\d+) # optional whitespace and a number
Expand All @@ -764,6 +764,12 @@ def optwrap(self, text):
(?=\s|\-) # followed by whitespace (bullet list, or spaced out hr)
# or another dash (header or hr)
""", flags=re.MULTILINE | re.VERBOSE)
slash_chars = r'\`*_{}[]()#+-.!'
md_backslash_matcher = re.compile(r'''
(\\) # match one slash
(?=[%s]) # followed by a char that requires escaping
''' % re.escape(slash_chars),
flags=re.VERBOSE)

def skipwrap(para):
# If the text begins with four spaces or one tab, it's a code block; don't wrap
Expand Down Expand Up @@ -807,6 +813,7 @@ def escape_md(text):

def escape_md_section(text, snob=False):
"""Escapes markdown-sensitive characters across whole document sections."""
text = md_backslash_matcher.sub(r"\\\1", text)
if snob:
text = md_chars_matcher_all.sub(r"\\\1", text)
text = md_dot_matcher.sub(r"\1\\\2", text)
Expand Down
4 changes: 4 additions & 0 deletions test/normal.html
Expand Up @@ -132,5 +132,9 @@ <h1>
<br>
- - -
</p>

<p>
c:\tmp, \\server\path, \_/, foo\bar, #\#, \\#
</p>
</body>
</html>
2 changes: 2 additions & 0 deletions test/normal.md
Expand Up @@ -50,3 +50,5 @@ not a hr
\---
\- - -

c:\tmp, \\\server\path, \\_/, foo\bar, #\\#, \\\\#

7 changes: 4 additions & 3 deletions test/normal_escape_snob.html
Expand Up @@ -28,9 +28,6 @@ <h1>
<li>
<span>apple</span>
</li>
<li>
<span>yam\\sweet potato</span>
</li>
</ul>
<li>
<span>final</span>
Expand Down Expand Up @@ -136,5 +133,9 @@ <h1>
<br>
- - -
</p>

<p>
c:\tmp, \\server\path, \_/, foo\bar, #\#, \\#
</p>
</body>
</html>
3 changes: 2 additions & 1 deletion test/normal_escape_snob.md
Expand Up @@ -6,7 +6,6 @@ first issue
* _**bold italic**_
* orange
* apple
* yam\\\\sweet potato
* final

text to separate lists
Expand Down Expand Up @@ -52,3 +51,5 @@ not a hr
\---
\- - -

c:\tmp, \\\server\path, \\\_/, foo\bar, \#\\\#, \\\\\#

0 comments on commit 510c0e0

Please sign in to comment.