Skip to content
This repository has been archived by the owner on Sep 17, 2018. It is now read-only.

Commit

Permalink
Add a post-processing step for <tt> and <code> tags
Browse files Browse the repository at this point in the history
These tags normally get converted to ` which doesn't work when
there are HTML tags inside. In that case, we need to avoid using
the markdown version and output HTML tags directly.

This solution is modelled after the <pre> hack from
9d4c019.
  • Loading branch information
fmarier committed Jul 29, 2012
1 parent 35ddc5a commit bb9e07e
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 9 deletions.
24 changes: 24 additions & 0 deletions blogger2ikiwiki.py
Expand Up @@ -123,6 +123,29 @@ def post_process_pre(text):
return "\n".join(out)


def post_process_tt_code(text):
out = []

lines = text.split("\n")
in_pre = False
pre_lines = []
for line in lines:
if "<!-- START TT -->" in line:
if "<!-- END TT WITHOUT TAGS -->" in line:
out.append(line.replace('<!-- START TT -->', '`').replace('<!-- END TT WITHOUT TAGS -->', '`'))
elif "<!-- END TT WITH TAGS -->" in line:
out.append(line.replace('<!-- START TT -->', '<tt>').replace('<!-- END TT WITH TAGS -->', '</tt>'))
elif "<!-- START CODE -->" in line:
if "<!-- END CODE WITHOUT TAGS -->" in line:
out.append(line.replace('<!-- START CODE -->', '`').replace('<!-- END CODE WITHOUT TAGS -->', '`'))
elif "<!-- END CODE WITH TAGS -->" in line:
out.append(line.replace('<!-- START CODE -->', '<code>').replace('<!-- END CODE WITH TAGS -->', '</code>'))
else:
out.append(line)

return "\n".join(out)


image_regexp = re.compile('\[!\[\]\([^)]+\)\]\(([^)]+)\)')
htmlimage_regexp = re.compile('(\(http://([^.]+.){2}blogspot.com/[^()]+/)s1600-h/')
filename_regexp = re.compile('.*/([^/]+\.(jpg|png))')
Expand Down Expand Up @@ -162,6 +185,7 @@ def post_process_images(text, image_directory):

def post_process(text, post_filename, is_comment):
text = post_process_pre(text)
text = post_process_tt_code(text)

if not is_comment:
image_directory = post_filename.split('.mdwn')[0]
Expand Down
59 changes: 50 additions & 9 deletions html2text.py
Expand Up @@ -212,6 +212,10 @@ def __init__(self, out=None, baseurl=''):
self.table = 0
self.startpre = 0
self.tags_in_pre = 0
self.in_tt = False
self.tags_in_tt = False
self.in_code = False
self.tags_in_code = False
self.code = False
self.br_toggle = ''
self.lastWasNL = 0
Expand Down Expand Up @@ -428,23 +432,37 @@ def handle_tag(self, tag, attrs, start):
self.p()

if tag in ['em', 'i', 'u'] and not self.ignore_emphasis:
if not self.pre:
self.o("_")
else:
self.tags_in_pre = 1
if self.pre or self.in_code or self.in_tt:
if self.pre:
self.tags_in_pre = 1
elif self.in_code:
self.tags_in_code = 1
elif self.in_tt:
self.tags_in_tt = 1

if start:
self.o("<i>")
else:
self.o("</i>")
if tag in ['strong', 'b'] and not self.ignore_emphasis:
if not self.pre:
self.o("**")
else:
self.tags_in_pre = 1
self.o("_")

if tag in ['strong', 'b'] and not self.ignore_emphasis:
if self.pre or self.in_code or self.in_tt:
if self.pre:
self.tags_in_pre = 1
elif self.in_code:
self.tags_in_code = 1
elif self.in_tt:
self.tags_in_tt = 1

if start:
self.o("<b>")
else:
self.o("</b>")
else:
self.o("**")

if tag in ['del', 'strike', 's']:
if start:
self.o("<"+tag+">")
Expand All @@ -456,7 +474,30 @@ def handle_tag(self, tag, attrs, start):
# handle some font attributes, but leave headers clean
self.handle_emphasis(start, tag_style, parent_style)

if tag in ["code", "tt"] and not self.pre: self.o('`') #TODO: `` `this` ``
#TODO: `` `this` ``
if tag == 'tt' and not self.pre:
if start:
self.in_tt = True
self.o('<!-- START TT -->')
else:
if self.tags_in_tt:
self.o('<!-- END TT WITH TAGS -->')
else:
self.o('<!-- END TT WITHOUT TAGS -->')
self.in_tt = False
self.tags_in_tt = False
if tag == 'code' and not self.pre:
if start:
self.in_code = True
self.o('<!-- START CODE -->')
else:
if self.tags_in_code:
self.o('<!-- END CODE WITH TAGS -->')
else:
self.o('<!-- END CODE WITHOUT TAGS -->')
self.in_code = False
self.tags_in_code = False

if tag == "abbr":
if start:
self.abbr_title = None
Expand Down

0 comments on commit bb9e07e

Please sign in to comment.