Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

starting Python v3 support (like http://python3porting.com/noconv.html

…strategy)
  • Loading branch information...
commit 5f60d4906d95b700952c074a555245058d16d742 1 parent 00833ad
@jedie authored
Showing with 635 additions and 566 deletions.
  1. +2 −0  README.creole
  2. +18 −16 creole/__init__.py
  3. +50 −49 creole/creole2html/emitter.py
  4. +67 −66 creole/creole2html/parser.py
  5. +9 −9 creole/creole2html/rules.py
  6. +7 −5 creole/creole2html/str2dict.py
  7. +1 −1  creole/exceptions.py
  8. +19 −19 creole/html2creole/emitter.py
  9. +24 −24 creole/html2rest/emitter.py
  10. +17 −17 creole/html2textile/emitter.py
  11. +2 −2 creole/html_parser/config.py
  12. +40 −39 creole/html_parser/parser.py
  13. +31 −19 creole/html_tools/deentity.py
  14. +24 −24 creole/html_tools/strip_html.py
  15. +10 −10 creole/html_tools/text_tools.py
  16. +23 −0 creole/py3compat.py
  17. +21 −17 creole/rest2html/clean_writer.py
  18. +2 −2 creole/setup_utils.py
  19. +22 −21 creole/shared/base_emitter.py
  20. +22 −21 creole/shared/document_tree.py
  21. +1 −1  creole/shared/example_macros.py
  22. +6 −3 creole/shared/html_parser.py
  23. +1 −1  creole/shared/markup_table.py
  24. +7 −7 creole/shared/unknown_tags.py
  25. +10 −9 demo.py
  26. +2 −2 setup.py
  27. +18 −18 test2.py
  28. +11 −0 tests/__init__.py
  29. +13 −13 tests/run_all_tests.py
  30. +6 −6 tests/test_TODOs.py
  31. +38 −35 tests/test_creole2html.py
  32. +13 −13 tests/test_cross_compare_all.py
  33. +4 −4 tests/test_cross_compare_creole.py
  34. +5 −5 tests/test_cross_compare_rest.py
  35. +1 −1  tests/test_cross_compare_textile.py
  36. +4 −4 tests/test_html2creole.py
  37. +5 −5 tests/test_html2rest.py
  38. +4 −4 tests/test_html2textile.py
  39. +9 −9 tests/test_macros.py
  40. +5 −5 tests/test_rest2html.py
  41. +19 −19 tests/test_utils.py
  42. +34 −33 tests/utils/base_unittest.py
  43. +8 −8 tests/utils/utils.py
View
2  README.creole
@@ -96,6 +96,8 @@ How to do this, read: https://code.google.com/p/python-creole/wiki/UseInSetup
= history
+* v0.9pre
+** Add Python v3 support (like http://python3porting.com/noconv.html strategy)
* v0.8.5
** Bugfix in html2creole: ignore links without href
* v0.8.4
View
34 creole/__init__.py
@@ -18,22 +18,22 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
-__version__ = (0, 8, 5)
+__version__ = (0, 9, 0, "pre")
__api__ = (1, 0) # Creole 1.0 spec - http://wikicreole.org/
import os
import sys
-
from creole.creole2html.emitter import HtmlEmitter
from creole.creole2html.parser import BlockRules, CreoleParser
from creole.html2creole.emitter import CreoleEmitter
from creole.html2rest.emitter import ReStructuredTextEmitter
from creole.html2textile.emitter import TextileEmitter
from creole.html_parser.parser import HtmlParser
+from creole.py3compat import TEXT_TYPE
# TODO: Add git date to __version__
@@ -47,10 +47,10 @@ def creole2html(markup_string, debug=False, parser_kwargs={}, emitter_kwargs={})
"""
convert creole markup into html code
- >>> creole2html(u'This is **creole //markup//**!')
- u'<p>This is <strong>creole <i>markup</i></strong>!</p>'
+ >>> creole2html('This is **creole //markup//**!')
+ '<p>This is <strong>creole <i>markup</i></strong>!</p>'
"""
- assert isinstance(markup_string, unicode), "given markup_string must be unicode!"
+ assert isinstance(markup_string, TEXT_TYPE), "given markup_string must be unicode!"
# Create document tree from creole markup
document = CreoleParser(markup_string, **parser_kwargs).parse()
@@ -58,13 +58,13 @@ def creole2html(markup_string, debug=False, parser_kwargs={}, emitter_kwargs={})
document.debug()
# Build html code from document tree
- #print "creole2html HtmlEmitter kwargs:", emitter_kwargs
+ #print("creole2html HtmlEmitter kwargs:", emitter_kwargs)
return HtmlEmitter(document, **emitter_kwargs).emit()
def parse_html(html_string, debug=False, **parser_kwargs):
""" create the document tree from html code """
- assert isinstance(html_string, unicode), "given html_string must be unicode!"
+ assert isinstance(html_string, TEXT_TYPE), "given html_string must be unicode!"
h2c = HtmlParser(debug, **parser_kwargs)
document_tree = h2c.feed(html_string)
@@ -77,8 +77,8 @@ def html2creole(html_string, debug=False, parser_kwargs={}, emitter_kwargs={}):
"""
convert html code into creole markup
- >>> html2creole(u'<p>This is <strong>creole <i>markup</i></strong>!</p>')
- u'This is **creole //markup//**!'
+ >>> html2creole('<p>This is <strong>creole <i>markup</i></strong>!</p>')
+ 'This is **creole //markup//**!'
"""
document_tree = parse_html(html_string, debug, **parser_kwargs)
@@ -91,8 +91,8 @@ def html2textile(html_string, debug=False, parser_kwargs={}, emitter_kwargs={}):
"""
convert html code into textile markup
- >>> html2textile(u'<p>This is <strong>textile <i>markup</i></strong>!</p>')
- u'This is *textile __markup__*!'
+ >>> html2textile('<p>This is <strong>textile <i>markup</i></strong>!</p>')
+ 'This is *textile __markup__*!'
"""
document_tree = parse_html(html_string, debug, **parser_kwargs)
@@ -105,8 +105,8 @@ def html2rest(html_string, debug=False, parser_kwargs={}, emitter_kwargs={}):
"""
convert html code into textile markup
- >>> html2rest(u'<p>This is <strong>ReStructuredText</strong> <em>markup</em>!</p>')
- u'This is **ReStructuredText** *markup*!'
+ >>> html2rest('<p>This is <strong>ReStructuredText</strong> <em>markup</em>!</p>')
+ 'This is **ReStructuredText** *markup*!'
"""
document_tree = parse_html(html_string, debug, **parser_kwargs)
@@ -117,6 +117,8 @@ def html2rest(html_string, debug=False, parser_kwargs={}, emitter_kwargs={}):
if __name__ == '__main__':
- print "runing local doctest..."
+ print("runing local doctest...")
import doctest
- print doctest.testmod()#verbose=True)
+ print(
+ doctest.testmod()#verbose=True
+ )
View
99 creole/creole2html/emitter.py
@@ -8,15 +8,16 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+
+from __future__ import division, absolute_import, print_function, unicode_literals
from xml.sax.saxutils import escape
import sys
import traceback
-
from creole.creole2html.parser import CreoleParser
from creole.creole2html.str2dict import str2dict
+from creole.py3compat import TEXT_TYPE
class HtmlEmitter:
@@ -52,25 +53,25 @@ def text_emit(self, node):
return self.html_escape(node.content)
def separator_emit(self, node):
- return u'<hr />\n\n'
+ return '<hr />\n\n'
def paragraph_emit(self, node):
- return u'<p>%s</p>\n' % self.emit_children(node)
+ return '<p>%s</p>\n' % self.emit_children(node)
def _list_emit(self, node, list_type):
if node.parent.kind in ("document",):
# The first list item
- formatter = u''
+ formatter = ''
else:
- formatter = u'\n'
+ formatter = '\n'
if list_type == "li":
formatter += (
- u'%(i)s<%(t)s>%(c)s</%(t)s>'
+ '%(i)s<%(t)s>%(c)s</%(t)s>'
)
else:
formatter += (
- u'%(i)s<%(t)s>%(c)s\n'
+ '%(i)s<%(t)s>%(c)s\n'
'%(i)s</%(t)s>'
)
return formatter % {
@@ -80,30 +81,30 @@ def _list_emit(self, node, list_type):
}
def bullet_list_emit(self, node):
- return self._list_emit(node, list_type=u"ul")
+ return self._list_emit(node, list_type="ul")
def number_list_emit(self, node):
- return self._list_emit(node, list_type=u"ol")
+ return self._list_emit(node, list_type="ol")
def list_item_emit(self, node):
- return self._list_emit(node, list_type=u"li")
+ return self._list_emit(node, list_type="li")
def table_emit(self, node):
- return u'<table>\n%s</table>\n' % self.emit_children(node)
+ return '<table>\n%s</table>\n' % self.emit_children(node)
def table_row_emit(self, node):
- return u'<tr>\n%s</tr>\n' % self.emit_children(node)
+ return '<tr>\n%s</tr>\n' % self.emit_children(node)
def table_cell_emit(self, node):
- return u'\t<td>%s</td>\n' % self.emit_children(node)
+ return '\t<td>%s</td>\n' % self.emit_children(node)
def table_head_emit(self, node):
- return u'\t<th>%s</th>\n' % self.emit_children(node)
+ return '\t<th>%s</th>\n' % self.emit_children(node)
#--------------------------------------------------------------------------
def _typeface(self, node, tag):
- return u'<%(tag)s>%(data)s</%(tag)s>' % {
+ return '<%(tag)s>%(data)s</%(tag)s>' % {
"tag": tag,
"data": self.emit_children(node),
}
@@ -120,7 +121,7 @@ def superscript_emit(self, node):
def subscript_emit(self, node):
return self._typeface(node, tag="sub")
def underline_emit(self, node):
- return self._typeface(node, tag="u")
+ return self._typeface(node, tag="")
def small_emit(self, node):
return self._typeface(node, tag="small")
def delete_emit(self, node):
@@ -129,11 +130,11 @@ def delete_emit(self, node):
#--------------------------------------------------------------------------
def header_emit(self, node):
- return u'<h%d>%s</h%d>\n' % (
+ return '<h%d>%s</h%d>\n' % (
node.level, self.html_escape(node.content), node.level)
def preformatted_emit(self, node):
- return u'<pre>%s</pre>' % self.html_escape(node.content)
+ return '<pre>%s</pre>' % self.html_escape(node.content)
def link_emit(self, node):
target = node.content
@@ -142,18 +143,18 @@ def link_emit(self, node):
else:
inside = self.html_escape(target)
- return u'<a href="%s">%s</a>' % (
+ return '<a href="%s">%s</a>' % (
self.attr_escape(target), inside)
def image_emit(self, node):
target = node.content
text = self.attr_escape(self.get_text(node))
- return u'<img src="%s" title="%s" alt="%s" />' % (
+ return '<img src="%s" title="%s" alt="%s" />' % (
self.attr_escape(target), text, text)
def macro_emit(self, node):
- #print node.debug()
+ #print(node.debug())
macro_name = node.macro_name
text = node.content
macro = None
@@ -161,10 +162,10 @@ def macro_emit(self, node):
args = node.macro_args
try:
macro_kwargs = str2dict(args)
- except ValueError, e:
+ except ValueError as e:
exc_info = sys.exc_info()
return self.error(
- u"Wrong macro arguments: %r for macro '%s' (maybe wrong macro tag syntax?)" % (
+ "Wrong macro arguments: %r for macro '%s' (maybe wrong macro tag syntax?)" % (
args, macro_name
),
exc_info
@@ -180,24 +181,24 @@ def macro_emit(self, node):
if isinstance(self.macros, dict):
try:
macro = self.macros[macro_name]
- except KeyError, e:
+ except KeyError as e:
exc_info = sys.exc_info()
else:
try:
macro = getattr(self.macros, macro_name)
- except AttributeError, e:
+ except AttributeError as e:
exc_info = sys.exc_info()
if macro == None:
return self.error(
- u"Macro '%s' doesn't exist" % macro_name,
+ "Macro '%s' doesn't exist" % macro_name,
exc_info
)
try:
result = macro(**macro_kwargs)
- except TypeError, err:
- msg = u"Macro '%s' error: %s" % (macro_name, err)
+ except TypeError as err:
+ msg = "Macro '%s' error: %s" % (macro_name, err)
exc_info = sys.exc_info()
if self.verbose > 1:
# Inject more information about the macro in traceback
@@ -206,21 +207,21 @@ def macro_emit(self, node):
filename = inspect.getfile(macro)
try:
sourceline = inspect.getsourcelines(macro)[0][0].strip()
- except IOError, err:
+ except IOError as err:
evalue = etype("%s (error getting sourceline: %s from %s)" % (evalue, err, filename))
else:
evalue = etype("%s (sourceline: %r from %s)" % (evalue, sourceline, filename))
exc_info = etype, evalue, etb
return self.error(msg, exc_info)
- except Exception, err:
+ except Exception as err:
return self.error(
- u"Macro '%s' error: %s" % (macro_name, err),
+ "Macro '%s' error: %s" % (macro_name, err),
exc_info=sys.exc_info()
)
- if not isinstance(result, unicode):
- msg = u"Macro '%s' doesn't return a unicode string!" % macro_name
+ if not isinstance(result, TEXT_TYPE):
+ msg = "Macro '%s' doesn't return a unicode string!" % macro_name
if self.verbose > 1:
msg += " - returns: %r, type %r" % (result, type(result))
return self.error(msg)
@@ -234,22 +235,22 @@ def macro_emit(self, node):
def break_emit(self, node):
if node.parent.kind == "list_item":
- return u"<br />\n" + "\t" * node.parent.level
+ return "<br />\n" + "\t" * node.parent.level
elif node.parent.kind in ("table_head", "table_cell"):
- return u"<br />\n\t\t"
+ return "<br />\n\t\t"
else:
- return u"<br />\n"
+ return "<br />\n"
def line_emit(self, node):
- return u"\n"
+ return "\n"
def pre_block_emit(self, node):
""" pre block, with newline at the end """
- return u"<pre>%s</pre>\n" % self.html_escape(node.content)
+ return "<pre>%s</pre>\n" % self.html_escape(node.content)
def pre_inline_emit(self, node):
""" pre without newline at the end """
- return u"<tt>%s</tt>" % self.html_escape(node.content)
+ return "<tt>%s</tt>" % self.html_escape(node.content)
def default_emit(self, node):
"""Fallback function for emitting unknown nodes."""
@@ -257,11 +258,11 @@ def default_emit(self, node):
def emit_children(self, node):
"""Emit all the children of a node."""
- return u''.join([self.emit_node(child) for child in node.children])
+ return ''.join([self.emit_node(child) for child in node.children])
def emit_node(self, node):
"""Emit a single node."""
- #print "%s_emit: %r" % (node.kind, node.content)
+ #print("%s_emit: %r" % (node.kind, node.content))
emit = getattr(self, '%s_emit' % node.kind, self.default_emit)
return emit(node)
@@ -279,23 +280,23 @@ def error(self, text, exc_info=None):
self.stderr.write(exception)
if self.verbose > 0:
- return u"[Error: %s]\n" % text
+ return "[Error: %s]\n" % text
else:
# No error output
- return u""
+ return ""
if __name__ == "__main__":
- txt = u"""this is **bold** ok?
+ txt = """this is **bold** ok?
for example ** this sentence"""
- print "-" * 80
+ print("-" * 80)
# from creole_alt.creole import CreoleParser
p = CreoleParser(txt)
document = p.parse()
p.debug()
html = HtmlEmitter(document).emit()
- print html
- print "-" * 79
- print html.replace(" ", ".").replace("\n", "\\n\n")
+ print(html)
+ print("-" * 79)
+ print(html.replace(" ", ".").replace("\n", "\\n\n"))
View
133 creole/creole2html/parser.py
@@ -22,12 +22,13 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
import re
from creole.creole2html.rules import BlockRules, INLINE_FLAGS, INLINE_RULES, \
SpecialRules, InlineRules
+from creole.py3compat import TEXT_TYPE
from creole.shared.document_tree import DocNode, DebugList
@@ -59,7 +60,7 @@ class CreoleParser:
def __init__(self, raw, block_rules=BlockRules, blog_line_breaks=True):
- assert isinstance(raw, unicode)
+ assert isinstance(raw, TEXT_TYPE)
self.raw = raw
# setup block element rules:
@@ -109,7 +110,7 @@ def _upto_block(self):
# same method needs several names, because of group names in regexps.
def _text_repl(self, groups):
-# print "_text_repl()", self.cur.kind
+# print("_text_repl()", self.cur.kind)
# self.debug_groups(groups)
if self.cur.kind in ('table', 'table_row', 'bullet_list', 'number_list'):
@@ -118,7 +119,7 @@ def _text_repl(self, groups):
if self.cur.kind in ('document', 'section', 'blockquote'):
self.cur = DocNode('paragraph', self.cur)
- text = groups.get('text', u"")
+ text = groups.get('text', "")
if groups.get('space'):
# use wiki style line breaks and seperate a new line with one space
@@ -128,7 +129,7 @@ def _text_repl(self, groups):
if groups.get('break') and self.cur.kind in ('paragraph',
'emphasis', 'strong', 'pre_inline'):
- self.last_text_break = DocNode('break', self.cur, u"")
+ self.last_text_break = DocNode('break', self.cur, "")
self.text = None
_break_repl = _text_repl
@@ -138,7 +139,7 @@ def _url_repl(self, groups):
"""Handle raw urls in text."""
if not groups.get('escaped_url'):
# this url is NOT escaped
- target = groups.get('url_target', u"")
+ target = groups.get('url_target', "")
node = DocNode('link', self.cur)
node.content = target
DocNode('text', node, node.content)
@@ -146,7 +147,7 @@ def _url_repl(self, groups):
else:
# this url is escaped, we render it as text
if self.text is None:
- self.text = DocNode('text', self.cur, u"")
+ self.text = DocNode('text', self.cur, "")
self.text.content += groups.get('url_target')
_url_target_repl = _url_repl
_url_proto_repl = _url_repl
@@ -154,8 +155,8 @@ def _url_repl(self, groups):
def _link_repl(self, groups):
"""Handle all kinds of links."""
- target = groups.get('link_target', u"")
- text = (groups.get('link_text', u"") or u"").strip()
+ target = groups.get('link_target', "")
+ text = (groups.get('link_text', "") or "").strip()
parent = self.cur
self.cur = DocNode('link', self.cur)
self.cur.content = target
@@ -177,13 +178,13 @@ def _add_macro(self, groups, macro_type, name_key, args_key, text_key=None):
assert macro_type in ("macro_inline", "macro_block")
if text_key:
- macro_text = groups.get(text_key, u"").strip()
+ macro_text = groups.get(text_key, "").strip()
else:
macro_text = None
node = DocNode(macro_type, self.cur, macro_text)
node.macro_name = groups[name_key]
- node.macro_args = groups.get(args_key, u"").strip()
+ node.macro_args = groups.get(args_key, "").strip()
self.text = None
@@ -199,10 +200,10 @@ def _macro_block_repl(self, groups):
self.cur = self.root
self._add_macro(
groups,
- macro_type=u"macro_block",
- name_key=u"macro_block_start",
- args_key=u"macro_block_args",
- text_key=u"macro_block_text",
+ macro_type="macro_block",
+ name_key="macro_block_start",
+ args_key="macro_block_args",
+ text_key="macro_block_text",
)
_macro_block_start_repl = _macro_block_repl
_macro_block_args_repl = _macro_block_repl
@@ -214,9 +215,9 @@ def _macro_tag_repl(self, groups):
"""
self._add_macro(
groups,
- macro_type=u"macro_inline",
- name_key=u"macro_tag_name",
- args_key=u"macro_tag_args",
+ macro_type="macro_inline",
+ name_key="macro_tag_name",
+ args_key="macro_tag_args",
text_key=None,
)
_macro_tag_name_repl = _macro_tag_repl
@@ -229,10 +230,10 @@ def _macro_inline_repl(self, groups):
"""
self._add_macro(
groups,
- macro_type=u"macro_inline",
- name_key=u"macro_inline_start",
- args_key=u"macro_inline_args",
- text_key=u"macro_inline_text",
+ macro_type="macro_inline",
+ name_key="macro_inline_start",
+ args_key="macro_inline_args",
+ text_key="macro_inline_text",
)
_macro_inline_start_repl = _macro_inline_repl
_macro_inline_args_repl = _macro_inline_repl
@@ -242,8 +243,8 @@ def _macro_inline_repl(self, groups):
def _image_repl(self, groups):
"""Handles images and attachemnts included in the page."""
- target = groups.get('image_target', u"").strip()
- text = (groups.get('image_text', u"") or u"").strip()
+ target = groups.get('image_target', "").strip()
+ text = (groups.get('image_text', "") or "").strip()
node = DocNode("image", self.cur, target)
DocNode('text', node, text or node.content)
self.text = None
@@ -256,8 +257,8 @@ def _separator_repl(self, groups):
def _item_repl(self, groups):
""" List item """
- bullet = groups.get('item_head', u"")
- text = groups.get('item_text', u"")
+ bullet = groups.get('item_head', "")
+ text = groups.get('item_text', "")
if bullet[-1] == '#':
kind = 'number_list'
else:
@@ -315,7 +316,7 @@ def _table_repl(self, groups):
else:
text = m.group('head').strip('= ')
self.cur = DocNode('table_head', tr)
- self.text = DocNode('text', self.cur, u"")
+ self.text = DocNode('text', self.cur, "")
self.parse_inline(text)
self.cur = tb
@@ -324,7 +325,7 @@ def _table_repl(self, groups):
def _pre_block_repl(self, groups):
self._upto_block()
kind = groups.get('pre_block_kind', None)
- text = groups.get('pre_block_text', u"")
+ text = groups.get('pre_block_text', "")
def remove_tilde(m):
return m.group('indent') + m.group('rest')
text = self.pre_escape_re.sub(remove_tilde, text)
@@ -338,10 +339,10 @@ def remove_tilde(m):
def _line_repl(self, groups):
""" Transfer newline from the original markup into the html code """
self._upto_block()
- DocNode('line', self.cur, u"")
+ DocNode('line', self.cur, "")
def _pre_inline_repl(self, groups):
- text = groups.get('pre_inline_text', u"")
+ text = groups.get('pre_inline_text', "")
DocNode('pre_inline', self.cur, text)
self.text = None
_pre_inline_text_repl = _pre_inline_repl
@@ -401,13 +402,13 @@ def _linebreak_repl(self, groups):
def _escape_repl(self, groups):
if self.text is None:
- self.text = DocNode('text', self.cur, u"")
- self.text.content += groups.get('escaped_char', u"")
+ self.text = DocNode('text', self.cur, "")
+ self.text.content += groups.get('escaped_char', "")
def _char_repl(self, groups):
if self.text is None:
- self.text = DocNode('text', self.cur, u"")
- self.text.content += groups.get('char', u"")
+ self.text = DocNode('text', self.cur, "")
+ self.text.content += groups.get('char', "")
#--------------------------------------------------------------------------
@@ -415,14 +416,14 @@ def _replace(self, match):
"""Invoke appropriate _*_repl method. Called for every matched group."""
# def debug(groups):
-# from pprint import pformat
+# from pprint(import pformat)
# data = dict([
-# group for group in groups.iteritems() if group[1] is not None
+# group for group in groups.items() if group[1] is not None
# ])
-# print "%s\n" % pformat(data)
+# print("%s\n" % pformat(data))
groups = match.groupdict()
- for name, text in groups.iteritems():
+ for name, text in groups.items():
if text is not None:
#if name != "char": debug(groups)
replace_method = getattr(self, '_%s_repl' % name)
@@ -450,29 +451,29 @@ def debug(self, start_node=None):
"""
Display the current document tree
"""
- print "_" * 80
+ print("_" * 80)
if start_node == None:
start_node = self.root
- print " document tree:"
+ print(" document tree:")
else:
- print " tree from %s:" % start_node
+ print(" tree from %s:" % start_node)
- print "=" * 80
+ print("=" * 80)
def emit(node, ident=0):
for child in node.children:
- print u"%s%s: %r" % (u" " * ident, child.kind, child.content)
+ print("%s%s: %r" % (" " * ident, child.kind, child.content))
emit(child, ident + 4)
emit(start_node)
- print "*" * 80
+ print("*" * 80)
def debug_groups(self, groups):
- print "_" * 80
- print " debug groups:"
- for name, text in groups.iteritems():
+ print("_" * 80)
+ print(" debug groups:")
+ for name, text in groups.items():
if text is not None:
- print "%15s: %r" % (name, text)
- print "-" * 80
+ print("%15s: %r" % (name, text))
+ print("-" * 80)
@@ -480,11 +481,11 @@ def debug_groups(self, groups):
if __name__ == "__main__":
import doctest
- print doctest.testmod()
+ print(doctest.testmod())
- print "-" * 80
+ print("-" * 80)
- txt = u"""one **line** and //jo//
+ txt = """one **line** and //jo//
second line
**third**
@@ -492,8 +493,8 @@ def debug_groups(self, groups):
block 2b
block 2c"""
- print txt
- print "-" * 80
+ print(txt)
+ print("-" * 80)
blog_line_breaks = False
@@ -505,19 +506,19 @@ def debug_groups(self, groups):
def display_match(match):
groups = match.groupdict()
- for name, text in groups.iteritems():
+ for name, text in groups.items():
if name != "char" and text != None:
- print "%20s: %r" % (name, text)
+ print("%20s: %r" % (name, text))
- parser = CreoleParser(u"", blog_line_breaks=blog_line_breaks)
+ parser = CreoleParser("", blog_line_breaks=blog_line_breaks)
- print "_" * 80
- print "merged block rules test:"
+ print("_" * 80)
+ print("merged block rules test:")
re.sub(parser.block_re, display_match, txt)
- print "_" * 80
- print "merged inline rules test:"
+ print("_" * 80)
+ print("merged inline rules test:")
re.sub(parser.inline_re, display_match, txt)
@@ -526,14 +527,14 @@ def test_single(rules, flags, txt):
rexp = re.compile(rule, flags)
rexp.sub(display_match, txt)
- print "_" * 80
- print "single block rules match test:"
+ print("_" * 80)
+ print("single block rules match test:")
block_rules = BlockRules()
test_single(block_rules.rules, block_rules.re_flags, txt)
- print "_" * 80
- print "single inline rules match test:"
+ print("_" * 80)
+ print("single inline rules match test:")
test_single(INLINE_RULES, INLINE_FLAGS, txt)
- print "---END---"
+ print("---END---")
View
18 creole/creole2html/rules.py
@@ -9,7 +9,7 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
import re
@@ -229,22 +229,22 @@ def _verify_rules(rules, flags):
rule_list = []
for rule in rules:
try:
-# print rule
+# print(rule)
re.compile(rule, flags)
# Try to merge the rules. e.g. Check if group named double used.
rule_list.append(rule)
re.compile('|'.join(rule_list), flags)
- except Exception, err:
- print " *** Error with rule:"
- print rule
- print " -" * 39
+ except Exception as err:
+ print(" *** Error with rule:")
+ print(rule)
+ print(" -" * 39)
raise
- print "Rule test ok."
+ print("Rule test ok.")
if __name__ == "__main__":
import doctest
- print doctest.testmod()
+ print(doctest.testmod())
- print "-" * 80
+ print("-" * 80)
View
12 creole/creole2html/str2dict.py
@@ -11,10 +11,12 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
import shlex
+from creole.py3compat import TEXT_TYPE, PY3
+
# For str2dict()
KEYWORD_MAP = {
@@ -30,16 +32,16 @@ def str2dict(raw_content, encoding="utf-8"):
>>> str2dict('key1="value1" key2="value2"')
{'key2': 'value2', 'key1': 'value1'}
- >>> str2dict(u'A="B" C=1 D=1.1 E=True F=False G=None')
+ >>> str2dict('A="B" C=1 D=1.1 E=True F=False G=None')
{'A': 'B', 'C': 1, 'E': True, 'D': '1.1', 'G': None, 'F': False}
>>> str2dict('''key1="'1'" key2='"2"' key3="""'3'""" ''')
{'key3': 3, 'key2': 2, 'key1': 1}
- >>> str2dict(u'unicode=True')
+ >>> str2dict('unicode=True')
{'unicode': True}
"""
- if isinstance(raw_content, unicode):
+ if not PY3 and isinstance(raw_content, TEXT_TYPE):
# shlex.split doesn't work with unicode?!?
raw_content = raw_content.encode(encoding)
@@ -66,4 +68,4 @@ def str2dict(raw_content, encoding="utf-8"):
if __name__ == "__main__":
import doctest
- print doctest.testmod()
+ print(doctest.testmod())
View
2  creole/exceptions.py
@@ -9,7 +9,7 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
class DocutilsImportError(ImportError):
pass
View
38 creole/html2creole/emitter.py
@@ -10,7 +10,7 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
import posixpath
from creole.shared.base_emitter import BaseEmitter
@@ -36,13 +36,13 @@ def emit(self):
def blockdata_pre_emit(self, node):
""" pre block -> with newline at the end """
- return u"{{{%s}}}\n" % self.deentity.replace_all(node.content)
+ return "{{{%s}}}\n" % self.deentity.replace_all(node.content)
def inlinedata_pre_emit(self, node):
""" a pre inline block -> no newline at the end """
- return u"{{{%s}}}" % self.deentity.replace_all(node.content)
+ return "{{{%s}}}" % self.deentity.replace_all(node.content)
def blockdata_pass_emit(self, node):
- return u"%s\n\n" % node.content
+ return "%s\n\n" % node.content
return node.content
#--------------------------------------------------------------------------
@@ -55,12 +55,12 @@ def p_emit(self, node):
def br_emit(self, node):
if self._inner_list != "":
- return u"\\\\"
+ return "\\\\"
else:
- return u"\n"
+ return "\n"
def headline_emit(self, node):
- return u"%s %s\n\n" % (u"=" * node.level, self.emit_children(node))
+ return "%s %s\n\n" % ("=" * node.level, self.emit_children(node))
#--------------------------------------------------------------------------
@@ -90,7 +90,7 @@ def del_emit(self, node):
#--------------------------------------------------------------------------
def hr_emit(self, node):
- return u"----\n\n"
+ return "----\n\n"
def a_emit(self, node):
link_text = self.emit_children(node)
@@ -100,15 +100,15 @@ def a_emit(self, node):
# e.g.: <a name="anchor-one">foo</a>
return link_text
if link_text == url:
- return u"[[%s]]" % url
+ return "[[%s]]" % url
else:
- return u"[[%s|%s]]" % (url, link_text)
+ return "[[%s|%s]]" % (url, link_text)
def img_emit(self, node):
src = node.attrs["src"]
if src.split(':')[0] == 'data':
- return u""
+ return ""
title = node.attrs.get("title", "")
alt = node.attrs.get("alt", "")
@@ -120,7 +120,7 @@ def img_emit(self, node):
if text == "": # Use filename as picture text
text = posixpath.basename(src)
- return u"{{%s|%s}}" % (src, text)
+ return "{{%s|%s}}" % (src, text)
#--------------------------------------------------------------------------
@@ -145,12 +145,12 @@ def span_emit(self, node):
if __name__ == '__main__':
import doctest
- print doctest.testmod()
+ print(doctest.testmod())
# import sys;sys.exit()
from creole.html_parser.parser import HtmlParser
- data = u"""
+ data = """
<ul>
<li><p>item 1</p>
<ul>
@@ -167,7 +167,7 @@ def span_emit(self, node):
"""
-# print data.strip()
+# print(data.strip())
h2c = HtmlParser(
debug=True
)
@@ -178,7 +178,7 @@ def span_emit(self, node):
debug=True
)
content = e.emit()
- print "*" * 79
- print content
- print "*" * 79
- print content.replace(" ", ".").replace("\n", "\\n\n")
+ print("*" * 79)
+ print(content)
+ print("*" * 79)
+ print(content.replace(" ", ".").replace("\n", "\\n\n"))
View
48 creole/html2rest/emitter.py
@@ -13,7 +13,7 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
import posixpath
from creole.html_parser.config import BLOCK_TAGS
@@ -60,17 +60,17 @@ def blockdata_pre_emit(self, node):
def inlinedata_pre_emit(self, node):
""" a pre inline block -> no newline at the end """
- return u"<pre>%s</pre>" % self.deentity.replace_all(node.content)
+ return "<pre>%s</pre>" % self.deentity.replace_all(node.content)
def blockdata_pass_emit(self, node):
- return u"%s\n\n" % node.content
+ return "%s\n\n" % node.content
return node.content
#--------------------------------------------------------------------------
def emit_children(self, node):
"""Emit all the children of a node."""
- return u"".join(self.emit_children_list(node))
+ return "".join(self.emit_children_list(node))
def emit(self):
"""Emit the document represented by self.root DOM tree."""
@@ -81,19 +81,19 @@ def document_emit(self, node):
result = self.emit_children(node)
if self._substitution_data:
# add rest at the end
- result += u"%s\n\n" % self._get_block_data()
+ result += "%s\n\n" % self._get_block_data()
return result
def emit_node(self, node):
- result = u""
+ result = ""
if self._substitution_data and node.parent == self.root:
- result += u"%s\n\n" % self._get_block_data()
+ result += "%s\n\n" % self._get_block_data()
result += super(ReStructuredTextEmitter, self).emit_node(node)
return result
def p_emit(self, node):
- return u"%s\n\n" % self.emit_children(node)
+ return "%s\n\n" % self.emit_children(node)
HEADLINE_DATA = {
1:("=", True),
@@ -163,7 +163,7 @@ def small_emit(self, node):
#--------------------------------------------------------------------------
def hr_emit(self, node):
- return u"----\n\n"
+ return "----\n\n"
def _should_do_substitution(self, node):
node = node.parent
@@ -182,18 +182,18 @@ def a_emit(self, node):
if self._should_do_substitution(node):
# make a hyperlink reference
self._substitution_data.append(
- u".. _%s: %s" % (link_text, url)
+ ".. _%s: %s" % (link_text, url)
)
return "`%s`_" % link_text
# create a inline hyperlink
- return u"`%s <%s>`_" % (link_text, url)
+ return "`%s <%s>`_" % (link_text, url)
def img_emit(self, node):
src = node.attrs["src"]
if src.split(':')[0] == 'data':
- return u""
+ return ""
title = node.attrs.get("title", "")
alt = node.attrs.get("alt", "")
@@ -206,21 +206,21 @@ def img_emit(self, node):
substitution_text = posixpath.basename(src)
self._substitution_data.append(
- u".. |%s| image:: %s" % (substitution_text, src)
+ ".. |%s| image:: %s" % (substitution_text, src)
)
- return u"|%s|" % substitution_text
+ return "|%s|" % substitution_text
#--------------------------------------------------------------------------
def code_emit(self, node):
- return u"``%s``" % self._emit_content(node)
+ return "``%s``" % self._emit_content(node)
#--------------------------------------------------------------------------
def li_emit(self, node):
content = self.emit_children(node).strip("\n")
- result = u"\n%s%s %s\n" % (
+ result = "\n%s%s %s\n" % (
" " * (node.level - 1), self._list_markup, content
)
return result
@@ -253,17 +253,17 @@ def table_emit(self, node):
)
self.emit_children(node)
content = self._table.get_rest_table()
- return u"%s\n\n" % content
+ return "%s\n\n" % content
if __name__ == '__main__':
import doctest
- print doctest.testmod()
+ print(doctest.testmod())
# import sys;sys.exit()
from creole.html_parser.parser import HtmlParser
- data = u"""<p>A nested bullet lists:</p>
+ data = """<p>A nested bullet lists:</p>
<ul>
<li><p>item 1</p>
<ul>
@@ -288,7 +288,7 @@ def table_emit(self, node):
<p>5 <img alt="Image without files ext?" src="/path1/path2/image" /> five</p>
"""
- print data
+ print(data)
h2c = HtmlParser(
# debug=True
)
@@ -299,8 +299,8 @@ def table_emit(self, node):
debug=True
)
content = e.emit()
- print "*" * 79
- print content
- print "*" * 79
- print content.replace(" ", ".").replace("\n", "\\n\n")
+ print("*" * 79)
+ print(content)
+ print("*" * 79)
+ print(content.replace(" ", ".").replace("\n", "\\n\n"))
View
34 creole/html2textile/emitter.py
@@ -10,7 +10,7 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
import posixpath
from creole.shared.base_emitter import BaseEmitter
@@ -37,23 +37,23 @@ def emit(self):
def blockdata_pre_emit(self, node):
""" pre block -> with newline at the end """
- return u"<pre>%s</pre>\n" % self.deentity.replace_all(node.content)
+ return "<pre>%s</pre>\n" % self.deentity.replace_all(node.content)
def inlinedata_pre_emit(self, node):
""" a pre inline block -> no newline at the end """
- return u"<pre>%s</pre>" % self.deentity.replace_all(node.content)
+ return "<pre>%s</pre>" % self.deentity.replace_all(node.content)
def blockdata_pass_emit(self, node):
- return u"%s\n\n" % node.content
+ return "%s\n\n" % node.content
return node.content
#--------------------------------------------------------------------------
def p_emit(self, node):
- return u"%s\n\n" % self.emit_children(node)
+ return "%s\n\n" % self.emit_children(node)
def headline_emit(self, node):
- return u"h%i. %s\n\n" % (node.level, self.emit_children(node))
+ return "h%i. %s\n\n" % (node.level, self.emit_children(node))
#--------------------------------------------------------------------------
@@ -91,18 +91,18 @@ def code_emit(self, node):
#--------------------------------------------------------------------------
def hr_emit(self, node):
- return u"----\n\n"
+ return "----\n\n"
def a_emit(self, node):
link_text = self.emit_children(node)
url = node.attrs["href"]
- return u'"%s":%s' % (link_text, url)
+ return '"%s":%s' % (link_text, url)
def img_emit(self, node):
src = node.attrs["src"]
if src.split(':')[0] == 'data':
- return u""
+ return ""
title = node.attrs.get("title", "")
alt = node.attrs.get("alt", "")
@@ -114,7 +114,7 @@ def img_emit(self, node):
if text == "": # Use filename as picture text
text = posixpath.basename(src)
- return u"!%s(%s)!" % (src, text)
+ return "!%s(%s)!" % (src, text)
#--------------------------------------------------------------------------
@@ -133,12 +133,12 @@ def ol_emit(self, node):
if __name__ == '__main__':
import doctest
- print doctest.testmod()
+ print(doctest.testmod())
# import sys;sys.exit()
from creole.html_parser.parser import HtmlParser
- data = u"""
+ data = """
<h1>Textile</h1>
<table>
<tr>
@@ -152,7 +152,7 @@ def ol_emit(self, node):
</table>
"""
-# print data.strip()
+# print(data.strip())
h2c = HtmlParser(
debug=True
)
@@ -163,7 +163,7 @@ def ol_emit(self, node):
debug=True
)
content = e.emit()
- print "*" * 79
- print content
- print "*" * 79
- print content.replace(" ", ".").replace("\n", "\\n\n")
+ print("*" * 79)
+ print(content)
+ print("*" * 79)
+ print(content.replace(" ", ".").replace("\n", "\\n\n"))
View
4 creole/html_parser/config.py
@@ -11,13 +11,13 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
BLOCK_TAGS = (
"address", "blockquote", "center", "dir", "div", "dl", "fieldset",
"form",
"h1", "h2", "h3", "h4", "h5", "h6",
- "hr", "ins", "isindex", "menu", "noframes", "noscript",
+ "hr", "ins", "isindex", "men", "noframes", "noscript",
"ul", "ol", "li", "table", "th", "tr", "td",
"p", "pre",
"br"
View
79 creole/html_parser/parser.py
@@ -10,14 +10,15 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
import re
+import sys
import warnings
-from HTMLParser import HTMLParser
from creole.html_parser.config import BLOCK_TAGS, IGNORE_TAGS
from creole.html_tools.strip_html import strip_html
+from creole.py3compat import TEXT_TYPE, BINARY_TYPE
from creole.shared.document_tree import DocNode, DebugList
from creole.shared.html_parser import HTMLParser2
@@ -51,32 +52,32 @@ class HtmlParser(HTMLParser2):
parse html code and create a document tree.
>>> p = HtmlParser()
- >>> p.feed(u"<p>html <strong>code</strong></p>")
+ >>> p.feed("<p>html <strong>code</strong></p>")
<DocNode document: None>
>>> p.debug()
________________________________________________________________________________
document tree:
================================================================================
p
- data: u'html '
+ data: 'html '
strong
- data: u'code'
+ data: 'code'
********************************************************************************
>>> p = HtmlParser()
- >>> p.feed(u"<p>html1 <script>var foo='<em>BAR</em>';</script> html2</p>")
+ >>> p.feed("<p>html1 <script>var foo='<em>BAR</em>';</script> html2</p>")
<DocNode document: None>
>>> p.debug()
________________________________________________________________________________
document tree:
================================================================================
p
- data: u'html1 '
+ data: 'html1 '
script
- data: u"var foo='<em>BAR"
- data: u'</em>'
- data: u"';"
- data: u' html2'
+ data: "var foo='<em>BAR"
+ data: '</em>'
+ data: "';"
+ data: ' html2'
********************************************************************************
"""
# placeholder html tag for pre cutout areas:
@@ -104,11 +105,11 @@ def __init__(self, debug=False):
def _pre_cut(self, data, type, placeholder):
if self.debugging:
- print "append blockdata: %r" % data
- assert isinstance(data, unicode), "blockdata is not unicode"
+ print("append blockdata: %r" % data)
+ assert isinstance(data, TEXT_TYPE), "blockdata is not unicode"
self.blockdata.append(data)
id = len(self.blockdata) - 1
- return u'<%s type="%s" id="%s" />' % (placeholder, type, id)
+ return '<%s type="%s" id="%s" />' % (placeholder, type, id)
def _pre_pre_inline_cut(self, groups):
return self._pre_cut(groups["pre_inline"], "pre", self._inline_placeholder)
@@ -124,17 +125,17 @@ def _pre_pass_block_cut(self, groups):
def _pre_cut_out(self, match):
groups = match.groupdict()
- for name, text in groups.iteritems():
+ for name, text in groups.items():
if text is not None:
if self.debugging:
- print "%15s: %r (%r)" % (name, text, match.group(0))
+ print("%15s: %r (%r)" % (name, text, match.group(0)))
method = getattr(self, '_pre_%s_cut' % name)
return method(groups)
# data = match.group("data")
def feed(self, raw_data):
- assert isinstance(raw_data, unicode), "feed data must be unicode!"
+ assert isinstance(raw_data, TEXT_TYPE), "feed data must be unicode!"
data = raw_data.strip()
# cut out <pre> and <tt> areas block tag areas
@@ -145,17 +146,17 @@ def feed(self, raw_data):
data = strip_html(data)
if self.debugging:
- print "_" * 79
- print "raw data:"
- print repr(raw_data)
- print " -" * 40
- print "cleaned data:"
- print data
- print "-" * 79
-# print clean_data.replace(">", ">\n")
-# print "-"*79
-
- HTMLParser.feed(self, data)
+ print("_" * 79)
+ print("raw data:")
+ print(repr(raw_data))
+ print(" -" * 40)
+ print("cleaned data:")
+ print(data)
+ print("-" * 79)
+# print(clean_data.replace(">", ">\n"))
+# print("-"*79)
+
+ HTMLParser2.feed(self, data)
return self.root
@@ -209,7 +210,7 @@ def handle_starttag(self, tag, attrs):
def handle_data(self, data):
self.debug_msg("data", "%r" % data)
- if isinstance(data, str):
+ if isinstance(data, BINARY_TYPE):
data = unicode(data)
DocNode("data", self.cur, content=data)
@@ -260,24 +261,24 @@ def handle_endtag(self, tag):
def debug_msg(self, method, txt):
if not self.debugging:
return
- print "%-8s %8s: %s" % (self.getpos(), method, txt)
+ print("%-8s %8s: %s" % (self.getpos(), method, txt))
def debug(self, start_node=None):
"""
Display the current document tree
"""
- print "_" * 80
+ print("_" * 80)
if start_node == None:
start_node = self.root
- print " document tree:"
+ print(" document tree:")
else:
- print " tree from %s:" % start_node
+ print(" tree from %s:" % start_node)
- print "=" * 80
+ print("=" * 80)
def emit(node, ident=0):
for child in node.children:
- txt = u"%s%s" % (u" " * ident, child.kind)
+ txt = "%s%s" % (" " * ident, child.kind)
if child.content:
txt += ": %r" % child.content
@@ -288,18 +289,18 @@ def emit(node, ident=0):
if child.level != None:
txt += " - level: %r" % child.level
- print txt
+ print(txt)
emit(child, ident + 4)
emit(start_node)
- print "*" * 80
+ print("*" * 80)
if __name__ == '__main__':
import doctest
- print doctest.testmod()
+ print(doctest.testmod())
# p = HtmlParser(debug=True)
-# p.feed(u"""\
+# p.feed("""\
#<p><span>in span</span><br />
#<code>in code</code></p>
#""")
View
50 creole/html_tools/deentity.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python
# coding: utf-8
-
"""
python-creole utils
~~~~~~~~~~~~~~~~~~~
@@ -11,10 +10,15 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
import re
-import htmlentitydefs
+try:
+ import htmlentitydefs as entities
+except ImportError:
+ from html import entities # python 3
+
+from creole.py3compat import PY3
entities_rules = '|'.join([
@@ -22,7 +26,7 @@
r"(&\#x(?P<hex>[a-fA-F0-9]+);)",
r"(&(?P<named>[a-zA-Z]+);)",
])
-#print entities_rules
+#print(entities_rules)
entities_regex = re.compile(
entities_rules, re.VERBOSE | re.UNICODE | re.MULTILINE
)
@@ -33,44 +37,52 @@ class Deentity(object):
replace html entity
>>> d = Deentity()
- >>> d.replace_all(u"-=[&nbsp;&gt;&#62;&#x3E;nice&lt;&#60;&#x3C;&nbsp;]=-")
- u'-=[ >>>nice<<< ]=-'
+ >>> d.replace_all("-=[&nbsp;&gt;&#62;&#x3E;nice&lt;&#60;&#x3C;&nbsp;]=-")
+ '-=[ >>>nice<<< ]=-'
- >>> d.replace_all(u"-=[M&uuml;hlheim]=-") # uuml - latin small letter u with diaeresis
- u'-=[M\\xfchlheim]=-'
+ >>> d.replace_all("-=[M&uuml;hlheim]=-") # uuml - latin small letter u with diaeresis
+ '-=[M\\xfchlheim]=-'
>>> d.replace_number("126")
- u'~'
+ '~'
>>> d.replace_hex("7E")
- u'~'
+ '~'
>>> d.replace_named("amp")
- u'&'
+ '&'
"""
def replace_number(self, text):
""" unicode number entity """
unicode_no = int(text)
- return unichr(unicode_no)
+ if PY3:
+ return chr(unicode_no)
+ else:
+ return unichr(unicode_no)
def replace_hex(self, text):
""" hex entity """
unicode_no = int(text, 16)
- return unichr(unicode_no)
+ if PY3:
+ return chr(unicode_no)
+ else:
+ return unichr(unicode_no)
def replace_named(self, text):
""" named entity """
if text == "nbsp":
# Non breaking spaces is not in htmlentitydefs
- return u" "
+ return " "
else:
- codepoint = htmlentitydefs.name2codepoint[text]
- character = unichr(codepoint)
- return character
+ codepoint = entities.name2codepoint[text]
+ if PY3:
+ return chr(codepoint)
+ else:
+ return unichr(codepoint)
def replace_all(self, content):
""" replace all html entities form the given text. """
def replace_entity(match):
groups = match.groupdict()
- for name, text in groups.iteritems():
+ for name, text in groups.items():
if text is not None:
replace_method = getattr(self, 'replace_%s' % name)
return replace_method(text)
@@ -83,4 +95,4 @@ def replace_entity(match):
if __name__ == '__main__':
import doctest
- print doctest.testmod()
+ print(doctest.testmod())
View
48 creole/html_tools/strip_html.py
@@ -11,7 +11,7 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
import re
@@ -38,42 +38,42 @@ def strip_html(html_code):
"""
Delete whitespace from html code. Doesn't recordnize preformatted blocks!
- >>> strip_html(u' <p> one \\n two </p>')
- u'<p>one two</p>'
+ >>> strip_html(' <p> one \\n two </p>')
+ '<p>one two</p>'
- >>> strip_html(u'<p><strong><i>bold italics</i></strong></p>')
- u'<p><strong><i>bold italics</i></strong></p>'
+ >>> strip_html('<p><strong><i>bold italics</i></strong></p>')
+ '<p><strong><i>bold italics</i></strong></p>'
- >>> strip_html(u'<li> Force <br /> \\n linebreak </li>')
- u'<li>Force<br />linebreak</li>'
+ >>> strip_html('<li> Force <br /> \\n linebreak </li>')
+ '<li>Force<br />linebreak</li>'
- >>> strip_html(u'one <i>two \\n <strong> \\n three \\n </strong></i>')
- u'one <i>two <strong>three</strong> </i>'
+ >>> strip_html('one <i>two \\n <strong> \\n three \\n </strong></i>')
+ 'one <i>two <strong>three</strong> </i>'
- >>> strip_html(u'<p>a <unknown tag /> foobar </p>')
- u'<p>a <unknown tag /> foobar</p>'
+ >>> strip_html('<p>a <unknown tag /> foobar </p>')
+ '<p>a <unknown tag /> foobar</p>'
- >>> strip_html(u'<p>a <pre> preformated area </pre> foo </p>')
- u'<p>a<pre>preformated area</pre>foo</p>'
+ >>> strip_html('<p>a <pre> preformated area </pre> foo </p>')
+ '<p>a<pre>preformated area</pre>foo</p>'
- >>> strip_html(u'<p>a <img src="/image.jpg" /> image.</p>')
- u'<p>a <img src="/image.jpg" /> image.</p>'
+ >>> strip_html('<p>a <img src="/image.jpg" /> image.</p>')
+ '<p>a <img src="/image.jpg" /> image.</p>'
"""
def strip_tag(match):
block = match.group(0)
- end_tag = match.group("end") in ("/", u"/")
- startend_tag = match.group("startend") in ("/", u"/")
+ end_tag = match.group("end") in ("/", "/")
+ startend_tag = match.group("startend") in ("/", "/")
tag = match.group("tag")
-# print "_"*40
-# print match.groupdict()
-# print "block.......: %r" % block
-# print "end_tag.....:", end_tag
-# print "startend_tag:", startend_tag
-# print "tag.........: %r" % tag
+# print("_"*40)
+# print(match.groupdict())
+# print("block.......: %r" % block)
+# print("end_tag.....:", end_tag)
+# print("startend_tag:", startend_tag)
+# print("tag.........: %r" % tag)
if tag in BLOCK_TAGS:
return block.strip()
@@ -110,4 +110,4 @@ def strip_tag(match):
if __name__ == '__main__':
import doctest
- print doctest.testmod()
+ print(doctest.testmod())
View
20 creole/html_tools/text_tools.py
@@ -11,7 +11,7 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
import re
@@ -21,17 +21,17 @@ def clean_whitespace(txt):
"""
Special whitespaces cleanup
- >>> clean_whitespace(u"\\n\\nfoo bar\\n\\n")
- u'foo bar\\n'
+ >>> clean_whitespace("\\n\\nfoo bar\\n\\n")
+ 'foo bar\\n'
- >>> clean_whitespace(u" foo bar \\n \\n")
- u' foo bar\\n'
+ >>> clean_whitespace(" foo bar \\n \\n")
+ ' foo bar\\n'
- >>> clean_whitespace(u" \\n \\n foo bar ")
- u' foo bar '
+ >>> clean_whitespace(" \\n \\n foo bar ")
+ ' foo bar '
- >>> clean_whitespace(u"foo bar")
- u'foo bar'
+ >>> clean_whitespace("foo bar")
+ 'foo bar'
"""
def cleanup(match):
start, txt, end = match.groups()
@@ -53,4 +53,4 @@ def cleanup(match):
if __name__ == '__main__':
import doctest
- print doctest.testmod()
+ print(doctest.testmod())
View
23 creole/py3compat.py
@@ -0,0 +1,23 @@
+# coding: utf-8
+
+"""
+ Helper to support Python v2 and v3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ See also:
+ http://python3porting.com
+ https://bitbucket.org/gutworth/six/src/tip/six.py
+ http://packages.python.org/six/
+"""
+
+import sys
+
+# True if we are running on Python 3.
+PY3 = sys.version_info[0] == 3
+
+if PY3:
+ TEXT_TYPE = str
+ BINARY_TYPE = bytes
+else:
+ TEXT_TYPE = unicode
+ BINARY_TYPE = str
View
38 creole/rest2html/clean_writer.py
@@ -16,7 +16,7 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function, unicode_literals
#import warnings
import sys
@@ -34,9 +34,13 @@
" Please install: http://pypi.python.org/pypi/docutils"
) % evalue
evalue = etype(msg)
-# evalue.docutils = False #
- raise DocutilsImportError, evalue, etb
-# raise etype, evalue, etb
+
+ # Doesn't work with Python 3:
+ # http://www.python-forum.de/viewtopic.php?f=1&t=27507
+ # raise DocutilsImportError, evalue, etb
+
+ raise DocutilsImportError(msg)
+
DEBUG = False
@@ -75,7 +79,7 @@ def starttag(self, node, tagname, suffix='\n', empty=0, **attributes):
if tagname in IGNORE_TAGS:
if DEBUG:
- print "ignore tag %r" % tagname
+ print("ignore tag %r" % tagname)
return ""
parts = [tagname]
@@ -98,9 +102,9 @@ def starttag(self, node, tagname, suffix='\n', empty=0, **attributes):
self.attval(unicode(value))))
if DEBUG:
- print "Tag %r - ids: %r - attributes: %r - parts: %r" % (
+ print("Tag %r - ids: %r - attributes: %r - parts: %r" % (
tagname, getattr(node, "ids", "-"), attributes, parts
- )
+ ))
if empty:
infix = ' /'
@@ -108,7 +112,7 @@ def starttag(self, node, tagname, suffix='\n', empty=0, **attributes):
infix = ''
html = '<%s%s>%s' % (' '.join(parts), infix, suffix)
if DEBUG:
- print "startag html: %r" % html
+ print("startag html: %r" % html)
return html
def visit_section(self, node):
@@ -166,11 +170,11 @@ def rest2html(content):
"""
Convert reStructuredText markup to clean html code: No extra div, class or ids.
- >>> rest2html(u"- bullet list")
- u'<ul>\\n<li>bullet list</li>\\n</ul>\\n'
+ >>> rest2html("- bullet list")
+ '<ul>\\n<li>bullet list</li>\\n</ul>\\n'
- >>> rest2html(u"A ReSt link to `PyLucid CMS <http://www.pylucid.org>`_ :)")
- u'<p>A ReSt link to <a href="http://www.pylucid.org">PyLucid CMS</a> :)</p>\\n'
+ >>> rest2html("A ReSt link to `PyLucid CMS <http://www.pylucid.org>`_ :)")