Skip to content

Commit

Permalink
Merge remote branch 'drd/callable-unknown' into callable-unknown
Browse files Browse the repository at this point in the history
  • Loading branch information
jedie committed May 25, 2011
2 parents 2762a5a + 4c2af35 commit 7566b9b
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 80 deletions.
4 changes: 3 additions & 1 deletion creole/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""

__version__ = (0, 4, 0)#, "pre")
__version__ = (0, 4, 1, "pre")
__api__ = (1, 0) # Creole 1.0 spec - http://wikicreole.org/


Expand All @@ -27,6 +27,8 @@
from creole_parser import BlockRules, Parser
from creole2html import HtmlEmitter
from html2creole import Html2CreoleParser, Html2CreoleEmitter
from html2creole import raise_unknown_node, use_html_macro, \
escape_unknown_nodes, transparent_unknown_nodes

try:
from django.utils.version import get_svn_revision
Expand Down
139 changes: 69 additions & 70 deletions creole/html2creole.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,92 +552,82 @@ def replace_entity(match):
return entities_regex.sub(replace_entity, content)



#------------------------------------------------------------------------------

RAISE_UNKNOWN_NODES = 1
HTML_MACRO_UNKNOWN_NODES = 2
ESCAPE_UNKNOWN_NODES = 3

class Html2CreoleEmitter(object):
def raise_unknown_node(emitter, node):
"""
Raise NotImplementedError on unknown tags.
"""
raise NotImplementedError(
"Node from type '%s' is not implemented!" % node.kind
)

def __init__(self, document_tree, unknown_emit=ESCAPE_UNKNOWN_NODES,
debug=False):
self.root = document_tree
def use_html_macro(emitter, node):
"""
Use the <<html>> macro to mask unknown tags.
"""
attrs = node.get_attrs_as_string()
if attrs:
attrs = " " + attrs

if unknown_emit == RAISE_UNKNOWN_NODES:
self.unknown_emit = self.raise_unknown_node
elif unknown_emit == HTML_MACRO_UNKNOWN_NODES:
self.unknown_emit = self.use_html_macro
elif unknown_emit == ESCAPE_UNKNOWN_NODES:
self.unknown_emit = self.escape_unknown_nodes
else:
raise AssertionError("wrong keyword argument 'unknown_emit'!")
tag_data = {
"tag": node.kind,
"attrs": attrs,
}

self.last = None
self.debugging = debug
content = emitter.emit_children(node)
if not content:
# single tag
return u"<<html>><%(tag)s%(attrs)s /><</html>>" % tag_data

self.deentity = Deentity() # for replacing html entities
self.__inner_list = ""
self.__mask_linebreak = False
start_tag = u"<<html>><%(tag)s%(attrs)s><</html>>" % tag_data
end_tag = u"<<html>></%(tag)s><</html>>" % tag_data

#--------------------------------------------------------------------------
return start_tag + content + end_tag

def raise_unknown_node(self, node):
"""
Raise NotImplementedError on unknown tags.
"""
raise NotImplementedError(
"Node from type '%s' is not implemented!" % node.kind
)
def escape_unknown_nodes(emitter, node):
"""
All unknown tags should be escaped.
"""
attrs = node.get_attrs_as_string()
if attrs:
attrs = " " + attrs

def use_html_macro(self, node):
"""
Use the <<html>> macro to mask unknown tags.
"""
#node.debug()
attrs = node.get_attrs_as_string()
if attrs:
attrs = " " + attrs
tag_data = {
"tag": node.kind,
"attrs": attrs,
}

tag_data = {
"tag": node.kind,
"attrs": attrs,
}
content = emitter.emit_children(node)
if not content:
# single tag
return escape(u"<%(tag)s%(attrs)s />" % tag_data)

content = self.emit_children(node)
if not content:
# single tag
return u"<<html>><%(tag)s%(attrs)s /><</html>>" % tag_data
start_tag = escape(u"<%(tag)s%(attrs)s>" % tag_data)
end_tag = escape(u"</%(tag)s>" % tag_data)

start_tag = u"<<html>><%(tag)s%(attrs)s><</html>>" % tag_data
end_tag = u"<<html>></%(tag)s><</html>>" % tag_data
return start_tag + content + end_tag

return start_tag + content + end_tag
def transparent_unknown_nodes(emitter, node):
return emitter._emit_content(node)

def escape_unknown_nodes(self, node):
"""
All unknown tags should be escaped.
"""
#node.debug()
attrs = node.get_attrs_as_string()
if attrs:
attrs = " " + attrs

tag_data = {
"tag": node.kind,
"attrs": attrs,
}

content = self.emit_children(node)
if not content:
# single tag
return escape(u"<%(tag)s%(attrs)s />" % tag_data)
class Html2CreoleEmitter(object):

def __init__(self, document_tree, unknown_emit=raise_unknown_node,
debug=False):
self.root = document_tree

self._unknown_emit = unknown_emit

start_tag = escape(u"<%(tag)s%(attrs)s>" % tag_data)
end_tag = escape(u"</%(tag)s>" % tag_data)
self.last = None
self.debugging = debug

return start_tag + content + end_tag
self.deentity = Deentity() # for replacing html entities
self.__inner_list = ""
self.__mask_linebreak = False

#--------------------------------------------------------------------------

Expand Down Expand Up @@ -701,7 +691,7 @@ def br_emit(self, node):
return u"\n"

def headline_emit(self, node):
return u"%s %s\n" % (u"=" * node.level, self.emit_children(node))
return u"%s %s\n\n" % (u"=" * node.level, self.emit_children(node))

#--------------------------------------------------------------------------

Expand Down Expand Up @@ -747,6 +737,9 @@ def a_emit(self, node):
def img_emit(self, node):
src = node.attrs["src"]

if src.split(':')[0] == 'data':
return u""

title = node.attrs.get("title", "")
alt = node.attrs.get("alt", "")
if len(alt) > len(title): # Use the longest one
Expand Down Expand Up @@ -835,6 +828,8 @@ def td_emit(self, node):
def _emit_content(self, node):
content = self.emit_children(node)
content = self._escape_linebreaks(content)
if node.kind in BLOCK_TAGS:
content = u"%s\n\n" % content
return content

def div_emit(self, node):
Expand Down Expand Up @@ -865,9 +860,13 @@ def emit_node(self, node):
self.debug_msg("emit_node", "%s: %r" % (node.kind, node.content))

method_name = "%s_emit" % node.kind
emit_method = getattr(self, method_name, self.unknown_emit)
emit_method = getattr(self, method_name, None)

content = emit_method(node)
if emit_method:
content = emit_method(node)
else:
content = self._unknown_emit(self, node)

if not isinstance(content, unicode):
raise AssertionError(
"Method '%s' returns no unicode (returns: %r)" % (
Expand Down
55 changes: 48 additions & 7 deletions tests/test_html2creole.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
from tests.utils.base_unittest import BaseCreoleTest

from creole import html2creole
from creole.html2creole import RAISE_UNKNOWN_NODES, HTML_MACRO_UNKNOWN_NODES, \
ESCAPE_UNKNOWN_NODES
from creole.html2creole import raise_unknown_node, use_html_macro, \
escape_unknown_nodes, transparent_unknown_nodes


class TestHtml2Creole(unittest.TestCase):
Expand Down Expand Up @@ -58,18 +58,18 @@ def test_not_used(self):

def test_raise_unknown_node(self):
"""
Test creole.html2creole.RAISE_UNKNOWN_NODES mode:
Test creole.html2creole.raise_unknown_node callable:
Raise NotImplementedError on unknown tags.
"""
self.assertRaises(NotImplementedError,
html2creole,
html_string=u"<unknwon>",
unknown_emit=RAISE_UNKNOWN_NODES
unknown_emit=raise_unknown_node
)

def test_escape_unknown_nodes(self):
"""
Test creole.html2creole.ESCAPE_UNKNOWN_NODES mode:
Test creole.html2creole.escape_unknown_nodes callable:
All unknown tags should be escaped.
"""
self.assertCreole(r"""
Expand All @@ -83,9 +83,36 @@ def test_escape_unknown_nodes(self):
<p>555<unknown />666</p>
""",
unknown_emit=ESCAPE_UNKNOWN_NODES
unknown_emit=escape_unknown_nodes
)

def test_transparent_unknown_nodes(self):
"""
Test creole.html2creole.transparent_unknown_nodes callable:
All unknown tags should be "transparent" and show only
their child nodes' content.
"""
self.assertCreole(r"""
//baz//, **quux**
""", """
<form class="foo" id="bar"><label><em>baz</em></label>, <strong>quux</strong></form>
""", unknown_emit = transparent_unknown_nodes)

def test_transparent_unknown_nodes_block_elements(self):
"""
Test that block elements insert linefeeds into the stream.
"""
self.assertCreole(r"""
//baz//,
**quux**
spam, ham, and eggs
""", """
<div><em>baz</em>,</div> <fieldset><strong>quux</strong></fieldset>
<span>spam, </span><label>ham, </label>and eggs
""", unknown_emit = transparent_unknown_nodes)

def test_entities(self):
"""
Test html entities.
Expand Down Expand Up @@ -194,7 +221,10 @@ def test_image(self):
<img src="/path1/path2/foobar3.jpg" /><br />
<img src="/foobar4.jpg" alt="It's foobar 4" /><br />
<img src="/foobar5.jpg" title="It's foobar 5" /><br />
<img src="/foobar6.jpg" alt="short name" title="a long picture title" /></p>
<img src="/foobar6.jpg" alt="short name" title="a long picture title" /><br />
<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA
AAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO
9TXL0Y4OHwAAAABJRU5ErkJggg==" alt="data uri should be disallowed" /></p>
""")

def test_non_closed_br(self):
Expand Down Expand Up @@ -249,6 +279,17 @@ def test_newline_before_headline(self):
<h1>one</h1>
""")#, debug=True)

def test_newlines_after_headlines(self):
self.assertCreole(r"""
= Headline news
[[http://google.com|The googlezor]] is a big bad mother.
""", """
<h1>Headline news</h1>
<p><a href="http://google.com">The googlezor</a> is a big bad mother.</p>
""")

def test_cross_lines(self):
""" TODO: bold/italics cross lines
see: http://code.google.com/p/python-creole/issues/detail?id=13
Expand Down
4 changes: 2 additions & 2 deletions tests/utils/base_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from utils import MarkupTest

from creole import creole2html, html2creole
from creole.html2creole import HTML_MACRO_UNKNOWN_NODES
from creole.html2creole import use_html_macro


class BaseCreoleTest(MarkupTest):
Expand Down Expand Up @@ -107,6 +107,6 @@ def assertCreole(self, source_string, should_string, debug=False):
self.assert_Creole2html(source_string, should_string, debug)
self.assert_html2Creole(
source_string, should_string, debug,
unknown_emit=HTML_MACRO_UNKNOWN_NODES
unknown_emit=use_html_macro
)

1 comment on commit 7566b9b

@jedie
Copy link
Owner Author

@jedie jedie commented on 7566b9b May 25, 2011

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have create a howto page about these callables here: https://code.google.com/p/python-creole/wiki/UnknownHtmlTags

Please sign in to comment.