Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 759 lines (648 sloc) 24.4 KB
"""
BB Code parser by Jonas 'Ojii' Obrist (c) 2009
USAGE:
Parsing:
parsed, errors = bbcode.parse(content, strict=True)
This might raise a bbocde.PaserError if strict is True (default). Otherwise on a
ParserError the content is returned unparsed and errors contains the reason.
Validation:
errors = bbcode.validate(content)
Returns errors caused by parsing the code or an empty sequence.
Extending:
Subclassing bbcode.TagNode and bbcode.register the class adds new BB Code Tags.
Each node must have an opening and closing pattern (open_pattern, close_pattern)
and push, pushed, pull and close methods. For further information read the doc
strings of the TagNode class.
"""
import re
import cgi
try:
from django.utils.translation import ugettext as _
except ImportError:
_ = lambda x: x
AUTODISCOVERED = False
LINEFEED_PATTERN = re.compile('\n\s*\n', re.MULTILINE)
def convert_linefeeds(content):
content = LINEFEED_PATTERN.sub('<br /><br />', content)
return content.replace('\n', '<br />')
class UnmatchablePseudoPattern(object):
"""
A class which should look like a compiled regular expression but never match.
"""
def match(self, content):
return False
def search(self, content):
return False
def finditer(self, content):
return iter([])
def sub(self, replacement, content):
return content
class patterns:
"""
This is a class for namespacing reasons
"""
no_argument = r'\[%s\]'
self_closing_tag = r'\[%s\s*/\]'
single_argument = r'\[%s(?:\]|="?(?P<argument>[^\]"]+)"?\])'
argument = r'( (\w+)=([^\] ]+))?'
closing = r'\[/%s\]'
unmatchable = UnmatchablePseudoPattern()
def get_tag_name(klass):
"""
Convert a class to tagname
"""
return klass.tagname if hasattr(klass, 'tagname') else klass.__name__.lower()
class NeedsSubclassingError(Exception): pass
class ParserError(Exception): pass
class SoftException(object):
def __init__(self, lineno, message):
self.lineno = lineno
self.message = message
def __str__(self):
return '<span class="bbcode-error lineno">Line %s:</span> <span class="bbcode-error message">%s</span>' % (self.lineno, self.message)
__unicode__ = __str__
class SoftExceptionManager(object):
"""
Allows 'soft exceptions'. Soft exceptions are exceptions which don't break
the flow of the code but are rather stored in a list and can then be told
given to the user.
"""
def __init__(self):
self.exceptions = []
self.line_number = 1
def set_line_number(self, number):
"""
Update the line number
"""
self.line_number = number
def soft_raise(self, exception):
"""
Soft raise an exception. Stores the line number the exception occured
and the exception message. If deployed in django it will make the
message i18n ready.
"""
self.exceptions.append(SoftException(self.line_number, _(exception)))
def pull(self):
"""
Pulls all exception since initialization or last pull. Resets exception
list.
"""
old = self.exceptions
self.exceptions = []
return old
sem = SoftExceptionManager()
soft_raise = sem.soft_raise
class VariableScope(dict):
def add(self, name, value):
dict.__setitem__(self, str(name), str(value))
def resolve(self, context):
context = context.strip('"')
for var, value in dict.iteritems(self):
context = context.replace('$%s$' % var, value)
return context
def lazy_resolve(self, context):
class Lazy:
def __init__(self, resolver, context):
self.resolver = resolver
self.context = context
def __int__(self):
self.context = self.resolver(self.context)
return int(self.context)
def __getattr__(self, attr):
self.context = self.resolver(self.context)
return self.context.__getattribute__(attr)
return Lazy(self.resolve, context)
class Node(object):
"""
This is the baseclass for all objects in a BBCode Parse Tree.
To understand Nodes it is important to understand the Tree.
Each Parse Tree has one, and only one, head node. This node has child nodes
and those children have child nodes themselves. This continues until there
are no more child nodes. In a standard Parse Tree the last leaves of a
branch are instances of TextNode, however since empty TextNodes are not kept
in the Tree, they might also be missing.
When the Parse Tree is generated the nodes get 'pushed', 'appended', 'pulled'
and 'closed'. Only TextNodes can be appended to a node's nodelist. When a
new child node is found it is 'pushed' and becomes the current node. When a
node cannot be closed correctly it is 'pulled', which means it's unparsed
contents are added to it's parent. Usually this causes a ParserError, which
means the Tree is not parseable. When a node is finished parsing it's
'closed' which normally returns the parent.
"""
name = 'node'
is_text_node = False
def __init__(self, parent, match, fullcontent, context=None):
"""
Normal nodes take their parent node as first argument, the regular
expression match as second argument and the full context as third
argument.
"""
self.start = match.start()
self.fullcontent = fullcontent
self.raw_content = ''
self.parent = parent
self.match = match
self.nodes = []
self.context = context # for django only
# copy the variable scope
self.variables = parent.variables
def soft_raise(self, errmsg):
soft_raise(errmsg)
return self.raw_content
def append(self, text):
"""
Adds a text node to the node
"""
self.nodes.append(TextNode(self, text))
def push(self, nodeklass, match, fullcontent):
"""
Adds a nested tag node and returns that node
"""
node = nodeklass(self, match, fullcontent, self.context)
self.nodes.append(node)
return node.pushed()
def pushed(self):
"""
Normal Nodes return themselves when being pushed. Self closing nodes
can overwrite this method to handle this in another fashion.
"""
return self
def pull(self, end):
"""
Pulls all text nodes and returns the parent
"""
self.parent.nodes.append(TextNode(self, self.fullcontent[self.start:end]))
return self.parent
def close(self, end):
"""
When closing the node just return the parent.
"""
self.end = end
self.raw_content = self.fullcontent[self.start:end]
return self.parent
def parse(self):
"""
Parses the node. This is also responsible to parse child nodes. Should
return a string and fail silently.
"""
raise NeedsSubclassingError
class HeadNode(Node):
"""
The head node of the BBCode parse tree.
"""
name = 'head'
def __init__(self, raw_content, context=None):
self.raw_content = raw_content
self.nodes = []
self.context = context
self.variables = VariableScope()
def pull(self, end):
raise ParserError, "Cannot pull from headnode, invalid BBCode Tree"
def close(self, end):
raise ParserError, "Cannot close headnode, invalid BBCode Tree"
def parse(self):
content = ''
failed = []
for node in self.nodes:
content += node.parse()
return content
class TextNode(Node):
is_text_node = True
def __init__(self, parent, text):
self.text = text
self.variables = parent.variables
self.parent = parent
self.raw_content = text
self.nodes = []
def append(self, text):
raise TypeError, "TextNode does not support appending"
def push(self, node):
raise TypeError, "TextNode does not support pushing"
def pull(self, end):
raise TypeError, "TextNode does not support pulling"
def close(self, end):
raise TypeError, "TextNode does not support closing"
def __repr__(self):
return '<TextNode instance "%s">' % self.text
def parse(self):
"""
Return cgi-escaped content
"""
return cgi.escape(self.variables.resolve(self.text))
def __str__(self):
return 'TextNode: %r' % self.text
class TagNode(Node):
@staticmethod
def open_pattern():
raise NeedsSubclassingError
@staticmethod
def close_pattern():
raise NeedsSubclassingError
def parse_inner(self):
"""
Shortcut for parsing all inner nodes and return their combined contents.
"""
inner = ''
for node in self.nodes:
inner += node.parse()
return inner
def __str__(self):
return self.__class__.__name__
class ReplaceTagNode(TagNode):
"""
A specialized TagNode subclass with a predefined parse method. It allows
easy creation of simple bbcode - html replacement tags. [tag] becomes <tag>
and [/tag] becomes </tag>. These tags do not take any arguments and parse
all inner content.
Requires an explicit 'tagname' attribute, otherwise the lowered class name
will be used as tagname
"""
def __init__(self, parent, match, content, context):
"""
Implicitly set tag name if not available.
"""
if not hasattr(self, 'tagname'):
self.tagname = self.__class__.__name__.lower()
TagNode.__init__(self, parent, match, content, context)
def parse(self):
return '<%s>%s</%s>' % (self.tagname, self.parse_inner(), self.tagname)
def __str__(self):
return 'ReplaceTagNode: %s' % self.__class__.__name__
class ArgumentTagNode(TagNode):
"""
TagNode which takes one (or no) argument. Open pattern must have a named
group 'argument'.
"""
def __init__(self, parent, match, content, context):
TagNode.__init__(self, parent, match, content, context)
arg = match.group('argument')
self.argument = self.variables.lazy_resolve(arg.strip('"') if arg else '')
def __str__(self):
return '%s (%s)' % (self.__class__.__name__, self.argument)
class _MultiArgs(dict):
"""
Dictionary-like class which allows items to be accessed via attributes.
"""
def __getattr__(self, attr):
return dict.__getitem__(self, attr)
class MultiArgumentTagNode(TagNode):
"""
TagNode which takes multiple (or no) arguments. Must have an attribute
_arguments which holds key, value pairs of the arguments and their defaults.
Open pattern should use bbcode.patterns.argument as argument matching
expression.
"""
_arguments = []
def __init__(self, parent, match, content, context):
TagNode.__init__(self, parent, match, content, context)
args = match.groups()
kwargs = dict(self._arguments)
for index, value in enumerate(filter(bool, args)):
if not index or not index % 3:
continue
if not (index + 1) % 3:
kwargs[args[index - 1]] = self.variables.lazy_resolve(value)
self.arguments = _MultiArgs(kwargs)
def __str__(self):
args = []
for key, value in self.arguments.iteritems():
args.append('%s: %s' % (key, value))
return '%s (%s)' % (self.__class__.__name__, ', '.join(args))
class SelfClosingTagNode(TagNode):
"""
A tag which is self closed.
"""
close_pattern = patterns.unmatchable
def __init__(self, parent, match, content, context):
self.start = match.start()
self.context = context
self.fullcontent = content
self.raw_content = content[match.start():match.end()]
self.parent = parent
self.match = match
self.nodes = []
self.variables = parent.variables
def pushed(self):
"""
A self closing node returns it's parent. Thus it will never have child
nodes!
"""
return self.parent
def __str__(self):
return 'SelfClosingTag: %s' % self.__class__.__name__
class AutoDict(dict):
def __init__(self, default_thing=set, *args, **kwargs):
self.__default_thing = default_thing
dict.__init__(self, *args, **kwargs)
def __getitem__(self, item):
if not dict.__contains__(self, item):
dict.__setitem__(self, item, self.__default_thing() if callable(self.__default_thing) else self.__default_thing)
return dict.__getitem__(self, item)
class Library(object):
"""
The core of the BBCode parser. Keeps track of all bbcode tags and text
parsers. Also handles building BBCode Parse Trees and the automated help
generation.
"""
name_pat1 = re.compile('([a-z0-9])([A-Z])')
name_pat2 = re.compile('(.)([A-Z][a-z]+)')
def __init__(self):
self.names = AutoDict(None)
self.raw_names = {}
self.tags = AutoDict(set)
self.klasses = AutoDict(None)
def convert(self, name):
"""
Convert a class name to something a bit more readable
"""
return self.name_pat1.sub(r'\1 \2', self.name_pat2.sub(r'\1 \2', name))
def dsparse(self, docs):
"""
Parse docstrings
"""
content, errors = parse(docs, strict=False, auto_discover=True)
return content
def get_default_namespaces(self, klass):
bits = klass.__module__.split('.')
return (bits[-1], bits[-3], klass.__name__.lower())
def register(self, klass):
"""
Register a BBCode Tag Node
"""
# Add the class to their namespaces.
if hasattr(klass, 'namespaces'):
for ns in klass.namespaces:
self.tags[ns].add(klass)
if not hasattr(klass, 'not_in_all') or not klass.not_in_all:
self.tags['__all__'].add(klass)
elif not hasattr(klass, 'not_in_all') or not klass.not_in_all:
self.tags['__all__'].add(klass)
if not hasattr(klass, 'namespaces'):
setattr(klass, 'namespaces', [])
d_namespaces = self.get_default_namespaces(klass)
for default in d_namespaces:
self.tags[default].add(klass)
for ns in reversed(d_namespaces):
klass.namespaces.insert(0, ns)
# Register documentation
docstrings = klass.__doc__
if hasattr(klass, 'tagname'):
tagname = klass.tagname
else:
tagname = klass.__name__.lower()
if docstrings:
if hasattr(klass, 'verbose_name'):
verbose_name = klass.verbose_name
else:
verbose_name = self.convert(klass.__name__)
self.names[tagname] = {'docs': docstrings.strip(),
'name': verbose_name,
'class': klass}
self.klasses[klass] = self.names[tagname]
self.raw_names[klass.__name__] = klass
def add_namespace(self, klass, *namespaces):
"""
Add a tag to a namespace or several namespaces
"""
if isinstance(klass, TagNode):
for namespace in namespaces:
self.tags[namespace].add(klass)
elif isinstance(klass, basestring):
if klass in self.raw_names:
self.add_namespace(self.raw_names[klass], *namespaces)
elif klass in self.names:
self.add_namespace(self.names[klass]['class'], *namespaces)
def remove_namespace(self, klass, *namespaces):
"""
Remove a tag from a namespace or several namespaces
"""
if isinstance(klass, TagNode):
for namespace in namespaces:
if klass in self.tags[namespace]:
self.tags[namespace].remove(klass)
elif isinstance(klass, basestring):
if klass in self.raw_names:
self.add_namespace(self.raw_names[klass], *namespaces)
elif klass in remove_namespace.names:
self.remove_namespace(self.names[klass]['class'], *namespaces)
def set_not_in_all(self, klass, flag=True):
"""
Set 'not_in_all' for a tag.
"""
if flag:
self.remove_namespace(klass, '__all__')
else:
self.add_namespace(klass, '__all__')
def get_help(self, *tags):
"""
Get help for a tag or for all tags.
Returns a dictionary with keys 'name', 'tag', 'docstring'.
"""
if not tags:
tags = self.get_tags()
help_objects = []
for tag in tags:
if issubclass(tag, Node):
obj = self.klasses[tag]
if obj is None:
continue
else:
obj = self.names[tag]
if obj is None:
continue
help_objects.append({'name': obj['name'],
'docstring': parse(obj['docs'], strict=False, auto_discover=True)[0],
'obj': obj['class']})
return help_objects
def get_tags(self, namespaces=None):
"""
Get a list of tag classes for the namespaces
"""
if namespaces is None:
namespaces = get_default_namespaces()
tags = set()
exclude = []
include = []
# Split the 'namespaces' into exclude and include namespaces
for ns in namespaces:
if ns.startswith('no-'):
_ns = ns[3:]
if _ns in self.tags:
exclude.append(_ns)
elif ns in self.tags:
include.append(ns)
# Include first
if not include or '__all__' in include:
tags = set(self.tags['__all__'])
else:
if 'base' in include:
tags = set(self.tags['__all__'])
for ns in include:
tags = tags.union(self.tags[ns])
# Then exclude
for ns in exclude:
tags = tags.difference(self.tags[ns])
return tags
def get_taglist(self, content, namespaces=None):
"""
Get the tag-match list of a content for given namespaces
"""
if namespaces is None:
namespaces = get_default_namespaces()
tags = self.get_tags(namespaces)
# Build tag list
taglist = []
for tagklass in tags:
op = tagklass.open_pattern
if callable(op):
op = op()
i = 1
for match in op.finditer(content):
i += 1
taglist.append((match.start(), match, tagklass, True))
cp = tagklass.close_pattern
if callable(cp):
cp = cp()
for match in cp.finditer(content):
taglist.append((match.start(), match, tagklass, False))
# Sort by position
return sorted(taglist)
def get_parse_tree(self, content, namespaces=None, context=None):
"""
Prepare content for parsing.
Returns a HeadNode instance
"""
if namespaces is None:
namespaces = get_default_namespaces()
taglist = self.get_taglist(content, namespaces)
# Get headnode
headnode = HeadNode(content, context)
lastpos = 0
currentnode = headnode
# Loop over tag matches
for pos, match, tagklass, opener in taglist:
start, end = match.span()
# Prevent tags matching within other tags (eg AutoDetectURL)
if start < lastpos:
continue
# Append text between last tag and this one
text = content[lastpos:start]
if text:
currentnode.append(text)
# Set new position
lastpos = end
# Get line number for soft exceptions
lineno = content[:start].count('\n') + 1
sem.set_line_number(lineno)
# if opener, push new node
if opener:
currentnode = currentnode.push(tagklass, match, content)
# else close the tag
else:
# pull all unclosed child tags of the current node
while tagklass != currentnode.__class__:
try:
currentnode = currentnode.pull(end)
except ParserError:
sem.soft_raise("BBCode could not be parsed. There are probably unclosed or uneven tags!")
raise ParserError, "Failed to find matching opening tag for closing tag '%s' in line %s." % (get_tag_name(tagklass), lineno)
# close the node
currentnode = currentnode.close(end)
text = content[lastpos:]
if text:
headnode.append(text)
# Return the head node
return headnode
def get_visual_parse_tree(self, content, namespaces=None, indent=4):
if namespaces is None:
namespaces = get_default_namespaces()
def recurse(nodes, level, indent):
cindent = level * indent
sindent = ' ' * cindent
next = level + 1
l = []
for node in nodes:
l.append('%s-%s' % (sindent, str(node)))
l += recurse(node.nodes, next, indent)
return l
try:
head = self.get_parse_tree(content, namespaces)
except ParserError:
return '-Parse Error'
visuals = ['-HeadNode']
visuals += recurse(head.nodes, 1, indent)
return '\n'.join(visuals)
def validate(self, content, namespaces=None, auto_discover=False):
"""
Validates a given content and returns the errors or an empty sequence.
"""
if namespaces is None:
namespaces = get_default_namespaces()
if auto_discover:
autodiscover()
try:
headnode = self.get_parse_tree(content, namespaces)
except ParserError:
return sem.pull()
parsed = headnode.parse()
return sem.pull()
lib = Library()
register = lib.register
validate = lib.validate
get_help = lib.get_help
get_visual = lib.get_visual_parse_tree
def get_default_namespaces():
from django.conf import settings
if hasattr(settings, 'BBCODE_DEFAULT_NAMESPACES'):
return settings.BBCODE_DEFAULT_NAMESPACES
return ['__all__']
def parse(content, namespaces=None, strict=True, auto_discover=False,
context=None):
"""
Parse a content with the BBCodes
"""
if auto_discover:
autodiscover()
if namespaces is None:
namespaces = get_default_namespaces()
# Fix windows linefeeds
content = content.replace('\r','')
# Get head node
if strict:
head = lib.get_parse_tree(content, namespaces, context)
else:
try:
head = lib.get_parse_tree(content, namespaces, context)
except ParserError:
return convert_linefeeds(content), sem.pull()
# parse BB Codes
content = head.parse()
# Replace linefeeds
content = convert_linefeeds(content)
return content, sem.pull()
def autodiscover():
"""
Automatically register all bbcode tags. This searches the 'bbtags' modules
of all INSTALLED_APPS if available.
"""
global AUTODISCOVERED
if AUTODISCOVERED:
return
import imp
from django.conf import settings
import os
for app in settings.INSTALLED_APPS:
try:
module = __import__(app, {}, {}, [app.split('.')[-1]])
app_path = module.__path__
except AttributeError:
continue
try:
imp.find_module('bbtags', app_path)
except ImportError:
continue
for f in os.listdir(os.path.join(os.path.dirname(os.path.abspath(module.__file__)), 'bbtags')):
mod_name, ext = os.path.splitext(f)
if ext == '.py':
__import__("%s.bbtags.%s" % (app, mod_name))
AUTODISCOVERED = True
Jump to Line
Something went wrong with that request. Please try again.