Skip to content

Commit

Permalink
Merge pull request #2194 from getnikola/shortcode-parser
Browse files Browse the repository at this point in the history
Use a custom shortcode parser
  • Loading branch information
ralsina committed Dec 24, 2015
2 parents 19cd8dd + 4bbdd98 commit 5bc23e3
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 35 deletions.
114 changes: 82 additions & 32 deletions nikola/shortcodes.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -26,11 +26,6 @@


"""Support for Hugo-style shortcodes.""" """Support for Hugo-style shortcodes."""


try:
from html.parser import HTMLParser
except ImportError:
from HTMLParser import HTMLParser

from .utils import LOGGER from .utils import LOGGER




Expand Down Expand Up @@ -84,15 +79,14 @@ def _find_shortcodes(data):
""" """
# FIXME: this is really space-intolerant # FIXME: this is really space-intolerant


parser = SCParser()
pos = 0 pos = 0
while True: while True:
start = data.find('{{%', pos) start = data.find('{{%', pos)
if start == -1: if start == -1:
break break
# Get the whole shortcode tag # Get the whole shortcode tag
end = data.find('%}}', start + 1) end = data.find('%}}', start + 1)
name, args = parser.parse_sc('<{}>'.format(data[start + 3:end].strip())) name, args = parse_sc(data[start + 3:end].strip())
# Check if this start has a matching close # Check if this start has a matching close
close_tag = '{{% /{} %}}'.format(name) close_tag = '{{% /{} %}}'.format(name)
close = data.find(close_tag, end + 3) close = data.find(close_tag, end + 3)
Expand All @@ -106,28 +100,84 @@ def _find_shortcodes(data):
yield [name, args, start, end] yield [name, args, start, end]




class SCParser(HTMLParser): def parse_sc(data):
"""Parser for shortcode arguments.""" """Parse shortcode arguments into a tuple."""

elements = data.split(' ', 1)
# Because shortcode attributes are HTML-like, we are abusing the HTML parser. name = elements[0]
# TODO replace with self-contained parser if len(elements) == 1:
# FIXME should be able to take quoted positional arguments! # No arguments

return name, ([], {})
def parse_sc(self, data): args = []
"""Parse shortcode arguments into a tuple.""" kwargs = {}
self.name = None
self.attrs = {} # "Simple" argument parser.
self.feed(data) # flag can be one of:
args = [] # 0 name
kwargs = {} # 1 value +value
for a, b in self.attrs: # 2 name inside quotes +quotes
if b is None: # 3 value inside quotes
args.append(a) # 4 [unsupported] +backslash
else: # 5 value inside backslash
kwargs[a] = b # 4 [unsupported]
return self.name, (args, kwargs) # 7 value inside quotes and backslash

flag = 0
def handle_starttag(self, tag, attrs): cname = ''
"""Set start tag information on parser object.""" cvalue = ''
self.name = tag qc = ''
self.attrs = attrs for char in elements[1]:
if flag & 0b100 and flag & 1:
# Backslash in value: escape next character, no matter what
cvalue += char
flag -= 0b100
elif flag & 0b100:
# Backslash in name: escape next character, no matter what
cname += char
flag -= 0b100
elif char == '=' and flag == 0:
# Equals sign inside unquoted name: switch to value
flag = 1
elif char == ' ' and flag == 0:
# Space inside unquoted name: save as positional argument
args.append(cname)
cname = cvalue = qc = ''
elif char == ' ' and flag == 1:
# Space inside unquoted value: save as keyword argument
kwargs[cname] = cvalue
flag = 0
cname = cvalue = qc = ''
elif char == ' ' and flag == 2:
# Space inside quoted name: save to name
cname += char
elif char == ' ' and flag == 3:
# Space inside quoted value: save to value
cvalue += char
elif char == '\\':
# Backslash: next character will be escaped
flag += 4
elif char == '"' or char == "'":
# Quote handler
qc = char
if not flag & 2:
flag += 2
elif flag & 2 and qc == char:
flag -= 2
elif flag == 2:
# Unbalanced quotes, reproduce as is
cname += char
elif flag == 3:
# Unbalanced quotes, reproduce as is
cvalue += char
elif flag & 1:
# Fallback: add anything else to value
cvalue += char
else:
# Fallback: add anything else to name
cname += char

# Handle last argument
if cvalue:
kwargs[cname] = cvalue
else:
args.append(cname)

return name, (args, kwargs)
12 changes: 9 additions & 3 deletions tests/test_shortcodes.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest import pytest
from nikola import shortcodes from nikola import shortcodes
from .base import FakeSite from .base import FakeSite
import sys


def noargs(site, data=''): def noargs(site, data=''):
return "noargs {0} success!".format(data) return "noargs {0} success!".format(data)
Expand All @@ -15,6 +16,10 @@ def arg(*args, **kwargs):
# don’t clutter the kwargs dict # don’t clutter the kwargs dict
_ = kwargs.pop('site') _ = kwargs.pop('site')
data = kwargs.pop('data') data = kwargs.pop('data')
# TODO hack for Python 2.7 -- remove when possible
if sys.version_info[0] == 2:
args = tuple(i.encode('utf-8') for i in args)
kwargs = {k.encode('utf-8'): v.encode('utf-8') for k, v in kwargs.items()}
return "arg {0}/{1}/{2}".format(args, sorted(kwargs.items()), data) return "arg {0}/{1}/{2}".format(args, sorted(kwargs.items()), data)




Expand All @@ -32,15 +37,16 @@ def test_noargs(fakesite):
def test_arg_pos(fakesite): def test_arg_pos(fakesite):
assert shortcodes.apply_shortcodes('test({{% arg 1 %}})', fakesite.shortcode_registry) == "test(arg ('1',)/[]/)" assert shortcodes.apply_shortcodes('test({{% arg 1 %}})', fakesite.shortcode_registry) == "test(arg ('1',)/[]/)"
assert shortcodes.apply_shortcodes('test({{% arg 1 2aa %}})', fakesite.shortcode_registry) == "test(arg ('1', '2aa')/[]/)" assert shortcodes.apply_shortcodes('test({{% arg 1 2aa %}})', fakesite.shortcode_registry) == "test(arg ('1', '2aa')/[]/)"
# TODO: currently unsupported! assert shortcodes.apply_shortcodes('test({{% arg "hello world" %}})', fakesite.shortcode_registry) == "test(arg ('hello world',)/[]/)"
# assert shortcodes.apply_shortcodes('test({{% arg "hello world" %}})', fakesite.shortcode_registry) == "test(arg ('hello world',)/[]/)" assert shortcodes.apply_shortcodes('test({{% arg back\ slash arg2 %}})', fakesite.shortcode_registry) == "test(arg ('back slash', 'arg2')/[]/)"


def test_arg_keyword(fakesite): def test_arg_keyword(fakesite):
assert shortcodes.apply_shortcodes('test({{% arg 1a=2b %}})', fakesite.shortcode_registry) == "test(arg ()/[('1a', '2b')]/)" assert shortcodes.apply_shortcodes('test({{% arg 1a=2b %}})', fakesite.shortcode_registry) == "test(arg ()/[('1a', '2b')]/)"
assert shortcodes.apply_shortcodes('test({{% arg 1a="2b 3c" 4d=5f %}})', fakesite.shortcode_registry) == "test(arg ()/[('1a', '2b 3c'), ('4d', '5f')]/)" assert shortcodes.apply_shortcodes('test({{% arg 1a="2b 3c" 4d=5f %}})', fakesite.shortcode_registry) == "test(arg ()/[('1a', '2b 3c'), ('4d', '5f')]/)"
assert shortcodes.apply_shortcodes('test({{% arg 1a="2b 3c" 4d=5f back=slash\ slash %}})', fakesite.shortcode_registry) == "test(arg ()/[('1a', '2b 3c'), ('4d', '5f'), ('back', 'slash slash')]/)"


def test_data(fakesite): def test_data(fakesite):
assert shortcodes.apply_shortcodes('test({{% arg 123 %}}Hello!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123',)/[]/Hello!)" assert shortcodes.apply_shortcodes('test({{% arg 123 %}}Hello!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123',)/[]/Hello!)"
assert shortcodes.apply_shortcodes('test({{% arg 123 456 foo=bar %}}Hello world!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123', '456')/[('foo', 'bar')]/Hello world!)" assert shortcodes.apply_shortcodes('test({{% arg 123 456 foo=bar %}}Hello world!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123', '456')/[('foo', 'bar')]/Hello world!)"
assert shortcodes.apply_shortcodes('test({{% arg 123 456 foo=bar baz="quotes rock." %}}Hello test suite!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123', '456')/[('baz', 'quotes rock.'), ('foo', 'bar')]/Hello test suite!)" assert shortcodes.apply_shortcodes('test({{% arg 123 456 foo=bar baz="quotes rock." %}}Hello test suite!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123', '456')/[('baz', 'quotes rock.'), ('foo', 'bar')]/Hello test suite!)"
# assert shortcodes.apply_shortcodes('test({{% arg "123 foo" foobar foo=bar baz="quotes rock." %}}Hello test suite!!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123 foo', 'foobar')/[('baz', 'quotes rock.'), ('foo', 'bar')]/Hello test suite!!)" assert shortcodes.apply_shortcodes('test({{% arg "123 foo" foobar foo=bar baz="quotes rock." %}}Hello test suite!!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123 foo', 'foobar')/[('baz', 'quotes rock.'), ('foo', 'bar')]/Hello test suite!!)"

0 comments on commit 5bc23e3

Please sign in to comment.