Skip to content
Permalink
Browse files

Merge pull request #2737 from getnikola/shortcodes2

Use shortcodes in a more robust manner
  • Loading branch information
ralsina committed May 10, 2017
2 parents 2fb5f39 + 62c5645 commit 77ddb6f513d55b4d97b423024be9333581e9b425
Showing with 121 additions and 25 deletions.
  1. +2 −0 CHANGES.txt
  2. +17 −1 nikola/nikola.py
  3. +5 −2 nikola/plugins/compile/rest/__init__.py
  4. +70 −22 nikola/shortcodes.py
  5. +1 −0 requirements-extras.txt
  6. +4 −0 tests/base.py
  7. +22 −0 tests/test_shortcodes.py
@@ -30,6 +30,8 @@ Features
Bugfixes
--------

* More robust shortcodes, no need to escape URLs in reSt, work better
with LaTeX, etc.
* No longer creates empty subarchive pages, and no longer create broken
archive navigation links on day level (Issue #2734)
* Fixes post scanner plugin order (Issue #2720)
@@ -58,6 +58,7 @@
from yapsy.PluginManager import PluginManager
from blinker import signal


from .post import Post # NOQA
from .state import Persistor
from . import DEBUG, utils, shortcodes
@@ -1688,12 +1689,27 @@ def register_shortcode(self, name, f):
self.shortcode_registry[name] = f

# XXX in v8, get rid of with_dependencies
def apply_shortcodes(self, data, filename=None, lang=None, with_dependencies=False, extra_context={}):
def apply_shortcodes(self, data, filename=None, lang=None, with_dependencies=False, extra_context=None):
"""Apply shortcodes from the registry on data."""
if extra_context is None:
extra_context = {}
if lang is None:
lang = utils.LocaleBorg().current_lang
return shortcodes.apply_shortcodes(data, self.shortcode_registry, self, filename, lang=lang, with_dependencies=with_dependencies, extra_context=extra_context)

def apply_shortcodes_uuid(self, data, _shortcodes, filename=None, lang=None, with_dependencies=False, extra_context=None):
"""Apply shortcodes from the registry on data."""
if lang is None:
lang = utils.LocaleBorg().current_lang
if extra_context is None:
extra_context = {}
deps = []
for k, v in _shortcodes.items():
replacement, _deps = shortcodes.apply_shortcodes(v, self.shortcode_registry, self, filename, lang=lang, with_dependencies=with_dependencies, extra_context=extra_context)
data = data.replace(k, replacement)
deps.extend(_deps)
return data, deps

def _get_rss_copyright(self, lang, rss_plain):
if rss_plain:
return (
@@ -81,14 +81,17 @@ def compile_string(self, data, source_path=None, is_two_file=True, post=None, la
'language_code': LEGAL_VALUES['DOCUTILS_LOCALES'].get(LocaleBorg().current_lang, 'en')
}

from nikola import shortcodes as sc
new_data, shortcodes = sc.extract_shortcodes(data)
output, error_level, deps = rst2html(
data, settings_overrides=settings_overrides, logger=self.logger, source_path=source_path, l_add_ln=add_ln, transforms=self.site.rst_transforms,
new_data, settings_overrides=settings_overrides, logger=self.logger, source_path=source_path, l_add_ln=add_ln, transforms=self.site.rst_transforms,
no_title_transform=self.site.config.get('NO_DOCUTILS_TITLE_TRANSFORM', False))
if not isinstance(output, unicode_str):
# To prevent some weird bugs here or there.
# Original issue: empty files. `output` became a bytestring.
output = output.decode('utf-8')
output, shortcode_deps = self.site.apply_shortcodes(output, filename=source_path, with_dependencies=True, extra_context=dict(post=post))

output, shortcode_deps = self.site.apply_shortcodes_uuid(output, shortcodes, filename=source_path, with_dependencies=True, extra_context=dict(post=post))
return output, error_level, deps, shortcode_deps

# TODO remove in v8
@@ -27,14 +27,11 @@
"""Support for Hugo-style shortcodes."""

from __future__ import unicode_literals
from .utils import LOGGER
import sys

import uuid

# Constants
_TEXT = 1
_SHORTCODE_START = 2
_SHORTCODE_END = 3
from .utils import LOGGER
import sys


class ParsingError(Exception):
@@ -83,11 +80,10 @@ def _skip_whitespace(data, pos, must_be_nontrivial=False):

def _skip_nonwhitespace(data, pos):
"""Return first position not before pos which contains a non-whitespace character."""
while pos < len(data):
if data[pos].isspace():
break
pos += 1
return pos
for i, x in enumerate(data[pos:]):
if x.isspace():
return pos + i
return len(data)


def _parse_quoted_string(data, start):
@@ -209,14 +205,66 @@ def _parse_shortcode_args(data, start, shortcode_name, start_pos):
raise ParsingError("Shortcode '{0}' starting at {1} is not terminated correctly with '%}}}}'!".format(shortcode_name, _format_position(data, start_pos)))


def _new_sc_id():
return str('SHORTCODE{0}REPLACEMENT'.format(str(uuid.uuid4()).replace('-', '')))


def extract_shortcodes(data):
"""
Return data with replaced shortcodes, shortcodes.
data is the original data, with the shortcodes replaced by UUIDs.
a dictionary of shortcodes, where the keys are UUIDs and the values
are the shortcodes themselves ready to process.
"""
shortcodes = {}
splitted = _split_shortcodes(data)

def extract_data_chunk(data):
"""Take a list of splitted shortcodes and return a string and a tail.
The string is data, the tail is ready for a new run of this same function.
"""
text = []
for i, token in enumerate(data):
if token[0] == 'SHORTCODE_START':
name = token[3]
sc_id = _new_sc_id()
text.append(sc_id)
# See if this shortcode closes
for j in range(i, len(data)):
if data[j][0] == 'SHORTCODE_END' and data[j][3] == name:
# Extract this chunk
shortcodes[sc_id] = ''.join(t[1] for t in data[i:j + 1])
return ''.join(text), data[j + 1:]
# Doesn't close
shortcodes[sc_id] = token[1]
return ''.join(text), data[i + 1:]
elif token[0] == 'TEXT':
text.append(token[1])
return ''.join(text), data[1:]
elif token[0] == 'SHORTCODE_END': # This is malformed
raise Exception('Closing unopened shortcode {}'.format(token[3]))

text = []
tail = splitted
while True:
new_text, tail = extract_data_chunk(tail)
text.append(new_text)
if not tail:
break
return ''.join(text), shortcodes


def _split_shortcodes(data):
"""Given input data, splits it into a sequence of texts, shortcode starts and shortcode ends.
Returns a list of tuples of the following forms:
1. (_TEXT, text)
2. (_SHORTCODE_START, text, start, name, args)
3. (_SHORTCODE_END, text, start, name)
1. ("TEXT", text)
2. ("SHORTCODE_START", text, start, name, args)
3. ("SHORTCODE_END", text, start, name)
Here, text is the raw text represented by the token; start is the starting position in data
of the token; name is the name of the shortcode; and args is a tuple (args, kw) as returned
@@ -228,9 +276,9 @@ def _split_shortcodes(data):
# Search for shortcode start
start = data.find('{{%', pos)
if start < 0:
result.append((_TEXT, data[pos:]))
result.append(("TEXT", data[pos:]))
break
result.append((_TEXT, data[pos:start]))
result.append(("TEXT", data[pos:start]))
# Extract name
name_start = _skip_whitespace(data, start + 3)
name_end = _skip_nonwhitespace(data, name_start)
@@ -246,13 +294,13 @@ def _split_shortcodes(data):
# Must be followed by '%}}'
if pos > len(data) or data[end_start:pos] != '%}}':
raise ParsingError("Syntax error: '{{{{% /{0}' must be followed by ' %}}}}' ({1})!".format(name, _format_position(data, end_start)))
result.append((_SHORTCODE_END, data[start:pos], start, name))
result.append(("SHORTCODE_END", data[start:pos], start, name))
elif name == '%}}':
raise ParsingError("Syntax error: '{{{{%' must be followed by shortcode name ({0})!".format(_format_position(data, start)))
else:
# This is an opening shortcode
pos, args = _parse_shortcode_args(data, name_end, shortcode_name=name, start_pos=start)
result.append((_SHORTCODE_START, data[start:pos], start, name, args))
result.append(("SHORTCODE_START", data[start:pos], start, name, args))
return result


@@ -284,17 +332,17 @@ def apply_shortcodes(data, registry, site=None, filename=None, raise_exceptions=
pos = 0
while pos < len(sc_data):
current = sc_data[pos]
if current[0] == _TEXT:
if current[0] == "TEXT":
result.append(current[1])
pos += 1
elif current[0] == _SHORTCODE_END:
elif current[0] == "SHORTCODE_END":
raise ParsingError("Found shortcode ending '{{{{% /{0} %}}}}' which isn't closing a started shortcode ({1})!".format(current[3], _format_position(data, current[2])))
elif current[0] == _SHORTCODE_START:
elif current[0] == "SHORTCODE_START":
name = current[3]
# Check if we can find corresponding ending
found = None
for p in range(pos + 1, len(sc_data)):
if sc_data[p][0] == _SHORTCODE_END and sc_data[p][3] == name:
if sc_data[p][0] == "SHORTCODE_END" and sc_data[p][3] == name:
found = p
break
if found:
@@ -13,3 +13,4 @@ ipykernel>=4.0.0
ghp-import2>=1.0.0
ws4py==0.4.2
watchdog==0.8.3
AppMetrics==0.5.0
@@ -230,3 +230,7 @@ def register_shortcode(self, name, f):
def apply_shortcodes(self, data, *a, **kw):
"""Apply shortcodes from the registry on data."""
return nikola.shortcodes.apply_shortcodes(data, self.shortcode_registry, **kw)

def apply_shortcodes_uuid(self, data, shortcodes, *a, **kw):
"""Apply shortcodes from the registry on data."""
return nikola.shortcodes.apply_shortcodes(data, self.shortcode_registry, **kw)
@@ -4,6 +4,8 @@
u"""Test shortcodes."""

from __future__ import unicode_literals
import itertools

import pytest
from nikola import shortcodes
from .base import FakeSite, BaseTestCase
@@ -74,3 +76,23 @@ def test_errors(self):
self.assertRaisesRegexp(shortcodes.ParsingError, "^Found shortcode ending '{{% / %}}' which isn't closing a started shortcode", shortcodes.apply_shortcodes, '{{% / %}}', self.fakesite.shortcode_registry, raise_exceptions=True)
self.assertRaisesRegexp(shortcodes.ParsingError, "^Syntax error: '{{% /' must be followed by ' %}}'", shortcodes.apply_shortcodes, '{{% / a %}}', self.fakesite.shortcode_registry, raise_exceptions=True)
self.assertRaisesRegexp(shortcodes.ParsingError, "^Shortcode '<==' starting at .* is not terminated correctly with '%}}'!", shortcodes.apply_shortcodes, '==> {{% <==', self.fakesite.shortcode_registry, raise_exceptions=True)


@pytest.mark.parametrize("input, expected", [
('{{% foo %}}', (u'SC1', {u'SC1': u'{{% foo %}}'})),
('{{% foo %}} bar {{% /foo %}}', (u'SC1', {u'SC1': u'{{% foo %}} bar {{% /foo %}}'})),
('AAA{{% foo %}} bar {{% /foo %}}BBB', (u'AAASC1BBB', {u'SC1': u'{{% foo %}} bar {{% /foo %}}'})),
('AAA{{% foo %}} {{% bar %}} {{% /foo %}}BBB', (u'AAASC1BBB', {u'SC1': u'{{% foo %}} {{% bar %}} {{% /foo %}}'})),
('AAA{{% foo %}} {{% /bar %}} {{% /foo %}}BBB', (u'AAASC1BBB', {u'SC1': u'{{% foo %}} {{% /bar %}} {{% /foo %}}'})),
('AAA{{% foo %}} {{% bar %}} quux {{% /bar %}} {{% /foo %}}BBB', (u'AAASC1BBB', {u'SC1': u'{{% foo %}} {{% bar %}} quux {{% /bar %}} {{% /foo %}}'})),
('AAA{{% foo %}} BBB {{% bar %}} quux {{% /bar %}} CCC', (u'AAASC1 BBB SC2 CCC', {u'SC1': u'{{% foo %}}', u'SC2': u'{{% bar %}} quux {{% /bar %}}'})),
])
def test_extract_shortcodes(input, expected, monkeypatch):

i = iter('SC%d' % i for i in range(1, 100))
if sys.version_info[0] < 3:
monkeypatch.setattr(shortcodes, '_new_sc_id', i.next)
else:
monkeypatch.setattr(shortcodes, '_new_sc_id', i.__next__)
extracted = shortcodes.extract_shortcodes(input)
assert extracted == expected

0 comments on commit 77ddb6f

Please sign in to comment.
You can’t perform that action at this time.