Skip to content
Browse files
Initial (broken) implementation of pelican post reading
Signed-off-by: Chris Warrick <>
  • Loading branch information
Kwpolska committed Aug 23, 2015
1 parent c69770f commit 332c8c79f69bfa8b9dc60e16814714954e690baa
Showing 3 changed files with 79 additions and 16 deletions.
@@ -242,6 +242,7 @@ class PageCompiler(BasePlugin):
friendly_name = ''
demote_headers = False
supports_onefile = True
metadata_can_be_overridden = False
default_metadata = {
'title': '',
'slug': '',
@@ -299,7 +300,7 @@ def get_compiler_extensions(self):
return plugins

class CompilerExtension(BasePlugin):
class CompilerExtension(BasePgin):

"""An extension for a Nikola compiler.
@@ -41,13 +41,20 @@
from nikola.utils import unicode_str, get_logger, makedirs, write_metadata, STDERR_HANDLER

'modified': 'updated',
'authors': 'author',
'summary': 'description'

class CompileRest(PageCompiler):

"""Compile reStructuredText into HTML."""

name = "rest"
friendly_name = "reStructuredText"
demote_headers = True
metadata_can_be_overridden = True
logger = None

def _read_extra_deps(self, post):
@@ -63,7 +70,7 @@ def register_extra_dependencies(self, post):
"""Add dependency to post object to check .dep file."""
post.add_dependency(lambda: self._read_extra_deps(post), 'fragment')

def compile_html_string(self, data, source_path=None, is_two_file=True):
def compile_html_string(self, data, source_path=None, is_two_file=True, return_publisher=False):
"""Compile reST into HTML strings."""
# If errors occur, this will be added to the line number reported by
# docutils so the line number matches the actual line number (off by
@@ -74,7 +81,10 @@ def compile_html_string(self, data, source_path=None, is_two_file=True):
add_ln = len(m_data.splitlines()) + 1

default_template_path = os.path.join(os.path.dirname(__file__), 'template.txt')
output, error_level, deps = rst2html(
# TODO cache publisher in post object (which requires a ton of
# refactoring, and might even need v8 and breaking tons of APIs) for
# speed -- right now, we are publishing each post twice
publisher = rst2html(
data, settings_overrides={
'initial_header_level': 1,
'record_dependencies': True,
@@ -83,12 +93,16 @@ def compile_html_string(self, data, source_path=None, is_two_file=True):
'syntax_highlight': 'short',
'math_output': 'mathjax',
'template': default_template_path,
}, logger=self.logger, source_path=source_path, l_add_ln=add_ln,
if not isinstance(output, unicode_str):
# To prevent some weird bugs here or there.
# Original issue: empty files. `output` became a bytestring.
output = output.decode('utf-8')
return output, error_level, deps
}, logger=self.logger, source_path=source_path, l_add_ln=add_ln,, return_publisher=True)
if return_publisher:
return publisher
output, error_level, deps = rst_document_tuple(publisher)
if not isinstance(output, unicode_str):
# To prevent some weird bugs here or there.
# Original issue: empty files. `output` became a bytestring.
output = output.decode('utf-8')
return output, error_level, deps

def compile_html(self, source, dest, is_two_file=True):
"""Compile source file into HTML and save as dest."""
@@ -112,6 +126,41 @@ def compile_html(self, source, dest, is_two_file=True):
return False

def read_metadata(self, post, file_metadata_regexp=None, unslugify_titles=False, lang=None):
"""Read the metadata from a post, and return a metadata dict."""
metadata = {}
source = post.translated_source_path(lang)
with, 'r', encoding='utf-8') as in_file:
data =
# This is a bit of a cheat. The method is now abused to create a
# publisher and not the full document tuple.
publisher = self.compile_html_string(data, source, post.is_two_file, True)

# Get title.
title_id = publisher.document.first_child_matching_class(docutils.nodes.title)
if title_id is not None:
metadata['title'] = publisher.document.children[title_id].astext()

# Get any other metadata that is part of the reST standard docinfo
# (which is a special field list)
docinfo_id = publisher.document.first_child_matching_class(docutils.nodes.docinfo)
if docinfo_id is not None:
docinfo = publisher.document.children[docinfo_id]
for field in docinfo.children:
fieldname = field.tagname
if fieldname == 'authors':
field.child_text_separator = ', '
fieldvalue = field.astext()
elif fieldname == 'field':
fieldname = field.children[0].astext()
fieldvalue = field.children[1].astext()
fieldvalue = field.astext()
fieldname = fieldname.lower()
fieldname = DOCINFO_PELICAN_NIKOLA_MAPPING.get(fieldname, fieldname)
metadata[fieldname] = fieldvalue
return metadata

def create_post(self, path, **kw):
"""Create a new post."""
content = kw.pop('content', None)
@@ -237,11 +286,8 @@ def rst2html(source, source_path=None,,
parser=None, parser_name='restructuredtext', writer=None,
writer_name='html', settings=None, settings_spec=None,
settings_overrides=None, config_section=None,
enable_exit_status=None, logger=None, l_add_ln=0, transforms=None):
"""Set up & run a ``Publisher``, and return a dictionary of document parts.
Dictionary keys are the names of parts, and values are Unicode strings;
encoding is up to the client. For programmatic use with string I/O.
enable_exit_status=None, logger=None, l_add_ln=0, transforms=None, return_publisher=False):
"""Set up & run a ``Publisher``, and return the publisher or the document.
For encoded string input, be sure to set the 'input_encoding' setting to
the desired encoding. Set it to 'unicode' for unencoded Unicode string
@@ -275,4 +321,15 @@ def rst2html(source, source_path=None,,
pub.set_destination(None, destination_path)

if return_publisher:
return pub
return rst_document_tuple(pub)

def rst_document_tuple(pub):
"""Return the document tuple (output, error level, dependencies) for a publisher.
Previously, the only output of rst2html.
return['docinfo'] +['fragment'], pub.document.reporter.max_level, pub.settings.record_dependencies
@@ -486,7 +486,7 @@ def wrap_encrypt(path, password):
if self.meta('password'):
# TODO: get rid of this feature one day (v8?; warning added in v7.3.0.)
LOGGER.warn("The post {0} is using the `password` attribute, which may stop working in the future.")
@@ -989,8 +989,13 @@ def get_meta(post, file_metadata_regexp=None, unslugify_titles=False, lang=None)
if getattr(post, 'compiler', None):
compiler_meta = post.compiler.read_metadata(post, file_metadata_regexp, unslugify_titles, lang)
if compiler_meta:
compiler_meta_override = post.compiler.metadata_can_be_overridden
compiler_meta_override = True

if not post.is_two_file and not compiler_meta:
if not post.is_two_file and compiler_meta_override:
# Meta file has precedence over file, which can contain garbage.
# Moreover, we should not to talk to the file if we have compiler meta.
meta.update(get_metadata_from_file(post.source_path, config, lang))

0 comments on commit 332c8c7

Please sign in to comment.