Skip to content
Permalink
Browse files

Pelican style Markdown metadata (Issue #1923) (#2805)

* Pelican style Markdown metadata (Issue #1923)

* docs

* lowercase keys for Pelican-style metadata

* reordered code so translated_source_path works earlier

* Use translated_source_path to get metadata out of ipynb files

* lint

* minor [ci skip]

* Get metadata from reSt docinfo and title. Don't get title from regexps

* remove tests for deleted code

* Flag to enable/disable docinfo metadata

* Remove docinfo from output

* don't assume

* Missing docstring.

* pedantry

* make post-list work

* separate HIDE_REST_DOCINFO option

* oops

* Lint

* Lint

* damned

* damned

* damned

* damned

* ignore more

* minor doc fix

* Add default

* describe how to enable markdown metadata in changelog
  • Loading branch information
ralsina committed Jun 2, 2017
1 parent 2f1199a commit f1dbc2162e9f05a9fce6d15abbbc168a4447baa0
@@ -1,3 +1,6 @@
# Demo for Bagguette
/bower_components/baguettebox.js/demo/*

*.db
*~
*.diff
@@ -15,6 +18,7 @@ output/
# All of .idea should be ignored
.idea/
.vscode/
node_modules

# Created by https://www.gitignore.io/api/pydev,python,pycharm,vim

@@ -195,6 +199,4 @@ Session.vim
.netrwhist
*~

# Demo for Bagguette
bower_components/baguettebox.js/demo
# End of https://www.gitignore.io/api/pydev,python,pycharm,vim
@@ -4,6 +4,11 @@ New in master
Features
--------

* Support for reStructured text docinfo metadata with
USE_REST_DOCINFO_METADATA option, defaulting to False (Issue #1923)
* New HIDE_REST_DOCINFO option, defaulting to False.
* Support for Markdown Metadata for Pelican compatibility by adding
``'markdown.extensions.meta'`` to ``MARKDOWN_EXTENSIONS`` (Issue #1923)
* Support for YAML and TOML metadata (Issue #2801)

Bugfixes
@@ -312,6 +312,30 @@ TOML metadata should be wrapped by a "+++" separator and in that case, the usual
date = "2012-09-15 19:52:05 UTC"
+++

Markdown Metadata only works in markdown files, and requires the ``markdown.extensions.meta`` extension
(see `MARKDOWN_EXTENSIONS <#markdown>`__). The exact format is described in
the `markdown metadata extension docs <https://pythonhosted.org/Markdown/extensions/meta_data.html>`__

.. code::

title: How to make money
slug: how-to-make-money
date: 2012-09-15 19:52:05 UTC

Nikola can extract metadata from reStructured Text docinfo fields and the document itself, too:

.. code::

How to make money
=================

:slug: how-to-make-money
:date: 2012-09-15 19:52:05 UTC

To do this, you need ``USE_REST_DOCINFO_METADATA = True`` in your ``conf.py``,
and Nikola will hide the docinfo fields in the output if you set
``HIDE_REST_DOCINFO = True``.

Basic
`````

@@ -1135,6 +1135,12 @@ MARKDOWN_EXTENSIONS = ['markdown.extensions.fenced_code', 'markdown.extensions.c
# (Note the '.*\/' in the beginning -- matches source paths relative to conf.py)
# FILE_METADATA_REGEXP = None

# If enabled, extract metadata from docinfo fields in reSt documents
# USE_REST_DOCINFO_METADATA = False

# If enabled, hide docinfo fields in reSt document output
# HIDE_REST_DOCINFO = False

# If you hate "Filenames with Capital Letters and Spaces.md", you should
# set this to true.
UNSLUGIFY_TITLES = True
@@ -517,6 +517,7 @@ def __init__(self, **config):
'GZIP_EXTENSIONS': ('.txt', '.htm', '.html', '.css', '.js', '.json', '.xml'),
'HIDDEN_AUTHORS': [],
'HIDDEN_TAGS': [],
'HIDE_REST_DOCINFO': False,
'HIDDEN_CATEGORIES': [],
'HYPHENATE': False,
'IMAGE_FOLDERS': {'images': ''},
@@ -604,6 +605,7 @@ def __init__(self, **config):
'USE_BUNDLES': True,
'USE_CDN': False,
'USE_CDN_WARNING': True,
'USE_REST_DOCINFO_METADATA': False,
'USE_FILENAME_AS_TITLE': True,
'USE_KATEX': False,
'USE_OPEN_GRAPH': True,
@@ -44,7 +44,7 @@

from nikola import shortcodes as sc
from nikola.plugin_categories import PageCompiler
from nikola.utils import makedirs, req_missing, get_logger, STDERR_HANDLER
from nikola.utils import makedirs, req_missing, get_logger, STDERR_HANDLER, LocaleBorg


class CompileIPynb(PageCompiler):
@@ -113,7 +113,9 @@ def read_metadata(self, post, file_metadata_regexp=None, unslugify_titles=False,
will be assume to be in the 'nikola' subfield.
"""
self._req_missing_ipynb()
source = post.source_path
if lang is None:
lang = LocaleBorg().current_lang
source = post.translated_source_path(lang)
with io.open(source, "r", encoding="utf8") as in_file:
nb_json = nbformat.read(in_file, current_nbformat)
# Metadata might not exist in two-file posts or in hand-crafted
@@ -42,7 +42,7 @@

from nikola import shortcodes as sc
from nikola.plugin_categories import PageCompiler
from nikola.utils import makedirs, req_missing, write_metadata
from nikola.utils import makedirs, req_missing, write_metadata, LocaleBorg


class ThreadLocalMarkdown(threading.local):
@@ -58,8 +58,14 @@ def __init__(self, extensions):
def convert(self, data):
"""Convert data to HTML and reset internal state."""
result = self.markdown.convert(data)
try:
meta = {}
for k in self.markdown.Meta: # This reads everything as lists
meta[k.lower()] = ','.join(self.markdown.Meta[k])
except Exception:
meta = {}
self.markdown.reset()
return result
return result, meta


class CompileMarkdown(PageCompiler):
@@ -85,6 +91,7 @@ def set_site(self, site):
extensions.extend(site_extensions)
if Markdown is not None:
self.converter = ThreadLocalMarkdown(extensions)
self.support_metadata = 'markdown.extensions.meta' in extensions

def compile_string(self, data, source_path=None, is_two_file=True, post=None, lang=None):
"""Compile Markdown into HTML strings."""
@@ -93,7 +100,7 @@ def compile_string(self, data, source_path=None, is_two_file=True, post=None, la
if not is_two_file:
_, data = self.split_metadata(data)
new_data, shortcodes = sc.extract_shortcodes(data)
output = self.converter.convert(new_data)
output, _ = self.converter.convert(new_data)
output, shortcode_deps = self.site.apply_shortcodes_uuid(output, shortcodes, filename=source_path, with_dependencies=True, extra_context=dict(post=post))
return output, shortcode_deps

@@ -134,3 +141,16 @@ def create_post(self, path, **kw):
fd.write(write_metadata(metadata))
fd.write('-->\n\n')
fd.write(content)

def read_metadata(self, post, file_metadata_regexp=None, unslugify_titles=False, lang=None):
"""Read the metadata from a post, and return a metadata dict."""
if not self.support_metadata:
return {}
if Markdown is None:
req_missing(['markdown'], 'build this site (compile Markdown)')
if lang is None:
lang = LocaleBorg().current_lang
source = post.translated_source_path(lang)
with io.open(source, 'r', encoding='utf-8') as inf:
_, meta = self.converter.convert(inf.read())
return meta
@@ -32,6 +32,7 @@

import docutils.core
import docutils.nodes
import docutils.transforms
import docutils.utils
import docutils.io
import docutils.readers.standalone
@@ -59,6 +60,37 @@ class CompileRest(PageCompiler):
demote_headers = True
logger = None

def read_metadata(self, post, file_metadata_regexp=None, unslugify_titles=False, lang=None):
"""Read the metadata from a post, and return a metadata dict."""
if not self.site.config.get('USE_REST_DOCINFO_METADATA'):
return {}
if lang is None:
lang = LocaleBorg().current_lang
source_path = post.translated_source_path(lang)

with io.open(source_path, 'r', encoding='utf-8') as inf:
data = inf.read()
_, _, _, document = rst2html(data, logger=self.logger, source_path=source_path, transforms=self.site.rst_transforms, no_title_transform=False)
meta = {}
if 'title' in document:
meta['title'] = document['title']
for docinfo in document.traverse(docutils.nodes.docinfo):
for element in docinfo.children:
if element.tagname == 'field': # custom fields (e.g. summary)
name_elem, body_elem = element.children
name = name_elem.astext()
value = body_elem.astext()
elif element.tagname == 'authors': # author list
name = element.tagname
value = [element.astext() for element in element.children]
else: # standard fields (e.g. address)
name = element.tagname
value = element.astext()
name = name.lower()

meta[name] = value
return meta

def compile_string(self, data, source_path=None, is_two_file=True, post=None, lang=None):
"""Compile reST into HTML strings."""
# If errors occur, this will be added to the line number reported by
@@ -83,7 +115,9 @@ def compile_string(self, data, source_path=None, is_two_file=True, post=None, la

from nikola import shortcodes as sc
new_data, shortcodes = sc.extract_shortcodes(data)
output, error_level, deps = rst2html(
if self.site.config.get('HIDE_REST_DOCINFO', False):
self.site.rst_transforms.append(RemoveDocinfo)
output, error_level, deps, _ = rst2html(
new_data, settings_overrides=settings_overrides, logger=self.logger, source_path=source_path, l_add_ln=add_ln, transforms=self.site.rst_transforms,
no_title_transform=self.site.config.get('NO_DOCUTILS_TITLE_TRANSFORM', False))
if not isinstance(output, unicode_str):
@@ -299,7 +333,7 @@ def rst2html(source, source_path=None, source_class=docutils.io.StringInput,
pub.set_destination(None, destination_path)
pub.publish(enable_exit_status=enable_exit_status)

return pub.writer.parts['docinfo'] + pub.writer.parts['fragment'], pub.document.reporter.max_level, pub.settings.record_dependencies
return pub.writer.parts['docinfo'] + pub.writer.parts['fragment'], pub.document.reporter.max_level, pub.settings.record_dependencies, pub.document


# Alignment helpers for extensions
@@ -308,3 +342,14 @@ def rst2html(source, source_path=None, source_class=docutils.io.StringInput,

def _align_choice(argument):
return docutils.parsers.rst.directives.choice(argument, _align_options_base + ("none", ""))


class RemoveDocinfo(docutils.transforms.Transform):
"""Remove docinfo nodes."""

default_priority = 870

def apply(self):
"""Remove docinfo nodes."""
for node in self.document.traverse(docutils.nodes.docinfo):
node.parent.remove(node)
@@ -153,15 +153,17 @@ def __init__(
self._dependency_uptodate_page = defaultdict(list)
self._depfile = defaultdict(list)

# Load internationalized metadata
for lang in self.translations:
if os.path.isfile(get_translation_candidate(self.config, self.source_path, lang)):
self.translated_to.add(lang)

default_metadata, self.newstylemeta = get_meta(self, self.config['FILE_METADATA_REGEXP'], self.config['UNSLUGIFY_TITLES'])

self.meta = Functionary(lambda: None, self.default_lang)
self.meta[self.default_lang] = default_metadata

# Load internationalized metadata
for lang in self.translations:
if os.path.isfile(get_translation_candidate(self.config, self.source_path, lang)):
self.translated_to.add(lang)
if lang != self.default_lang:
meta = defaultdict(lambda: '')
meta.update(default_metadata)
@@ -1003,24 +1005,6 @@ def get_metadata_from_file(source_path, config=None, lang=None):
string.punctuation)))


def _get_title_from_contents(meta_data):
"""Extract title from file contents, LAST RESOURCE."""
piece = meta_data[:]
title = None
for i, line in enumerate(piece):
if re_rst_title.findall(line) and i > 0:
title = meta_data[i - 1].strip()
break
if (re_rst_title.findall(line) and i >= 0 and
re_rst_title.findall(meta_data[i + 2])):
title = meta_data[i + 1].strip()
break
if re_md_title.findall(line):
title = re_md_title.findall(line)[0]
break
return title


def _get_metadata_from_file(meta_data):
"""Extract metadata from a post's source file."""
meta = {}
@@ -1063,12 +1047,6 @@ def _get_metadata_from_file(meta_data):
if match[0]:
meta[match[0]] = match[1]

# If we have no title, try to get it from document
if 'title' not in meta:
t = _get_title_from_contents(meta_data)
if t is not None:
meta['title'] = t

return meta


@@ -42,32 +42,6 @@ def test_getting_metadata_from_content(self):
self.assertFalse('description' in meta)
self.assertTrue(nsm)

def test_get_title_from_rest(self):
file_metadata = ".. slug: write-tests-now\n"\
".. date: 2012/09/15 19:52:05\n"\
".. tags:\n"\
".. link:\n"\
".. description:\n\n"\
"Post Title\n"\
"----------\n"

opener_mock = mock.mock_open(read_data=file_metadata)

post = dummy()
post.source_path = 'file_with_metadata'
post.metadata_path = 'file_with_metadata.meta'

with mock.patch('nikola.post.io.open', opener_mock, create=True):
meta, nsm = get_meta(post)

self.assertEqual('Post Title', meta['title'])
self.assertEqual('write-tests-now', meta['slug'])
self.assertEqual('2012/09/15 19:52:05', meta['date'])
self.assertFalse('tags' in meta)
self.assertFalse('link' in meta)
self.assertFalse('description' in meta)
self.assertTrue(nsm)

def test_get_title_from_fname(self):
file_metadata = ".. slug: write-tests-now\n"\
".. date: 2012/09/15 19:52:05\n"\
@@ -322,13 +296,9 @@ def test_get_metadata_from_file():
from nikola.post import _get_metadata_from_file
g = _get_metadata_from_file
assert list(g([]).values()) == []
assert str(g(["======", "FooBar", "======"])["title"]) == 'FooBar'
assert str(g(["FooBar", "======"])["title"]) == 'FooBar'
assert str(g(["#FooBar"])["title"]) == 'FooBar'
assert str(g([".. title: FooBar"])["title"]) == 'FooBar'
assert 'title' not in g(["", "", ".. title: FooBar"])
assert 'title' in g(["", ".. title: FooBar"])
assert 'title' in g([".. foo: bar", "", "FooBar", "------"])


def test_get_asset_path():

0 comments on commit f1dbc21

Please sign in to comment.
You can’t perform that action at this time.