Skip to content

Commit

Permalink
Remove re for finding YAML frontmatter. Fixes #46
Browse files Browse the repository at this point in the history
Because there is a constraint of only one frontmatter section,
a string split on the YAML document marker will be must more
reliable than a complicated regular expression (and probably faster
to boot).
  • Loading branch information
mblayman committed Oct 7, 2015
1 parent ed68cbf commit 5fde2de
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 25 deletions.
48 changes: 30 additions & 18 deletions handroll/composers/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@

import io
import os
import re
try:
from html import escape
except ImportError:
from cgi import escape

import yaml
from yaml.scanner import ScannerError

from handroll import logger, signals
from handroll.composers import Composer
from handroll.exceptions import AbortError
from handroll.i18n import _


Expand All @@ -22,14 +23,7 @@ class GenericHTMLComposer(Composer):
lines will be passed to a template method for further processing.
"""
output_extension = '.html'

# A pattern to get source content from a file with YAML front matter.
yaml_scanner = re.compile(r""".*? # YAML header
---
.*? # front matter
---\n
(?P<markup>.*)""",
re.DOTALL | re.VERBOSE)
document_marker = '---' + os.linesep

def compose(self, catalog, source_file, out_dir):
"""Compose an HTML document by generating HTML from the source
Expand Down Expand Up @@ -74,16 +68,9 @@ def _get_data(self, source_file):
source = f.read()

if self._has_frontmatter(first):
documents = yaml.load_all(source)
data = next(documents)
if 'title' in data:
data['title'] = escape(data['title'])
data, source = self._split_content_with_frontmatter(
first, source, source_file)
signals.frontmatter_loaded.send(source_file, frontmatter=data)

# Don't pass all file content to the composer. Find the markup.
match = re.search(self.yaml_scanner, source)
if match:
source = match.group('markup')
else:
# This is a plain file so pull title from the first line.
data['title'] = escape(first)
Expand All @@ -95,6 +82,31 @@ def _has_frontmatter(self, first_line):
front matter from YAML documents."""
return first_line.startswith(('%YAML', '---'))

def _split_content_with_frontmatter(self, first, source, source_file):
"""Separate frontmatter from source material."""
max_splits = 1
# With a directive present, there must be two document markers.
if first.startswith('%YAML'):
max_splits = 2
content = source.split(self.document_marker, max_splits)

try:
data = yaml.load(content[max_splits - 1])
except ScannerError as ex:
raise AbortError(_(
'There is invalid YAML in the frontmatter: {details}').format(
details=str(ex)))
try:
source = content[max_splits]
except IndexError:
raise AbortError(_('A YAML marker was missing in {source}').format(
source=source_file))

if 'title' in data:
data['title'] = escape(data['title'])

return data, source

def _needs_update(self, template, source_file, output_file):
"""Check if the output file needs to be updated by looking at the
modified times of the template, source file, and output file."""
Expand Down
49 changes: 42 additions & 7 deletions handroll/tests/test_composers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import inspect
import os
import re
import stat
import tempfile

Expand Down Expand Up @@ -232,6 +231,48 @@ def test_gets_frontmatter(self):
self.assertEqual('A Fake Title', data['title'])
self.assertEqual('The Content', source)

def test_gets_frontmatter_no_directive(self):
source = inspect.cleandoc("""---
title: A Fake Title
---
The Content
""")
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(source.encode('utf-8'))
composer = self._make_one()
data, source = composer._get_data(f.name)
self.assertEqual('A Fake Title', data['title'])
self.assertEqual('The Content', source)

def test_malformed_document_with_frontmatter(self):
source = inspect.cleandoc("""%YAML 1.1
---
title: A Fake Title
""")
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(source.encode('utf-8'))
composer = self._make_one()
try:
composer._get_data(f.name)
self.fail()
except AbortError:
pass

def test_malformed_yaml(self):
source = inspect.cleandoc("""%YAML 1.1
---
title: A Fake Title
The Content
""")
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(source.encode('utf-8'))
composer = self._make_one()
try:
composer._get_data(f.name)
self.fail()
except AbortError:
pass

@mock.patch('handroll.composers.generic.signals')
def test_fires_frontmatter_loaded(self, signals):
source = inspect.cleandoc("""%YAML 1.1
Expand All @@ -252,12 +293,6 @@ def test_looks_like_frontmatter(self):
self.assertTrue(composer._has_frontmatter('%YAML 1.1'))
self.assertTrue(composer._has_frontmatter('---'))

def test_frontmatter_pattern_captures_markup(self):
minimal = '---\n---\nabc'
composer = self._make_one()
match = re.search(composer.yaml_scanner, minimal)
self.assertEqual('abc', match.group('markup'))

def test_needs_update(self):
site = tempfile.mkdtemp()
output_file = os.path.join(site, 'output.md')
Expand Down

0 comments on commit 5fde2de

Please sign in to comment.