Navigation Menu

Skip to content

Commit

Permalink
Improve reST handling of specialized docs.
Browse files Browse the repository at this point in the history
Notably Sphinx documents were not handled very well before, and rather noisily. So add our own implementation of `rst2html` that tries to preserve content without emulating specialized formatting, while minimizing noise. Patch by Daniele Varrazzo.
  • Loading branch information
theory committed Jan 30, 2012
1 parent 748791d commit a83c0ac
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 14 deletions.
6 changes: 4 additions & 2 deletions Build.PL
Expand Up @@ -2,7 +2,7 @@ use strict;
use warnings;
use Module::Build;

Module::Build->new(
my $build = Module::Build->new(
module_name => 'Text::Markup',
license => 'perl',
create_makefile_pl => 'traditional',
Expand Down Expand Up @@ -34,4 +34,6 @@ Module::Build->new(
repository => 'http://github.com/theory/text-markup/tree/',
}
},
)->create_build_script;
);
$build->add_build_element('py');
$build->create_build_script;
7 changes: 6 additions & 1 deletion Changes
@@ -1,7 +1,12 @@
Revision history for Perl extension Text-Markup.

0.16
- Rest support now running the `rst2html` script with `--quiet` and
- Rest support now uses its own implementation of `rst2html` so that it
can render specialized reST documents, such as Sphinx files, in a more
forgiving way: it preserves the content of unknown directives while not
emulating specialized rendering. Written by Daniele Varrazzo.

running the `rst2html` script with `--quiet` and
`--strip-elements-with-class=system-message` to eliminat error output
for unknown directives, such as those often included for Sphinx.

Expand Down
21 changes: 13 additions & 8 deletions lib/Text/Markup/Rest.pm
Expand Up @@ -3,6 +3,7 @@ package Text::Markup::Rest;
use 5.8.1;
use strict;
use File::Spec;
use File::Basename ();
use constant WIN32 => $^O eq 'MSWin32';

our $VERSION = '0.16';
Expand All @@ -18,7 +19,14 @@ foreach my $p (File::Spec->path) {
}
}
}
unless ($rst2html) {

if ($rst2html) {
# We have it, but use our custom version instead.
$rst2html = File::Spec->catfile(
File::Basename::dirname(__FILE__),
'rst2html_lenient.py'
);
} else {
use Carp;
Carp::croak(
'Cannot find rst2html.py in path ' . join ':', File::Spec->path
Expand All @@ -33,7 +41,6 @@ my @OPTIONS = qw(
--cloak-email-address
--no-generator
--quiet
--strip-elements-with-class=system-message
);

sub parser {
Expand Down Expand Up @@ -93,12 +100,10 @@ Text::Markup::Rest - reStructuredText parser for Text::Markup
This is the
L<reStructuredText|http://docutils.sourceforge.net/docs/user/rst/quickref.html>
parser for L<Text::Markup>. It uses the reference docutils implementation of
the parser, invoking C<rst2html> (or C<rst2html.py> to do the job, so it
depends on the C<docutils> Python package (which can be found as
C<python-docutils> in many Linux distributions, or installed using the command
C<easy_install docutils>). It recognizes files with the following extensions
as reST:
parser for L<Text::Markup>. It depends on the C<docutils> Python package
(which can be found as C<python-docutils> in many Linux distributions, or
installed using the command C<easy_install docutils>). It recognizes files
with the following extensions as reST:
=over
Expand Down
140 changes: 140 additions & 0 deletions lib/Text/Markup/rst2html_lenient.py
@@ -0,0 +1,140 @@
#!/usr/bin/env python
"""
Parse a reST file into HTML in a very forgiving way.
The script is meant to render specialized reST documents, such as Sphinx
files, preserving the content, while not emulating the original rendering.
The script is currently tested against docutils 0.7. Other version may break
it as it deals with the parser at a relatively low level.
"""

from docutils import nodes, utils
from docutils.core import publish_cmdline, default_description
from docutils.parsers.rst import Directive, directives, roles


# A generic directive to deal with any unknown directive we may find.

class any_directive(nodes.General, nodes.FixedTextElement): pass

class AnyDirective(Directive):
"""A directive returning its unaltered body."""
optional_arguments = 100 # should suffice
has_content = True

def run(self):
children = []
children.append(nodes.strong(self.name, u"%s: " % self.name))
# keep the arguments, drop the options
for a in self.arguments:
if a.startswith(':') and a.endswith(':'):
break
children.append(nodes.emphasis(a, u"%s " % a))
content = u'\n'.join(self.content)
children.append(nodes.literal_block(content, content))
node = any_directive(self.block_text, '', *children, dir_name=self.name)
return [node]


# A generic role to deal with any unknown role we may find.

class any_role(nodes.Inline, nodes.TextElement): pass

class AnyRole:
"""A role to be rendered as a generic element with a specific class."""
def __init__(self, role_name):
self.role_name = role_name

def __call__(self, role, rawtext, text, lineno, inliner,
options={}, content=[]):
roles.set_classes(options)
options['role_name'] = self.role_name
node = any_role(rawtext, utils.unescape(text), **options)
return [node], []


# Patch the parser so that when an unknown directive is found, a generic one
# is generated on the fly.

from docutils.parsers.rst.states import Body

def catchall_directive(self, match, **option_presets):
type_name = match.group(1)
directive_class, messages = directives.directive(
type_name, self.memo.language, self.document)

# in case it's missing, register a generic directive
if not directive_class:
directives.register_directive(type_name, AnyDirective)
directive_class, messages = directives.directive(
type_name, self.memo.language, self.document)
assert directive_class, "can't find just defined directive"

self.parent += messages
return self.run_directive(
directive_class, match, type_name, option_presets)

# Patch the constructs dispatch table
for i, (f, p) in enumerate(Body.explicit.constructs):
if f is Body.directive.im_func is f:
Body.explicit.constructs[i] = (catchall_directive, p)
break
else:
assert False, "can't find directive dispatch entry"

Body.directive = catchall_directive


# Patch the parser so that when an unknown interpreted text role is found,
# a generic one is generated on the fly.

from docutils.parsers.rst.states import Inliner

def catchall_interpreted(self, rawsource, text, role, lineno):
role_fn, messages = roles.role(role, self.language, lineno,
self.reporter)
# in case it's missing, register a generic role
if not role_fn:
role_obj = AnyRole(role)
roles.register_canonical_role(role, role_obj)
role_fn, messages = roles.role(
role, self.language, lineno, self.reporter)
assert role_fn, "can't find just defined role"

nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
return nodes, messages + messages2

Inliner.interpreted = catchall_interpreted


# Create a writer to deal with the generic element we may have created.

description = ('Generates (X)HTML documents from standalone reStructuredText '
'sources. Be extremely forgiving against unknown elements. '
+ default_description)

from docutils.writers.html4css1 import Writer, HTMLTranslator

class MyTranslator(HTMLTranslator):
def visit_any_directive(self, node):
cls = node.get('dir_name')
cls = cls and 'directive-%s' % cls or 'directive'
self.body.append(self.starttag(node, 'div', CLASS=cls))

def depart_any_directive(self, node):
self.body.append('\n</div>\n')

def visit_any_role(self, node):
cls = node.get('role_name')
cls = cls and 'role-%s' % cls or 'role'
self.body.append(self.starttag(node, 'span', '', CLASS=cls))

def depart_any_role(self, node):
self.body.append('</span>')


writer = Writer()
writer.translator_class = MyTranslator

publish_cmdline(writer=writer, description=description)
2 changes: 1 addition & 1 deletion t/base.t
Expand Up @@ -25,7 +25,7 @@ opendir my $dh, $dir or die "Cannot open diretory $dir: $!\n";
my @core_parsers;
while (my $f = readdir $dh) {
next if $f eq '.' || $f eq '..' || $f eq 'None.pm';
$f =~ s{[.]pm$}{};
$f =~ s{[.]pm$}{} or next;
push @core_parsers => lc $f;
}

Expand Down
8 changes: 8 additions & 0 deletions t/html/rest.html
Expand Up @@ -13,6 +13,14 @@ <h2 class="subtitle" id="and-i-am-its-subtitle">And I am its subtitle</h2>

<p><a class="reference external" href="http://docutils.sourceforge.net/">Rest</a> allows <em>inline</em> <strong>style</strong> and other stuff,
but I guess this <tt class="docutils literal">code</tt> is enough.</p>
<div class="directive-unknown">
<strong>unknown: </strong><em>arg1 </em><em>arg2 </em><pre class="literal-block">
We also support unknown directive, so that no content is lost if reST
specialization is parsed.
</pre>

</div>
<p>Of course we may also find <span class="role-unknown">new roles</span>.</p>
</div>
</body>
</html>
10 changes: 8 additions & 2 deletions t/markups/rest.txt
Expand Up @@ -4,10 +4,16 @@ I am a reST document
And I am its subtitle
=====================

.. toctree::

Rest__ allows *inline* **style** and other stuff,
but I guess this ``code`` is enough.

.. __: http://docutils.sourceforge.net/

.. unknown:: arg1 arg2
:option: foo

We also support unknown directive, so that no content is lost if reST
specialization is parsed.

Of course we may also find :unknown:`new roles`.

0 comments on commit a83c0ac

Please sign in to comment.