/
rst.py
199 lines (160 loc) · 6.48 KB
/
rst.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
try:
import docutils
from docutils.core import publish_programmatically, Publisher
from docutils.transforms.parts import Contents
import docutils.io as io
from docutils.utils import new_document
have_docutils = True
except ImportError:
have_docutils = False
from blazeutils.datastructures import BlankObject
# see http://docutils.sourceforge.net/docs/user/config.html
default_rst_opts = {
'no_generator': True,
'no_source_link': True,
'tab_width': 4,
'stylesheet_path': None,
'halt_level': 1,
'doctitle_xform': False,
'raw_enabled': False,
'traceback': True,
'file_insertion_enabled': False,
}
def rst2pub(source, source_path=None, source_class=None,
destination_path=None,
reader=None, reader_name='standalone',
parser=None, parser_name='restructuredtext',
writer=None, writer_name='pseudoxml',
settings=None, settings_spec=None,
settings_overrides=None, config_section=None,
enable_exit_status=None):
"""
Like docutils.core.publish_parts, but returns the publisher and sets
some default settings, see `default_rst_opts`.
Parameters: see `docutils.core` functions for explanation.
Example:
pub = rst2pub(rst_string)
print doctree2dict(pub.document)
"""
if not have_docutils:
raise ImportError('docutils library is required to use reStructuredText conversion')
final_settings_overrides = default_rst_opts.copy()
if settings_overrides:
final_settings_overrides.update(settings_overrides)
source_class = source_class or io.StringInput
output, pub = publish_programmatically(
source=source, source_path=source_path, source_class=source_class,
destination_class=io.StringOutput,
destination=None, destination_path=destination_path,
reader=reader, reader_name=reader_name,
parser=parser, parser_name=parser_name,
writer=writer, writer_name=writer_name,
settings=settings, settings_spec=settings_spec,
settings_overrides=final_settings_overrides,
config_section=config_section,
enable_exit_status=enable_exit_status)
return pub
def docinfo2dict(doctree):
"""
Return the docinfo field list from a doctree as a dictionary
Note: there can be multiple instances of a single field in the docinfo.
Since a dictionary is returned, the last instance's value will win.
Example:
pub = rst2pub(rst_string)
print docinfo2dict(pub.document)
"""
nodes = list(doctree.findall(docutils.nodes.docinfo))
md = {}
if not nodes:
return md
for node in nodes[0]:
# copied this logic from Sphinx, not exactly sure why they use it, but
# I figured it can't hurt
if isinstance(node, docutils.nodes.authors):
md['authors'] = [author.astext() for author in node]
elif isinstance(node, docutils.nodes.TextElement): # e.g. author
md[node.__class__.__name__] = node.astext()
else:
name, body = node
md[name.astext()] = body.astext()
return md
# deprecate eventually
doctree2dict = docinfo2dict # noqa: E305
def create_toc(doctree, depth=9223372036854775807, writer_name='html',
exclude_first_section=True, href_prefix=None, id_prefix='toc-ref-'):
"""
Create a Table of Contents (TOC) from the given doctree
Returns: (docutils.core.Publisher instance, output string)
`writer_name`: represents a reST writer name and determines the type of
output returned.
Example:
pub = blazeutils.rst.rst2pub(toc_rst)
pub, html_output = blazeutils.rst.create_toc(pub.document)
# a full HTML document (probably not what you want most of the time)
print html_output
# just the TOC
print pub.writer.parts['body']
"""
# copy the doctree since Content alters some settings on the original
# document
doctree = doctree.deepcopy()
# we want to be able to customize ids to avoid clashes if needed
doctree.settings.auto_id_prefix = id_prefix
details = {
'depth': depth,
}
# Assuming the document has one primary heading and then sub-sections, we
# want to be able to give just the sub-sections
startnode = None
if exclude_first_section:
nodes = list(doctree.findall(docutils.nodes.section))
if nodes:
startnode = nodes[0]
# use the Contents transform to build the TOC node structure from the
# document
c = Contents(doctree)
# this startnode isn't really used as the start node, its only used for
# to pull settings from
c.startnode = BlankObject(details=details)
# since this toc is detached from the rest of the document, we don't want
# backlinks
c.backlinks = 'none'
# build the nodes
toc_nodes = c.build_contents(startnode or doctree)
# create a new document with the new nodes
toc_doc = new_document(None)
toc_doc += toc_nodes
# fix fragements that reference the same page
if href_prefix:
prefix_refids(toc_doc, href_prefix)
# setup a publisher and publish from the TOC document
reader = docutils.readers.doctree.Reader(parser_name='null')
pub = Publisher(
reader, None, None,
source=io.DocTreeInput(toc_doc),
destination_class=io.StringOutput
)
pub.set_writer(writer_name)
final_settings_overrides = default_rst_opts.copy()
pub.process_programmatic_settings(
None, final_settings_overrides, None)
output = pub.publish()
return pub, output
def rst2html(rst_src, **kwargs):
"""
Convert a reStructuredText string into a unicode HTML fragment.
For `kwargs`, see `default_rst_opts` and
http://docutils.sourceforge.net/docs/user/config.html
"""
pub = rst2pub(rst_src, settings_overrides=kwargs, writer_name='html')
return pub.writer.parts['body']
def prefix_refids(document, href_prefix):
# The href comes out as just a fragment, but its possible that the HTML
# will be used on a page where relative links don't resolve to the current
# page. In that case, an href_prefix can be sent in. The only downfall
# to doing this way is that the writer automatically sets an "external"
# class on a reference with 'refuri' instead of 'refid'.
nodes = document.findall(docutils.nodes.reference)
for node in nodes:
node['refuri'] = '{0}#{1}'.format(href_prefix, node['refid'])
del node['refid']