/
wysiwyg.py
243 lines (185 loc) · 8.38 KB
/
wysiwyg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
"""Markdown extension to render content similar to the source.
This will render Markdown such that the spacing and alignment in the source
text and the rendered content looks roughly the same. It's meant to help ensure
that what's typed is very close to what's viewed when rendered.
"""
from __future__ import absolute_import, unicode_literals
import re
from collections import OrderedDict
import markdown
from django.utils import six
from django.utils.six.moves import range
from markdown.blockprocessors import BlockProcessor, OListProcessor
from markdown.postprocessors import RawHtmlPostprocessor
from markdown.treeprocessors import Treeprocessor
from markdown.util import etree
class SmartEmptyBlockProcessor(BlockProcessor):
"""Handles empty blocks in a more specialized way.
By default, Python-Markdown will trim away excess empty blocks, with
the idea being that it doesn't matter how much whitespace exists
between tags when rendered to HTML. However, in our case, we need to
preserve whitespace in order to better match the resulting render to
the input text.
We replace any empty lines with paragraph tags, which will safely stick
around.
This is invoked before EmptyBlockProcessor.
"""
def test(self, parent, block):
"""Test that the processor should apply to the given block."""
return not block or block.startswith('\n')
def run(self, parent, blocks):
"""Run the processor on the given blocks."""
block = blocks.pop(0)
# We'll enter this while loop at least once, given that test() must
# pass before run() is called.
#
# We know that blocks are separated by 2 blank lines. At a minimum,
# we'll have 2 resulting paragraphs here.
#
# For odd-numbered sequences of newlines, we'll end up with a block
# starting with a newline. We'll add a paragraph in this case, and
# continue on.
while self.test(parent, block):
etree.SubElement(parent, 'p')
if block:
block = block[1:]
else:
etree.SubElement(parent, 'p')
return
# Add remaining lines to master blocks for later.
#
# We know there's a block here, given that test() passes if block
# is None. We'll only exit the while loop to here if there's a block
# without any leading newlines.
blocks.insert(0, block)
class PreserveStartOListBlockProcessor(OListProcessor):
"""Applies CSS styles to any <ol> with a start= attribute.
This allows for CSS tricks to be performed that ensure that ordered list
item numbers and item contents line up between source text and rendered
text. It basically turns off the <li>'s native counter value and instead
creates its own using :before and CSS counters. These tricks end up causing
the start= attribute on the <ol> to be ignored.
This block processor extends the standard OListProcessor to also apply
a CSS style to set the displayed counter value to the intended start
value.
This replaces OListProcessor.
"""
# Force <ol> tags to have a start= attribute.
LAZY_OL = False
def run(self, parent, blocks):
"""Run the processor on the given blocks."""
# The base BlockProcessor class does not inherit from object, so
# we can't use super() here.
OListProcessor.run(self, parent, blocks)
list_el = self.lastChild(parent)
# The element should always be an <ol>, but better safe than sorry.
if list_el.tag == 'ol' and 'start' in list_el.attrib:
try:
start = int(list_el.attrib['start'])
except ValueError:
start = 1
if start > 1:
# Set a style= attribute to force the text to render with the
# particular start value.
list_el.attrib['style'] = 'counter-reset: li %s' % (start - 1)
class TrimTrailingEmptyParagraphs(Treeprocessor):
"""Removes empty paragraphs from the end of the tree.
This will remove any trailing empty paragraphs formerly added by
SmartEmptyBlockProcessor. This step must be done after all the blocks are
processed, so that we have a complete picture of the state of the tree.
It's therefore performed right before we prettify the tree.
"""
def run(self, root):
"""Run the processor on the root of the tree."""
num_children = len(root)
start_i = num_children
# Loop through the children from end to beginning, counting how many
# of them are empty <p> elements.
for child in reversed(root):
if child.tag != 'p' or child.text or len(child) > 0:
break
start_i -= 1
if start_i < num_children:
# Clear away any of the trailing paragraphs we found.
root[start_i:] = []
class TrimmedRawHtmlPostprocessor(RawHtmlPostprocessor):
"""Post-processes raw HTML placeholders, without adding extra newlines.
Python-Markdown's RawHtmlPostprocessor had a nasty habit of adding an
extra newline after replacing a placeholder with stored raw HTML. That
would cause extra newlines to appear in our output.
This version more efficiently replaces the raw HTML placeholders and
ensures there are no trailing newlines in the resulting HTML. Not only does
it prevent the newline normally added by the original function, but it
strips trailing newlines from stashed HTML that may have been generated
by other extensions, keeping spacing consistent and predictable.
"""
def run(self, text):
"""Run the processor on the HTML.
Args:
text (unicode):
The text to process.
Returns:
unicode:
The processed text.
"""
html_stash = self.markdown.htmlStash
if html_stash.html_counter == 0:
return text
replacements = OrderedDict()
for i in range(html_stash.html_counter):
html = html_stash.rawHtmlBlocks[i]
placeholder = html_stash.get_placeholder(i)
# Help control whitespace by getting rid of any trailing newlines
# we may find.
html = html.rstrip('\n')
if self.isblocklevel(html):
replacements['<p>%s</p>' % placeholder] = html
replacements[placeholder] = html
return re.sub(
'|'.join(re.escape(key) for key in replacements),
lambda m: replacements[m.group(0)],
text)
class WysiwygFormattingExtension(markdown.Extension):
"""Provides a series of WYSIWYG formatting rules for Markdown rendering.
We have a lot of specific rendering concerns that Python-Markdown doesn't
really address, or generally need to care about. We try very hard to match
up newlines around various code blocks, and we have special ways we do
ordered lists. The resulting rendered output is meant to look identical
to the input text, as much as possible.
This extension renders a series of processors that ensures that the HTML
output is in the format required for our rendering.
This is meant to be used with the following Markdown configuration
and extensions:
.. code-block:: python
{
'lazy_ol': False,
'extensions': [
'sane_lists', 'nl2br', 'djblets.markdown.extentions.wysiwyg',
],
}
"""
def extendMarkdown(self, md, md_globals):
"""Extend the list of Markdown processors.
Each processor in this file will be registered in the order
necessary for the smarter formatting.
"""
md.parser.blockprocessors['olist'] = \
PreserveStartOListBlockProcessor(md.parser)
md.parser.blockprocessors.add(
'smart-empty',
SmartEmptyBlockProcessor(md.parser),
'<empty')
md.treeprocessors.add(
'trim_empty_p',
TrimTrailingEmptyParagraphs(md),
'<prettify')
md.postprocessors['raw_html'] = TrimmedRawHtmlPostprocessor(md)
def makeExtension(*args, **kwargs):
"""Create and return an instance of this extension.
Args:
*args (tuple):
Positional arguments for the extension.
**kwargs (dict):
Keyword arguments for the extension.
"""
return WysiwygFormattingExtension(*args, **kwargs)