forked from localwiki/localwiki
/
plugins.py
223 lines (177 loc) · 6.92 KB
/
plugins.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
"""
Conversion of HTML into template with dynamic parts.
We want to allow some dynamic content that gets inserted as the HTML is
rendered. This is done by converting certain HTML tags into template tags.
For example, to mark links to non-existant pages with a different style, this:
<a href="My Page">My Page</a>
gets converted to this:
{% link "My Page" %}My Page{% endlink %}
and rendered as appropriate by the LinkNode class.
The function html_to_template_text parses the HTML and lets each registered
handler a chance to do something with an element, such as replace it with a
template tag.
"""
from lxml import etree
from lxml.html import fragments_fromstring
from xml.sax.saxutils import escape
from HTMLParser import HTMLParser
from urllib import unquote_plus
from urlparse import urlparse
from django.template import Node
from django.core.urlresolvers import reverse
from pages.models import Page, name_to_url, url_to_name, PageFile
from pages.models import slugify
from ckeditor.models import parse_style
def sanitize_intermediate(html):
"""
Sanitizes template tags and escapes entities.
"""
return html.replace('{', '{')\
.replace('}', '}')\
.replace('&', '{amp}') # escape all entities
_unescape_util = HTMLParser()
def desanitize(fragment):
"""
Undo sanitization, when we need the original contents.
"""
fragment = sanitize_final(fragment)
return _unescape_util.unescape(fragment)
def sanitize_final(html):
"""
Fixes escaped entities.
"""
return html.replace('{amp}', '&') # unescape entities
def escape_quotes(s):
"""
Escapes double quotes for use in template tags.
"""
return s.replace('"', '\\"')
def insert_text_before(text, elem):
prev = elem.getprevious()
if prev is not None:
prev.tail = (prev.tail or '') + text
else:
elem.getparent().text = (elem.getparent().text or '') + text
def handle_link(elem, context=None):
if not 'href' in elem.attrib:
return
href = desanitize(elem.attrib['href'])
before = '{%% link "%s" %%}' % escape_quotes(href) + (elem.text or '')
after = '{% endlink %}' + (elem.tail or '')
insert_text_before(before, elem)
for child in elem:
elem.addprevious(child)
insert_text_before(after, elem)
elem.getparent().remove(elem)
return False
_files_url = '_files/'
def file_url_to_name(url):
return unquote_plus(url.replace(_files_url, '').encode('utf-8'))
def handle_image(elem, context=None):
# only handle resized images
do_thumbnail = True
style = parse_style(elem.attrib.get('style', ''))
if 'width' not in style or 'height' not in style:
do_thumbnail = False
src = desanitize(elem.attrib.get('src', ''))
if not src.startswith(_files_url):
return
if not context or 'page' not in context:
return
page = context['page']
try:
file = PageFile.objects.get(slug__exact=page.slug,
name__exact=file_url_to_name(src))
except PageFile.DoesNotExist:
return
if do_thumbnail:
width = int(style['width'].replace('px', ''))
height = int(style['height'].replace('px', ''))
escaped_filename = escape_quotes(file.file.name)
before = '{%% thumbnail "%s" "%dx%d" as im %%}' % (escaped_filename,
width, height)
after = '{% endthumbnail %}'
elem.attrib['src'] = '{{ im.url }}'
insert_text_before(before, elem)
elem.tail = after + (elem.tail or '')
else:
elem.attrib['src'] = file.file.url
info_url = reverse('pages:file-info', args=[page.pretty_slug,
file.name])
link = etree.Element('a')
link.attrib['href'] = info_url
elem.addprevious(link)
elem.getparent().remove(elem)
link.append(elem)
return False
tag_imports = ['{% load pages_tags %}',
'{% load thumbnail %}',
]
tag_handlers = {"a": [handle_link],
"img": [handle_image],
}
def html_to_template_text(unsafe_html, context=None):
"""
Parse html and turn it into template text.
"""
# TODO: factor out parsing/serializing
safe_html = sanitize_intermediate(unsafe_html)
top_level_elements = fragments_fromstring(safe_html)
# put top level elements in container
container = etree.Element('div')
if top_level_elements and not hasattr(top_level_elements[0], 'tag'):
container.text = top_level_elements.pop(0)
container.extend(top_level_elements)
tree = etree.iterwalk(container, events=('end',))
# walk over all elements
for action, elem in tree:
if not elem.tag in tag_handlers:
continue
for handler in tag_handlers[elem.tag]:
can_continue = handler(elem, context)
if can_continue is False:
break
template_bits = [etree.tostring(elem, encoding='UTF-8')
for elem in container]
return sanitize_final(''.join(tag_imports +
[escape(container.text or '')] +
template_bits
)
)
class LinkNode(Node):
def __init__(self, href, nodelist):
self.href = href
self.nodelist = nodelist
def render(self, context):
try:
cls = ''
url = self.href
page = context['page']
if self.is_relative_link(url):
if url.startswith('_files/'):
filename = file_url_to_name(url)
url = reverse('pages:file-info', args=[page.pretty_slug,
filename])
try:
file = PageFile.objects.get(slug__exact=page.slug,
name__exact=filename)
cls = ' class="file_%s"' % file.rough_type
except PageFile.DoesNotExist:
cls = ' class="missing_link"'
else:
try:
page = Page.objects.get(slug__exact=slugify(url))
url = reverse('pages:show', args=[page.pretty_slug])
except Page.DoesNotExist:
cls = ' class="missing_link"'
# Convert to proper URL: My%20page -> My_page
url = name_to_url(url_to_name(url))
url = reverse('pages:show', args=[url])
return '<a href="%s"%s>%s</a>' % (url, cls,
self.nodelist.render(context))
except:
return ''
def is_relative_link(self, url):
url_parts = urlparse(url)
return (not url_parts.scheme and not url_parts.netloc
and not url_parts.fragment)