Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

257 lines (235 sloc) 11.546 kb
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from datetime import datetime
import os
from unittest import TestCase
import warnings
from django.utils import html, safestring
from django.utils._os import upath
from django.utils.deprecation import RemovedInDjango18Warning
from django.utils.encoding import force_text
class TestUtilsHtml(TestCase):
def check_output(self, function, value, output=None):
"""
Check that function(value) equals output. If output is None,
check that function(value) equals value.
"""
if output is None:
output = value
self.assertEqual(function(value), output)
def test_escape(self):
f = html.escape
items = (
('&', '&'),
('<', '&lt;'),
('>', '&gt;'),
('"', '&quot;'),
("'", '&#39;'),
)
# Substitution patterns for testing the above items.
patterns = ("%s", "asdf%sfdsa", "%s1", "1%sb")
for value, output in items:
for pattern in patterns:
self.check_output(f, pattern % value, pattern % output)
# Check repeated values.
self.check_output(f, value * 2, output * 2)
# Verify it doesn't double replace &.
self.check_output(f, '<&', '&lt;&amp;')
def test_format_html(self):
self.assertEqual(
html.format_html("{0} {1} {third} {fourth}",
"< Dangerous >",
html.mark_safe("<b>safe</b>"),
third="< dangerous again",
fourth=html.mark_safe("<i>safe again</i>")
),
"&lt; Dangerous &gt; <b>safe</b> &lt; dangerous again <i>safe again</i>"
)
def test_linebreaks(self):
f = html.linebreaks
items = (
("para1\n\npara2\r\rpara3", "<p>para1</p>\n\n<p>para2</p>\n\n<p>para3</p>"),
("para1\nsub1\rsub2\n\npara2", "<p>para1<br />sub1<br />sub2</p>\n\n<p>para2</p>"),
("para1\r\n\r\npara2\rsub1\r\rpara4", "<p>para1</p>\n\n<p>para2<br />sub1</p>\n\n<p>para4</p>"),
("para1\tmore\n\npara2", "<p>para1\tmore</p>\n\n<p>para2</p>"),
)
for value, output in items:
self.check_output(f, value, output)
def test_strip_tags(self):
f = html.strip_tags
items = (
('<p>See: &#39;&eacute; is an apostrophe followed by e acute</p>',
'See: &#39;&eacute; is an apostrophe followed by e acute'),
('<adf>a', 'a'),
('</adf>a', 'a'),
('<asdf><asdf>e', 'e'),
('hi, <f x', 'hi, <f x'),
('234<235, right?', '234<235, right?'),
('a4<a5 right?', 'a4<a5 right?'),
('b7>b2!', 'b7>b2!'),
('</fe', '</fe'),
('<x>b<y>', 'b'),
('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'),
('a<p a >b</p>c', 'abc'),
('d<a:b c:d>e</p>f', 'def'),
('<strong>foo</strong><a href="http://example.com">bar</a>', 'foobar'),
)
for value, output in items:
self.check_output(f, value, output)
# Test with more lengthy content (also catching performance regressions)
for filename in ('strip_tags1.html', 'strip_tags2.txt'):
path = os.path.join(os.path.dirname(upath(__file__)), 'files', filename)
with open(path, 'r') as fp:
content = force_text(fp.read())
start = datetime.now()
stripped = html.strip_tags(content)
elapsed = datetime.now() - start
self.assertEqual(elapsed.seconds, 0)
self.assertIn("Please try again.", stripped)
self.assertNotIn('<', stripped)
def test_strip_spaces_between_tags(self):
f = html.strip_spaces_between_tags
# Strings that should come out untouched.
items = (' <adf>', '<adf> ', ' </adf> ', ' <f> x</f>')
for value in items:
self.check_output(f, value)
# Strings that have spaces to strip.
items = (
('<d> </d>', '<d></d>'),
('<p>hello </p>\n<p> world</p>', '<p>hello </p><p> world</p>'),
('\n<p>\t</p>\n<p> </p>\n', '\n<p></p><p></p>\n'),
)
for value, output in items:
self.check_output(f, value, output)
def test_strip_entities(self):
f = html.strip_entities
# Strings that should come out untouched.
values = ("&", "&a", "&a", "a&#a")
for value in values:
self.check_output(f, value)
# Valid entities that should be stripped from the patterns.
entities = ("&#1;", "&#12;", "&a;", "&fdasdfasdfasdf;")
patterns = (
("asdf %(entity)s ", "asdf "),
("%(entity)s%(entity)s", ""),
("&%(entity)s%(entity)s", "&"),
("%(entity)s3", "3"),
)
for entity in entities:
for in_pattern, output in patterns:
self.check_output(f, in_pattern % {'entity': entity}, output)
def test_fix_ampersands(self):
with warnings.catch_warnings():
warnings.simplefilter("ignore", RemovedInDjango18Warning)
f = html.fix_ampersands
# Strings without ampersands or with ampersands already encoded.
values = ("a&#1;", "b", "&a;", "&amp; &x; ", "asdf")
patterns = (
("%s", "%s"),
("&%s", "&amp;%s"),
("&%s&", "&amp;%s&amp;"),
)
for value in values:
for in_pattern, out_pattern in patterns:
self.check_output(f, in_pattern % value, out_pattern % value)
# Strings with ampersands that need encoding.
items = (
("&#;", "&amp;#;"),
("&#875 ;", "&amp;#875 ;"),
("&#4abc;", "&amp;#4abc;"),
)
for value, output in items:
self.check_output(f, value, output)
def test_escapejs(self):
f = html.escapejs
items = (
('"double quotes" and \'single quotes\'', '\\u0022double quotes\\u0022 and \\u0027single quotes\\u0027'),
(r'\ : backslashes, too', '\\u005C : backslashes, too'),
('and lots of whitespace: \r\n\t\v\f\b', 'and lots of whitespace: \\u000D\\u000A\\u0009\\u000B\\u000C\\u0008'),
(r'<script>and this</script>', '\\u003Cscript\\u003Eand this\\u003C/script\\u003E'),
('paragraph separator:\u2029and line separator:\u2028', 'paragraph separator:\\u2029and line separator:\\u2028'),
)
for value, output in items:
self.check_output(f, value, output)
def test_clean_html(self):
f = html.clean_html
items = (
('<p>I <i>believe</i> in <b>semantic markup</b>!</p>', '<p>I <em>believe</em> in <strong>semantic markup</strong>!</p>'),
('I escape & I don\'t <a href="#" target="_blank">target</a>', 'I escape &amp; I don\'t <a href="#" >target</a>'),
('<p>I kill whitespace</p><br clear="all"><p>&nbsp;</p>', '<p>I kill whitespace</p>'),
# also a regression test for #7267: this used to raise an UnicodeDecodeError
('<p>* foo</p><p>* bar</p>', '<ul>\n<li> foo</li><li> bar</li>\n</ul>'),
)
with warnings.catch_warnings():
warnings.simplefilter("ignore", RemovedInDjango18Warning)
for value, output in items:
self.check_output(f, value, output)
def test_remove_tags(self):
f = html.remove_tags
items = (
("<b><i>Yes</i></b>", "b i", "Yes"),
("<a>x</a> <p><b>y</b></p>", "a b", "x <p>y</p>"),
)
for value, tags, output in items:
self.assertEqual(f(value, tags), output)
def test_smart_urlquote(self):
quote = html.smart_urlquote
# Ensure that IDNs are properly quoted
self.assertEqual(quote('http://öäü.com/'), 'http://xn--4ca9at.com/')
self.assertEqual(quote('http://öäü.com/öäü/'), 'http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/')
# Ensure that everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered safe as per RFC
self.assertEqual(quote('http://example.com/path/öäü/'), 'http://example.com/path/%C3%B6%C3%A4%C3%BC/')
self.assertEqual(quote('http://example.com/%C3%B6/ä/'), 'http://example.com/%25C3%25B6/%C3%A4/')
self.assertEqual(quote('http://example.com/?x=1&y=2'), 'http://example.com/?x=1&y=2')
self.assertEqual(quote('http://example.com/?x=ä&ä=2'), 'http://example.com/?x=%C3%A4&%C3%A4=2')
# Ensure empty query key does not get lost
self.assertRegexpMatches(quote('http://example.com/?empty'), 'http:\/\/example\.com\/\?empty=?')
# Ensure that embedded quoted url does not get unquoted
self.assertEqual(
quote('http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango'),
'http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango'
)
# Ensure that " gets quoted
self.assertEqual(
quote('http://example.com/pa"th?k"ey=va"lue&ke"y=value"&k"ey=val"ue#frag"ment'),
'http://example.com/pa%22th?k%22ey=va%22lue&ke%22y=value%22&k%22ey=val%22ue#frag%22ment'
)
# Ensure wrong url returns None
self.assertEqual(quote('http://exa[mple.com/'), None) # Invalid IPv6 url
self.assertEqual(quote('http://../'), None) # Invalid domain part
def test_urlize(self):
self.assertEqual(
html.urlize('Hello http://google.com/?sdf=443&s="sdf" hello'),
'Hello <a href="http://google.com/?sdf=443&s=%22sdf">http://google.com/?sdf=443&s="sdf</a>" hello'
)
self.assertEqual(
html.urlize('Email me@example.com'),
'Email <a href="mailto:me@example.com">me@example.com</a>'
)
self.assertEqual(
html.urlize('Double urls http://example.com/?x=ä&ä=4 http://example.com/?x=ä&ä=2 test'),
'Double urls <a href="http://example.com/?x=%C3%A4&%C3%A4=4">http://example.com/?x=ä&ä=4</a> '
'<a href="http://example.com/?x=%C3%A4&%C3%A4=2">http://example.com/?x=ä&ä=2</a> test'
)
self.assertEqual(
html.urlize('Invalid url http://ex[ls.com/ does not get a tag'),
'Invalid url http://ex[ls.com/ does not get a tag'
)
self.assertEqual(
html.urlize('text <> and url http://example.com/<tag>?<q>=<v> gets escaped', autoescape=True),
'text &lt;&gt; and url <a href="http://example.com/%3Ctag%3E?%3Cq%3E=%3Cv%3E">'
'http://example.com/&lt;tag&gt;?&lt;q&gt;=&lt;v&gt;</a> gets escaped'
)
self.assertEqual(
html.urlize('No follow http://example.com', nofollow=True),
'No follow <a href="http://example.com" rel="nofollow">http://example.com</a>'
)
self.assertEqual(
html.urlize('Truncate http://example.com', trim_url_limit=5),
'Truncate <a href="http://example.com">ht...</a>'
)
def test_conditional_escape(self):
s = '<h1>interop</h1>'
self.assertEqual(html.conditional_escape(s),
'&lt;h1&gt;interop&lt;/h1&gt;')
self.assertEqual(html.conditional_escape(safestring.mark_safe(s)), s)
Jump to Line
Something went wrong with that request. Please try again.