Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion html5lib/filters/alphabeticalattributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,24 @@
from ordereddict import OrderedDict


def _attr_key(attr):
"""Return an appropriate key for an attribute for sorting

Attributes have a namespace that can be either ``None`` or a string. We
can't compare the two because they're different types, so we convert
``None`` to an empty string first.

"""
return (attr[0][0] or ''), attr[0][1]


class Filter(base.Filter):
def __iter__(self):
for token in base.Filter.__iter__(self):
if token["type"] in ("StartTag", "EmptyTag"):
attrs = OrderedDict()
for name, value in sorted(token["data"].items(),
key=lambda x: x[0]):
key=_attr_key):
attrs[name] = value
token["data"] = attrs
yield token
81 changes: 81 additions & 0 deletions html5lib/tests/test_alphabeticalattributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from __future__ import absolute_import, division, unicode_literals

try:
from collections import OrderedDict
except ImportError:
from ordereddict import OrderedDict

import pytest

import html5lib
from html5lib.filters.alphabeticalattributes import Filter
from html5lib.serializer import HTMLSerializer


@pytest.mark.parametrize('msg, attrs, expected_attrs', [
(
'no attrs',
{},
{}
),
(
'one attr',
{(None, 'alt'): 'image'},
OrderedDict([((None, 'alt'), 'image')])
),
(
'multiple attrs',
{
(None, 'src'): 'foo',
(None, 'alt'): 'image',
(None, 'style'): 'border: 1px solid black;'
},
OrderedDict([
((None, 'alt'), 'image'),
((None, 'src'), 'foo'),
((None, 'style'), 'border: 1px solid black;')
])
),
])
def test_alphabetizing(msg, attrs, expected_attrs):
tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}]
output_tokens = list(Filter(tokens))

attrs = output_tokens[0]['data']
assert attrs == expected_attrs


def test_with_different_namespaces():
tokens = [{
'type': 'StartTag',
'name': 'pattern',
'data': {
(None, 'id'): 'patt1',
('http://www.w3.org/1999/xlink', 'href'): '#patt2'
}
}]
output_tokens = list(Filter(tokens))

attrs = output_tokens[0]['data']
assert attrs == OrderedDict([
((None, 'id'), 'patt1'),
(('http://www.w3.org/1999/xlink', 'href'), '#patt2')
])


def test_with_serializer():
"""Verify filter works in the context of everything else"""
parser = html5lib.HTMLParser()
dom = parser.parseFragment('<svg><pattern xlink:href="#patt2" id="patt1"></svg>')
walker = html5lib.getTreeWalker('etree')
ser = HTMLSerializer(
alphabetical_attributes=True,
quote_attr_values='always'
)

# FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When
# that gets fixed, we can fix this expected result.
assert (
ser.render(walker(dom)) ==
'<svg><pattern id="patt1" href="#patt2"></pattern></svg>'
)