Skip to content

Commit

Permalink
Fix attribute order to the treebuilder to be document order
Browse files Browse the repository at this point in the history
Somehow I managed to screw this up so it became reverse document order!
  • Loading branch information
gsnedders committed Jul 15, 2016
1 parent a3b8252 commit e0dc25f
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 5 deletions.
5 changes: 3 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ Change Log
0.999999999/1.0b10
~~~~~~~~~~~~~~~~~~

Released on XXX
Released on July 15, 2016

* XXX
* Fix attribute order going to the tree builder to be document order
instead of reverse document order(!).


0.99999999/1.0b9
Expand Down
6 changes: 5 additions & 1 deletion html5lib/html5parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,11 @@ def normalizeToken(self, token):
""" HTML5 specific normalizations to the token stream """

if token["type"] == tokenTypes["StartTag"]:
token["data"] = OrderedDict(token['data'][::-1])
raw = token["data"]
token["data"] = OrderedDict(raw)
if len(raw) > len(token["data"]):
# we had some duplicated attribute, fix so first wins
token["data"].update(raw[::-1])

return token

Expand Down
33 changes: 31 additions & 2 deletions html5lib/tests/test_parser2.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from __future__ import absolute_import, division, unicode_literals

from six import PY2, text_type
from six import PY2, text_type, unichr

import io

from . import support # noqa

from html5lib.constants import namespaces
from html5lib.constants import namespaces, tokenTypes
from html5lib import parse, parseFragment, HTMLParser


Expand Down Expand Up @@ -53,13 +53,42 @@ def test_unicode_file():
assert parse(io.StringIO("a")) is not None


def test_maintain_attribute_order():
# This is here because we impl it in parser and not tokenizer
p = HTMLParser()
# generate loads to maximize the chance a hash-based mutation will occur
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
token = {'name': 'html',
'selfClosing': False,
'selfClosingAcknowledged': False,
'type': tokenTypes["StartTag"],
'data': attrs}
out = p.normalizeToken(token)
attr_order = list(out["data"].keys())
assert attr_order == [x for x, i in attrs]


def test_duplicate_attribute():
# This is here because we impl it in parser and not tokenizer
doc = parse('<p class=a class=b>')
el = doc[1][0]
assert el.get("class") == "a"


def test_maintain_duplicate_attribute_order():
# This is here because we impl it in parser and not tokenizer
p = HTMLParser()
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
token = {'name': 'html',
'selfClosing': False,
'selfClosingAcknowledged': False,
'type': tokenTypes["StartTag"],
'data': attrs + [('a', len(attrs))]}
out = p.normalizeToken(token)
attr_order = list(out["data"].keys())
assert attr_order == [x for x, i in attrs]


def test_debug_log():
parser = HTMLParser(debug=True)
parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
Expand Down

0 comments on commit e0dc25f

Please sign in to comment.