Skip to content

Commit 0dd2e81

Browse files
committed
Fixed a bug in Message.toFormMarkup() related to encoding UTF-8 encoded form values.
The .toFormMarkup() method that generates a <form> HTML structure had a bug when the form field values contained UTF-8 encoded strings with characters outside the 7-bit ASCII space. If the lxml implementation of the ElementTree API was in use these values would result in a ValueError being raised (ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters). If the stdlib implementation of ElementTree was used these characters were silently replaced by their XML character reference equivalents (&#XXX;). This patch generates the form using Unicode values for everything and then serializes the form to a UTF-8 encoded string ensuring that the final form is what is expected and constant regardless of the ElementTree API implementation.
1 parent 12efd78 commit 0dd2e81

File tree

4 files changed

+62
-15
lines changed

4 files changed

+62
-15
lines changed

openid/message.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def toArgs(self):
298298
return kvargs
299299

300300
def toFormMarkup(self, action_url, form_tag_attrs=None,
301-
submit_text="Continue"):
301+
submit_text=u"Continue"):
302302
"""Generate HTML form markup that contains the values in this
303303
message, to be HTTP POSTed as x-www-form-urlencoded UTF-8.
304304
@@ -324,28 +324,28 @@ def toFormMarkup(self, action_url, form_tag_attrs=None,
324324

325325
assert action_url is not None
326326

327-
form = ElementTree.Element('form')
327+
form = ElementTree.Element(u'form')
328328

329329
if form_tag_attrs:
330330
for name, attr in form_tag_attrs.iteritems():
331331
form.attrib[name] = attr
332332

333-
form.attrib['action'] = action_url
334-
form.attrib['method'] = 'post'
335-
form.attrib['accept-charset'] = 'UTF-8'
336-
form.attrib['enctype'] = 'application/x-www-form-urlencoded'
333+
form.attrib[u'action'] = oidutil.toUnicode(action_url)
334+
form.attrib[u'method'] = u'post'
335+
form.attrib[u'accept-charset'] = u'UTF-8'
336+
form.attrib[u'enctype'] = u'application/x-www-form-urlencoded'
337337

338338
for name, value in self.toPostArgs().iteritems():
339-
attrs = {'type': 'hidden',
340-
'name': name,
341-
'value': value}
342-
form.append(ElementTree.Element('input', attrs))
339+
attrs = {u'type': u'hidden',
340+
u'name': oidutil.toUnicode(name),
341+
u'value': oidutil.toUnicode(value)}
342+
form.append(ElementTree.Element(u'input', attrs))
343343

344-
submit = ElementTree.Element(
345-
'input', {'type':'submit', 'value':submit_text})
344+
submit = ElementTree.Element(u'input',
345+
{u'type':'submit', u'value':oidutil.toUnicode(submit_text)})
346346
form.append(submit)
347347

348-
return ElementTree.tostring(form)
348+
return ElementTree.tostring(form, encoding='utf-8')
349349

350350
def toURL(self, base_url):
351351
"""Generate a GET URL with the parameters in this message

openid/oidutil.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
interesting.
66
"""
77

8-
__all__ = ['log', 'appendArgs', 'toBase64', 'fromBase64', 'autoSubmitHTML']
8+
__all__ = ['log', 'appendArgs', 'toBase64', 'fromBase64', 'autoSubmitHTML', 'toUnicode']
99

1010
import binascii
1111
import sys
@@ -21,6 +21,18 @@
2121
'elementtree.ElementTree',
2222
]
2323

24+
def toUnicode(value):
25+
"""Returns the given argument as a unicode object.
26+
27+
@param value: A UTF-8 encoded string or a unicode (coercable) object
28+
@type message: str or unicode
29+
30+
@returns: Unicode object representing the input value.
31+
"""
32+
if isinstance(value, str):
33+
return value.decode('utf-8')
34+
return unicode(value)
35+
2436
def autoSubmitHTML(form, title='OpenID transaction in progress'):
2537
return """
2638
<html>

openid/test/oidutil.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
import unittest
23
import codecs
34
import string
@@ -53,7 +54,17 @@ def runTest(self):
5354
def shortDescription(self):
5455
return self.desc
5556

57+
class TestUnicodeConversion(unittest.TestCase):
5658

59+
def test_toUnicode(self):
60+
# Unicode objects pass through
61+
self.failUnless(isinstance(oidutil.toUnicode(u'fööbär'), unicode))
62+
self.assertEquals(oidutil.toUnicode(u'fööbär'), u'fööbär')
63+
# UTF-8 encoded string are decoded
64+
self.failUnless(isinstance(oidutil.toUnicode('fööbär'), unicode))
65+
self.assertEquals(oidutil.toUnicode('fööbär'), u'fööbär')
66+
# Other encodings raise exceptions
67+
self.assertRaises(UnicodeDecodeError, lambda: oidutil.toUnicode(u'fööbär'.encode('latin-1')))
5768

5869
class TestSymbol(unittest.TestCase):
5970
def testCopyHash(self):
@@ -154,6 +165,7 @@ def buildAppendTests():
154165
def pyUnitTests():
155166
some = buildAppendTests()
156167
some.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(TestSymbol))
168+
some.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(TestUnicodeConversion))
157169
return some
158170

159171
def test_appendArgs():

openid/test/test_message.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
from openid import message
23
from openid import oidutil
34
from openid.extensions import sreg
@@ -445,7 +446,6 @@ def test_toURL(self):
445446
def test_isOpenID1(self):
446447
self.failUnless(self.msg.isOpenID1())
447448

448-
449449
class OpenID2MessageTest(unittest.TestCase):
450450
def setUp(self):
451451
self.msg = message.Message.fromPostArgs({'openid.mode':'error',
@@ -846,6 +846,29 @@ def test_toFormMarkup(self):
846846
self._checkForm(html, m, self.action_url,
847847
self.form_tag_attrs, self.submit_text)
848848

849+
def test_toFormMarkup_bug_with_utf8_values(self):
850+
postargs = {
851+
'openid.ns': message.OPENID2_NS,
852+
'openid.mode': 'checkid_setup',
853+
'openid.identity': 'http://bogus.example.invalid:port/',
854+
'openid.assoc_handle': 'FLUB',
855+
'openid.return_to': 'Neverland',
856+
'ünicöde_key' : 'ünicöde_välüe',
857+
}
858+
m = message.Message.fromPostArgs(postargs)
859+
# Calling m.toFormMarkup with lxml used for ElementTree will throw
860+
# a ValueError.
861+
html = m.toFormMarkup(self.action_url, self.form_tag_attrs,
862+
self.submit_text)
863+
# Using the (c)ElementTree from stdlib will result in the UTF-8
864+
# encoded strings to be converted to XML character references,
865+
# "ünicöde_key" becomes "&#195;&#188;nic&#195;&#182;de_key" and
866+
# "ünicöde_välüe" becomes "&#195;&#188;nic&#195;&#182;de_v&#195;&#164;l&#195;&#188;e"
867+
self.failIf('&#195;&#188;nic&#195;&#182;de_key' in html,
868+
'UTF-8 bytes should not convert to XML character references')
869+
self.failIf('&#195;&#188;nic&#195;&#182;de_v&#195;&#164;l&#195;&#188;e' in html,
870+
'UTF-8 bytes should not convert to XML character references')
871+
849872
def test_overrideMethod(self):
850873
"""Be sure that caller cannot change form method to GET."""
851874
m = message.Message.fromPostArgs(self.postargs)

0 commit comments

Comments
 (0)