Skip to content

Commit

Permalink
fix(util): Don't double-encode percent-encoded URIs
Browse files Browse the repository at this point in the history
Detect a URI that is already encoded, and don't re-encode in that
case.

Fixes #687
  • Loading branch information
Kurt Griffiths committed Aug 25, 2016
1 parent 5493bf8 commit 5587872
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 3 deletions.
29 changes: 26 additions & 3 deletions falcon/util/uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,40 @@ def _create_char_encoder(allowed_chars):
def _create_str_encoder(is_value):

allowed_chars = _UNRESERVED if is_value else _ALL_ALLOWED
allowed_chars_plus_percent = allowed_chars + '%'
encode_char = _create_char_encoder(allowed_chars)

def encoder(uri):
# PERF(kgriffs): Very fast way to check, learned from urlib.quote
if not uri.rstrip(allowed_chars):
return uri

if not uri.rstrip(allowed_chars_plus_percent):
# NOTE(kgriffs): There's a good chance the string has already
# been escaped. Do one more check to increase our certainty.
tokens = uri.split('%')
for token in tokens[1:]:
hex_octet = token[:2]

if not len(hex_octet) == 2:
break

if not (hex_octet[0] in _HEX_DIGITS and
hex_octet[1] in _HEX_DIGITS):
break
else:
# NOTE(kgriffs): All percent-encoded sequences were
# valid, so assume that the string has already been
# encoded.
return uri

# NOTE(kgriffs): At this point we know there is at least
# one unallowed percent character. We are going to assume
# that everything should be encoded. If the string is
# partially encoded, the caller will need to normalize it
# before passing it in here.

# Convert to a byte array if it is not one already
#
# NOTE(kgriffs): Code coverage disabled since in Py3K the uri
# is always a text type, so we get a failure for that tox env.
if isinstance(uri, six.text_type):
uri = uri.encode('utf-8')

Expand Down
29 changes: 29 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,35 @@ def test_uri_encode(self):
'?limit=3&e%C3%A7ho=true')
self.assertEqual(uri.encode(url), expected)

def test_uri_encode_double(self):
url = 'http://example.com/v1/fiz bit/messages'
expected = 'http://example.com/v1/fiz%20bit/messages'
self.assertEqual(uri.encode(uri.encode(url)), expected)

url = u'http://example.com/v1/fizbit/messages?limit=3&e\u00e7ho=true'
expected = ('http://example.com/v1/fizbit/messages'
'?limit=3&e%C3%A7ho=true')
self.assertEqual(uri.encode(uri.encode(url)), expected)

url = 'http://example.com/v1/fiz%bit/mess%ages/%'
expected = 'http://example.com/v1/fiz%25bit/mess%25ages/%25'
self.assertEqual(uri.encode(uri.encode(url)), expected)

url = 'http://example.com/%%'
expected = 'http://example.com/%25%25'
self.assertEqual(uri.encode(uri.encode(url)), expected)

# NOTE(kgriffs): Specific example cited in GH issue
url = 'http://something?redirect_uri=http%3A%2F%2Fsite'
self.assertEqual(uri.encode(url), url)

hex_digits = 'abcdefABCDEF0123456789'
for c1 in hex_digits:
for c2 in hex_digits:
url = 'http://example.com/%' + c1 + c2
encoded = uri.encode(uri.encode(url))
self.assertEqual(encoded, url)

def test_uri_encode_value(self):
self.assertEqual(uri.encode_value('abcd'), 'abcd')
self.assertEqual(uri.encode_value(u'abcd'), u'abcd')
Expand Down

0 comments on commit 5587872

Please sign in to comment.