Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(util): Don't double-encode percent-encoded URIs #869

Merged
merged 1 commit into from
Aug 27, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions falcon/util/uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,40 @@ def _create_char_encoder(allowed_chars):
def _create_str_encoder(is_value):

allowed_chars = _UNRESERVED if is_value else _ALL_ALLOWED
allowed_chars_plus_percent = allowed_chars + '%'
encode_char = _create_char_encoder(allowed_chars)

def encoder(uri):
# PERF(kgriffs): Very fast way to check, learned from urlib.quote
if not uri.rstrip(allowed_chars):
return uri

if not uri.rstrip(allowed_chars_plus_percent):
# NOTE(kgriffs): There's a good chance the string has already
# been escaped. Do one more check to increase our certainty.
tokens = uri.split('%')
for token in tokens[1:]:
hex_octet = token[:2]

if not len(hex_octet) == 2:
break

if not (hex_octet[0] in _HEX_DIGITS and
hex_octet[1] in _HEX_DIGITS):
break
else:
# NOTE(kgriffs): All percent-encoded sequences were
# valid, so assume that the string has already been
# encoded.
return uri

# NOTE(kgriffs): At this point we know there is at least
# one unallowed percent character. We are going to assume
# that everything should be encoded. If the string is
# partially encoded, the caller will need to normalize it
# before passing it in here.

# Convert to a byte array if it is not one already
#
# NOTE(kgriffs): Code coverage disabled since in Py3K the uri
# is always a text type, so we get a failure for that tox env.
if isinstance(uri, six.text_type):
uri = uri.encode('utf-8')

Expand Down
29 changes: 29 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,35 @@ def test_uri_encode(self):
'?limit=3&e%C3%A7ho=true')
self.assertEqual(uri.encode(url), expected)

def test_uri_encode_double(self):
url = 'http://example.com/v1/fiz bit/messages'
expected = 'http://example.com/v1/fiz%20bit/messages'
self.assertEqual(uri.encode(uri.encode(url)), expected)

url = u'http://example.com/v1/fizbit/messages?limit=3&e\u00e7ho=true'
expected = ('http://example.com/v1/fizbit/messages'
'?limit=3&e%C3%A7ho=true')
self.assertEqual(uri.encode(uri.encode(url)), expected)

url = 'http://example.com/v1/fiz%bit/mess%ages/%'
expected = 'http://example.com/v1/fiz%25bit/mess%25ages/%25'
self.assertEqual(uri.encode(uri.encode(url)), expected)

url = 'http://example.com/%%'
expected = 'http://example.com/%25%25'
self.assertEqual(uri.encode(uri.encode(url)), expected)

# NOTE(kgriffs): Specific example cited in GH issue
url = 'http://something?redirect_uri=http%3A%2F%2Fsite'
self.assertEqual(uri.encode(url), url)

hex_digits = 'abcdefABCDEF0123456789'
for c1 in hex_digits:
for c2 in hex_digits:
url = 'http://example.com/%' + c1 + c2
encoded = uri.encode(uri.encode(url))
self.assertEqual(encoded, url)

def test_uri_encode_value(self):
self.assertEqual(uri.encode_value('abcd'), 'abcd')
self.assertEqual(uri.encode_value(u'abcd'), u'abcd')
Expand Down