New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed #19508 -- Used uri_to_iri. #2932

Closed
wants to merge 1 commit into
base: master
from
Jump to file or symbol
Failed to load files and symbols.
+208 −42
Diff settings

Always

Just for now

@@ -206,7 +206,6 @@ def get_path_info(environ):
"""
path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
# It'd be better to implement URI-to-IRI decoding, see #19508.
return path_info.decode(UTF_8)
@@ -236,7 +235,6 @@ def get_script_name(environ):
else:
script_name = get_bytes_from_wsgi(environ, 'SCRIPT_NAME', '')
# It'd be better to implement URI-to-IRI decoding, see #19508.

This comment has been minimized.

@loic

loic Jul 19, 2014

Member

You removed this comment but it doesn't look like you do anything to SCRIPT_NAME.

@loic

loic Jul 19, 2014

Member

You removed this comment but it doesn't look like you do anything to SCRIPT_NAME.

This comment has been minimized.

@coder9042

coder9042 Jul 19, 2014

Contributor

Well the issue occurred in SCRIPT_NAME when it is extracted from PATH_INFO so since that is now fixed so we don't require this here as well.

@coder9042

coder9042 Jul 19, 2014

Contributor

Well the issue occurred in SCRIPT_NAME when it is extracted from PATH_INFO so since that is now fixed so we don't require this here as well.

return script_name.decode(UTF_8)
@@ -15,9 +15,11 @@
from wsgiref.util import FileWrapper # NOQA: for backwards compatibility
from django.core.exceptions import ImproperlyConfigured
from django.core.handlers.wsgi import ISO_8859_1
from django.core.management.color import color_style
from django.core.wsgi import get_wsgi_application
from django.utils import six
from django.utils.encoding import uri_to_iri
from django.utils.module_loading import import_string
from django.utils.six.moves import socketserver
@@ -107,6 +109,17 @@ def log_message(self, format, *args):
sys.stderr.write(msg)
def get_environ(self):
env = super(WSGIRequestHandler, self).get_environ()
if '?' in self.path:
path, query = self.path.split('?', 1)
else:
path, query = self.path, ''
path = uri_to_iri(path)
# WSGI requires latin-1 decoded strings.
env['PATH_INFO'] = path if six.PY2 else path.decode(ISO_8859_1)
return env
def run(addr, port, wsgi_handler, ipv6=False, threading=False):
server_address = (addr, port)
View
@@ -12,15 +12,15 @@
from django.conf import settings
from django.core import urlresolvers
from django.core.handlers.base import BaseHandler
from django.core.handlers.wsgi import WSGIRequest
from django.core.handlers.wsgi import WSGIRequest, ISO_8859_1
from django.core.signals import (request_started, request_finished,
got_request_exception)
from django.db import close_old_connections
from django.http import SimpleCookie, HttpRequest, QueryDict
from django.template import TemplateDoesNotExist
from django.test import signals
from django.utils.functional import curry, SimpleLazyObject
from django.utils.encoding import force_bytes, force_str
from django.utils.encoding import force_bytes, force_str, uri_to_iri
from django.utils.http import urlencode
from django.utils.itercompat import is_iterable
from django.utils import six
@@ -270,11 +270,9 @@ def _get_path(self, parsed):
# If there are parameters, add them
if parsed[3]:
path += str(";") + force_str(parsed[3])
path = unquote(path)
# WSGI requires latin-1 encoded strings. See get_path_info().
if six.PY3:
path = path.encode('utf-8').decode('iso-8859-1')
return path
path = uri_to_iri(path)
# WSGI requires latin-1 decoded strings.
return path if six.PY2 else path.decode(ISO_8859_1)
def get(self, path, data=None, secure=False, **extra):
"Construct a GET request."
View
@@ -1,13 +1,17 @@
# -*- encoding: utf-8 -*-
from __future__ import unicode_literals
import codecs
import datetime
from decimal import Decimal
import locale
import re
from django.utils.functional import Promise
from django.utils import six
from django.utils.six.moves.urllib.parse import quote
from django.utils.six.moves.urllib.parse import quote, unquote
if six.PY3:
from urllib.parse import unquote_to_bytes
class DjangoUnicodeDecodeError(UnicodeDecodeError):
@@ -185,7 +189,9 @@ def iri_to_uri(iri):
assuming input is either UTF-8 or unicode already, we can simplify things a
little from the full method.
Returns an ASCII string containing the encoded result.
Takes an IRI in UTF-8 bytes (e.g. '/I \xe2\x99\xa5 Django/') or unicode
(e.g. '/I ♥ Django/') and returns ASCII bytes containing the encoded result
(e.g. '/I%20%E2%99%A5%20Django/').
"""
# The list of safe characters here is constructed from the "reserved" and
# "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
@@ -204,6 +210,46 @@ def iri_to_uri(iri):
return quote(force_bytes(iri), safe=b"/#%[]=:;$&()+,!?*@'~")
def uri_to_iri(uri):

This comment has been minimized.

@loic

loic Jul 19, 2014

Member

I would have been interested in something closer to werkezeug's. That takes encoding as a parameter, etc.

Edit: TL;DR skip to https://github.com/django/django/pull/2932/files#r15440287

@loic

loic Jul 19, 2014

Member

I would have been interested in something closer to werkezeug's. That takes encoding as a parameter, etc.

Edit: TL;DR skip to https://github.com/django/django/pull/2932/files#r15440287

This comment has been minimized.

@coder9042

coder9042 Jul 19, 2014

Contributor

I don't see any need for it. Can you tell me why you want that parameter.

@coder9042

coder9042 Jul 19, 2014

Contributor

I don't see any need for it. Can you tell me why you want that parameter.

This comment has been minimized.

@coder9042

coder9042 Jul 19, 2014

Contributor

Even in werkzeug's iri_to_uri, there are sevaral parameters but not in ours. Here also I didn't felt the need, so I didn't. Though I will look once again and see if any need is there?

@coder9042

coder9042 Jul 19, 2014

Contributor

Even in werkzeug's iri_to_uri, there are sevaral parameters but not in ours. Here also I didn't felt the need, so I didn't. Though I will look once again and see if any need is there?

This comment has been minimized.

@loic

loic Jul 19, 2014

Member

The latin1 encoding mess is a WSGI thing, Django wasn't always a WSGI framework, and who knows what will be the next best thing in the future. When these things were contained in the WSGIHandler that made sense, but if we extract a reusable function it shouldn't be tied to such specifics.

@loic

loic Jul 19, 2014

Member

The latin1 encoding mess is a WSGI thing, Django wasn't always a WSGI framework, and who knows what will be the next best thing in the future. When these things were contained in the WSGIHandler that made sense, but if we extract a reusable function it shouldn't be tied to such specifics.

This comment has been minimized.

@coder9042

coder9042 Jul 19, 2014

Contributor

Ok I understand that but what has that got to do with including extra parameters.
Regarding encoding it in latin-1 when py3, I did it becuase in wsgi.py there are functions get_bytes_from_wsgi and get_str_from_wsgi which do decode in latin-1 and that is not limited to PATH_INFO but also other environ variables so in order to keep things same everywhere.

@coder9042

coder9042 Jul 19, 2014

Contributor

Ok I understand that but what has that got to do with including extra parameters.
Regarding encoding it in latin-1 when py3, I did it becuase in wsgi.py there are functions get_bytes_from_wsgi and get_str_from_wsgi which do decode in latin-1 and that is not limited to PATH_INFO but also other environ variables so in order to keep things same everywhere.

This comment has been minimized.

@loic

loic Jul 27, 2014

Member

This comment thread is rather outdated as it survived many iterations of the patch, anything before this point can be ignored.

One piece of design I'm indecisive about: should we return unicode or UTF-8? Both more or less make sense:

  • Our internal usage of the function ultimately wants UTF-8 on PY2 and UTF-8 decoded as LATIN1 on PY3, so returning UTF-8 is a little more convenient.
  • Although uri_to_iri() accepts both UTF-8 and unicode input, I suspect it's better practice for the application to deal with unicode, so in that case it would be better to have iri_to_uri(uri_to_iri(unicode_string)) == unicode_string. (i.e. return unicode).
@loic

loic Jul 27, 2014

Member

This comment thread is rather outdated as it survived many iterations of the patch, anything before this point can be ignored.

One piece of design I'm indecisive about: should we return unicode or UTF-8? Both more or less make sense:

  • Our internal usage of the function ultimately wants UTF-8 on PY2 and UTF-8 decoded as LATIN1 on PY3, so returning UTF-8 is a little more convenient.
  • Although uri_to_iri() accepts both UTF-8 and unicode input, I suspect it's better practice for the application to deal with unicode, so in that case it would be better to have iri_to_uri(uri_to_iri(unicode_string)) == unicode_string. (i.e. return unicode).

This comment has been minimized.

@coder9042

coder9042 Jul 28, 2014

Contributor

I would go with first option...

@aaugustin @timgraham
What is your opinion on this?

@coder9042

coder9042 Jul 28, 2014

Contributor

I would go with first option...

@aaugustin @timgraham
What is your opinion on this?

This comment has been minimized.

@timgraham

timgraham Jul 28, 2014

Member

Couldn't give you an opinion without spending a lot of time figuring out the purpose of this patch.

@timgraham

timgraham Jul 28, 2014

Member

Couldn't give you an opinion without spending a lot of time figuring out the purpose of this patch.

"""
Converts a Uniform Resource Identifier(URI) into an Internationalized
Resource Identifier(IRI).
This is the algorithm from section 3.2 of RFC 3987.
Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
UTF-8 bytes containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
"""
if uri is None:
return uri
uri = force_bytes(uri)
iri = unquote(uri) if six.PY2 else unquote_to_bytes(uri)
return repercent_broken_unicode(iri)
def repercent_broken_unicode(path):
"""
As per section 3.2 of RFC 3987, step three of converting a URI into an IRI,
we need to re-percent-encode any octet produced that is not part of a
strictly legal UTF-8 octet sequence.
"""
try:
path.decode('utf-8')
except UnicodeDecodeError as e:
first = path[:e.start]
last = path[e.start + 1:]
# Invalid utf-8 should remain URL-encoded.
# Refs. #19508
mid = re.findall(b"[^\x00-\x7f]", path[e.start:])[0]
val = quote(force_bytes(mid), safe=b"/#%[]=:;$&()+,!?*@'~")
if six.PY3:
val = val.encode('utf-8')
path = first + val + last
return repercent_broken_unicode(path)
else:
return path
def filepath_to_uri(path):
"""Convert a file system path to a URI portion that is suitable for
inclusion in a URL.
View
@@ -173,11 +173,11 @@ URL from an IRI_ -- very loosely speaking, a URI_ that can contain Unicode
characters. Quoting and converting an IRI to URI can be a little tricky, so
Django provides some assistance.
* The function ``django.utils.encoding.iri_to_uri()`` implements the
conversion from IRI to URI as required by the specification (:rfc:`3987`).
* The function :meth:`django.utils.encoding.iri_to_uri()` implements the
conversion from IRI to URI as required by the specification (:rfc:`3987#section-3.1`).
* The functions ``django.utils.http.urlquote()`` and
``django.utils.http.urlquote_plus()`` are versions of Python's standard
* The functions :meth:`django.utils.http.urlquote()` and
:meth:`django.utils.http.urlquote_plus()` are versions of Python's standard
``urllib.quote()`` and ``urllib.quote_plus()`` that work with non-ASCII
characters. (The data is converted to UTF-8 prior to encoding.)
@@ -213,12 +213,33 @@ you can construct your IRI without worrying about whether it contains
non-ASCII characters and then, right at the end, call ``iri_to_uri()`` on the
result.
The ``iri_to_uri()`` function is also idempotent, which means the following is
always true::
Similarly, Django provides :meth:`django.utils.encoding.uri_to_iri()` which
implements the conversion from URI to IRI as per :rfc:`3987#section-3.2`.
Some percent-encodings are necessary to distinguish percent-encoded and
unencoded uses of reserved characters. Also some percent-encodings cannot be
interpreted as sequences of UTF-8 octets.
It converts all percent encodings to corresponding ASCII octets and then
repercents those which are not valid utf-8 sequence.
An example to demonstrate::
>>> uri_to_iri('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93').decode('utf-8')
'/♥♥/?utf8=✓'
>>> uri_to_iri('%A9helloworld').decode('utf-8')
'%A9helloworld'
In the first example, the proper utf-8 characters and reserved characters got
unquoted. In the second, the percent-encoding remains unchanged because it
lies outside the valid utf-8 range.
Both ``iri_to_uri()`` and ``uri_to_iri()`` functions are idempotent, which means the
following is always true::
iri_to_uri(iri_to_uri(some_string)) = iri_to_uri(some_string)
uri_to_iri(uri_to_iri(some_string)) = uri_to_iri(some_string)
So you can safely call it multiple times on the same IRI without risking
So you can safely call it multiple times on the same URI/IRI without risking
double-quoting problems.
.. _URI: http://www.ietf.org/rfc/rfc2396.txt
View
@@ -271,7 +271,20 @@ The functions defined in this module share the following properties:
since we are assuming input is either UTF-8 or unicode already, we can
simplify things a little from the full method.
Returns an ASCII string containing the encoded result.
Takes an IRI in UTF-8 bytes and returns ASCII bytes containing the encoded
result.
.. function:: uri_to_iri(uri)

This comment has been minimized.

@timgraham

timgraham Jul 28, 2014

Member

missing .. versionadded:: 1.8

@timgraham

timgraham Jul 28, 2014

Member

missing .. versionadded:: 1.8

This comment has been minimized.

@coder9042

coder9042 Jul 28, 2014

Contributor

added..

@coder9042

coder9042 Jul 28, 2014

Contributor

added..

.. versionadded:: 1.8
Converts a Uniform Resource Identifier into an Internationalized Resource
Idenifier.
This is an algorithm from section 3.2 of :rfc:`3987#section-3.2`.
Takes a URI in ASCII bytes and returns UTF-8 bytes containing the encoded
result.
.. function:: filepath_to_uri(path)
View
@@ -244,6 +244,9 @@ Requests and Responses
This brings this class into line with the documentation and with
``WSGIRequest``.
* ``WSGIRequestHandler`` now follows RFC in converting URI to IRI, using
``uri_to_iri()``.

This comment has been minimized.

@loic

loic Jul 27, 2014

Member

If we mention uri_to_iri() here we need to document it. Dunno if we should make it a public API? If we do then the docs for iri_to_uri() can be used as guidance.

cc @timgraham, @aaugustin.

@loic

loic Jul 27, 2014

Member

If we mention uri_to_iri() here we need to document it. Dunno if we should make it a public API? If we do then the docs for iri_to_uri() can be used as guidance.

cc @timgraham, @aaugustin.

This comment has been minimized.

@coder9042

coder9042 Jul 27, 2014

Contributor

As far as I know iri_to_uri() is not documented particularly. What it does is mentioned in places.

@coder9042

coder9042 Jul 27, 2014

Contributor

As far as I know iri_to_uri() is not documented particularly. What it does is mentioned in places.

This comment has been minimized.

@loic

loic Jul 27, 2014

Member

It's formally documented in ref/utils.txt and discussed extensively in ref/unicode.txt.

@loic

loic Jul 27, 2014

Member

It's formally documented in ref/utils.txt and discussed extensively in ref/unicode.txt.

This comment has been minimized.

@coder9042

coder9042 Jul 28, 2014

Contributor

I have updated ref/utils.txt.
Regarding ref/unicode.txt, I wasn't sure whether we are to keep the docs in a similar way or probably we could improve upon the presentation.

@coder9042

coder9042 Jul 28, 2014

Contributor

I have updated ref/utils.txt.
Regarding ref/unicode.txt, I wasn't sure whether we are to keep the docs in a similar way or probably we could improve upon the presentation.

This comment has been minimized.

@timgraham

timgraham Jul 28, 2014

Member

As a user, I have no idea what this means. What benefit will I see? What new functionality does it enable? Is it more of a bug fix that probably doesn't need to be mentioned at all?

@timgraham

timgraham Jul 28, 2014

Member

As a user, I have no idea what this means. What benefit will I see? What new functionality does it enable? Is it more of a bug fix that probably doesn't need to be mentioned at all?

This comment has been minimized.

@coder9042

coder9042 Jul 28, 2014

Contributor

@timgraham
Well its basically a bug fix by implementing a new thing....

@coder9042

coder9042 Jul 28, 2014

Contributor

@timgraham
Well its basically a bug fix by implementing a new thing....

Tests
^^^^^
View
@@ -7,7 +7,7 @@
from django.db import close_old_connections, connection
from django.test import RequestFactory, TestCase, TransactionTestCase
from django.test import override_settings
from django.utils.encoding import force_str
from django.utils.encoding import force_str, uri_to_iri, iri_to_uri
from django.utils import six
@@ -129,3 +129,28 @@ class HandlerSuspiciousOpsTest(TestCase):
def test_suspiciousop_in_view_returns_400(self):
response = self.client.get('/suspicious/')
self.assertEqual(response.status_code, 400)
@override_settings(ROOT_URLCONF='handlers.urls')
class HandlerNotFoundTest(TestCase):
def test_invalid_urls(self):
response = self.client.get('~%A9helloworld')
self.assertEqual(response.status_code, 404)
self.assertContains(response, '~%A9helloworld', status_code=404)
response = self.client.get('d%aao%aaw%aan%aal%aao%aaa%aad%aa/')
self.assertEqual(response.status_code, 404)
self.assertContains(response, 'd%AAo%AAw%AAn%AAl%AAo%AAa%AAd%AA', status_code=404)
response = self.client.get('/%E2%99%E2%99%A5/')
self.assertEqual(response.status_code, 404)
self.assertContains(response, '%E2%99\u2665', status_code=404)
response = self.client.get('/%E2%98%8E%E2%A9%E2%99%A5/')
self.assertEqual(response.status_code, 404)
self.assertContains(response, '\u260e%E2%A9\u2665', status_code=404)

This comment has been minimized.

@timgraham

timgraham Jul 28, 2014

Member

extra newline

@timgraham

timgraham Jul 28, 2014

Member

extra newline

This comment has been minimized.

@coder9042

coder9042 Jul 28, 2014

Contributor

removed

@coder9042

coder9042 Jul 28, 2014

Contributor

removed

def test_environ_path_info_type(self):
environ = RequestFactory().get('/%E2%A8%87%87%A5%E2%A8%A0').environ
self.assertTrue(isinstance(environ['PATH_INFO'], six.text_type))

This comment has been minimized.

@berkerpeksag

berkerpeksag Oct 11, 2014

Contributor

Nitpick: Since Django no longer supports Python 2.6 and this is a new feature in Django 1.8, you can use Python 2.7's new assertIsInstance method here: https://docs.python.org/2.7/library/unittest.html#unittest.TestCase.assertIsInstance

@berkerpeksag

berkerpeksag Oct 11, 2014

Contributor

Nitpick: Since Django no longer supports Python 2.6 and this is a new feature in Django 1.8, you can use Python 2.7's new assertIsInstance method here: https://docs.python.org/2.7/library/unittest.html#unittest.TestCase.assertIsInstance

View
@@ -2,7 +2,7 @@
from __future__ import unicode_literals
from django.test import TestCase
from django.utils.encoding import iri_to_uri, force_text
from django.utils.encoding import force_text
from django.utils.functional import lazy
from django.utils.http import (cookie_date, http_date,
urlquote, urlquote_plus, urlunquote, urlunquote_plus)
@@ -89,17 +89,3 @@ def test_cookie_date(self):
def test_http_date(self):
t = 1167616461.0
self.assertEqual(http_date(t), 'Mon, 01 Jan 2007 01:54:21 GMT')
def test_iri_to_uri(self):
self.assertEqual(iri_to_uri('red%09ros\xe9#red'),
'red%09ros%C3%A9#red')
self.assertEqual(iri_to_uri('/blog/for/J\xfcrgen M\xfcnster/'),
'/blog/for/J%C3%BCrgen%20M%C3%BCnster/')
self.assertEqual(iri_to_uri('locations/%s' % urlquote_plus('Paris & Orl\xe9ans')),
'locations/Paris+%26+Orl%C3%A9ans')
def test_iri_to_uri_idempotent(self):
self.assertEqual(iri_to_uri(iri_to_uri('red%09ros\xe9#red')),
'red%09ros%C3%A9#red')
@@ -6,7 +6,8 @@
from django.utils import six
from django.utils.encoding import (force_bytes, force_text, filepath_to_uri,
python_2_unicode_compatible)
iri_to_uri, uri_to_iri, python_2_unicode_compatible)
from django.utils.http import urlquote_plus
class TestEncodingUtils(unittest.TestCase):
@@ -39,15 +40,77 @@ def test_force_bytes_strings_only(self):
today = datetime.date.today()
self.assertEqual(force_bytes(today, strings_only=True), today)
def test_filepath_to_uri(self):
self.assertEqual(filepath_to_uri('upload\\чубака.mp4'),
'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
self.assertEqual(filepath_to_uri('upload\\чубака.mp4'.encode('utf-8')),
'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
@unittest.skipIf(six.PY3, "tests a class not defining __str__ under Python 2")
def test_decorated_class_without_str(self):
with self.assertRaises(ValueError):
@python_2_unicode_compatible
class NoStr(object):
pass
class TestRFC3987IEncodingUtils(unittest.TestCase):
def test_filepath_to_uri(self):
self.assertEqual(filepath_to_uri('upload\\чубака.mp4'),
'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
self.assertEqual(filepath_to_uri('upload\\чубака.mp4'.encode('utf-8')),
'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
def test_iri_to_uri(self):
cases = [
# Valid UTF-8 sequences are encoded.
('red%09rosé#red', 'red%09ros%C3%A9#red'),
('/blog/for/Jürgen Münster/', '/blog/for/J%C3%BCrgen%20M%C3%BCnster/'),
('locations/%s' % urlquote_plus('Paris & Orléans'), 'locations/Paris+%26+Orl%C3%A9ans'),
# Reserved chars remain unescaped.
('%&', '%&'),
('red&♥ros%#red', 'red&%E2%99%A5ros%#red'),
]
for iri, uri in cases:
self.assertEqual(iri_to_uri(iri), uri)
# Test idempotency.
self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)
def test_uri_to_iri(self):
cases = [
# Valid UTF-8 sequences are decoded.
('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
# Broken UTF-8 sequences remain escaped.
('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
]
for uri, iri in cases:
iri = iri.encode('utf-8')
self.assertEqual(uri_to_iri(uri), iri)
# Test idempotency.
self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)
def test_complementarity(self):
cases = [
('/blog/for/J%C3%BCrgen%20M%C3%BCnster/', '/blog/for/J\xfcrgen M\xfcnster/'),
('%&', '%&'),
('red&%E2%99%A5ros%#red', 'red&♥ros%#red'),
('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
]
for uri, iri in cases:
iri = iri.encode('utf-8')
self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)
self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)
ProTip! Use n and p to navigate between commits in a pull request.