Skip to content

Commit

Permalink
Backport urlparse.urlsplit from Python 2.7 to Python 2.6.
Browse files Browse the repository at this point in the history
urlsplit doesn't reject invalid IPv6 addresses in Python 2.6.
  • Loading branch information
berkerpeksag committed May 10, 2015
1 parent 945d2ec commit 95b5f78
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 1 deletion.
65 changes: 65 additions & 0 deletions gunicorn/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from gunicorn import six

PY26 = (sys.version_info[:2] == (2, 6))
PY33 = (sys.version_info >= (3, 3))


Expand Down Expand Up @@ -200,3 +201,67 @@ def wrap_error(func, *args, **kw):
if exc.args:
_wrap_error(exc, _MAP_ERRNO, exc.args[0])
raise

if PY26:
from urlparse import (
_parse_cache, MAX_CACHE_SIZE, clear_cache, _splitnetloc, SplitResult,
scheme_chars,
)

def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
Return a 5-tuple: (scheme, netloc, path, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)
if cached:
return cached
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
clear_cache()
netloc = query = fragment = ''
i = url.find(':')
if i > 0:
if url[:i] == 'http': # optimize the common case
scheme = url[:i].lower()
url = url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return v
for c in url[:i]:
if c not in scheme_chars:
break
else:
# make sure "url" is not actually a port number (in which case
# "scheme" is really part of the path)
rest = url[i+1:]
if not rest or any(c not in '0123456789' for c in rest):
# not a port number
scheme, url = url[:i].lower(), rest

if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return v

else:
from gunicorn.six.moves.urllib.parse import urlsplit
2 changes: 1 addition & 1 deletion gunicorn/http/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
LimitRequestLine, LimitRequestHeaders)
from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest
from gunicorn.six import BytesIO
from gunicorn.six.moves.urllib.parse import urlsplit
from gunicorn._compat import urlsplit

MAX_REQUEST_LINE = 8190
MAX_HEADERS = 32768
Expand Down

0 comments on commit 95b5f78

Please sign in to comment.