Skip to content

Commit

Permalink
Merge pull request #23 from dave-shawley/rfc-8288-groundwork
Browse files Browse the repository at this point in the history
RFC-8288 groundwork
  • Loading branch information
dave-shawley committed Jan 14, 2020
2 parents 7346d16 + cf807eb commit ec5292b
Show file tree
Hide file tree
Showing 12 changed files with 148 additions and 134 deletions.
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
include CONTRIBUTING.rst
include ietfparse/py.typed
include LICENSE
include *requirements.txt
include setupext.py
include tox.ini
graft docs
graft tests

recursive-include ietfparse *.pyi
global-exclude __pycache__
global-exclude *.pyc
global-exclude *.swp
2 changes: 2 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Changelog
---------------
- Switched from travis-ci to circle-ci.
- Add type stubs.
- Allow "bad whitespace" around ``=`` in link header parameter lists as
indicated in :rfc:`8288#section-3`.

`1.5.1`_ (04-Mar-2018)
----------------------
Expand Down
37 changes: 25 additions & 12 deletions ietfparse/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,25 +231,27 @@ def rewrite_url(input_url, **kwargs):
pass through case that is almost always present.
"""
scheme, netloc, path, query, fragment = parse.urlsplit(input_url)
result = parse.urlparse(input_url)

if 'scheme' in kwargs:
scheme = kwargs['scheme']
else:
scheme = result.scheme

ident, host_n_port = parse.splituser(netloc)

user, password = parse.splitpasswd(ident) if ident else (None, None)
user = None
if 'user' in kwargs:
user = kwargs['user']
elif user is not None:
user = parse.unquote_to_bytes(user).decode('utf-8')
elif result.username is not None:
user = parse.unquote_to_bytes(result.username).decode('utf-8')

password = None
if 'password' in kwargs:
password = kwargs['password']
elif password is not None:
password = parse.unquote_to_bytes(password).decode('utf-8')
elif result.password is not None:
password = parse.unquote_to_bytes(result.password).decode('utf-8')

ident = _create_url_identifier(user, password)

host, port = parse.splitnport(host_n_port, defport=None)
if 'host' in kwargs:
host = kwargs['host']
if host is not None:
Expand All @@ -259,13 +261,17 @@ def rewrite_url(input_url, **kwargs):
encode_with_idna=kwargs.get('encode_with_idna', None),
scheme=scheme,
)
else:
host = result.hostname

if 'port' in kwargs:
port = kwargs['port']
if port is not None:
port = int(kwargs['port'])
if port < 0:
raise ValueError('port is required to be non-negative')
raise ValueError('port is requried to be non-negative')
else:
port = result.port

if host is None or host == '':
host_n_port = None
Expand All @@ -280,6 +286,8 @@ def rewrite_url(input_url, **kwargs):
path = '/'
else:
path = parse.quote(path.encode('utf-8'), safe=PATH_SAFE_CHARS)
else:
path = result.path

netloc = '{0}@{1}'.format(ident, host_n_port) if ident else host_n_port

Expand All @@ -305,12 +313,16 @@ def rewrite_url(input_url, **kwargs):
query = parse.urlencode(params)
else:
query = new_query
else:
query = result.query

if 'fragment' in kwargs:
fragment = kwargs['fragment']
if fragment is not None:
fragment = parse.quote(fragment.encode('utf-8'),
safe=FRAGMENT_SAFE_CHARS)
else:
fragment = result.fragment

# The following is necessary to get around some interesting special
# case code in urllib.parse._coerce_args in Python 3.4. Setting
Expand All @@ -319,7 +331,8 @@ def rewrite_url(input_url, **kwargs):
if scheme is None:
scheme = ''

return parse.urlunsplit((scheme, netloc, path, query, fragment))
return parse.urlunparse(
(scheme, netloc, path, result.params, query, fragment))


def remove_url_auth(url):
Expand Down Expand Up @@ -354,7 +367,7 @@ def remove_url_auth(url):
'http://example.com'
"""
parts = parse.urlsplit(url)
parts = parse.urlparse(url)
return RemoveUrlAuthResult(auth=(parts.username or None, parts.password),
url=rewrite_url(url, user=None, password=None))

Expand Down
28 changes: 10 additions & 18 deletions ietfparse/compat/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,38 +9,30 @@

__all__ = (
'quote',
'splitnport',
'splitpasswd',
'splituser',
'unquote',
'unquote_to_bytes',
'urlencode',
'urlsplit',
'urlunsplit',
'urlparse',
'urlunparse',
)

try:
from urllib.parse import (
quote,
splitnport,
splitpasswd,
splituser,
unquote,
unquote_to_bytes,
urlencode,
urlsplit,
urlunsplit,
urlparse,
urlunparse,
)
except ImportError: # pragma: no cover, coverage with tox
from urllib import (
quote,
splitnport,
splitpasswd,
splituser,
unquote,
unquote as _unquote,
urlencode as _urlencode,
)
from urlparse import urlsplit, urlunsplit
from urlparse import (
urlparse,
urlunparse,
)

# unquote_to_bytes is extremely useful when you need to cleanly
# unquote a percent-encoded UTF-8 sequence into a unicode string
Expand All @@ -51,7 +43,7 @@
# The return value of this function is the percent decoded raw
# byte string - NOT A UNICODE STRING
def unquote_to_bytes(s):
return unquote(s).encode('raw_unicode_escape')
return _unquote(s).encode('raw_unicode_escape')

# urlencode did not encode its parameters in Python 2.x so we
# need to implement that ourselves for compatibility.
Expand Down
30 changes: 13 additions & 17 deletions ietfparse/compat/parse.pyi
Original file line number Diff line number Diff line change
@@ -1,23 +1,19 @@
from typing import Optional, Sequence, Tuple


def quote(a: bytes, safe: bytes) -> str:
...


def splitnport(host: str, defport: Optional[int] = -1) -> Tuple[str, int]:
...


def splitpasswd(a: str) -> Tuple[str, str]:
...


def splituser(a: str) -> Tuple[str, str]:
...
class _ParseResult:
fragment: Optional[str]
hostname: Optional[str]
password: Optional[str]
scheme: Optional[str]
username: Optional[str]
port: Optional[int]
path: Optional[str]
query: str
params: str


def unquote(a: str) -> str:
def quote(a: bytes, safe: bytes) -> str:
...


Expand All @@ -29,9 +25,9 @@ def urlencode(pairs: Sequence[Tuple[int, int]]) -> str:
...


def urlsplit(url: str) -> Tuple[str, str, str, str, str]:
def urlparse(url: str) -> _ParseResult:
...


def urlunsplit(parts: Tuple[str, str, str, str, str]) -> str:
def urlunparse(parsed: Tuple[str, str, str, str, str, str]) -> str:
...
10 changes: 8 additions & 2 deletions ietfparse/headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,8 @@ def parse_links(buf):

for target, param_list in parse_links(sanitized):
parser = _helpers.ParameterParser(strict=strict)
for name, value in _parse_parameter_list(param_list):
for name, value in _parse_parameter_list(
param_list, strip_interior_whitespace=True):
parser.add_value(name, value)

links.append(
Expand All @@ -337,7 +338,8 @@ def parse_list(value):
def _parse_parameter_list(parameter_list,
normalized_parameter_values=_DEF_PARAM_VALUE,
normalize_parameter_names=False,
normalize_parameter_values=True):
normalize_parameter_values=True,
strip_interior_whitespace=False):
"""
Parse a named parameter list in the "common" format.
Expand All @@ -348,6 +350,8 @@ def _parse_parameter_list(parameter_list,
as *truthy*, then parameter values are case-folded to lower case
:keyword bool normalized_parameter_values: alternate way to spell
``normalize_parameter_values`` -- this one is deprecated
:keyword bool strip_interior_whitespace: remove whitespace between
name and values surrounding the ``=``
:return: a sequence containing the name to value pairs
The parsed values are normalized according to the keyword parameters
Expand All @@ -367,6 +371,8 @@ def _parse_parameter_list(parameter_list,
param = param.strip()
if param:
name, value = param.split('=')
if strip_interior_whitespace:
name, value = name.strip(), value.strip()
if normalize_parameter_names:
name = name.lower()
if normalize_parameter_values:
Expand Down
6 changes: 6 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ universal = 1
[build_sphinx]
all-files = 1

[coverage:report]
show_missing = 1

[coverage:run]
branch = 1

[nosetests]
nocapture = 1
verbosity = 2
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def read_requirements_file(name):
description='Parse formats defined in IETF RFCs.',
long_description=long_description,
packages=setuptools.find_packages(exclude=['tests', 'tests.*']),
include_package_data=True,
zip_safe=True,
platforms='any',
install_requires=install_requirements,
Expand Down
Loading

0 comments on commit ec5292b

Please sign in to comment.