Merge pull request #23 from dave-shawley/rfc-8288-groundwork

RFC-8288 groundwork
dave-shawley · Jan 14, 2020 · ec5292b · ec5292b
2 parents 7346d16 + cf807eb
commit ec5292b
Show file tree

Hide file tree

Showing 12 changed files with 148 additions and 134 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,11 +1,13 @@
 include CONTRIBUTING.rst
+include ietfparse/py.typed
 include LICENSE
 include *requirements.txt
 include setupext.py
 include tox.ini
 graft docs
 graft tests
 
+recursive-include ietfparse *.pyi
 global-exclude __pycache__
 global-exclude *.pyc
 global-exclude *.swp
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -7,6 +7,8 @@ Changelog
 ---------------
 - Switched from travis-ci to circle-ci.
 - Add type stubs.
+- Allow "bad whitespace" around ``=`` in link header parameter lists as
+  indicated in :rfc:`8288#section-3`.
 
 `1.5.1`_ (04-Mar-2018)
 ----------------------

diff --git a/ietfparse/algorithms.py b/ietfparse/algorithms.py
@@ -231,25 +231,27 @@ def rewrite_url(input_url, **kwargs):
     pass through case that is almost always present.
 
     """
-    scheme, netloc, path, query, fragment = parse.urlsplit(input_url)
+    result = parse.urlparse(input_url)
 
     if 'scheme' in kwargs:
         scheme = kwargs['scheme']
+    else:
+        scheme = result.scheme
 
-    ident, host_n_port = parse.splituser(netloc)
-
-    user, password = parse.splitpasswd(ident) if ident else (None, None)
+    user = None
     if 'user' in kwargs:
         user = kwargs['user']
-    elif user is not None:
-        user = parse.unquote_to_bytes(user).decode('utf-8')
+    elif result.username is not None:
+        user = parse.unquote_to_bytes(result.username).decode('utf-8')
+
+    password = None
     if 'password' in kwargs:
         password = kwargs['password']
-    elif password is not None:
-        password = parse.unquote_to_bytes(password).decode('utf-8')
+    elif result.password is not None:
+        password = parse.unquote_to_bytes(result.password).decode('utf-8')
+
     ident = _create_url_identifier(user, password)
 
-    host, port = parse.splitnport(host_n_port, defport=None)
     if 'host' in kwargs:
         host = kwargs['host']
         if host is not None:
@@ -259,13 +261,17 @@ def rewrite_url(input_url, **kwargs):
                 encode_with_idna=kwargs.get('encode_with_idna', None),
                 scheme=scheme,
             )
+    else:
+        host = result.hostname
 
     if 'port' in kwargs:
         port = kwargs['port']
         if port is not None:
             port = int(kwargs['port'])
             if port < 0:
-                raise ValueError('port is required to be non-negative')
+                raise ValueError('port is requried to be non-negative')
+    else:
+        port = result.port
 
     if host is None or host == '':
         host_n_port = None
@@ -280,6 +286,8 @@ def rewrite_url(input_url, **kwargs):
             path = '/'
         else:
             path = parse.quote(path.encode('utf-8'), safe=PATH_SAFE_CHARS)
+    else:
+        path = result.path
 
     netloc = '{0}@{1}'.format(ident, host_n_port) if ident else host_n_port
 
@@ -305,12 +313,16 @@ def rewrite_url(input_url, **kwargs):
                 query = parse.urlencode(params)
             else:
                 query = new_query
+    else:
+        query = result.query
 
     if 'fragment' in kwargs:
         fragment = kwargs['fragment']
         if fragment is not None:
             fragment = parse.quote(fragment.encode('utf-8'),
                                    safe=FRAGMENT_SAFE_CHARS)
+    else:
+        fragment = result.fragment
 
     # The following is necessary to get around some interesting special
     # case code in urllib.parse._coerce_args in Python 3.4.  Setting
@@ -319,7 +331,8 @@ def rewrite_url(input_url, **kwargs):
     if scheme is None:
         scheme = ''
 
-    return parse.urlunsplit((scheme, netloc, path, query, fragment))
+    return parse.urlunparse(
+        (scheme, netloc, path, result.params, query, fragment))
 
 
 def remove_url_auth(url):
@@ -354,7 +367,7 @@ def remove_url_auth(url):
     'http://example.com'
 
     """
-    parts = parse.urlsplit(url)
+    parts = parse.urlparse(url)
     return RemoveUrlAuthResult(auth=(parts.username or None, parts.password),
                                url=rewrite_url(url, user=None, password=None))
 

diff --git a/ietfparse/compat/parse.py b/ietfparse/compat/parse.py
@@ -9,38 +9,30 @@
 
 __all__ = (
     'quote',
-    'splitnport',
-    'splitpasswd',
-    'splituser',
-    'unquote',
     'unquote_to_bytes',
     'urlencode',
-    'urlsplit',
-    'urlunsplit',
+    'urlparse',
+    'urlunparse',
 )
 
 try:
     from urllib.parse import (
         quote,
-        splitnport,
-        splitpasswd,
-        splituser,
-        unquote,
         unquote_to_bytes,
         urlencode,
-        urlsplit,
-        urlunsplit,
+        urlparse,
+        urlunparse,
     )
 except ImportError:  # pragma: no cover, coverage with tox
     from urllib import (
         quote,
-        splitnport,
-        splitpasswd,
-        splituser,
-        unquote,
+        unquote as _unquote,
         urlencode as _urlencode,
     )
-    from urlparse import urlsplit, urlunsplit
+    from urlparse import (
+        urlparse,
+        urlunparse,
+    )
 
     # unquote_to_bytes is extremely useful when you need to cleanly
     # unquote a percent-encoded UTF-8 sequence into a unicode string
@@ -51,7 +43,7 @@
     # The return value of this function is the percent decoded raw
     # byte string - NOT A UNICODE STRING
     def unquote_to_bytes(s):
-        return unquote(s).encode('raw_unicode_escape')
+        return _unquote(s).encode('raw_unicode_escape')
 
     # urlencode did not encode its parameters in Python 2.x so we
     # need to implement that ourselves for compatibility.

diff --git a/ietfparse/compat/parse.pyi b/ietfparse/compat/parse.pyi
@@ -1,23 +1,19 @@
 from typing import Optional, Sequence, Tuple
 
 
-def quote(a: bytes, safe: bytes) -> str:
-    ...
-
-
-def splitnport(host: str, defport: Optional[int] = -1) -> Tuple[str, int]:
-    ...
-
-
-def splitpasswd(a: str) -> Tuple[str, str]:
-    ...
-
-
-def splituser(a: str) -> Tuple[str, str]:
-    ...
+class _ParseResult:
+    fragment: Optional[str]
+    hostname: Optional[str]
+    password: Optional[str]
+    scheme: Optional[str]
+    username: Optional[str]
+    port: Optional[int]
+    path: Optional[str]
+    query: str
+    params: str
 
 
-def unquote(a: str) -> str:
+def quote(a: bytes, safe: bytes) -> str:
     ...
 
 
@@ -29,9 +25,9 @@ def urlencode(pairs: Sequence[Tuple[int, int]]) -> str:
     ...
 
 
-def urlsplit(url: str) -> Tuple[str, str, str, str, str]:
+def urlparse(url: str) -> _ParseResult:
     ...
 
 
-def urlunsplit(parts: Tuple[str, str, str, str, str]) -> str:
+def urlunparse(parsed: Tuple[str, str, str, str, str, str]) -> str:
     ...
diff --git a/ietfparse/headers.py b/ietfparse/headers.py
@@ -311,7 +311,8 @@ def parse_links(buf):
 
     for target, param_list in parse_links(sanitized):
         parser = _helpers.ParameterParser(strict=strict)
-        for name, value in _parse_parameter_list(param_list):
+        for name, value in _parse_parameter_list(
+                param_list, strip_interior_whitespace=True):
             parser.add_value(name, value)
 
         links.append(
@@ -337,7 +338,8 @@ def parse_list(value):
 def _parse_parameter_list(parameter_list,
                           normalized_parameter_values=_DEF_PARAM_VALUE,
                           normalize_parameter_names=False,
-                          normalize_parameter_values=True):
+                          normalize_parameter_values=True,
+                          strip_interior_whitespace=False):
     """
     Parse a named parameter list in the "common" format.
 
@@ -348,6 +350,8 @@ def _parse_parameter_list(parameter_list,
         as *truthy*, then parameter values are case-folded to lower case
     :keyword bool normalized_parameter_values: alternate way to spell
         ``normalize_parameter_values`` -- this one is deprecated
+    :keyword bool strip_interior_whitespace: remove whitespace between
+        name and values surrounding the ``=``
     :return: a sequence containing the name to value pairs
 
     The parsed values are normalized according to the keyword parameters
@@ -367,6 +371,8 @@ def _parse_parameter_list(parameter_list,
         param = param.strip()
         if param:
             name, value = param.split('=')
+            if strip_interior_whitespace:
+                name, value = name.strip(), value.strip()
             if normalize_parameter_names:
                 name = name.lower()
             if normalize_parameter_values:

diff --git a/setup.cfg b/setup.cfg
@@ -4,6 +4,12 @@ universal = 1
 [build_sphinx]
 all-files = 1
 
+[coverage:report]
+show_missing = 1
+
+[coverage:run]
+branch = 1
+
 [nosetests]
 nocapture = 1
 verbosity = 2

diff --git a/setup.py b/setup.py
@@ -44,6 +44,7 @@ def read_requirements_file(name):
     description='Parse formats defined in IETF RFCs.',
     long_description=long_description,
     packages=setuptools.find_packages(exclude=['tests', 'tests.*']),
+    include_package_data=True,
     zip_safe=True,
     platforms='any',
     install_requires=install_requirements,