Merge pull request #26 from dave-shawley/handle-invalid-accept

Handle invalid Accept headers by skipping invalid content
dave-shawley · Nov 4, 2020 · 5f630bb · 5f630bb
2 parents cc577bf + 77353c2
commit 5f630bb
Show file tree

Hide file tree

Showing 7 changed files with 80 additions and 19 deletions.
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -3,6 +3,20 @@ Changelog
 
 .. py:currentmodule:: ietfparse
 
+`Next Release`_
+---------------
+.. rubric:: Behavioural Change
+
+:func:`headers.parse_accept` used to fail with a :exc:`ValueError` when
+it encountered an invalid content type value in the header.  Now it skips
+the invalid value.  If you want the previous behaviour, then pass ``strict=True``.
+
+- Advertise support for Python 3.7-3.9, remove 3.4 & 3.5
+- Clarify that :func:`headers.parse_content_type` raises :exc:`ValueError`
+  when it encounters an invalid content type header
+- Skip unparseable content types in :func:`headers.parse_accept` unless
+  the new ``strict`` parameter is truthy
+
 `1.6.1`_ (26-Jan-2020)
 ----------------------
 - Fixed project URL metadata.

diff --git a/ietfparse/headers.py b/ietfparse/headers.py
@@ -31,14 +31,20 @@
 _DEF_PARAM_VALUE = object()
 
 
-def parse_accept(header_value):
+def parse_accept(header_value, strict=False):
     """Parse an HTTP accept-like header.
 
     :param str header_value: the header value to parse
+    :param bool strict: if :data:`True`, then invalid content type
+        values within `header_value` will raise :exc:`ValueError`;
+        otherwise, they are ignored
     :return: a :class:`list` of :class:`.ContentType` instances
         in decreasing quality order.  Each instance is augmented
         with the associated quality as a ``float`` property
         named ``quality``.
+    :raise: :exc:`ValueError` if `strict` is *truthy* and at least
+        one value in `header_value` could not be parsed by
+        :func:`.parse_content_type`
 
     ``Accept`` is a class of headers that contain a list of values
     and an associated preference value.  The ever present `Accept`_
@@ -55,9 +61,14 @@ def parse_accept(header_value):
 
     """
     next_explicit_q = decimal.ExtendedContext.next_plus(decimal.Decimal('5.0'))
-    headers = [
-        parse_content_type(header) for header in parse_list(header_value)
-    ]
+    headers = []
+    for header in parse_list(header_value):
+        try:
+            headers.append(parse_content_type(header))
+        except ValueError:
+            if strict:
+                raise
+
     for header in headers:
         q = header.parameters.pop('q', None)
         if q is None:
@@ -208,10 +219,17 @@ def parse_content_type(content_type, normalize_parameter_values=True):
         setting this to ``False`` will enable strict RFC2045 compliance
         in which content parameter values are case preserving.
     :return: a :class:`~ietfparse.datastructures.ContentType` instance
+    :raises: :exc:`ValueError` if the content type cannot be reasonably
+        parsed (e.g., ``Content-Type: *``)
 
     """
     parts = _remove_comments(content_type).split(';')
-    content_type, content_subtype = parts.pop(0).split('/')
+    type_spec = parts.pop(0)
+    try:
+        content_type, content_subtype = type_spec.split('/')
+    except ValueError:
+        raise ValueError('Failed to parse ' + type_spec)
+
     if '+' in content_subtype:
         content_subtype, content_suffix = content_subtype.split('+')
     else:

diff --git a/ietfparse/headers.pyi b/ietfparse/headers.pyi
@@ -3,7 +3,7 @@ from typing import Collection, Dict, List, Optional, Sequence, Union
 from ietfparse import datastructures
 
 
-def parse_accept(header_value: str) -> List[datastructures.ContentType]:
+def parse_accept(header_value: str, strict: bool = False) -> List[datastructures.ContentType]:
     ...
 
 

diff --git a/setup.cfg b/setup.cfg
@@ -21,9 +21,10 @@ classifiers =
     Programming Language :: Python :: 2
     Programming Language :: Python :: 2.7
     Programming Language :: Python :: 3
-    Programming Language :: Python :: 3.4
-    Programming Language :: Python :: 3.5
     Programming Language :: Python :: 3.6
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
     Development Status :: 5 - Production/Stable
     Environment :: Web Environment
     Topic :: Internet :: WWW/HTTP
@@ -37,16 +38,16 @@ packages = find:
 
 [options.extras_require]
 dev =
-    coverage==5.0.3
-    flake8==3.7.9
+    coverage==5.3
+    flake8==3.8.4
     mock>1.0,<2; python_version<"3"
-    mypy==0.761
-    sphinx==2.3.1
+    mypy==0.790
+    sphinx==3.3.0
     sphinxcontrib-httpdomain==1.7.0
-    tox==3.14.2
+    tox==3.20.1
     yapf==0.29.0
 test =
-    coverage==5.0.3
+    coverage==5.3
     mock>1.0,<2; python_version<"3"
 
 [options.packages.find]
@@ -66,6 +67,7 @@ show_missing = 1
 
 [coverage:run]
 branch = 1
+source = ietfparse
 
 [flake8]
 exclude = build,dist,doc,env

diff --git a/tests/test_headers_content_type.py b/tests/test_headers_content_type.py
@@ -45,6 +45,12 @@ def test_that_message_type_parameter_is_parsed(self):
         self.assertEqual(self.parsed.parameters['msgtype'], 'Request')
 
 
+class ParsingBrokenContentTypes(unittest.TestCase):
+    def test_that_missing_subtype_raises_value_error(self):
+        with self.assertRaises(ValueError):
+            headers.parse_content_type('*')
+
+
 class Rfc7231ExampleTests(unittest.TestCase):
     """Test cases from RFC7231, Section 3.1.1.1"""
     def setUp(self):

diff --git a/tests/test_headers_parse_accept.py b/tests/test_headers_parse_accept.py
@@ -63,6 +63,26 @@ def test_that_extension_tokens_with_spaces_are_parsed(self):
                                            {'x-foo': ' something else'})
             ])
 
+    def test_that_invalid_parts_are_skipped(self):
+        parsed = headers.parse_accept('text/html, image/gif, image/jpeg, '
+                                      '*; q=.2, */*; q=.2')
+        self.assertEqual(len(parsed), 4)
+        self.assertEqual(parsed[0], datastructures.ContentType('text', 'html'))
+        self.assertEqual(parsed[1],
+                         datastructures.ContentType('image', 'jpeg'))
+        self.assertEqual(parsed[2], datastructures.ContentType('image', 'gif'))
+        self.assertEqual(parsed[3], datastructures.ContentType('*', '*'))
+
+    def test_that_invalid_parts_raise_error_when_strict_is_enabled(self):
+        with self.assertRaises(ValueError):
+            headers.parse_accept(
+                'text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2',
+                strict=True)
+
+    def test_the_invalid_header_returns_empty_list(self):
+        parsed = headers.parse_accept('*')
+        self.assertEqual(len(parsed), 0)
+
 
 class ParseAcceptCharsetHeaderTests(unittest.TestCase):
 

diff --git a/tox.ini b/tox.ini
@@ -1,16 +1,17 @@
 [tox]
-envlist = py27,py35,py36,py37,py38,lint,coverage
+envlist = py27,py36,py37,py38,py39,lint,coverage
 toxworkdir = {toxinidir}/build/tox
 
 [testenv]
-commands = python -m unittest discover
+commands =
+	python -m unittest discover tests
 extras = test
 
 [testenv:lint]
 deps =
-	flake8==3.7.9
+	flake8==3.8.4
 	mypy==0.720; python_version<"3"
-	mypy==0.761; python_version>"3"
+	mypy==0.790; python_version>"3"
 	yapf==0.29.0
 commands =
 	flake8 --output-file=build/pep8.txt
@@ -19,5 +20,5 @@ commands =
 
 [testenv:coverage]
 commands =
-	coverage run -m unittest discover
+	coverage run -m unittest discover tests
 	coverage report --fail-under=100 --show-missing