Skip to content

Commit

Permalink
Merge pull request #26 from dave-shawley/handle-invalid-accept
Browse files Browse the repository at this point in the history
Handle invalid Accept headers by skipping invalid content
  • Loading branch information
dave-shawley committed Nov 4, 2020
2 parents cc577bf + 77353c2 commit 5f630bb
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 19 deletions.
14 changes: 14 additions & 0 deletions docs/changelog.rst
Expand Up @@ -3,6 +3,20 @@ Changelog

.. py:currentmodule:: ietfparse
`Next Release`_
---------------
.. rubric:: Behavioural Change

:func:`headers.parse_accept` used to fail with a :exc:`ValueError` when
it encountered an invalid content type value in the header. Now it skips
the invalid value. If you want the previous behaviour, then pass ``strict=True``.

- Advertise support for Python 3.7-3.9, remove 3.4 & 3.5
- Clarify that :func:`headers.parse_content_type` raises :exc:`ValueError`
when it encounters an invalid content type header
- Skip unparseable content types in :func:`headers.parse_accept` unless
the new ``strict`` parameter is truthy

`1.6.1`_ (26-Jan-2020)
----------------------
- Fixed project URL metadata.
Expand Down
28 changes: 23 additions & 5 deletions ietfparse/headers.py
Expand Up @@ -31,14 +31,20 @@
_DEF_PARAM_VALUE = object()


def parse_accept(header_value):
def parse_accept(header_value, strict=False):
"""Parse an HTTP accept-like header.
:param str header_value: the header value to parse
:param bool strict: if :data:`True`, then invalid content type
values within `header_value` will raise :exc:`ValueError`;
otherwise, they are ignored
:return: a :class:`list` of :class:`.ContentType` instances
in decreasing quality order. Each instance is augmented
with the associated quality as a ``float`` property
named ``quality``.
:raise: :exc:`ValueError` if `strict` is *truthy* and at least
one value in `header_value` could not be parsed by
:func:`.parse_content_type`
``Accept`` is a class of headers that contain a list of values
and an associated preference value. The ever present `Accept`_
Expand All @@ -55,9 +61,14 @@ def parse_accept(header_value):
"""
next_explicit_q = decimal.ExtendedContext.next_plus(decimal.Decimal('5.0'))
headers = [
parse_content_type(header) for header in parse_list(header_value)
]
headers = []
for header in parse_list(header_value):
try:
headers.append(parse_content_type(header))
except ValueError:
if strict:
raise

for header in headers:
q = header.parameters.pop('q', None)
if q is None:
Expand Down Expand Up @@ -208,10 +219,17 @@ def parse_content_type(content_type, normalize_parameter_values=True):
setting this to ``False`` will enable strict RFC2045 compliance
in which content parameter values are case preserving.
:return: a :class:`~ietfparse.datastructures.ContentType` instance
:raises: :exc:`ValueError` if the content type cannot be reasonably
parsed (e.g., ``Content-Type: *``)
"""
parts = _remove_comments(content_type).split(';')
content_type, content_subtype = parts.pop(0).split('/')
type_spec = parts.pop(0)
try:
content_type, content_subtype = type_spec.split('/')
except ValueError:
raise ValueError('Failed to parse ' + type_spec)

if '+' in content_subtype:
content_subtype, content_suffix = content_subtype.split('+')
else:
Expand Down
2 changes: 1 addition & 1 deletion ietfparse/headers.pyi
Expand Up @@ -3,7 +3,7 @@ from typing import Collection, Dict, List, Optional, Sequence, Union
from ietfparse import datastructures


def parse_accept(header_value: str) -> List[datastructures.ContentType]:
def parse_accept(header_value: str, strict: bool = False) -> List[datastructures.ContentType]:
...


Expand Down
18 changes: 10 additions & 8 deletions setup.cfg
Expand Up @@ -21,9 +21,10 @@ classifiers =
Programming Language :: Python :: 2
Programming Language :: Python :: 2.7
Programming Language :: Python :: 3
Programming Language :: Python :: 3.4
Programming Language :: Python :: 3.5
Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Development Status :: 5 - Production/Stable
Environment :: Web Environment
Topic :: Internet :: WWW/HTTP
Expand All @@ -37,16 +38,16 @@ packages = find:

[options.extras_require]
dev =
coverage==5.0.3
flake8==3.7.9
coverage==5.3
flake8==3.8.4
mock>1.0,<2; python_version<"3"
mypy==0.761
sphinx==2.3.1
mypy==0.790
sphinx==3.3.0
sphinxcontrib-httpdomain==1.7.0
tox==3.14.2
tox==3.20.1
yapf==0.29.0
test =
coverage==5.0.3
coverage==5.3
mock>1.0,<2; python_version<"3"

[options.packages.find]
Expand All @@ -66,6 +67,7 @@ show_missing = 1

[coverage:run]
branch = 1
source = ietfparse

[flake8]
exclude = build,dist,doc,env
Expand Down
6 changes: 6 additions & 0 deletions tests/test_headers_content_type.py
Expand Up @@ -45,6 +45,12 @@ def test_that_message_type_parameter_is_parsed(self):
self.assertEqual(self.parsed.parameters['msgtype'], 'Request')


class ParsingBrokenContentTypes(unittest.TestCase):
def test_that_missing_subtype_raises_value_error(self):
with self.assertRaises(ValueError):
headers.parse_content_type('*')


class Rfc7231ExampleTests(unittest.TestCase):
"""Test cases from RFC7231, Section 3.1.1.1"""
def setUp(self):
Expand Down
20 changes: 20 additions & 0 deletions tests/test_headers_parse_accept.py
Expand Up @@ -63,6 +63,26 @@ def test_that_extension_tokens_with_spaces_are_parsed(self):
{'x-foo': ' something else'})
])

def test_that_invalid_parts_are_skipped(self):
parsed = headers.parse_accept('text/html, image/gif, image/jpeg, '
'*; q=.2, */*; q=.2')
self.assertEqual(len(parsed), 4)
self.assertEqual(parsed[0], datastructures.ContentType('text', 'html'))
self.assertEqual(parsed[1],
datastructures.ContentType('image', 'jpeg'))
self.assertEqual(parsed[2], datastructures.ContentType('image', 'gif'))
self.assertEqual(parsed[3], datastructures.ContentType('*', '*'))

def test_that_invalid_parts_raise_error_when_strict_is_enabled(self):
with self.assertRaises(ValueError):
headers.parse_accept(
'text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2',
strict=True)

def test_the_invalid_header_returns_empty_list(self):
parsed = headers.parse_accept('*')
self.assertEqual(len(parsed), 0)


class ParseAcceptCharsetHeaderTests(unittest.TestCase):

Expand Down
11 changes: 6 additions & 5 deletions tox.ini
@@ -1,16 +1,17 @@
[tox]
envlist = py27,py35,py36,py37,py38,lint,coverage
envlist = py27,py36,py37,py38,py39,lint,coverage
toxworkdir = {toxinidir}/build/tox

[testenv]
commands = python -m unittest discover
commands =
python -m unittest discover tests
extras = test

[testenv:lint]
deps =
flake8==3.7.9
flake8==3.8.4
mypy==0.720; python_version<"3"
mypy==0.761; python_version>"3"
mypy==0.790; python_version>"3"
yapf==0.29.0
commands =
flake8 --output-file=build/pep8.txt
Expand All @@ -19,5 +20,5 @@ commands =

[testenv:coverage]
commands =
coverage run -m unittest discover
coverage run -m unittest discover tests
coverage report --fail-under=100 --show-missing

0 comments on commit 5f630bb

Please sign in to comment.