From 487966cac0469551ddd6e4e2ce6f6f8e9b360fe1 Mon Sep 17 00:00:00 2001 From: Jon Betts Date: Wed, 29 Jan 2020 18:17:47 +0000 Subject: [PATCH] Fix query string bugs relating to repeated elements * Also separate out testing of the component * Remove duplicated testing in other places * Use the same code more times * Rather than inventing a new way at the end of get_content_type() --- tests/unit/via/views/test__query_params.py | 59 ++++++++++++++++ tests/unit/via/views/test_route_by_content.py | 54 +-------------- via/views/_query_params.py | 68 +++++++++---------- via/views/route_by_content.py | 20 +++--- via/views/view_pdf.py | 14 ++-- 5 files changed, 113 insertions(+), 102 deletions(-) create mode 100644 tests/unit/via/views/test__query_params.py diff --git a/tests/unit/via/views/test__query_params.py b/tests/unit/via/views/test__query_params.py new file mode 100644 index 00000000..d2b86224 --- /dev/null +++ b/tests/unit/via/views/test__query_params.py @@ -0,0 +1,59 @@ +import pytest +from webob.multidict import MultiDict, NestedMultiDict + +from via.views._query_params import QueryParams + + +class TestQueryParams: + ORIGINAL_URL = "http://example.com" + + @pytest.mark.parametrize( + "params", + [ + {}, + {QueryParams.CONFIG_FROM_FRAME: "test_1"}, + { + QueryParams.CONFIG_FROM_FRAME: "test_1", + QueryParams.OPEN_SIDEBAR: "test_2", + }, + ], + ) + def test_build_url_strips_via_params(self, params): + params.update({"p1": "v1", "p2": "v2"}) + + url = QueryParams.build_url(self.ORIGINAL_URL, params, strip_via_params=True) + + assert url == self.ORIGINAL_URL + "?p1=v1&p2=v2" + + def test_upper_case_params_are_different(self): + param = QueryParams.OPEN_SIDEBAR.upper() + url = QueryParams.build_url( + self.ORIGINAL_URL, {param: "test_1"}, strip_via_params=True + ) + + assert url == f"http://example.com?{param}=test_1" + + def test_build_url_can_leave_via_params(self): + url = QueryParams.build_url( + self.ORIGINAL_URL, + {"a": "test_1", QueryParams.OPEN_SIDEBAR: "test_2"}, + strip_via_params=False, + ) + + assert url == self.ORIGINAL_URL + "?a=test_1&via.open_sidebar=test_2" + + def test_build_url_handles_multi_dicts(self): + url = QueryParams.build_url( + self.ORIGINAL_URL, + NestedMultiDict( + MultiDict({"a": "a1", QueryParams.OPEN_SIDEBAR: "v1"}), + MultiDict({"a": "a2", QueryParams.OPEN_SIDEBAR: "v2"}), + ), + strip_via_params=True, + ) + + assert url == self.ORIGINAL_URL + "?a=a1&a=a2" + + @pytest.mark.parametrize("url", ["", "jddf://example.com", "http://"]) + def test_build_url_handles_malformed_urls(self, url): + QueryParams.build_url(url, {}, strip_via_params=True) diff --git a/tests/unit/via/views/test_route_by_content.py b/tests/unit/via/views/test_route_by_content.py index ae4ab1c3..08f82165 100644 --- a/tests/unit/via/views/test_route_by_content.py +++ b/tests/unit/via/views/test_route_by_content.py @@ -13,57 +13,11 @@ from tests.unit.conftest import assert_cache_control from via.exceptions import BadURL, UnhandledException, UpstreamServiceError +from via.views._query_params import QueryParams from via.views.route_by_content import route_by_content class TestRouteByContent: - @pytest.mark.parametrize( - "requested_path,expected_location,content_type", - [ - # If the requested pdf URL has no query string then it should just - # redirect to the requested URL, with no query string (but with the - ( - "/https://example.com/foo", - "http://localhost/pdf/https://example.com/foo", - "application/pdf", - ), - # If the requested pdf URL has a query string then the query string - # should be preserved in the URL that it redirects to. - ( - "/https://example.com/foo?bar=baz", - "http://localhost/pdf/https://example.com/foo?bar=baz", - "application/pdf", - ), - # If the requested html URL has a query string then the query string - # should be preserved in the URL that it redirects to. - ( - "/https://example.com/foo?bar=baz", - "http://via.hypothes.is/https://example.com/foo?bar=baz", - "text/html", - ), - # If the requested html URL has a client query params then the query params - # should be preserved in the URL that it redirects to. - ( - "/https://example.com/foo?via.open_sidebar=1", - "http://via.hypothes.is/https://example.com/foo?via.open_sidebar=1", - "text/html", - ), - ], - ) - def test_redirect_location( - self, make_pyramid_request, requested_path, expected_location, content_type - ): - - request = make_pyramid_request( - request_url=requested_path, - thirdparty_url="https://example.com/foo", - content_type=content_type, - ) - - redirect = route_by_content(request) - - assert redirect.location == expected_location - @pytest.mark.parametrize( "content_type,redirect_url", [ @@ -85,10 +39,8 @@ def test_redirects_based_on_content_type_header( assert result.location == redirect_url - @pytest.mark.parametrize( - "query_param", ["via.request_config_from_frame", "via.open_sidebar"] - ) - def test_does_not_pass_via_query_params_to_thirdparty_server( + @pytest.mark.parametrize("query_param", QueryParams.ALL_PARAMS) + def test_does_not_pass_via_query_params_to_third_parties( self, make_pyramid_request, query_param ): request = make_pyramid_request( diff --git a/via/views/_query_params.py b/via/views/_query_params.py index fa099e70..0cf8c9e5 100644 --- a/via/views/_query_params.py +++ b/via/views/_query_params.py @@ -1,37 +1,35 @@ """Methods and data relating to query parameters.""" -from urllib.parse import urlencode - -# Client configuration query parameters. -OPEN_SIDEBAR = "via.open_sidebar" -CONFIG_FROM_FRAME = "via.request_config_from_frame" - - -def strip_client_query_params(base_url, query_params): - """Return ``base_url`` with non-Via params from ``query_params`` appended. - - Return ``base_url`` with all the non-Via query params from ``query_params`` - appended to it as a query string. Any params in ``query_params`` that are - meant for Via (the ``"via.*`` query params) will be ignored and *not* - appended to the returned URL. - - :param base_url: the protocol, domain and path, for example: https://thirdparty.url/foo.pdf - :type base_url: str - - :param query_params: the query params to be added to base_url - :type query_params: dict - - :return: ``base_url`` with the non-Via query params appended - :rtype: str - """ - client_params = [OPEN_SIDEBAR, CONFIG_FROM_FRAME] - filtered_params = urlencode( - { - param: value - for param, value in query_params.items() - if param not in client_params - } - ) - if filtered_params: - return f"{base_url}?{filtered_params}" - return base_url +from urllib.parse import urlencode, urlparse + +from webob.multidict import MultiDict + + +class QueryParams: + """Client configuration query parameters and related functions.""" + + # pylint: disable=too-few-public-methods + + OPEN_SIDEBAR = "via.open_sidebar" + CONFIG_FROM_FRAME = "via.request_config_from_frame" + ALL_PARAMS = {OPEN_SIDEBAR, CONFIG_FROM_FRAME} + + @classmethod + def build_url(cls, url, query, strip_via_params): + """Add the query to the url, optionally removing via related params. + + :param url: URL to base the new URL on + :param query: Dict of query parameters + :param strip_via_params: Enable removal of via params + :return: A new URL with the relevant query params added + """ + + # Coerce any immutable NestedMultiDict's into mutable a MultiDict + if strip_via_params: + query = MultiDict(query) + + via_keys = [key for key in query.keys() if key in cls.ALL_PARAMS] + for key in via_keys: + query.pop(key, None) + + return urlparse(url)._replace(query=urlencode(query)).geturl() diff --git a/via/views/route_by_content.py b/via/views/route_by_content.py index 296e5718..32ab2aeb 100644 --- a/via/views/route_by_content.py +++ b/via/views/route_by_content.py @@ -12,24 +12,24 @@ UnhandledException, UpstreamServiceError, ) -from via.views._query_params import strip_client_query_params +from via.views._query_params import QueryParams @view.view_config(route_name="route_by_content") def route_by_content(request): """Routes the request according to the Content-Type header.""" - url = strip_client_query_params(request.matchdict["url"], request.params) + path_url = request.matchdict["url"] - mime_type, status_code = _get_url_details(url) + mime_type, status_code = _get_url_details( + url=QueryParams.build_url(path_url, request.params, strip_via_params=True) + ) # Can PDF mime types get extra info on the end like "encoding=?" if mime_type in ("application/x-pdf", "application/pdf"): # Unless we have some very baroque error messages they shouldn't # really be returning PDFs return exc.HTTPFound( - request.route_url( - "view_pdf", pdf_url=request.matchdict["url"], _query=request.params, - ), + request.route_url("view_pdf", pdf_url=path_url, _query=request.params,), headers=_caching_headers(max_age=300), ) @@ -49,9 +49,13 @@ def route_by_content(request): headers = {"Cache-Control": "no-cache"} via_url = request.registry.settings["legacy_via_url"] - url = request.path_qs.lstrip("/") - return exc.HTTPFound(f"{via_url}/{url}", headers=headers) + return exc.HTTPFound( + QueryParams.build_url( + f"{via_url}/{path_url}", request.params, strip_via_params=False + ), + headers=headers, + ) def _get_url_details(url): diff --git a/via/views/view_pdf.py b/via/views/view_pdf.py index 8677c725..28d26f26 100644 --- a/via/views/view_pdf.py +++ b/via/views/view_pdf.py @@ -3,11 +3,7 @@ from pyramid import view from pyramid.settings import asbool -from via.views._query_params import ( - CONFIG_FROM_FRAME, - OPEN_SIDEBAR, - strip_client_query_params, -) +from via.views._query_params import QueryParams @view.view_config( @@ -20,12 +16,14 @@ def view_pdf(request): """HTML page with client and the PDF embedded.""" nginx_server = request.registry.settings["nginx_server"] - pdf_url = strip_client_query_params(request.matchdict["pdf_url"], request.params) + pdf_url = QueryParams.build_url( + request.matchdict["pdf_url"], request.params, strip_via_params=True + ) return { "pdf_url": f"{nginx_server}/proxy/static/{pdf_url}", "client_embed_url": request.registry.settings["client_embed_url"], - "h_open_sidebar": asbool(request.params.get(OPEN_SIDEBAR, False)), - "h_request_config": request.params.get(CONFIG_FROM_FRAME, None), + "h_open_sidebar": asbool(request.params.get(QueryParams.OPEN_SIDEBAR, False)), + "h_request_config": request.params.get(QueryParams.CONFIG_FROM_FRAME, None), "static_url": request.static_url, }