Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix query string bugs relating to repeated elements #79

Merged
merged 1 commit into from Jan 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
59 changes: 59 additions & 0 deletions tests/unit/via/views/test__query_params.py
@@ -0,0 +1,59 @@
import pytest
from webob.multidict import MultiDict, NestedMultiDict

from via.views._query_params import QueryParams


class TestQueryParams:
ORIGINAL_URL = "http://example.com"

@pytest.mark.parametrize(
"params",
[
{},
{QueryParams.CONFIG_FROM_FRAME: "test_1"},
{
QueryParams.CONFIG_FROM_FRAME: "test_1",
QueryParams.OPEN_SIDEBAR: "test_2",
},
],
)
def test_build_url_strips_via_params(self, params):
params.update({"p1": "v1", "p2": "v2"})

url = QueryParams.build_url(self.ORIGINAL_URL, params, strip_via_params=True)

assert url == self.ORIGINAL_URL + "?p1=v1&p2=v2"

def test_upper_case_params_are_different(self):
param = QueryParams.OPEN_SIDEBAR.upper()
url = QueryParams.build_url(
self.ORIGINAL_URL, {param: "test_1"}, strip_via_params=True
)

assert url == f"http://example.com?{param}=test_1"

def test_build_url_can_leave_via_params(self):
url = QueryParams.build_url(
self.ORIGINAL_URL,
{"a": "test_1", QueryParams.OPEN_SIDEBAR: "test_2"},
strip_via_params=False,
)

assert url == self.ORIGINAL_URL + "?a=test_1&via.open_sidebar=test_2"

def test_build_url_handles_multi_dicts(self):
url = QueryParams.build_url(
self.ORIGINAL_URL,
NestedMultiDict(
MultiDict({"a": "a1", QueryParams.OPEN_SIDEBAR: "v1"}),
MultiDict({"a": "a2", QueryParams.OPEN_SIDEBAR: "v2"}),
),
strip_via_params=True,
)

assert url == self.ORIGINAL_URL + "?a=a1&a=a2"

@pytest.mark.parametrize("url", ["", "jddf://example.com", "http://"])
def test_build_url_handles_malformed_urls(self, url):
QueryParams.build_url(url, {}, strip_via_params=True)
54 changes: 3 additions & 51 deletions tests/unit/via/views/test_route_by_content.py
Expand Up @@ -13,57 +13,11 @@

from tests.unit.conftest import assert_cache_control
from via.exceptions import BadURL, UnhandledException, UpstreamServiceError
from via.views._query_params import QueryParams
from via.views.route_by_content import route_by_content


class TestRouteByContent:
@pytest.mark.parametrize(
"requested_path,expected_location,content_type",
[
# If the requested pdf URL has no query string then it should just
# redirect to the requested URL, with no query string (but with the
(
"/https://example.com/foo",
"http://localhost/pdf/https://example.com/foo",
"application/pdf",
),
# If the requested pdf URL has a query string then the query string
# should be preserved in the URL that it redirects to.
(
"/https://example.com/foo?bar=baz",
"http://localhost/pdf/https://example.com/foo?bar=baz",
"application/pdf",
),
# If the requested html URL has a query string then the query string
# should be preserved in the URL that it redirects to.
(
"/https://example.com/foo?bar=baz",
"http://via.hypothes.is/https://example.com/foo?bar=baz",
"text/html",
),
# If the requested html URL has a client query params then the query params
# should be preserved in the URL that it redirects to.
(
"/https://example.com/foo?via.open_sidebar=1",
"http://via.hypothes.is/https://example.com/foo?via.open_sidebar=1",
"text/html",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of these tests amount to the same thing, and are replicated lower down. We always pass all params onto the final request. It only changes when we call the original URL to check the resource content type.

),
],
)
def test_redirect_location(
self, make_pyramid_request, requested_path, expected_location, content_type
):

request = make_pyramid_request(
request_url=requested_path,
thirdparty_url="https://example.com/foo",
content_type=content_type,
)

redirect = route_by_content(request)

assert redirect.location == expected_location

@pytest.mark.parametrize(
"content_type,redirect_url",
[
Expand All @@ -85,10 +39,8 @@ def test_redirects_based_on_content_type_header(

assert result.location == redirect_url

@pytest.mark.parametrize(
"query_param", ["via.request_config_from_frame", "via.open_sidebar"]
)
def test_does_not_pass_via_query_params_to_thirdparty_server(
@pytest.mark.parametrize("query_param", QueryParams.ALL_PARAMS)
def test_does_not_pass_via_query_params_to_third_parties(
self, make_pyramid_request, query_param
):
request = make_pyramid_request(
Expand Down
68 changes: 33 additions & 35 deletions via/views/_query_params.py
@@ -1,37 +1,35 @@
"""Methods and data relating to query parameters."""

from urllib.parse import urlencode

# Client configuration query parameters.
OPEN_SIDEBAR = "via.open_sidebar"
CONFIG_FROM_FRAME = "via.request_config_from_frame"


def strip_client_query_params(base_url, query_params):
"""Return ``base_url`` with non-Via params from ``query_params`` appended.

Return ``base_url`` with all the non-Via query params from ``query_params``
appended to it as a query string. Any params in ``query_params`` that are
meant for Via (the ``"via.*`` query params) will be ignored and *not*
appended to the returned URL.

:param base_url: the protocol, domain and path, for example: https://thirdparty.url/foo.pdf
:type base_url: str

:param query_params: the query params to be added to base_url
:type query_params: dict

:return: ``base_url`` with the non-Via query params appended
:rtype: str
"""
client_params = [OPEN_SIDEBAR, CONFIG_FROM_FRAME]
filtered_params = urlencode(
{
param: value
for param, value in query_params.items()
if param not in client_params
}
)
if filtered_params:
return f"{base_url}?{filtered_params}"
return base_url
from urllib.parse import urlencode, urlparse

from webob.multidict import MultiDict


class QueryParams:
"""Client configuration query parameters and related functions."""

# pylint: disable=too-few-public-methods
jon-betts marked this conversation as resolved.
Show resolved Hide resolved

OPEN_SIDEBAR = "via.open_sidebar"
CONFIG_FROM_FRAME = "via.request_config_from_frame"
ALL_PARAMS = {OPEN_SIDEBAR, CONFIG_FROM_FRAME}

@classmethod
def build_url(cls, url, query, strip_via_params):
"""Add the query to the url, optionally removing via related params.

:param url: URL to base the new URL on
:param query: Dict of query parameters
:param strip_via_params: Enable removal of via params
:return: A new URL with the relevant query params added
"""

# Coerce any immutable NestedMultiDict's into mutable a MultiDict
if strip_via_params:
query = MultiDict(query)

via_keys = [key for key in query.keys() if key in cls.ALL_PARAMS]
for key in via_keys:
query.pop(key, None)

return urlparse(url)._replace(query=urlencode(query)).geturl()
20 changes: 12 additions & 8 deletions via/views/route_by_content.py
Expand Up @@ -12,24 +12,24 @@
UnhandledException,
UpstreamServiceError,
)
from via.views._query_params import strip_client_query_params
from via.views._query_params import QueryParams


@view.view_config(route_name="route_by_content")
def route_by_content(request):
"""Routes the request according to the Content-Type header."""
url = strip_client_query_params(request.matchdict["url"], request.params)
jon-betts marked this conversation as resolved.
Show resolved Hide resolved
path_url = request.matchdict["url"]

mime_type, status_code = _get_url_details(url)
mime_type, status_code = _get_url_details(
url=QueryParams.build_url(path_url, request.params, strip_via_params=True)
)

# Can PDF mime types get extra info on the end like "encoding=?"
if mime_type in ("application/x-pdf", "application/pdf"):
# Unless we have some very baroque error messages they shouldn't
# really be returning PDFs
return exc.HTTPFound(
request.route_url(
"view_pdf", pdf_url=request.matchdict["url"], _query=request.params,
),
request.route_url("view_pdf", pdf_url=path_url, _query=request.params,),
headers=_caching_headers(max_age=300),
)

Expand All @@ -49,9 +49,13 @@ def route_by_content(request):
headers = {"Cache-Control": "no-cache"}

via_url = request.registry.settings["legacy_via_url"]
url = request.path_qs.lstrip("/")

return exc.HTTPFound(f"{via_url}/{url}", headers=headers)
return exc.HTTPFound(
QueryParams.build_url(
f"{via_url}/{path_url}", request.params, strip_via_params=False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
f"{via_url}/{path_url}", request.params, strip_via_params=False
f"{via_url}/{path_url}", request.params

It defaults to False

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think I might remove the default to force you to say. This was to make crystal clear the intent.

),
headers=headers,
)


def _get_url_details(url):
Expand Down
14 changes: 6 additions & 8 deletions via/views/view_pdf.py
Expand Up @@ -3,11 +3,7 @@
from pyramid import view
from pyramid.settings import asbool

from via.views._query_params import (
CONFIG_FROM_FRAME,
OPEN_SIDEBAR,
strip_client_query_params,
)
from via.views._query_params import QueryParams


@view.view_config(
Expand All @@ -20,12 +16,14 @@
def view_pdf(request):
"""HTML page with client and the PDF embedded."""
nginx_server = request.registry.settings["nginx_server"]
pdf_url = strip_client_query_params(request.matchdict["pdf_url"], request.params)
pdf_url = QueryParams.build_url(
request.matchdict["pdf_url"], request.params, strip_via_params=True
)

return {
"pdf_url": f"{nginx_server}/proxy/static/{pdf_url}",
"client_embed_url": request.registry.settings["client_embed_url"],
"h_open_sidebar": asbool(request.params.get(OPEN_SIDEBAR, False)),
"h_request_config": request.params.get(CONFIG_FROM_FRAME, None),
"h_open_sidebar": asbool(request.params.get(QueryParams.OPEN_SIDEBAR, False)),
"h_request_config": request.params.get(QueryParams.CONFIG_FROM_FRAME, None),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This bit of processing could be moved to QueryParams, but then:

  • This code wouldn't have anything to do and would get sad
  • The code wouldn't get any shorter as we'd have to unpack whatever we were sent

"static_url": request.static_url,
}