From 6896931b926d81b2debe8f907495dd742d705c6c Mon Sep 17 00:00:00 2001 From: Carsten Ehbrecht Date: Mon, 9 Aug 2021 18:43:38 +0200 Subject: [PATCH] Squashed commit of the following: added test for xml_util use xml_util use werkzeug Response use markupsafe --- pywps/app/WPSRequest.py | 6 ++-- pywps/exceptions.py | 2 +- pywps/inout/inputs.py | 2 +- pywps/inout/outputs.py | 2 +- pywps/tests.py | 11 +++--- pywps/validator/complexvalidator.py | 16 +++------ pywps/xml_util.py | 16 +++++++++ requirements.txt | 1 + tests/test_capabilities.py | 4 +-- tests/test_complexdata_io.py | 4 +-- tests/test_exceptions.py | 3 +- tests/test_execute.py | 10 +++--- tests/test_formats.py | 4 +-- tests/test_inout.py | 2 -- tests/test_ows.py | 2 -- tests/test_wpsrequest.py | 1 - tests/test_xml_util.py | 52 +++++++++++++++++++++++++++++ 17 files changed, 95 insertions(+), 43 deletions(-) create mode 100644 pywps/xml_util.py create mode 100644 tests/test_xml_util.py diff --git a/pywps/app/WPSRequest.py b/pywps/app/WPSRequest.py index f6324ba3d..d505095be 100644 --- a/pywps/app/WPSRequest.py +++ b/pywps/app/WPSRequest.py @@ -5,7 +5,7 @@ import logging import lxml -import lxml.etree +from pywps import xml_util as etree from werkzeug.exceptions import MethodNotAllowed from pywps import get_ElementMakerForVersion import base64 @@ -85,7 +85,7 @@ def _post_request(self): ' Maximum request size allowed: {} megabytes'.format(maxsize / 1024 / 1024)) try: - doc = lxml.etree.fromstring(self.http_request.get_data()) + doc = etree.fromstring(self.http_request.get_data()) except Exception as e: raise NoApplicableCode(e.msg) @@ -571,7 +571,7 @@ def _get_dataelement_value(value_el): """ if isinstance(value_el, lxml.etree._Element): - return lxml.etree.tostring(value_el, encoding=str) + return etree.tostring(value_el, encoding=str) else: return value_el diff --git a/pywps/exceptions.py b/pywps/exceptions.py index e730d73df..c1a265356 100644 --- a/pywps/exceptions.py +++ b/pywps/exceptions.py @@ -14,7 +14,7 @@ from werkzeug.wrappers import Response from werkzeug.exceptions import HTTPException -from werkzeug.utils import escape +from markupsafe import escape import logging diff --git a/pywps/inout/inputs.py b/pywps/inout/inputs.py index 180fbf204..e6e6c9ca8 100644 --- a/pywps/inout/inputs.py +++ b/pywps/inout/inputs.py @@ -4,7 +4,7 @@ ################################################################## import re -import lxml.etree as etree +from pywps import xml_util as etree from pywps.app.Common import Metadata from pywps.exceptions import InvalidParameterValue diff --git a/pywps/inout/outputs.py b/pywps/inout/outputs.py index 91c431ac2..1e1caf15f 100644 --- a/pywps/inout/outputs.py +++ b/pywps/inout/outputs.py @@ -6,7 +6,7 @@ WPS Output classes """ -import lxml.etree as etree +from pywps import xml_util as etree import os import re from pywps.app.Common import Metadata diff --git a/pywps/tests.py b/pywps/tests.py index db9d5c8ad..11db57ced 100644 --- a/pywps/tests.py +++ b/pywps/tests.py @@ -5,10 +5,11 @@ import tempfile from pathlib import Path -import lxml.etree +import lxml +from pywps import xml_util as etree import requests from werkzeug.test import Client -from werkzeug.wrappers import BaseResponse +from werkzeug.wrappers import Response from pywps import __version__ from pywps import Process from pywps.inout import LiteralInput, LiteralOutput, ComplexInput, ComplexOutput, BoundingBoxInput, BoundingBoxOutput @@ -93,17 +94,17 @@ class WpsClient(Client): def post_xml(self, *args, **kwargs): doc = kwargs.pop('doc') - data = lxml.etree.tostring(doc, pretty_print=True) + data = etree.tostring(doc, pretty_print=True) kwargs['data'] = data return self.post(*args, **kwargs) -class WpsTestResponse(BaseResponse): +class WpsTestResponse(Response): def __init__(self, *args): super(WpsTestResponse, self).__init__(*args) if re.match(r'text/xml(;\s*charset=.*)?', self.headers.get('Content-Type')): - self.xml = lxml.etree.fromstring(self.get_data()) + self.xml = etree.fromstring(self.get_data()) def xpath(self, path): version = self.xml.attrib["version"] diff --git a/pywps/validator/complexvalidator.py b/pywps/validator/complexvalidator.py index b079c8340..b170194d1 100644 --- a/pywps/validator/complexvalidator.py +++ b/pywps/validator/complexvalidator.py @@ -11,6 +11,8 @@ from pywps.validator.mode import MODE from pywps.inout.formats import FORMATS +from lxml.etree import XMLSchema +from pywps import xml_util as etree from urllib.request import urlopen import mimetypes import os @@ -61,13 +63,10 @@ def validategml(data_input, mode): passed = False if mode >= MODE.VERYSTRICT: - - from lxml import etree - try: schema_url = data_input.data_format.schema gmlschema_doc = etree.parse(urlopen(schema_url)) - gmlschema = etree.XMLSchema(gmlschema_doc) + gmlschema = XMLSchema(gmlschema_doc) passed = gmlschema.validate(etree.parse(data_input.stream)) except Exception as e: LOGGER.warning(e) @@ -118,13 +117,10 @@ def validategpx(data_input, mode): passed = False if mode >= MODE.VERYSTRICT: - - from lxml import etree - try: schema_url = data_input.data_format.schema gpxschema_doc = etree.parse(urlopen(schema_url)) - gpxschema = etree.XMLSchema(gpxschema_doc) + gpxschema = XMLSchema(gpxschema_doc) passed = gpxschema.validate(etree.parse(data_input.stream)) except Exception as e: LOGGER.warning(e) @@ -164,15 +160,13 @@ def validatexml(data_input, mode): passed = data_input.data_format.mime_type in {mtype, FORMATS.GML.mime_type} if mode >= MODE.STRICT: - from lxml import etree - # TODO: Raise the actual validation exception to make it easier to spot the error. # xml = etree.parse(data_input.file) # schema.assertValid(xml) try: fn = os.path.join(_get_schemas_home(), data_input.data_format.schema) schema_doc = etree.parse(fn) - schema = etree.XMLSchema(schema_doc) + schema = XMLSchema(schema_doc) passed = schema.validate(etree.parse(data_input.file)) except Exception as e: LOGGER.warning(e) diff --git a/pywps/xml_util.py b/pywps/xml_util.py new file mode 100644 index 000000000..6b0615b19 --- /dev/null +++ b/pywps/xml_util.py @@ -0,0 +1,16 @@ +from lxml import etree as _etree + + +PARSER = _etree.XMLParser( + resolve_entities=False, +) + +tostring = _etree.tostring + + +def fromstring(text): + return _etree.fromstring(text, parser=PARSER) + + +def parse(source): + return _etree.parse(source, parser=PARSER) diff --git a/requirements.txt b/requirements.txt index 252d70a68..a57778fd1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ python-dateutil requests SQLAlchemy werkzeug +MarkupSafe humanize diff --git a/tests/test_capabilities.py b/tests/test_capabilities.py index 98dc8bfe8..8e3e3851b 100644 --- a/tests/test_capabilities.py +++ b/tests/test_capabilities.py @@ -4,13 +4,11 @@ ################################################################## import unittest -import lxml -import lxml.etree from pywps import configuration from pywps.app import Process, Service from pywps.app.Common import Metadata from pywps import get_ElementMakerForVersion -from pywps.tests import assert_pywps_version, client_for, assert_wps_version +from pywps.tests import client_for, assert_wps_version WPS, OWS = get_ElementMakerForVersion("1.0.0") diff --git a/tests/test_complexdata_io.py b/tests/test_complexdata_io.py index 9398bc633..84ab9a46b 100644 --- a/tests/test_complexdata_io.py +++ b/tests/test_complexdata_io.py @@ -2,12 +2,12 @@ import unittest import os -from pywps import get_ElementMakerForVersion, E +from pywps import get_ElementMakerForVersion from pywps.app.basic import get_xpath_ns from pywps import Service, Process, ComplexInput, ComplexOutput, FORMATS from pywps.tests import client_for, assert_response_success from owslib.wps import WPSExecution, ComplexDataInput -from lxml import etree +from pywps import xml_util as etree VERSION = "1.0.0" WPS, OWS = get_ElementMakerForVersion(VERSION) diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index c9133f141..d3d639ed1 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -4,10 +4,9 @@ ################################################################## import unittest -from pywps import Process, Service, get_ElementMakerForVersion +from pywps import Service, get_ElementMakerForVersion from pywps.app.basic import get_xpath_ns from pywps.tests import assert_pywps_version, client_for -import lxml.etree import re diff --git a/tests/test_execute.py b/tests/test_execute.py index 24a36d9ee..f340f1434 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -5,7 +5,7 @@ import unittest import pytest -import lxml.etree +from pywps import xml_util as etree import json import tempfile import os.path @@ -13,16 +13,14 @@ BoundingBoxOutput, BoundingBoxInput, Format, ComplexInput, ComplexOutput, FORMATS from pywps.validator.base import emptyvalidator from pywps.validator.complexvalidator import validategml -from pywps.validator.mode import MODE from pywps.exceptions import InvalidParameterValue -from pywps import get_inputs_from_xml, get_output_from_xml +from pywps import get_inputs_from_xml from pywps import E, get_ElementMakerForVersion from pywps.app.basic import get_xpath_ns from pywps.tests import client_for, assert_response_success from pywps import configuration from io import StringIO -from owslib.ows import BoundingBox try: import netCDF4 @@ -223,7 +221,7 @@ def get_output(doc): output[identifier_el.text] = data_el[0].text else: # XML children ch = list(data_el[0])[0] - output[identifier_el.text] = lxml.etree.tostring(ch) + output[identifier_el.text] = etree.tostring(ch) return output @@ -600,7 +598,7 @@ def test_complex_input(self): WPS.ComplexData(the_data, mimeType='text/foobar'))))) rv = get_inputs_from_xml(request_doc) self.assertEqual(rv['name'][0]['mimeType'], 'text/foobar') - rv_doc = lxml.etree.parse(StringIO(rv['name'][0]['data'])).getroot() + rv_doc = etree.parse(StringIO(rv['name'][0]['data'])).getroot() self.assertEqual(rv_doc.tag, 'TheData') self.assertEqual(rv_doc.text, 'hello world') diff --git a/tests/test_formats.py b/tests/test_formats.py index c6d097a08..2dfff8ff5 100644 --- a/tests/test_formats.py +++ b/tests/test_formats.py @@ -8,9 +8,8 @@ import unittest from pywps.inout.formats import Format, get_format, FORMATS -from lxml import etree from pywps.app.basic import get_xpath_ns -from pywps.validator.base import emptyvalidator + xpath_ns = get_xpath_ns("1.0.0") @@ -121,4 +120,3 @@ def load_tests(loader=None, tests=None, pattern=None): loader.loadTestsFromTestCase(FormatsTest) ] return unittest.TestSuite(suite_list) - diff --git a/tests/test_inout.py b/tests/test_inout.py index cee9f9735..8d2048b70 100644 --- a/tests/test_inout.py +++ b/tests/test_inout.py @@ -11,7 +11,6 @@ import requests import os import tempfile -import datetime import unittest import json from pywps import inout @@ -35,7 +34,6 @@ from pywps.tests import service_ok from pywps.translations import get_translation -from lxml import etree DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') diff --git a/tests/test_ows.py b/tests/test_ows.py index 504af8170..73cf2cdae 100644 --- a/tests/test_ows.py +++ b/tests/test_ows.py @@ -9,8 +9,6 @@ import os import tempfile import unittest -import lxml.etree -import sys from pywps import Service, Process, ComplexInput, ComplexOutput, Format, FORMATS, get_format from pywps.dependencies import ogr from pywps.exceptions import NoApplicableCode diff --git a/tests/test_wpsrequest.py b/tests/test_wpsrequest.py index f45a6a993..82246e3b9 100644 --- a/tests/test_wpsrequest.py +++ b/tests/test_wpsrequest.py @@ -4,7 +4,6 @@ ################################################################## import unittest -import lxml.etree from pywps.app import WPSRequest import tempfile import datetime diff --git a/tests/test_xml_util.py b/tests/test_xml_util.py new file mode 100644 index 000000000..e9325afc3 --- /dev/null +++ b/tests/test_xml_util.py @@ -0,0 +1,52 @@ +from pywps import xml_util as etree + +from io import StringIO + + +XML_EXECUTE = """ + + +]> + + test_process + + + name + + &xxe; + + + + + + + output + + + + +""" + + +def test_etree_fromstring(): + xml = etree.tostring(etree.fromstring(XML_EXECUTE)) + # don't replace entities + # https://lxml.de/parsing.html + assert b"&xxe;" in xml + + +def test_etree_parse(): + xml = etree.tostring(etree.parse(StringIO(XML_EXECUTE))) + # don't replace entities + # https://lxml.de/parsing.html + assert b"&xxe;" in xml