Skip to content

Commit

Permalink
Merge pull request #616 from cehbrecht/fix-lxml-parser
Browse files Browse the repository at this point in the history
Fix lxml default parser
  • Loading branch information
cehbrecht committed Aug 10, 2021
2 parents 7112197 + 6896931 commit 7d6b26a
Show file tree
Hide file tree
Showing 17 changed files with 95 additions and 43 deletions.
6 changes: 3 additions & 3 deletions pywps/app/WPSRequest.py
Expand Up @@ -5,7 +5,7 @@

import logging
import lxml
import lxml.etree
from pywps import xml_util as etree
from werkzeug.exceptions import MethodNotAllowed
from pywps import get_ElementMakerForVersion
import base64
Expand Down Expand Up @@ -85,7 +85,7 @@ def _post_request(self):
' Maximum request size allowed: {} megabytes'.format(maxsize / 1024 / 1024))

try:
doc = lxml.etree.fromstring(self.http_request.get_data())
doc = etree.fromstring(self.http_request.get_data())
except Exception as e:
raise NoApplicableCode(e.msg)

Expand Down Expand Up @@ -571,7 +571,7 @@ def _get_dataelement_value(value_el):
"""

if isinstance(value_el, lxml.etree._Element):
return lxml.etree.tostring(value_el, encoding=str)
return etree.tostring(value_el, encoding=str)
else:
return value_el

Expand Down
2 changes: 1 addition & 1 deletion pywps/exceptions.py
Expand Up @@ -14,7 +14,7 @@

from werkzeug.wrappers import Response
from werkzeug.exceptions import HTTPException
from werkzeug.utils import escape
from markupsafe import escape

import logging

Expand Down
2 changes: 1 addition & 1 deletion pywps/inout/inputs.py
Expand Up @@ -4,7 +4,7 @@
##################################################################

import re
import lxml.etree as etree
from pywps import xml_util as etree

from pywps.app.Common import Metadata
from pywps.exceptions import InvalidParameterValue
Expand Down
2 changes: 1 addition & 1 deletion pywps/inout/outputs.py
Expand Up @@ -6,7 +6,7 @@
WPS Output classes
"""

import lxml.etree as etree
from pywps import xml_util as etree
import os
import re
from pywps.app.Common import Metadata
Expand Down
11 changes: 6 additions & 5 deletions pywps/tests.py
Expand Up @@ -5,10 +5,11 @@
import tempfile
from pathlib import Path

import lxml.etree
import lxml
from pywps import xml_util as etree
import requests
from werkzeug.test import Client
from werkzeug.wrappers import BaseResponse
from werkzeug.wrappers import Response
from pywps import __version__
from pywps import Process
from pywps.inout import LiteralInput, LiteralOutput, ComplexInput, ComplexOutput, BoundingBoxInput, BoundingBoxOutput
Expand Down Expand Up @@ -93,17 +94,17 @@ class WpsClient(Client):

def post_xml(self, *args, **kwargs):
doc = kwargs.pop('doc')
data = lxml.etree.tostring(doc, pretty_print=True)
data = etree.tostring(doc, pretty_print=True)
kwargs['data'] = data
return self.post(*args, **kwargs)


class WpsTestResponse(BaseResponse):
class WpsTestResponse(Response):

def __init__(self, *args):
super(WpsTestResponse, self).__init__(*args)
if re.match(r'text/xml(;\s*charset=.*)?', self.headers.get('Content-Type')):
self.xml = lxml.etree.fromstring(self.get_data())
self.xml = etree.fromstring(self.get_data())

def xpath(self, path):
version = self.xml.attrib["version"]
Expand Down
16 changes: 5 additions & 11 deletions pywps/validator/complexvalidator.py
Expand Up @@ -11,6 +11,8 @@

from pywps.validator.mode import MODE
from pywps.inout.formats import FORMATS
from lxml.etree import XMLSchema
from pywps import xml_util as etree
from urllib.request import urlopen
import mimetypes
import os
Expand Down Expand Up @@ -61,13 +63,10 @@ def validategml(data_input, mode):
passed = False

if mode >= MODE.VERYSTRICT:

from lxml import etree

try:
schema_url = data_input.data_format.schema
gmlschema_doc = etree.parse(urlopen(schema_url))
gmlschema = etree.XMLSchema(gmlschema_doc)
gmlschema = XMLSchema(gmlschema_doc)
passed = gmlschema.validate(etree.parse(data_input.stream))
except Exception as e:
LOGGER.warning(e)
Expand Down Expand Up @@ -118,13 +117,10 @@ def validategpx(data_input, mode):
passed = False

if mode >= MODE.VERYSTRICT:

from lxml import etree

try:
schema_url = data_input.data_format.schema
gpxschema_doc = etree.parse(urlopen(schema_url))
gpxschema = etree.XMLSchema(gpxschema_doc)
gpxschema = XMLSchema(gpxschema_doc)
passed = gpxschema.validate(etree.parse(data_input.stream))
except Exception as e:
LOGGER.warning(e)
Expand Down Expand Up @@ -164,15 +160,13 @@ def validatexml(data_input, mode):
passed = data_input.data_format.mime_type in {mtype, FORMATS.GML.mime_type}

if mode >= MODE.STRICT:
from lxml import etree

# TODO: Raise the actual validation exception to make it easier to spot the error.
# xml = etree.parse(data_input.file)
# schema.assertValid(xml)
try:
fn = os.path.join(_get_schemas_home(), data_input.data_format.schema)
schema_doc = etree.parse(fn)
schema = etree.XMLSchema(schema_doc)
schema = XMLSchema(schema_doc)
passed = schema.validate(etree.parse(data_input.file))
except Exception as e:
LOGGER.warning(e)
Expand Down
16 changes: 16 additions & 0 deletions pywps/xml_util.py
@@ -0,0 +1,16 @@
from lxml import etree as _etree


PARSER = _etree.XMLParser(
resolve_entities=False,
)

tostring = _etree.tostring


def fromstring(text):
return _etree.fromstring(text, parser=PARSER)


def parse(source):
return _etree.parse(source, parser=PARSER)
1 change: 1 addition & 0 deletions requirements.txt
Expand Up @@ -6,4 +6,5 @@ python-dateutil
requests
SQLAlchemy
werkzeug
MarkupSafe
humanize
4 changes: 1 addition & 3 deletions tests/test_capabilities.py
Expand Up @@ -4,13 +4,11 @@
##################################################################

import unittest
import lxml
import lxml.etree
from pywps import configuration
from pywps.app import Process, Service
from pywps.app.Common import Metadata
from pywps import get_ElementMakerForVersion
from pywps.tests import assert_pywps_version, client_for, assert_wps_version
from pywps.tests import client_for, assert_wps_version

WPS, OWS = get_ElementMakerForVersion("1.0.0")

Expand Down
4 changes: 2 additions & 2 deletions tests/test_complexdata_io.py
Expand Up @@ -2,12 +2,12 @@

import unittest
import os
from pywps import get_ElementMakerForVersion, E
from pywps import get_ElementMakerForVersion
from pywps.app.basic import get_xpath_ns
from pywps import Service, Process, ComplexInput, ComplexOutput, FORMATS
from pywps.tests import client_for, assert_response_success
from owslib.wps import WPSExecution, ComplexDataInput
from lxml import etree
from pywps import xml_util as etree

VERSION = "1.0.0"
WPS, OWS = get_ElementMakerForVersion(VERSION)
Expand Down
3 changes: 1 addition & 2 deletions tests/test_exceptions.py
Expand Up @@ -4,10 +4,9 @@
##################################################################

import unittest
from pywps import Process, Service, get_ElementMakerForVersion
from pywps import Service, get_ElementMakerForVersion
from pywps.app.basic import get_xpath_ns
from pywps.tests import assert_pywps_version, client_for
import lxml.etree

import re

Expand Down
10 changes: 4 additions & 6 deletions tests/test_execute.py
Expand Up @@ -5,24 +5,22 @@

import unittest
import pytest
import lxml.etree
from pywps import xml_util as etree
import json
import tempfile
import os.path
from pywps import Service, Process, LiteralOutput, LiteralInput,\
BoundingBoxOutput, BoundingBoxInput, Format, ComplexInput, ComplexOutput, FORMATS
from pywps.validator.base import emptyvalidator
from pywps.validator.complexvalidator import validategml
from pywps.validator.mode import MODE
from pywps.exceptions import InvalidParameterValue
from pywps import get_inputs_from_xml, get_output_from_xml
from pywps import get_inputs_from_xml
from pywps import E, get_ElementMakerForVersion
from pywps.app.basic import get_xpath_ns
from pywps.tests import client_for, assert_response_success
from pywps import configuration

from io import StringIO
from owslib.ows import BoundingBox

try:
import netCDF4
Expand Down Expand Up @@ -223,7 +221,7 @@ def get_output(doc):
output[identifier_el.text] = data_el[0].text
else: # XML children
ch = list(data_el[0])[0]
output[identifier_el.text] = lxml.etree.tostring(ch)
output[identifier_el.text] = etree.tostring(ch)

return output

Expand Down Expand Up @@ -600,7 +598,7 @@ def test_complex_input(self):
WPS.ComplexData(the_data, mimeType='text/foobar')))))
rv = get_inputs_from_xml(request_doc)
self.assertEqual(rv['name'][0]['mimeType'], 'text/foobar')
rv_doc = lxml.etree.parse(StringIO(rv['name'][0]['data'])).getroot()
rv_doc = etree.parse(StringIO(rv['name'][0]['data'])).getroot()
self.assertEqual(rv_doc.tag, 'TheData')
self.assertEqual(rv_doc.text, 'hello world')

Expand Down
4 changes: 1 addition & 3 deletions tests/test_formats.py
Expand Up @@ -8,9 +8,8 @@
import unittest

from pywps.inout.formats import Format, get_format, FORMATS
from lxml import etree
from pywps.app.basic import get_xpath_ns
from pywps.validator.base import emptyvalidator


xpath_ns = get_xpath_ns("1.0.0")

Expand Down Expand Up @@ -121,4 +120,3 @@ def load_tests(loader=None, tests=None, pattern=None):
loader.loadTestsFromTestCase(FormatsTest)
]
return unittest.TestSuite(suite_list)

2 changes: 0 additions & 2 deletions tests/test_inout.py
Expand Up @@ -11,7 +11,6 @@
import requests
import os
import tempfile
import datetime
import unittest
import json
from pywps import inout
Expand All @@ -35,7 +34,6 @@
from pywps.tests import service_ok
from pywps.translations import get_translation

from lxml import etree

DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')

Expand Down
2 changes: 0 additions & 2 deletions tests/test_ows.py
Expand Up @@ -9,8 +9,6 @@
import os
import tempfile
import unittest
import lxml.etree
import sys
from pywps import Service, Process, ComplexInput, ComplexOutput, Format, FORMATS, get_format
from pywps.dependencies import ogr
from pywps.exceptions import NoApplicableCode
Expand Down
1 change: 0 additions & 1 deletion tests/test_wpsrequest.py
Expand Up @@ -4,7 +4,6 @@
##################################################################

import unittest
import lxml.etree
from pywps.app import WPSRequest
import tempfile
import datetime
Expand Down
52 changes: 52 additions & 0 deletions tests/test_xml_util.py
@@ -0,0 +1,52 @@
from pywps import xml_util as etree

from io import StringIO


XML_EXECUTE = """
<!DOCTYPE foo [
<!ELEMENT foo ANY >
<!ENTITY xxe SYSTEM "file:///PATH/TO/input.txt">
]>
<wps:Execute
service="WPS"
version="1.0.0"
xmlns:wps="http://www.opengis.net/wps/1.0.0"
xmlns:ows="http://www.opengis.net/ows/1.1"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://schemas.opengis.net/wps/1.0.0/wpsExecute_request.xsd">
<ows:Identifier>test_process</ows:Identifier>
<wps:DataInputs>
<wps:Input>
<ows:Identifier>name</ows:Identifier>
<wps:Data>
<wps:LiteralData>&xxe;</wps:LiteralData>
</wps:Data>
</wps:Input>
</wps:DataInputs>
<wps:ResponseForm>
<wps:ResponseDocument
storeExecuteResponse="true"
status="true">
<wps:Output asReference="false">
<ows:Identifier>output</ows:Identifier>
</wps:Output>
</wps:ResponseDocument>
</wps:ResponseForm>
</wps:Execute>
"""


def test_etree_fromstring():
xml = etree.tostring(etree.fromstring(XML_EXECUTE))
# don't replace entities
# https://lxml.de/parsing.html
assert b"<wps:LiteralData>&xxe;</wps:LiteralData>" in xml


def test_etree_parse():
xml = etree.tostring(etree.parse(StringIO(XML_EXECUTE)))
# don't replace entities
# https://lxml.de/parsing.html
assert b"<wps:LiteralData>&xxe;</wps:LiteralData>" in xml

0 comments on commit 7d6b26a

Please sign in to comment.