From fcbb21e158f9bf249989536d2b4f6223a0ed6ec3 Mon Sep 17 00:00:00 2001 From: Vitaly Glibin Date: Thu, 14 Jan 2016 13:24:02 +0300 Subject: [PATCH] Add fallback to native etree implementation --- .travis.yml | 1 + setup.py | 2 +- tortik/page/__init__.py | 5 ++--- tortik/util/parse.py | 7 +++---- tortik/util/xml_etree.py | 34 ++++++++++++++++++++++++++++++++++ tortik_tests/util_test.py | 39 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 80 insertions(+), 8 deletions(-) create mode 100644 tortik/util/xml_etree.py diff --git a/.travis.yml b/.travis.yml index 635ef95..150116a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,6 +19,7 @@ install: - if [[ $TRAVIS_PYTHON_VERSION != 'pypy'* && $DEPS == true ]]; then travis_retry pip install pycurl; fi - if [[ $TRAVIS_PYTHON_VERSION == '2.7' && $DEPS == true ]]; then travis_retry travis_retry pip install sphinx==1.2.2 sphinx_rtd_theme; fi - if [[ $TRAVIS_PYTHON_VERSION == '3.5' && $DEPS == true ]]; then travis_retry travis_retry pip install sphinx==1.2.2 sphinx_rtd_theme; fi + - if [[ $DEPS == true ]]; then travis_retry pip install lxml; fi - travis_retry python setup.py install - travis_retry pip install pep8 coverage==3.7.1 codecov coveralls pycurl pycares diff --git a/setup.py b/setup.py index 004cddf..ba89832 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ def run(self): with open(os.path.join(build_dir, 'version.py'), 'w') as version_file: version_file.write('version = "{0}"\n'.format(version)) -install_requires = ['tornado', 'lxml', 'jinja2', 'six'] +install_requires = ['tornado', 'jinja2', 'six'] setup( name="tortik", diff --git a/tortik/page/__init__.py b/tortik/page/__init__.py index db1f3cb..e845531 100644 --- a/tortik/page/__init__.py +++ b/tortik/page/__init__.py @@ -16,18 +16,17 @@ except ImportError: import urllib.parse as urlparse # py3 -import lxml.etree as etree import tornado.web import tornado.httpclient from tornado.options import options, define from tornado.escape import to_unicode import tornado.gen -from tornado.util import unicode_type from jinja2 import Environment, PackageLoader import six from tortik.util import decorate_all, make_list, real_ip, make_qs from tortik.util.dumper import dump +from tortik.util.xml_etree import tostring from tortik.logger import PageLogger from tortik.util.async import AsyncGroup from tortik.util.parse import parse_xml, parse_json @@ -162,7 +161,7 @@ def finish_with_debug(self): size=sys.getsizeof, get_params=lambda x: urlparse.parse_qs(x, keep_blank_values=True), pretty_json=lambda x: json.dumps(x, sort_keys=True, indent=4, ensure_ascii=False), - pretty_xml=lambda x: etree.tostring(x, pretty_print=True, encoding=unicode_type), + pretty_xml=lambda x: to_unicode(tostring(x, pretty_print=True, encoding='UTF-8')), to_unicode=to_unicode, dumper=dump, format_exception=lambda x: "".join(traceback.format_exception(*x)) diff --git a/tortik/util/parse.py b/tortik/util/parse.py index 2ebe243..e9c148f 100644 --- a/tortik/util/parse.py +++ b/tortik/util/parse.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- -from lxml import etree from tornado.escape import json_decode from tortik.util import HTTPError +from tortik.util.xml_etree import parse, ParseError try: import httplib # py2 @@ -14,9 +14,8 @@ def parse_xml(response): if response.code == 599 or response.buffer is None: raise HTTPError(httplib.SERVICE_UNAVAILABLE, 'Response timeout or no body buffer') try: - parser = etree.XMLParser(remove_blank_text=True) - return etree.parse(response.buffer, parser=parser) - except etree.ParseError: + return parse(response.buffer) + except ParseError: raise HTTPError(httplib.SERVICE_UNAVAILABLE, 'Unable to parse xml') diff --git a/tortik/util/xml_etree.py b/tortik/util/xml_etree.py new file mode 100644 index 00000000..796c705 --- /dev/null +++ b/tortik/util/xml_etree.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +from tortik.logger import tortik_log + +native_etree = False + +try: + from lxml import etree +except ImportError: + tortik_log.info('lxml not installed. Using native etree implementation') + native_etree = True + import xml.etree.ElementTree as etree + + +def parse(source, parser=None): + if parser is None and native_etree is False: + parser = etree.XMLParser(remove_blank_text=True) + + return etree.parse(source, parser) + + +def tostring(element, **kwargs): + native_args = ['encoding', 'method'] + if native_etree is True: + pass_args = dict() + for a in native_args: + if a in kwargs: + pass_args[a] = kwargs[a] + else: + pass_args = kwargs + + return etree.tostring(element, **pass_args) + + +ParseError = etree.ParseError diff --git a/tortik_tests/util_test.py b/tortik_tests/util_test.py index 0a053e4..62416ad 100644 --- a/tortik_tests/util_test.py +++ b/tortik_tests/util_test.py @@ -1,9 +1,16 @@ # _*_ coding: utf-8 _*_ +try: + from cStringIO import StringIO # python 2 +except ImportError: + from io import StringIO # python 3 + from collections import OrderedDict import unittest +from tornado.escape import to_unicode from tortik.util import make_qs, update_url +from tortik.util.xml_etree import parse, tostring class BaseTest(unittest.TestCase): @@ -96,3 +103,35 @@ def test_both(self): 'http://google.com?a=1') self.assertUrlsEqual(update_url('http://google.com?a=2&b=3&c=4', update_args={'a': 1}, remove_args=['b']), 'http://google.com?a=1&c=4') + + +class TestParse(BaseTest): + def test_parse_xml(self): + mock_xml = b''' + + + 1 + 2008 + 141100 + + + + + 4 + 2011 + 59900 + + + + 68 + 2011 + 13600 + + + +''' + tree = parse(StringIO(mock_xml)) + self.assertEqual(tree.getroot().tag, 'data') + convert = tostring(tree.getroot(), pretty_print=True, xml_declaration=True, encoding='UTF-8') + self.assertEqual(to_unicode(convert).replace('\n', '').replace(' ', '').replace('\'', '"').strip(), + mock_xml.replace('\n', '').replace(' ', '').strip())