Skip to content

Commit

Permalink
Add fallback to native etree implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
glibin committed Jan 14, 2016
1 parent f452940 commit 8109996
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 8 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ install:
- if [[ $TRAVIS_PYTHON_VERSION != 'pypy'* && $DEPS == true ]]; then travis_retry pip install pycurl; fi
- if [[ $TRAVIS_PYTHON_VERSION == '2.7' && $DEPS == true ]]; then travis_retry travis_retry pip install sphinx==1.2.2 sphinx_rtd_theme; fi
- if [[ $TRAVIS_PYTHON_VERSION == '3.5' && $DEPS == true ]]; then travis_retry travis_retry pip install sphinx==1.2.2 sphinx_rtd_theme; fi
- if [[ $DEPS == true ]]; then travis_retry pip install lxml; fi
- travis_retry python setup.py install
- travis_retry pip install pep8 coverage==3.7.1 codecov coveralls pycurl pycares

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def run(self):
with open(os.path.join(build_dir, 'version.py'), 'w') as version_file:
version_file.write('version = "{0}"\n'.format(version))

install_requires = ['tornado', 'lxml', 'jinja2', 'six']
install_requires = ['tornado', 'jinja2', 'six']

setup(
name="tortik",
Expand Down
5 changes: 2 additions & 3 deletions tortik/page/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,17 @@
except ImportError:
import urllib.parse as urlparse # py3

import lxml.etree as etree
import tornado.web
import tornado.httpclient
from tornado.options import options, define
from tornado.escape import to_unicode
import tornado.gen
from tornado.util import unicode_type
from jinja2 import Environment, PackageLoader
import six

from tortik.util import decorate_all, make_list, real_ip, make_qs
from tortik.util.dumper import dump
from tortik.util.xml_etree import tostring
from tortik.logger import PageLogger
from tortik.util.async import AsyncGroup
from tortik.util.parse import parse_xml, parse_json
Expand Down Expand Up @@ -162,7 +161,7 @@ def finish_with_debug(self):
size=sys.getsizeof,
get_params=lambda x: urlparse.parse_qs(x, keep_blank_values=True),
pretty_json=lambda x: json.dumps(x, sort_keys=True, indent=4, ensure_ascii=False),
pretty_xml=lambda x: etree.tostring(x, pretty_print=True, encoding=unicode_type),
pretty_xml=lambda x: to_unicode(tostring(x, pretty_print=True, encoding='UTF-8')),
to_unicode=to_unicode,
dumper=dump,
format_exception=lambda x: "".join(traceback.format_exception(*x))
Expand Down
7 changes: 3 additions & 4 deletions tortik/util/parse.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
from lxml import etree
from tornado.escape import json_decode

from tortik.util import HTTPError
from tortik.util.xml_etree import parse, ParseError

try:
import httplib # py2
Expand All @@ -14,9 +14,8 @@ def parse_xml(response):
if response.code == 599 or response.buffer is None:
raise HTTPError(httplib.SERVICE_UNAVAILABLE, 'Response timeout or no body buffer')
try:
parser = etree.XMLParser(remove_blank_text=True)
return etree.parse(response.buffer, parser=parser)
except etree.ParseError:
return parse(response.buffer)
except ParseError:
raise HTTPError(httplib.SERVICE_UNAVAILABLE, 'Unable to parse xml')


Expand Down
34 changes: 34 additions & 0 deletions tortik/util/xml_etree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
from tortik.logger import tortik_log

native_etree = False

try:
from lxml import etree
except ImportError:
tortik_log.info('lxml not installed. Using native etree implementation')
native_etree = True
import xml.etree.ElementTree as etree


def parse(source, parser=None):
if parser is None and native_etree is False:
parser = etree.XMLParser(remove_blank_text=True)

return etree.parse(source, parser)


def tostring(element, **kwargs):
native_args = ['encoding', 'method']
if native_etree is True:
pass_args = dict()
for a in native_args:
if a in kwargs:
pass_args[a] = kwargs[a]
else:
pass_args = kwargs

return etree.tostring(element, **pass_args)


ParseError = etree.ParseError
39 changes: 39 additions & 0 deletions tortik_tests/util_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
# _*_ coding: utf-8 _*_

try:
from cStringIO import StringIO # python 2
except ImportError:
from io import StringIO # python 3

from collections import OrderedDict
import unittest
from tornado.escape import to_unicode

from tortik.util import make_qs, update_url
from tortik.util.xml_etree import parse, tostring


class BaseTest(unittest.TestCase):
Expand Down Expand Up @@ -96,3 +103,35 @@ def test_both(self):
'http://google.com?a=1')
self.assertUrlsEqual(update_url('http://google.com?a=2&b=3&c=4', update_args={'a': 1}, remove_args=['b']),
'http://google.com?a=1&c=4')


class TestParse(BaseTest):
def test_parse_xml(self):
mock_xml = b'''<?xml version="1.0" encoding="UTF-8"?>
<data>
<country name="Liechtenstein">
<rank>1</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria"/>
<neighbor name="Switzerland"/>
</country>
<country name="Singapore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia"/>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica"/>
<neighbor name="Colombia"/>
</country>
</data>'''
tree = parse(StringIO(to_unicode(mock_xml)))
self.assertEqual(tree.getroot().tag, 'data')
convert = tostring(tree.getroot(), pretty_print=True, xml_declaration=True, encoding='UTF-8')
self.assertEqual(to_unicode(convert).replace('\n', '').replace(' ', '').replace('\'', '"').strip(),
to_unicode(mock_xml).replace('\n', '').replace(' ', '').strip())

0 comments on commit 8109996

Please sign in to comment.