From 576082bd6205da8bbf6d40e4eb3f6b6f0e60fcc0 Mon Sep 17 00:00:00 2001 From: Ross Jones Date: Fri, 25 May 2012 11:08:23 +0100 Subject: [PATCH] 2414 Remove LXML as a dependency from core Removes LXML from the requirements by replacing the html stripping when generating a markdown extract of the description. Also removes LXML from the initialisation code that checks the solr schema version --- ckan/lib/helpers.py | 8 +++----- ckan/lib/search/__init__.py | 7 +++---- doc/install-from-source.rst | 5 +---- requires/lucid_present.txt | 3 +-- 4 files changed, 8 insertions(+), 15 deletions(-) diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index 538e152b331..d91403424d5 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -25,7 +25,6 @@ from routes import redirect_to as _redirect_to from routes import url_for as _routes_default_url_for from alphabet_paginate import AlphaPage -from lxml.html import fromstring import i18n import ckan.exceptions from pylons import request @@ -467,8 +466,7 @@ def group_name_to_title(name): def markdown_extract(text, extract_length=190): if (text is None) or (text.strip() == ''): return '' - html = fromstring(markdown(text)) - plain = html.xpath("string()") + plain = re.sub(r'<.*?>', '', markdown(text)) return unicode(truncate(plain, length=extract_length, indicator='...', whole_word=True)) def icon_url(name): @@ -607,7 +605,7 @@ def parse_rfc_2822_date(date_str, assume_utc=True): RFC 2822 is the date format used in HTTP headers. It should contain timezone information, but that cannot be relied upon. - + If date_str doesn't contain timezone information, then the 'assume_utc' flag determines whether we assume this string is local (with respect to the server running this code), or UTC. In practice, what this means is that if @@ -616,7 +614,7 @@ def parse_rfc_2822_date(date_str, assume_utc=True): If timezone information is available in date_str, then the returned datetime is 'aware', ie - it has an associated tz_info object. - + Returns None if the string cannot be parsed as a valid datetime. """ time_tuple = email.utils.parsedate_tz(date_str) diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index 0731d0e54b9..abc78b798fd 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -261,13 +261,12 @@ def check_solr_schema_version(schema_file=None): url = 'file://%s' % schema_file res = urllib2.urlopen(url) - from lxml import etree - tree = etree.fromstring(res.read()) + import xml.dom.minidom + tree = xml.dom.minidom.parseString(res.read()) - version = tree.xpath('//schema/@version') + version = tree.documentElement.getAttribute('version') if not len(version): raise SearchError('Could not extract version info from the SOLR schema, using file: \n%s' % url) - version = version[0] if not version in SUPPORTED_SCHEMA_VERSIONS: raise SearchError('SOLR schema version not supported: %s. Supported versions are [%s]' diff --git a/doc/install-from-source.rst b/doc/install-from-source.rst index c6dc209c2ca..bb324c32d25 100644 --- a/doc/install-from-source.rst +++ b/doc/install-from-source.rst @@ -125,7 +125,7 @@ WebOb has to be installed explicitly afterwards because by installing pylons wit Now to install the remaining dependencies in requires/lucid_present.txt and you are using Ubuntu Lucid 10.04 you can install the system versions:: - sudo apt-get install python-pybabel python-psycopg2 python-lxml + sudo apt-get install python-pybabel python-psycopg2 sudo apt-get install python-pylons python-repoze.who sudo apt-get install python-repoze.who-plugins python-tempita python-zope.interface @@ -135,9 +135,6 @@ Alternatively, if you are not using Ubuntu Lucid 10.04 you'll need to install th pip install --ignore-installed -r pyenv/src/ckan/requires/lucid_present.txt -This will take a **long** time. Particularly the install of the ``lxml`` -package. - At this point you will need to deactivate and then re-activate your virtual environment to ensure that all the scripts point to the correct locations: diff --git a/requires/lucid_present.txt b/requires/lucid_present.txt index aabdb0cedad..a4cbfb03f12 100644 --- a/requires/lucid_present.txt +++ b/requires/lucid_present.txt @@ -2,13 +2,12 @@ # apt-get if you are on that platform. If you are using a different platform # you can install these dependencies via pip instead. # -# sudo apt-get install python-pybabel python-psycopg2 python-lxml +# sudo apt-get install python-pybabel python-psycopg2 # sudo apt-get install python-pylons python-repoze.who # sudo apt-get install python-repoze.who-plugins python-tempita python-zope.interface babel==0.9.4 psycopg2==2.0.13 -lxml==2.2.4 # Specifying particular version of WebOb because later version has incompatibility # with pylons 0.9.7 (change to imports of Multidict) webob==1.0.8