Permalink
Browse files

Catch ParserError from lxml to raise as decruft Unparseable

  • Loading branch information...
1 parent 50beb95 commit 8668b4c13c4475107d0d2d272200bbbb6dc04565 John Watson committed Jan 24, 2012
Showing with 2 additions and 2 deletions.
  1. +2 −2 decruft/decruft.py
View
4 decruft/decruft.py
@@ -2,7 +2,7 @@
from page_parser import parse, get_title, get_body, Unparseable
import logging
import re
-from lxml.etree import tostring, tounicode
+from lxml.etree import tostring, tounicode, ParserError
from lxml.html.clean import Cleaner
logger = logging.getLogger('decruft')
@@ -98,7 +98,7 @@ def summary(self):
continue # try again
else:
return cleaned_article
- except StandardError, e:
+ except (StandardError, ParserError), e:
#logger.exception('error getting summary: ' + str(traceback.format_exception(*sys.exc_info())))
logger.exception('error getting summary: ' )
raise Unparseable(str(e))

0 comments on commit 8668b4c

Please sign in to comment.