Permalink
Browse files

Alternative way of html5 parsing

  • Loading branch information...
1 parent aa8e231 commit 6ed2d4a0b29a58b81894231608b5c69624a4ba18 @ggaughan committed Feb 13, 2013
Showing with 4 additions and 2 deletions.
  1. +4 −2 modules/pipexpathfetchpage.py
@@ -54,8 +54,10 @@ def pipe_xpathfetchpage(context, _INPUT, conf, **kwargs):
if html5:
- from lxml.html import html5parser
- root = html5parser.fromstring(content)
+ #from lxml.html import html5parser
+ #root = html5parser.fromstring(content)
+ from html5lib import parse
+ root = parse(content, treebuilder='lxml', namespaceHTMLElements=False)
else:
from lxml import etree
root = etree.HTML(content)

0 comments on commit 6ed2d4a

Please sign in to comment.