Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
Arthur Axel 'fREW' Schmidt committed Mar 11, 2016
1 parent a460205 commit 9fc4eb3
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
9 changes: 5 additions & 4 deletions feedpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# https://pypi.python.org/pypi/defusedxml/#defusedxml-sax
import xml.sax
from xml.sax.saxutils import escape
import HTMLParser


class FeedPipe(object):
Expand Down Expand Up @@ -137,8 +138,9 @@ def __init__(self, content=''):
.replace('’', '')
try:
try:
xml.sax.parseString(("<div>{}</div>".format(nco)),
xml.sax.ContentHandler())
xml.sax.parseString(
("<div>{}</div>".format(HTMLParser.HTMLParser().unescape(nco))),
xml.sax.ContentHandler())
except UnicodeEncodeError:
# who the hell cares?
pass
Expand All @@ -147,8 +149,7 @@ def __init__(self, content=''):
xmlns='http://www.w3.org/1999/xhtml')
self.div.content = content
self.attrs["type"] = 'xhtml'
except xml.sax.SAXParseException as e:
print(nco)
except xml.sax.SAXParseException:
self.attrs["type"] = 'html'
self.content = xe.XMLText(escape(content))

Expand Down
8 changes: 5 additions & 3 deletions tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,11 @@ def test_grep(self):
# )
self.assertEqual(fp.count(), 5)

# def test_html_vs_xhtml(self):
# fp = FeedPipe().cat(['./tests/lwn.xml'])
# print([str(x.content) for x in fp.entries])
def test_html_vs_xhtml(self):
fp = FeedPipe().cat(['./tests/lwn.xml'])
for x in fp.entries:
if x.content.attrs['type'] == 'html':
print(x.content)

if __name__ == '__main__':
unittest.main()

0 comments on commit 9fc4eb3

Please sign in to comment.