Skip to content
Browse files

Proper handling of non-ascii titles

  • Loading branch information...
1 parent e6f06fb commit d5d1dc49e010032156d0d597d05524af6bd9afb7 @amit committed Oct 4, 2011
Showing with 13 additions and 3 deletions.
  1. +1 −1 planet/reconstitute.py
  2. +2 −2 planet/spider.py
  3. +10 −0 tests/data/reconstitute/title_nonascii.xml
View
2 planet/reconstitute.py
@@ -75,7 +75,7 @@ def id(xentry, entry):
entry_id = entry.link
elif entry.has_key("title") and entry.title:
entry_id = (entry.title_detail.base + "/" +
- md5(entry.title).hexdigest())
+ md5(entry.title.encode('utf-8')).hexdigest())
elif entry.has_key("summary") and entry.summary:
entry_id = (entry.summary_detail.base + "/" +
md5(entry.summary).hexdigest())
View
4 planet/spider.py
@@ -226,7 +226,7 @@ def writeCache(feed_uri, feed_info, data):
# apply any filters
xdoc = reconstitute.reconstitute(data, entry)
- output = xdoc.toxml().encode('utf-8')
+ output = xdoc.toxml("utf-8")
xdoc.unlink()
for filter in config.filters(feed_uri):
output = shell.run(filter, output, mode="filter")
@@ -287,7 +287,7 @@ def writeCache(feed_uri, feed_info, data):
xdoc=minidom.parseString('''<feed xmlns:planet="%s"
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
reconstitute.source(xdoc.documentElement,data.feed,data.bozo,data.version)
- write(xdoc.toxml().encode('utf-8'), filename(sources, feed_uri))
+ write(xdoc.toxml("utf-8"), filename(sources, feed_uri))
xdoc.unlink()
def httpThread(thread_index, input_queue, output_queue, log):
View
10 tests/data/reconstitute/title_nonascii.xml
@@ -0,0 +1,10 @@
+<!--
+Description: title value
+Expect: title_detail.language == 'en-us'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+ <entry>
+ <title xml:lang="en-us">foo æøå. French: êèé </title>
+ </entry>
+</feed>

0 comments on commit d5d1dc4

Please sign in to comment.
Something went wrong with that request. Please try again.