Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Finish out parsing of the Atom xml for posts

  • Loading branch information...
commit e1e29cf2d9ae6ee5224690d371d70bcd104d380a 1 parent b982fac
@claytron authored
Showing with 38 additions and 12 deletions.
  1. +1 −0  setup.py
  2. +37 −12 transmogrify/blogger/source.py
View
1  setup.py
@@ -35,6 +35,7 @@ def read(*rnames):
install_requires=[
'setuptools',
'lxml',
+ 'python-dateutil',
'collective.transmogrifier',
],
zip_safe=False,
View
49 transmogrify/blogger/source.py
@@ -1,4 +1,5 @@
from lxml import objectify
+from dateutil.parser import parse
from zope.annotation.interfaces import IAnnotations
from zope.interface import implements
from zope.interface import classProvides
@@ -10,10 +11,16 @@
SETTINGS_SCHEMA = "http://schemas.google.com/blogger/2008/kind#settings"
POST_SCHEMA = "http://schemas.google.com/blogger/2008/kind#post"
COMMENT_SCHEMA = "http://schemas.google.com/blogger/2008/kind#comment"
+ATOM_URL = "http://www.w3.org/2005/Atom"
+ATOM_NAMESPACE = "{%s}" % ATOM_URL
BLOGGER_NAMESPACES = {
- 'f': "http://www.w3.org/2005/Atom",
+ 'a': ATOM_URL,
'thr': 'http://purl.org/syndication/thread/1.0',
+ 'app': 'http://purl.org/atom/app#',
}
+# XXX: leaving off the "%z" for now as DateTime fields in
+# Archetypes don't seem to accept it.
+RFC822_FMT = "%a, %d %h %Y %T"
class BloggerSource(object):
@@ -39,7 +46,7 @@ def __init__(self, transmogrifier, name, options, previous):
def init_settings(self):
settings = self.xml_root.xpath(
- "f:entry/f:category[contains(@term, '%s')]/.." % SETTINGS_SCHEMA,
+ "a:entry/a:category[contains(@term, '%s')]/.." % SETTINGS_SCHEMA,
namespaces=BLOGGER_NAMESPACES)
for setting in settings:
setting_key = setting.id.text.split(".")[-1]
@@ -57,18 +64,36 @@ def __iter__(self):
yield item
# process the blog posts
posts = self.xml_root.xpath(
- "f:entry/f:category[contains(@term, '%s')]/.." % POST_SCHEMA,
+ "a:entry/a:category[contains(@term, '%s')]/.." % POST_SCHEMA,
namespaces=BLOGGER_NAMESPACES)
for post in posts:
- # XXX: dates aren't working
- item = dict(
- title=post.title.text,
- text=post.content.text,
- effectiveDate=post.published.text,
- modification_date=post.updated.text,
- )
- # XXX: do I need this?
- item['_transmogrify.blogger.post_id'] = post.id.text
+ item = {}
+ item['_transmogrify.blogger.id'] = post.id.text
+ item['_transmogrify.blogger.title'] = post.title.text
+ item['_transmogrify.blogger.content'] = post.content.text
+ item['_transmogrify.blogger.author.name'] = post.author.name.text
+ item['_transmogrify.blogger.author.uri'] = post.author.uri.text
+ item['_transmogrify.blogger.author.email'] = post.author.email.text
+ published = parse(post.published.text)
+ item['_transmogrify.blogger.published'] = published
+ published_rfc822 = published.strftime(RFC822_FMT)
+ item['_transmogrify.blogger.published.rfc822'] = published_rfc822
+ updated = parse(post.updated.text)
+ item['_transmogrify.blogger.updated'] = updated
+ updated_rfc822 = updated.strftime(RFC822_FMT)
+ item['_transmogrify.blogger.updated.rfc822'] = updated_rfc822
+ alt_link = post.xpath(
+ "a:link[@rel='alternate']/@href",
+ namespaces=BLOGGER_NAMESPACES)
+ alt_link = alt_link and alt_link[0] or ""
+ item['_transmogrify.blogger.link'] = alt_link
+ post_state = "published"
+ draft = post.xpath(
+ "app:control/app:draft",
+ namespaces=BLOGGER_NAMESPACES)
+ if draft and draft[0] == "yes":
+ post_state = "draft"
+ item['_transmogrify.blogger.state'] = post_state
yield item
# process the post comments
comments = self.xml_root.xpath(
Please sign in to comment.
Something went wrong with that request. Please try again.