Permalink
Browse files

Added a limit on the maximum number of times a background task will b…

…e retried.

Version 0.3 of the PSHB spec allows us to have an infinitely long lease by default so I've removed the lease_seconds argument from the subscription process. I also updated the pollinghub to the version that supports PSHB 0.3
We now handle subscriptions for non-existent URLs by raising an exception in the ContentParser and the background task now detects this exception, logs it and ends the task rather than propagating the error then retrying.
We now deal with feeds that have a None value for their updated element by using the current time.
  • Loading branch information...
1 parent 71c5ddf commit 67efff78bc215fe36ee0890362230ba76722e6f7 @adewale committed Apr 11, 2010
Showing with 107 additions and 14 deletions.
  1. +25 −6 streamer.py
  2. +6 −1 streamer_functional_tests.py
  3. +15 −7 streamer_tests.py
  4. +61 −0 test_data/no_updated_element_feed
View
31 streamer.py
@@ -20,13 +20,18 @@
ALWAYS_USE_DEFAULT_HUB = False
# This is a hub I've set up that does polling
DEFAULT_HUB = "http://pollinghub.appspot.com/"
-# Use a cron job to re-subscribe to all feeds
-LEASE_SECONDS = "86400" * 60 #90 days
OPEN_ACCESS = False
+MAX_TASK_RETRIES = 10
from google.appengine.api.labs import taskqueue
class BackGroundTaskHandler(webapp.RequestHandler):
def post(self):
+ logging.info("Request body %s" % self.request.body)
+ retryCount = self.request.headers.get('X-AppEngine-TaskRetryCount')
+ taskName = self.request.headers.get('X-AppEngine-TaskName')
+ if retryCount and int(retryCount) > MAX_TASK_RETRIES:
+ logging.warning("Abandoning this task: %s after %s retries" % (taskName, retryCount))
+ return
functionName = self.request.get('function')
logging.info("Background task being executed. Function is: <%s>" % (functionName))
if functionName == 'handleNewSubscription':
@@ -89,7 +94,6 @@ def subscribe(self):
"hub.mode" : "subscribe",
"hub.topic" : self.url,
"hub.verify" : "async", # We don't want subscriptions to block until verification happens
- "hub.lease_seconds" : LEASE_SECONDS,
"hub.verify_token" : SECRET_TOKEN, #TODO Must generate a token based on some secret value
}
payload = urllib.urlencode(parameters)
@@ -168,7 +172,11 @@ def handleDeleteSubscription(url):
def handleNewSubscription(url, nickname):
logging.info("Subscription added: %s by %s" % (url, nickname))
- parser = ContentParser(None, DEFAULT_HUB, ALWAYS_USE_DEFAULT_HUB, urlToFetch = url)
+ try:
+ parser = ContentParser(None, DEFAULT_HUB, ALWAYS_USE_DEFAULT_HUB, urlToFetch = url)
+ except UrlNotFoundError:
+ logging.warn("Url added by: %s not found: %s" % (nickname, url))
+ return
hub = parser.extractHub()
sourceUrl = parser.extractSourceUrl()
author = parser.extractFeedAuthor()
@@ -260,10 +268,21 @@ def post(self):
self.response.set_status(200)
self.response.out.write("Good entries")
+class UrlNotFoundError(Exception):
+ def __init__(self, url):
+ self.url = url
+
+ def __str__(self):
+ return self.url
+
class ContentParser(object):
def __init__(self, content, defaultHub = DEFAULT_HUB, alwaysUseDefaultHub = ALWAYS_USE_DEFAULT_HUB, urlToFetch = ""):
if urlToFetch:
- content = urlfetch.fetch(urlToFetch).content
+ response = urlfetch.fetch(urlToFetch)
+ logging.info("Status was: [%s]" % response.status_code)
+ if response.status_code == 404:
+ raise UrlNotFoundError(urlToFetch)
+ content = response.content
self.data = feedparser.parse(content)
self.defaultHub = defaultHub
self.alwaysUseDefaultHub = alwaysUseDefaultHub
@@ -278,7 +297,7 @@ def logErrors(self):
logging.error('Bad feed data. %s: %r', self.data.bozo_exception.__class__.__name__, self.data.bozo_exception)
def __createDateTime(self, entry):
- if hasattr(entry, 'updated_parsed'):
+ if hasattr(entry, 'updated_parsed') and entry.updated_parsed:
return datetime.datetime(*(entry.updated_parsed[0:6]))
else:
return datetime.datetime.utcnow()
View
7 streamer_functional_tests.py
@@ -47,6 +47,11 @@ def testAddingNewSubscriptionsUsingTaskQueueIsIdempotent(self):
response = self.post('/bgtasks', data=data, expect_errors=True)
self.assertEqual(streamer.Subscription.all().count(), 1)
+ def testAddingNoneExistentFeedsDoesNotRaiseAnException(self):
+ data = {'function':'handleNewSubscription', 'url':'http://www.oshineye.com/404FromStreamer', 'nickname':'ade'}
+ response = self.post('/bgtasks', data=data, expect_errors=True)
+ self.assertEquals('200 OK', response.status)
+
def testEnqueuesTaskForNewSubscription(self):
data = {'url':'http://blog.oshineye.com/feeds/posts/default'}
self.assertTasksInQueue(0)
@@ -99,4 +104,4 @@ class ContentParserFunctionalTest(unittest.TestCase):
def testCanExtractPostsFromRemoteSite(self):
parser = streamer.ContentParser(None, urlToFetch = "http://blog.oshineye.com/feeds/posts/default")
posts = parser.extractPosts();
- self.assertTrue(len(posts) > 2 )
+ self.assertTrue(len(posts) > 2)
View
22 streamer_tests.py
@@ -70,7 +70,14 @@ class ContentParserTest(unittest.TestCase):
CANONICAL_RSS_FEED = open("test_data/canonical_rss_feed").read()
VALID_ATOM_FEED = open("test_data/valid_atom_feed").read()
NO_AUTHOR_RSS_FEED = open("test_data/no_author_rss_feed").read()
-
+ NO_UPDATED_ELEMENT_FEED = open("test_data/no_updated_element_feed").read()
+
+ def testCanExtractCorrectNumberOfPostsFromFeedWithMissingUpdatedElement(self):
+ parser = ContentParser(self.NO_UPDATED_ELEMENT_FEED)
+ posts = parser.extractPosts()
+ self.assertTrue(parser.dataValid())
+ self.assertEquals(1, len(posts))
+
def testCanIdentifyPostsWithGoodData(self):
parser = ContentParser(self.SAMPLE_FEED)
posts = parser.extractPosts()
@@ -137,7 +144,7 @@ def testCanExtractAuthorNameViaDublinCoreCreatorFromRssFeed(self):
def testCanExtractHubFromFeed(self):
parser = ContentParser(self.BLOGGER_FEED)
- hub = parser.extractHub();
+ hub = parser.extractHub()
self.assertEquals("http://pubsubhubbub.appspot.com/", hub)
def testCanOverrideHubForFeed(self):
@@ -148,14 +155,13 @@ def testCanOverrideHubForFeed(self):
parser.alwaysUseDefaultHub = True
self.assertEquals(fakeDefaultHub, parser.extractHub())
- def testCanExtractHubFromFeedburnerFeed(self):
- parser = ContentParser(self.FEEDBURNER_FEED)
- hub = parser.extractHub();
- self.assertEquals("http://pubsubhubbub.appspot.com", hub)
+ def testCanExtractHubFromFeedburnerFeeds(self):
+ self.assertEquals("http://pubsubhubbub.appspot.com", ContentParser(self.FEEDBURNER_FEED).extractHub())
+ self.assertEquals("http://pubsubhubbub.appspot.com/", ContentParser(self.NO_UPDATED_ELEMENT_FEED).extractHub())
def testCanExtractsDefaultHubForHubLessFeeds(self):
parser = ContentParser(self.HUBLESS_FEED)
- hub = parser.extractHub();
+ hub = parser.extractHub()
self.assertEquals(DEFAULT_HUB, hub)
def testCanExtractFeedUrls(self):
@@ -165,6 +171,7 @@ def testCanExtractFeedUrls(self):
self.assertEquals("http://feeds.feedburner.com/PlanetTw", ContentParser(self.FEEDBURNER_FEED).extractFeedUrl())
self.assertEquals("http://news.ycombinator.com/rss", ContentParser(self.RSS_FEED).extractFeedUrl())
self.assertEquals("http://www.scripting.com/rss", ContentParser(self.CANONICAL_RSS_FEED).extractFeedUrl())
+ self.assertEquals("http://feeds.feedburner.com/ChrisParsons", ContentParser(self.NO_UPDATED_ELEMENT_FEED).extractFeedUrl())
def testCanExtractSourceUrls(self):
self.assertEquals("http://pubsubhubbub-loadtest.appspot.com/foo", ContentParser(self.SAMPLE_FEED).extractSourceUrl())
@@ -173,3 +180,4 @@ def testCanExtractSourceUrls(self):
self.assertEquals("http://blogs.thoughtworks.com/", ContentParser(self.FEEDBURNER_FEED).extractSourceUrl())
self.assertEquals("http://news.ycombinator.com/", ContentParser(self.RSS_FEED).extractSourceUrl())
self.assertEquals("http://www.scripting.com/", ContentParser(self.CANONICAL_RSS_FEED).extractSourceUrl())
+ self.assertEquals("http://chrismdp.github.com/", ContentParser(self.NO_UPDATED_ELEMENT_FEED).extractSourceUrl())
View
61 test_data/no_updated_element_feed
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
+
+ <title>Chris Parsons</title>
+
+ <link href="http://chrismdp.github.com/" />
+ <updated>2010-03-30T02:13:18-07:00</updated>
+ <id>http://chrismdp.github.com/</id>
+ <author>
+ <name>Chris Parsons</name>
+ <email>chrismdp@gmail.com</email>
+ </author>
+
+
+ <atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/ChrisParsons" /><feedburner:info uri="chrisparsons" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><entry>
+ <title>Announcing edash</title>
+
+ <category term="information radiator" />
+
+ <link href="http://feedproxy.google.com/~r/ChrisParsons/~3/r5jZqCRf1Qk/announcing-edash" />
+ <updated>Liquid error: undefined method `xmlschema' for "2010-03-29 22:05:00 +0100":String</updated>
+ <id>http://chrismdp.github.com/2010/03/announcing-edash</id>
+ <content type="html">&lt;p&gt;I&amp;#8217;ve been hinting at the dashboard application I&amp;#8217;ve been hacking on recently and after showing it off to a few people at the &lt;a href='http://scottishrubyconference.com'&gt;Scottish Ruby Conference&lt;/a&gt; it&amp;#8217;s about time I released it open source.&lt;/p&gt;
+
+&lt;h2 id='introducing_edash'&gt;Introducing edash&lt;/h2&gt;
+
+&lt;p&gt;&lt;img src='/files/edash-1.png' alt='edash screenshot' /&gt;&lt;/p&gt;
+
+&lt;p&gt;This is the version currently running on a screen at &lt;a href='http://edendevelopment.co.uk'&gt;Eden&lt;/a&gt;.&lt;/p&gt;
+
+&lt;p&gt;&lt;em&gt;IMPORTANT NOTE: This application only works on &lt;a href='http://google.com/chrome'&gt;Chrome&lt;/a&gt;.&lt;/em&gt; There is enough browser specific hackery to render it unusable in other browsers currently. Patches to fix this are most welcome.&lt;/p&gt;
+
+&lt;p&gt;That said, I&amp;#8217;ve put together a short screencast showing it off, along with how to get it running:&lt;/p&gt;
+&lt;object height='344' width='550'&gt;&lt;param name='allowfullscreen' value='true' /&gt;&lt;param name='allowscriptaccess' value='always' /&gt;&lt;param name='movie' value='http://vimeo.com/moogaloop.swf?clip_id=10535751&amp;amp;server=vimeo.com&amp;amp;show_title=1&amp;amp;show_byline=1&amp;amp;show_portrait=0&amp;amp;color=c9ff23&amp;amp;fullscreen=1' /&gt;&lt;embed src='http://vimeo.com/moogaloop.swf?clip_id=10535751&amp;amp;server=vimeo.com&amp;amp;show_title=1&amp;amp;show_byline=1&amp;amp;show_portrait=0&amp;amp;color=c9ff23&amp;amp;fullscreen=1' allowfullscreen='true' type='application/x-shockwave-flash' allowscriptaccess='always' height='344' width='550' /&gt;&lt;/object&gt;&lt;p&gt;&lt;a href='http://vimeo.com/10535751'&gt;edash demo and usage instructions&lt;/a&gt; from &lt;a href='http://vimeo.com/user2596622'&gt;Chris Parsons&lt;/a&gt; on &lt;a href='http://vimeo.com'&gt;Vimeo&lt;/a&gt;.&lt;/p&gt;
+&lt;h2 id='getting_it_running'&gt;Getting it running&lt;/h2&gt;
+
+&lt;p&gt;Here&amp;#8217;s a minimal set of steps to get it running:&lt;/p&gt;
+&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='bash'&gt;gem install sinatra haml sass json pstore md5 eventmachine em-http-request
+git clone git://github.com/edendevelopment/edash.git
+&lt;span class='nb'&gt;cd &lt;/span&gt;edash
+git submodule update --init
+&lt;span class='c'&gt;# runs the websocket server, make sure port 8080 is readable from where you are. Use nohup to run as a daemon.&lt;/span&gt;
+scripts/server &amp;amp;
+&lt;span class='c'&gt;# Run rackup in place, or use your favourite rack-compatible server&lt;/span&gt;
+rackup &amp;amp;
+&lt;span class='c'&gt;# post a message to the server. Add a form of this to your build hooks.&lt;/span&gt;
+curl -d &lt;span class='s2'&gt;&amp;quot;project=&amp;lt;project&amp;gt;&amp;quot;&lt;/span&gt; -d &lt;span class='s2'&gt;&amp;quot;status=&amp;lt;pass|fail|building&amp;gt;&amp;quot;&lt;/span&gt; &lt;span class='o'&gt;[&lt;/span&gt;-d &lt;span class='s2'&gt;&amp;quot;author=Name &amp;lt;email&amp;gt;&amp;quot;&lt;/span&gt;&lt;span class='o'&gt;]&lt;/span&gt; -- http://localhost:9292/build
+&lt;/code&gt;&lt;/pre&gt;
+&lt;/div&gt;
+&lt;p&gt;Check out the screencast for a walkthrough.&lt;/p&gt;
+
+&lt;h2 id='under_the_hood'&gt;Under the hood&lt;/h2&gt;
+
+&lt;p&gt;The code is &lt;a href='http://github.com/edendevelopment/edash'&gt;on github&lt;/a&gt;.&lt;/p&gt;
+
+&lt;p&gt;Check it out and let me know if you find it useful. I&amp;#8217;m trying to keep it pretty thin and build server agnostic: it should work with a number of build servers out of the box just by configuring (hacking) your server to fire off HTTP posts as shown in the screencast.&lt;/p&gt;
+
+&lt;p&gt;I&amp;#8217;d welcome patches and fixes: it should be under fairly active development in the next few weeks. The plan is to add a generic statistic tracking module that will allow us to keep track of MetricFu stats, and you to keep track of almost anything&amp;#8230; watch this blog for updates.&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ChrisParsons/~4/r5jZqCRf1Qk" height="1" width="1"/&gt;</content>
+ <feedburner:origLink>http://chrismdp.github.com/2010/03/announcing-edash</feedburner:origLink></entry>
+
+</feed>

0 comments on commit 67efff7

Please sign in to comment.