Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Added a limit on the maximum number of times a background task will b…

…e retried.

Version 0.3 of the PSHB spec allows us to have an infinitely long lease by default so I've removed the lease_seconds argument from the subscription process. I also updated the pollinghub to the version that supports PSHB 0.3
We now handle subscriptions for non-existent URLs by raising an exception in the ContentParser and the background task now detects this exception, logs it and ends the task rather than propagating the error then retrying.
We now deal with feeds that have a None value for their updated element by using the current time.
  • Loading branch information...
commit 67efff78bc215fe36ee0890362230ba76722e6f7 1 parent 71c5ddf
Ade Oshineye authored
31 streamer.py
@@ -20,13 +20,18 @@
20 20 ALWAYS_USE_DEFAULT_HUB = False
21 21 # This is a hub I've set up that does polling
22 22 DEFAULT_HUB = "http://pollinghub.appspot.com/"
23   -# Use a cron job to re-subscribe to all feeds
24   -LEASE_SECONDS = "86400" * 60 #90 days
25 23 OPEN_ACCESS = False
  24 +MAX_TASK_RETRIES = 10
26 25
27 26 from google.appengine.api.labs import taskqueue
28 27 class BackGroundTaskHandler(webapp.RequestHandler):
29 28 def post(self):
  29 + logging.info("Request body %s" % self.request.body)
  30 + retryCount = self.request.headers.get('X-AppEngine-TaskRetryCount')
  31 + taskName = self.request.headers.get('X-AppEngine-TaskName')
  32 + if retryCount and int(retryCount) > MAX_TASK_RETRIES:
  33 + logging.warning("Abandoning this task: %s after %s retries" % (taskName, retryCount))
  34 + return
30 35 functionName = self.request.get('function')
31 36 logging.info("Background task being executed. Function is: <%s>" % (functionName))
32 37 if functionName == 'handleNewSubscription':
@@ -89,7 +94,6 @@ def subscribe(self):
89 94 "hub.mode" : "subscribe",
90 95 "hub.topic" : self.url,
91 96 "hub.verify" : "async", # We don't want subscriptions to block until verification happens
92   - "hub.lease_seconds" : LEASE_SECONDS,
93 97 "hub.verify_token" : SECRET_TOKEN, #TODO Must generate a token based on some secret value
94 98 }
95 99 payload = urllib.urlencode(parameters)
@@ -168,7 +172,11 @@ def handleDeleteSubscription(url):
168 172 def handleNewSubscription(url, nickname):
169 173 logging.info("Subscription added: %s by %s" % (url, nickname))
170 174
171   - parser = ContentParser(None, DEFAULT_HUB, ALWAYS_USE_DEFAULT_HUB, urlToFetch = url)
  175 + try:
  176 + parser = ContentParser(None, DEFAULT_HUB, ALWAYS_USE_DEFAULT_HUB, urlToFetch = url)
  177 + except UrlNotFoundError:
  178 + logging.warn("Url added by: %s not found: %s" % (nickname, url))
  179 + return
172 180 hub = parser.extractHub()
173 181 sourceUrl = parser.extractSourceUrl()
174 182 author = parser.extractFeedAuthor()
@@ -260,10 +268,21 @@ def post(self):
260 268 self.response.set_status(200)
261 269 self.response.out.write("Good entries")
262 270
  271 +class UrlNotFoundError(Exception):
  272 + def __init__(self, url):
  273 + self.url = url
  274 +
  275 + def __str__(self):
  276 + return self.url
  277 +
263 278 class ContentParser(object):
264 279 def __init__(self, content, defaultHub = DEFAULT_HUB, alwaysUseDefaultHub = ALWAYS_USE_DEFAULT_HUB, urlToFetch = ""):
265 280 if urlToFetch:
266   - content = urlfetch.fetch(urlToFetch).content
  281 + response = urlfetch.fetch(urlToFetch)
  282 + logging.info("Status was: [%s]" % response.status_code)
  283 + if response.status_code == 404:
  284 + raise UrlNotFoundError(urlToFetch)
  285 + content = response.content
267 286 self.data = feedparser.parse(content)
268 287 self.defaultHub = defaultHub
269 288 self.alwaysUseDefaultHub = alwaysUseDefaultHub
@@ -278,7 +297,7 @@ def logErrors(self):
278 297 logging.error('Bad feed data. %s: %r', self.data.bozo_exception.__class__.__name__, self.data.bozo_exception)
279 298
280 299 def __createDateTime(self, entry):
281   - if hasattr(entry, 'updated_parsed'):
  300 + if hasattr(entry, 'updated_parsed') and entry.updated_parsed:
282 301 return datetime.datetime(*(entry.updated_parsed[0:6]))
283 302 else:
284 303 return datetime.datetime.utcnow()
7 streamer_functional_tests.py
@@ -47,6 +47,11 @@ def testAddingNewSubscriptionsUsingTaskQueueIsIdempotent(self):
47 47 response = self.post('/bgtasks', data=data, expect_errors=True)
48 48 self.assertEqual(streamer.Subscription.all().count(), 1)
49 49
  50 + def testAddingNoneExistentFeedsDoesNotRaiseAnException(self):
  51 + data = {'function':'handleNewSubscription', 'url':'http://www.oshineye.com/404FromStreamer', 'nickname':'ade'}
  52 + response = self.post('/bgtasks', data=data, expect_errors=True)
  53 + self.assertEquals('200 OK', response.status)
  54 +
50 55 def testEnqueuesTaskForNewSubscription(self):
51 56 data = {'url':'http://blog.oshineye.com/feeds/posts/default'}
52 57 self.assertTasksInQueue(0)
@@ -99,4 +104,4 @@ class ContentParserFunctionalTest(unittest.TestCase):
99 104 def testCanExtractPostsFromRemoteSite(self):
100 105 parser = streamer.ContentParser(None, urlToFetch = "http://blog.oshineye.com/feeds/posts/default")
101 106 posts = parser.extractPosts();
102   - self.assertTrue(len(posts) > 2 )
  107 + self.assertTrue(len(posts) > 2)
22 streamer_tests.py
@@ -70,7 +70,14 @@ class ContentParserTest(unittest.TestCase):
70 70 CANONICAL_RSS_FEED = open("test_data/canonical_rss_feed").read()
71 71 VALID_ATOM_FEED = open("test_data/valid_atom_feed").read()
72 72 NO_AUTHOR_RSS_FEED = open("test_data/no_author_rss_feed").read()
73   -
  73 + NO_UPDATED_ELEMENT_FEED = open("test_data/no_updated_element_feed").read()
  74 +
  75 + def testCanExtractCorrectNumberOfPostsFromFeedWithMissingUpdatedElement(self):
  76 + parser = ContentParser(self.NO_UPDATED_ELEMENT_FEED)
  77 + posts = parser.extractPosts()
  78 + self.assertTrue(parser.dataValid())
  79 + self.assertEquals(1, len(posts))
  80 +
74 81 def testCanIdentifyPostsWithGoodData(self):
75 82 parser = ContentParser(self.SAMPLE_FEED)
76 83 posts = parser.extractPosts()
@@ -137,7 +144,7 @@ def testCanExtractAuthorNameViaDublinCoreCreatorFromRssFeed(self):
137 144
138 145 def testCanExtractHubFromFeed(self):
139 146 parser = ContentParser(self.BLOGGER_FEED)
140   - hub = parser.extractHub();
  147 + hub = parser.extractHub()
141 148 self.assertEquals("http://pubsubhubbub.appspot.com/", hub)
142 149
143 150 def testCanOverrideHubForFeed(self):
@@ -148,14 +155,13 @@ def testCanOverrideHubForFeed(self):
148 155 parser.alwaysUseDefaultHub = True
149 156 self.assertEquals(fakeDefaultHub, parser.extractHub())
150 157
151   - def testCanExtractHubFromFeedburnerFeed(self):
152   - parser = ContentParser(self.FEEDBURNER_FEED)
153   - hub = parser.extractHub();
154   - self.assertEquals("http://pubsubhubbub.appspot.com", hub)
  158 + def testCanExtractHubFromFeedburnerFeeds(self):
  159 + self.assertEquals("http://pubsubhubbub.appspot.com", ContentParser(self.FEEDBURNER_FEED).extractHub())
  160 + self.assertEquals("http://pubsubhubbub.appspot.com/", ContentParser(self.NO_UPDATED_ELEMENT_FEED).extractHub())
155 161
156 162 def testCanExtractsDefaultHubForHubLessFeeds(self):
157 163 parser = ContentParser(self.HUBLESS_FEED)
158   - hub = parser.extractHub();
  164 + hub = parser.extractHub()
159 165 self.assertEquals(DEFAULT_HUB, hub)
160 166
161 167 def testCanExtractFeedUrls(self):
@@ -165,6 +171,7 @@ def testCanExtractFeedUrls(self):
165 171 self.assertEquals("http://feeds.feedburner.com/PlanetTw", ContentParser(self.FEEDBURNER_FEED).extractFeedUrl())
166 172 self.assertEquals("http://news.ycombinator.com/rss", ContentParser(self.RSS_FEED).extractFeedUrl())
167 173 self.assertEquals("http://www.scripting.com/rss", ContentParser(self.CANONICAL_RSS_FEED).extractFeedUrl())
  174 + self.assertEquals("http://feeds.feedburner.com/ChrisParsons", ContentParser(self.NO_UPDATED_ELEMENT_FEED).extractFeedUrl())
168 175
169 176 def testCanExtractSourceUrls(self):
170 177 self.assertEquals("http://pubsubhubbub-loadtest.appspot.com/foo", ContentParser(self.SAMPLE_FEED).extractSourceUrl())
@@ -173,3 +180,4 @@ def testCanExtractSourceUrls(self):
173 180 self.assertEquals("http://blogs.thoughtworks.com/", ContentParser(self.FEEDBURNER_FEED).extractSourceUrl())
174 181 self.assertEquals("http://news.ycombinator.com/", ContentParser(self.RSS_FEED).extractSourceUrl())
175 182 self.assertEquals("http://www.scripting.com/", ContentParser(self.CANONICAL_RSS_FEED).extractSourceUrl())
  183 + self.assertEquals("http://chrismdp.github.com/", ContentParser(self.NO_UPDATED_ELEMENT_FEED).extractSourceUrl())
61 test_data/no_updated_element_feed
... ... @@ -0,0 +1,61 @@
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
  3 +
  4 + <title>Chris Parsons</title>
  5 +
  6 + <link href="http://chrismdp.github.com/" />
  7 + <updated>2010-03-30T02:13:18-07:00</updated>
  8 + <id>http://chrismdp.github.com/</id>
  9 + <author>
  10 + <name>Chris Parsons</name>
  11 + <email>chrismdp@gmail.com</email>
  12 + </author>
  13 +
  14 +
  15 + <atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/ChrisParsons" /><feedburner:info uri="chrisparsons" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><entry>
  16 + <title>Announcing edash</title>
  17 +
  18 + <category term="information radiator" />
  19 +
  20 + <link href="http://feedproxy.google.com/~r/ChrisParsons/~3/r5jZqCRf1Qk/announcing-edash" />
  21 + <updated>Liquid error: undefined method `xmlschema' for "2010-03-29 22:05:00 +0100":String</updated>
  22 + <id>http://chrismdp.github.com/2010/03/announcing-edash</id>
  23 + <content type="html">&lt;p&gt;I&amp;#8217;ve been hinting at the dashboard application I&amp;#8217;ve been hacking on recently and after showing it off to a few people at the &lt;a href='http://scottishrubyconference.com'&gt;Scottish Ruby Conference&lt;/a&gt; it&amp;#8217;s about time I released it open source.&lt;/p&gt;
  24 +
  25 +&lt;h2 id='introducing_edash'&gt;Introducing edash&lt;/h2&gt;
  26 +
  27 +&lt;p&gt;&lt;img src='/files/edash-1.png' alt='edash screenshot' /&gt;&lt;/p&gt;
  28 +
  29 +&lt;p&gt;This is the version currently running on a screen at &lt;a href='http://edendevelopment.co.uk'&gt;Eden&lt;/a&gt;.&lt;/p&gt;
  30 +
  31 +&lt;p&gt;&lt;em&gt;IMPORTANT NOTE: This application only works on &lt;a href='http://google.com/chrome'&gt;Chrome&lt;/a&gt;.&lt;/em&gt; There is enough browser specific hackery to render it unusable in other browsers currently. Patches to fix this are most welcome.&lt;/p&gt;
  32 +
  33 +&lt;p&gt;That said, I&amp;#8217;ve put together a short screencast showing it off, along with how to get it running:&lt;/p&gt;
  34 +&lt;object height='344' width='550'&gt;&lt;param name='allowfullscreen' value='true' /&gt;&lt;param name='allowscriptaccess' value='always' /&gt;&lt;param name='movie' value='http://vimeo.com/moogaloop.swf?clip_id=10535751&amp;amp;server=vimeo.com&amp;amp;show_title=1&amp;amp;show_byline=1&amp;amp;show_portrait=0&amp;amp;color=c9ff23&amp;amp;fullscreen=1' /&gt;&lt;embed src='http://vimeo.com/moogaloop.swf?clip_id=10535751&amp;amp;server=vimeo.com&amp;amp;show_title=1&amp;amp;show_byline=1&amp;amp;show_portrait=0&amp;amp;color=c9ff23&amp;amp;fullscreen=1' allowfullscreen='true' type='application/x-shockwave-flash' allowscriptaccess='always' height='344' width='550' /&gt;&lt;/object&gt;&lt;p&gt;&lt;a href='http://vimeo.com/10535751'&gt;edash demo and usage instructions&lt;/a&gt; from &lt;a href='http://vimeo.com/user2596622'&gt;Chris Parsons&lt;/a&gt; on &lt;a href='http://vimeo.com'&gt;Vimeo&lt;/a&gt;.&lt;/p&gt;
  35 +&lt;h2 id='getting_it_running'&gt;Getting it running&lt;/h2&gt;
  36 +
  37 +&lt;p&gt;Here&amp;#8217;s a minimal set of steps to get it running:&lt;/p&gt;
  38 +&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='bash'&gt;gem install sinatra haml sass json pstore md5 eventmachine em-http-request
  39 +git clone git://github.com/edendevelopment/edash.git
  40 +&lt;span class='nb'&gt;cd &lt;/span&gt;edash
  41 +git submodule update --init
  42 +&lt;span class='c'&gt;# runs the websocket server, make sure port 8080 is readable from where you are. Use nohup to run as a daemon.&lt;/span&gt;
  43 +scripts/server &amp;amp;
  44 +&lt;span class='c'&gt;# Run rackup in place, or use your favourite rack-compatible server&lt;/span&gt;
  45 +rackup &amp;amp;
  46 +&lt;span class='c'&gt;# post a message to the server. Add a form of this to your build hooks.&lt;/span&gt;
  47 +curl -d &lt;span class='s2'&gt;&amp;quot;project=&amp;lt;project&amp;gt;&amp;quot;&lt;/span&gt; -d &lt;span class='s2'&gt;&amp;quot;status=&amp;lt;pass|fail|building&amp;gt;&amp;quot;&lt;/span&gt; &lt;span class='o'&gt;[&lt;/span&gt;-d &lt;span class='s2'&gt;&amp;quot;author=Name &amp;lt;email&amp;gt;&amp;quot;&lt;/span&gt;&lt;span class='o'&gt;]&lt;/span&gt; -- http://localhost:9292/build
  48 +&lt;/code&gt;&lt;/pre&gt;
  49 +&lt;/div&gt;
  50 +&lt;p&gt;Check out the screencast for a walkthrough.&lt;/p&gt;
  51 +
  52 +&lt;h2 id='under_the_hood'&gt;Under the hood&lt;/h2&gt;
  53 +
  54 +&lt;p&gt;The code is &lt;a href='http://github.com/edendevelopment/edash'&gt;on github&lt;/a&gt;.&lt;/p&gt;
  55 +
  56 +&lt;p&gt;Check it out and let me know if you find it useful. I&amp;#8217;m trying to keep it pretty thin and build server agnostic: it should work with a number of build servers out of the box just by configuring (hacking) your server to fire off HTTP posts as shown in the screencast.&lt;/p&gt;
  57 +
  58 +&lt;p&gt;I&amp;#8217;d welcome patches and fixes: it should be under fairly active development in the next few weeks. The plan is to add a generic statistic tracking module that will allow us to keep track of MetricFu stats, and you to keep track of almost anything&amp;#8230; watch this blog for updates.&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ChrisParsons/~4/r5jZqCRf1Qk" height="1" width="1"/&gt;</content>
  59 + <feedburner:origLink>http://chrismdp.github.com/2010/03/announcing-edash</feedburner:origLink></entry>
  60 +
  61 +</feed>

0 comments on commit 67efff7

Please sign in to comment.
Something went wrong with that request. Please try again.