Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Switched to using PSHB (via Superfeedr) for aggregator updates.

  • Loading branch information...
commit e486a97137b82cb7d6801e2a657889e3d0152eb5 1 parent 7249676
@jacobian jacobian authored
View
7 deploy-requirements.txt
@@ -5,7 +5,7 @@ Django >= 1.2, < 1.3
django-haystack == 1.1.0
django-registration == 0.7
docutils >= 0.6, < 0.7
-FeedParser >= 4.1, < 5.0
+FeedParser >= 5.0, <= 5.1
Jinja2 >= 2.4, < 2.5
psycopg2 >= 2.2, < 2.3
python-memcached >= 1.45, < 2.0
@@ -18,4 +18,7 @@ http://bitbucket.org/ubernostrum/django-contact-form/get/tip.bz2
# xapian-haystack has a bug in 1.1.5beta fixed in trunk. So this
# can go back to a packaged release as soon as there's a newer release.
--e git://github.com/notanumber/xapian-haystack.git@dc11c14542c0137831e5#egg=xapian-haystack
+-e git://github.com/notanumber/xapian-haystack.git@dc11c14542c0137831e5#egg=xapian-haystack
+
+# django_push has a number of bugs in 0.3 fixed on trunk.
+-e git://github.com/brutasse/django-push.git@7ebbb23a0acc74750763#egg=django_push
View
0  django_website/aggregator/management/__init__.py
No changes.
View
0  django_website/aggregator/management/commands/__init__.py
No changes.
View
19 django_website/aggregator/management/commands/mark_defunct_feeds.py
@@ -1,19 +0,0 @@
-import socket
-import urllib2
-import httplib
-from django.core.management.base import BaseCommand
-from django_website.aggregator.models import Feed
-
-class Command(BaseCommand):
- """
- Mark people with 404'ing feeds as defunct.
- """
- def handle(self, *args, **kwargs):
- for f in Feed.objects.filter(is_defunct=False):
- try:
- urllib2.urlopen(f.feed_url, timeout=15)
- except (urllib2.HTTPError, urllib2.URLError, httplib.HTTPException,
- socket.timeout), e:
- print "%s on '%s'; marking defunct" % (e, f)
- f.is_defunct = True
- f.save()
View
146 django_website/aggregator/management/commands/update_feeds.py
@@ -1,146 +0,0 @@
-import logging
-import datetime
-import feedparser
-import optparse
-import os
-import socket
-import sys
-import time
-import threading
-import Queue
-from django.core.management.base import BaseCommand
-from django_website.aggregator.models import Feed, FeedItem
-
-class Command(BaseCommand):
- """
- Update feeds for Django community page. Requires Mark Pilgrim's excellent
- Universal Feed Parser (http://feedparser.org)
- """
- LOCKFILE = "/tmp/update_feeds.lock"
-
- option_list = BaseCommand.option_list + (
- optparse.make_option('-t', '--threads',
- metavar='NUM',
- type='int',
- default=4,
- help='Number of updater threads (default: 4).'
- ),
- )
-
- def handle(self, *args, **kwargs):
- log = self.setup_logging()
- log.debug('Starting run.')
-
- try:
- lockfile = os.open(self.LOCKFILE, os.O_CREAT | os.O_EXCL)
- except OSError:
- print >> sys.stderr, "Lockfile exists (%s). Aborting." % self.LOCKFILE
- sys.exit(1)
-
- try:
- verbose = int(kwargs['verbosity']) > 0
- except (KeyError, TypeError, ValueError):
- verbose = True
-
- try:
- self.update_feeds(verbose=verbose, num_threads=kwargs['threads'])
- except:
- log.exception('Uncaught exception updating feeds.')
- finally:
- log.debug('Cleaning up.')
- os.close(lockfile)
- os.unlink(self.LOCKFILE)
-
- log.debug('Ending run.')
-
- def update_feeds(self, verbose=False, num_threads=4):
- feed_queue = Queue.Queue()
- for feed in Feed.objects.filter(is_defunct=False):
- feed_queue.put(feed)
-
- threadpool = []
- for i in range(num_threads):
- threadpool.append(FeedUpdateWorker(q=feed_queue, verbose=verbose))
-
- [t.start() for t in threadpool]
- [t.join() for t in threadpool]
-
- def setup_logging(self):
- log = logging.getLogger('django_website.update_feeds')
- log.setLevel(logging.DEBUG)
- handler = logging.FileHandler('/var/log/update_feeds.log')
- handler.setLevel(logging.DEBUG)
- formatter = logging.Formatter('%(asctime)s [%(levelname)s] (%(thread)d) %(message)s')
- handler.setFormatter(formatter)
- log.addHandler(handler)
- return log
-
-class FeedUpdateWorker(threading.Thread):
-
- def __init__(self, q, verbose, **kwargs):
- super(FeedUpdateWorker, self).__init__(**kwargs)
- self.daemon = True
- self.verbose = verbose
- self.q = q
- self.log = logging.getLogger('django_website.update_feeds')
-
- def run(self):
- while 1:
- try:
- feed = self.q.get_nowait()
- except Queue.Empty:
- return
- self.update_feed(feed)
- self.q.task_done()
-
- def update_feed(self, feed):
- self.log.debug('Starting update: %s.' % feed)
- if self.verbose:
- print feed
-
- try:
- socket.setdefaulttimeout(15)
- parsed_feed = feedparser.parse(feed.feed_url)
- except Exception:
- self.log.exception('Error updating %s.' % feed)
- return
-
- for entry in parsed_feed.entries:
- # Parse out the entry, handling all the fun stuff that feeds can do.
- title = entry.title
- guid = entry.get("id", entry.link)
- link = entry.link
-
- if not guid:
- guid = link
-
- if hasattr(entry, "summary"):
- content = entry.summary
- elif hasattr(entry, "content"):
- content = entry.content[0].value
- elif hasattr(entry, "description"):
- content = entry.description
- else:
- content = u""
-
- try:
- if entry.has_key('modified_parsed'):
- date_modified = datetime.datetime.fromtimestamp(time.mktime(entry.modified_parsed))
- elif parsed_feed.feed.has_key('modified_parsed'):
- date_modified = datetime.datetime.fromtimestamp(time.mktime(parsed_feed.feed.modified_parsed))
- elif parsed_feed.has_key('modified'):
- date_modified = datetime.datetime.fromtimestamp(time.mktime(parsed_feed.modified))
- else:
- date_modified = datetime.datetime.now()
- except TypeError:
- date_modified = datetime.datetime.now()
-
- FeedItem.objects.create_or_update_by_guid(guid,
- feed = feed,
- title = title,
- link = link,
- summary = content,
- date_modified = date_modified
- )
-
- self.log.debug('Done with %s.' % feed)
View
44 django_website/aggregator/models.py
@@ -1,5 +1,9 @@
+import datetime
from django.db import models
from django.contrib.auth.models import User
+from django.conf import settings
+from django_push.subscriber import signals as push_signals
+from django_push.subscriber.models import Subscription
class FeedType(models.Model):
name = models.CharField(max_length=250)
@@ -22,6 +26,10 @@ class Feed(models.Model):
def __unicode__(self):
return self.title
+
+ def save(self, **kwargs):
+ super(Feed, self).save(**kwargs)
+ Subscription.objects.subscribe(self.feed_url, settings.PUSH_HUB)
class FeedItemManager(models.Manager):
def create_or_update_by_guid(self, guid, **kwargs):
@@ -74,3 +82,39 @@ def __unicode__(self):
def get_absolute_url(self):
return self.link
+
+def feed_updated(sender, notification, **kwargs):
+ try:
+ feed = Feed.objects.get(feed_url=sender.topic)
+ except Feed.DoesNotExist:
+ return
+
+ for entry in notification.entries:
+ title = entry.title
+ guid = entry.get("id", entry.link)
+ link = entry.link or guid
+
+ if hasattr(entry, "summary"):
+ content = entry.summary
+ elif hasattr(entry, "content"):
+ content = entry.content[0].value
+ elif hasattr(entry, "description"):
+ content = entry.description
+ else:
+ content = u""
+
+ if entry.has_key('updated_parsed'):
+ date_modified = datetime.datetime(*entry.updated_parsed[:6])
+ else:
+ date_modified = datetime.datetime.now()
+
+ FeedItem.objects.create_or_update_by_guid(guid,
+ feed = feed,
+ title = title,
+ link = link,
+ summary = content,
+ date_modified = date_modified
+ )
+
+push_signals.updated.connect(feed_updated)
+
View
9 django_website/aggregator/utils.py
@@ -0,0 +1,9 @@
+from django.conf import settings
+
+def push_credentials(hub_url):
+ """
+ Callback for django_push to get a hub's credentials.
+
+ We always use superfeedr so this is easy.
+ """
+ return tuple(settings.SECRETS['superfeedr_creds'])
View
9 django_website/settings/www.py
@@ -12,7 +12,8 @@
PRODUCTION = ('DJANGOPROJECT_DEBUG' not in os.environ) and ("djangoproject" in platform.node())
# It's a secret to everybody
-SECRET_KEY = str(json.load(open(BASE.ancestor(2).child('secrets.json')))['secret_key'])
+SECRETS = json.load(open(BASE.ancestor(2).child('secrets.json')))
+SECRET_KEY = str(SECRETS['secret_key'])
ADMINS = (('Adrian Holovaty','holovaty@gmail.com'),('Jacob Kaplan-Moss', 'jacob@jacobian.org'))
MANAGERS = (('Jacob Kaplan-Moss','jacob@jacobian.org'),)
@@ -60,6 +61,7 @@
'django.contrib.redirects',
'django.contrib.sessions',
'django.contrib.sitemaps',
+ 'django_push.subscriber',
'django_website.blog',
'django_website.aggregator',
'django_website.docs',
@@ -111,6 +113,11 @@
# XXX What's this for?
DJANGO_SVN_ROOT = "http://code.djangoproject.com/svn/django/"
+# PubSubHubbub settings
+PUSH_HUB = 'https://superfeedr.com/hubbub'
+PUSH_CREDENTIALS = 'django_website.aggregator.utils.push_credentials'
+PUSH_SSL_CALLBACK = PRODUCTION
+
# If django-debug-toolbar is installed enable it.
if not PRODUCTION:
try:
View
5 django_website/urls/www.py
@@ -44,6 +44,9 @@
# just use a flatpage for it.
url(r'^foundation/donate/thanks/$', 'django_website.views.donate_thanks'),
+ # django-push
+ url(r'^subscriber/', include('django_push.subscriber.urls')),
+
url(r'^sitemap\.xml$', cache_page(sitemap_views.sitemap, 60 * 60 * 6), {'sitemaps': sitemaps}),
url(r'^weblog/', include('django_website.blog.urls')),
url(r'^freenode\.9xJY7YIUWtwn\.html$', 'django.views.generic.simple.direct_to_template', {'template': 'freenode_tmp.html'}),
@@ -59,7 +62,7 @@
)
urlpatterns += patterns('',
- # flatpages need to be last b/c they match anything
+# flatpages need to be last b/c they match anything
(r'', include('django.contrib.flatpages.urls')),
)

3 comments on commit e486a97

@superfeedr

Nice commit :) Happy to chat about the PubSubHubbub/Superfeedr integration!

@jacobian
Collaborator

Seems to be working pretty well, so thanks!

@superfeedr

Awesome! Please get in touch for any questions, thought or feedback :)
PS; I hope it wasn't creepy, I used our 'track' feature on the 'superfeedr' keyword to get notifications of any mention of superfeedr on feeds we track :)

Please sign in to comment.
Something went wrong with that request. Please try again.