Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fixed the aggregator's handling of feeds that are themselves aggregat…

…ors.
  • Loading branch information...
commit 3c9d2dda65466c294cac80ed508b48e6d21f770e 1 parent 8d22c32
@jacobian jacobian authored
View
17 django_website/aggregator/management/commands/update_feeds.py
@@ -59,6 +59,7 @@ class FeedUpdateWorker(threading.Thread):
def __init__(self, q, verbose, **kwargs):
super(FeedUpdateWorker, self).__init__(**kwargs)
+ self.daemon = True
self.verbose = verbose
self.q = q
@@ -77,13 +78,14 @@ def update_feed(self, feed):
parsed_feed = feedparser.parse(feed.feed_url)
for entry in parsed_feed.entries:
+ # Parse out the entry, handling all the fun stuff that feeds can do.
title = entry.title.encode(parsed_feed.encoding, "xmlcharrefreplace")
guid = entry.get("id", entry.link).encode(parsed_feed.encoding, "xmlcharrefreplace")
link = entry.link.encode(parsed_feed.encoding, "xmlcharrefreplace")
if not guid:
guid = link
-
+
if hasattr(entry, "summary"):
content = entry.summary
elif hasattr(entry, "content"):
@@ -105,8 +107,11 @@ def update_feed(self, feed):
date_modified = datetime.datetime.now()
except TypeError:
date_modified = datetime.datetime.now()
-
- try:
- feed.feeditem_set.get(guid=guid)
- except FeedItem.DoesNotExist:
- feed.feeditem_set.create(title=title, link=link, summary=content, guid=guid, date_modified=date_modified)
+
+ FeedItem.objects.create_or_update_by_guid(guid,
+ feed = feed,
+ title = title,
+ link = link,
+ summary = content,
+ date_modified = date_modified
+ )
View
31 django_website/aggregator/models.py
@@ -24,6 +24,35 @@ class Meta:
def __unicode__(self):
return self.title
+class FeedItemManager(models.Manager):
+ def create_or_update_by_guid(self, guid, **kwargs):
+ """
+ Look up a FeedItem by GUID, updating it if it exists, and creating
+ it if it doesn't.
+
+ We don't limit it by feed because an item could be in another feed if
+ some feeds are themselves aggregators. That's also why we don't update
+ the feed field if the feed item already exists.
+
+ Returns (item, created) like get_or_create().
+ """
+ try:
+ item = self.get(guid=guid)
+
+ except self.model.DoesNotExist:
+ # Create a new item
+ kwargs['guid'] = guid
+ item = self.create(**kwargs)
+
+ else:
+ # Update an existing one.
+ kwargs.pop('feed', None)
+ for k,v in kwargs.items():
+ setattr(item, k, v)
+ item.save()
+
+ return item
+
class FeedItem(models.Model):
feed = models.ForeignKey(Feed)
title = models.CharField(max_length=500)
@@ -32,6 +61,8 @@ class FeedItem(models.Model):
date_modified = models.DateTimeField()
guid = models.CharField(max_length=500, unique=True, db_index=True)
+ objects = FeedItemManager()
+
class Meta:
db_table = 'aggregator_feeditems'
ordering = ("-date_modified",)
Please sign in to comment.
Something went wrong with that request. Please try again.