Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fixed the aggregator's handling of feeds that are themselves aggregat…

…ors.
  • Loading branch information...
commit 3c9d2dda65466c294cac80ed508b48e6d21f770e 1 parent 8d22c32
@jacobian jacobian authored
View
17 django_website/aggregator/management/commands/update_feeds.py
@@ -59,6 +59,7 @@ class FeedUpdateWorker(threading.Thread):
def __init__(self, q, verbose, **kwargs):
super(FeedUpdateWorker, self).__init__(**kwargs)
+ self.daemon = True
self.verbose = verbose
self.q = q
@@ -77,13 +78,14 @@ def update_feed(self, feed):
parsed_feed = feedparser.parse(feed.feed_url)
for entry in parsed_feed.entries:
+ # Parse out the entry, handling all the fun stuff that feeds can do.
title = entry.title.encode(parsed_feed.encoding, "xmlcharrefreplace")
guid = entry.get("id", entry.link).encode(parsed_feed.encoding, "xmlcharrefreplace")
link = entry.link.encode(parsed_feed.encoding, "xmlcharrefreplace")
if not guid:
guid = link
-
+
if hasattr(entry, "summary"):
content = entry.summary
elif hasattr(entry, "content"):
@@ -105,8 +107,11 @@ def update_feed(self, feed):
date_modified = datetime.datetime.now()
except TypeError:
date_modified = datetime.datetime.now()
-
- try:
- feed.feeditem_set.get(guid=guid)
- except FeedItem.DoesNotExist:
- feed.feeditem_set.create(title=title, link=link, summary=content, guid=guid, date_modified=date_modified)
+
+ FeedItem.objects.create_or_update_by_guid(guid,
+ feed = feed,
+ title = title,
+ link = link,
+ summary = content,
+ date_modified = date_modified
+ )
View
31 django_website/aggregator/models.py
@@ -24,6 +24,35 @@ class Meta:
def __unicode__(self):
return self.title
+class FeedItemManager(models.Manager):
+ def create_or_update_by_guid(self, guid, **kwargs):
+ """
+ Look up a FeedItem by GUID, updating it if it exists, and creating
+ it if it doesn't.
+
+ We don't limit it by feed because an item could be in another feed if
+ some feeds are themselves aggregators. That's also why we don't update
+ the feed field if the feed item already exists.
+
+ Returns (item, created) like get_or_create().
+ """
+ try:
+ item = self.get(guid=guid)
+
+ except self.model.DoesNotExist:
+ # Create a new item
+ kwargs['guid'] = guid
+ item = self.create(**kwargs)
+
+ else:
+ # Update an existing one.
+ kwargs.pop('feed', None)
+ for k,v in kwargs.items():
+ setattr(item, k, v)
+ item.save()
+
+ return item
+
class FeedItem(models.Model):
feed = models.ForeignKey(Feed)
title = models.CharField(max_length=500)
@@ -32,6 +61,8 @@ class FeedItem(models.Model):
date_modified = models.DateTimeField()
guid = models.CharField(max_length=500, unique=True, db_index=True)
+ objects = FeedItemManager()
+
class Meta:
db_table = 'aggregator_feeditems'
ordering = ("-date_modified",)
Please sign in to comment.
Something went wrong with that request. Please try again.