Skip to content

Commit

Permalink
Fixed the aggregator's handling of feeds that are themselves aggregat…
Browse files Browse the repository at this point in the history
…ors.
  • Loading branch information
jacobian committed Jan 27, 2011
1 parent 8d22c32 commit 3c9d2dd
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 6 deletions.
17 changes: 11 additions & 6 deletions django_website/aggregator/management/commands/update_feeds.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class FeedUpdateWorker(threading.Thread):


def __init__(self, q, verbose, **kwargs): def __init__(self, q, verbose, **kwargs):
super(FeedUpdateWorker, self).__init__(**kwargs) super(FeedUpdateWorker, self).__init__(**kwargs)
self.daemon = True
self.verbose = verbose self.verbose = verbose
self.q = q self.q = q


Expand All @@ -77,13 +78,14 @@ def update_feed(self, feed):


parsed_feed = feedparser.parse(feed.feed_url) parsed_feed = feedparser.parse(feed.feed_url)
for entry in parsed_feed.entries: for entry in parsed_feed.entries:
# Parse out the entry, handling all the fun stuff that feeds can do.
title = entry.title.encode(parsed_feed.encoding, "xmlcharrefreplace") title = entry.title.encode(parsed_feed.encoding, "xmlcharrefreplace")
guid = entry.get("id", entry.link).encode(parsed_feed.encoding, "xmlcharrefreplace") guid = entry.get("id", entry.link).encode(parsed_feed.encoding, "xmlcharrefreplace")
link = entry.link.encode(parsed_feed.encoding, "xmlcharrefreplace") link = entry.link.encode(parsed_feed.encoding, "xmlcharrefreplace")


if not guid: if not guid:
guid = link guid = link

if hasattr(entry, "summary"): if hasattr(entry, "summary"):
content = entry.summary content = entry.summary
elif hasattr(entry, "content"): elif hasattr(entry, "content"):
Expand All @@ -105,8 +107,11 @@ def update_feed(self, feed):
date_modified = datetime.datetime.now() date_modified = datetime.datetime.now()
except TypeError: except TypeError:
date_modified = datetime.datetime.now() date_modified = datetime.datetime.now()


try: FeedItem.objects.create_or_update_by_guid(guid,
feed.feeditem_set.get(guid=guid) feed = feed,
except FeedItem.DoesNotExist: title = title,
feed.feeditem_set.create(title=title, link=link, summary=content, guid=guid, date_modified=date_modified) link = link,
summary = content,
date_modified = date_modified
)
31 changes: 31 additions & 0 deletions django_website/aggregator/models.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -24,6 +24,35 @@ class Meta:
def __unicode__(self): def __unicode__(self):
return self.title return self.title


class FeedItemManager(models.Manager):
def create_or_update_by_guid(self, guid, **kwargs):
"""
Look up a FeedItem by GUID, updating it if it exists, and creating
it if it doesn't.
We don't limit it by feed because an item could be in another feed if
some feeds are themselves aggregators. That's also why we don't update
the feed field if the feed item already exists.
Returns (item, created) like get_or_create().
"""
try:
item = self.get(guid=guid)

except self.model.DoesNotExist:
# Create a new item
kwargs['guid'] = guid
item = self.create(**kwargs)

else:
# Update an existing one.
kwargs.pop('feed', None)
for k,v in kwargs.items():
setattr(item, k, v)
item.save()

return item

class FeedItem(models.Model): class FeedItem(models.Model):
feed = models.ForeignKey(Feed) feed = models.ForeignKey(Feed)
title = models.CharField(max_length=500) title = models.CharField(max_length=500)
Expand All @@ -32,6 +61,8 @@ class FeedItem(models.Model):
date_modified = models.DateTimeField() date_modified = models.DateTimeField()
guid = models.CharField(max_length=500, unique=True, db_index=True) guid = models.CharField(max_length=500, unique=True, db_index=True)


objects = FeedItemManager()

class Meta: class Meta:
db_table = 'aggregator_feeditems' db_table = 'aggregator_feeditems'
ordering = ("-date_modified",) ordering = ("-date_modified",)
Expand Down

0 comments on commit 3c9d2dd

Please sign in to comment.