Skip to content

Commit

Permalink
removed the default to request gzip or deflate since it was causing s…
Browse files Browse the repository at this point in the history
…egfaults on some feeds. removed ITunesRSS as a defaault parser since it broke normalization.
  • Loading branch information
pauldix committed Apr 17, 2009
1 parent d719f08 commit 6219578
Show file tree
Hide file tree
Showing 5 changed files with 2,234 additions and 19 deletions.
9 changes: 8 additions & 1 deletion README.textile
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ Feedzirra::Feed.add_common_feed_entry_element("wfw:commentRss", :as => :comment_
# AtomEntry classes. Now you can access those in an atom feed:
Feedzirra::Feed.parse(some_atom_xml).entries.first.comment_rss_ # => wfw:commentRss is now parsed!


# You can also define your own parsers and add them to the ones Feedzirra knows about. Here's an example that adds
# ITunesRSS parsing. It's included in the library, but not part of Feedzirra by default because some of the field names
# differ from other classes, thus breaking normalization.
Feedzirra::Feed.add_feed_class(ITunesRSS) # now all feeds will be checked to see if they match ITunesRSS before others

# You can also access http basic auth feeds. Unfortunately, you can't get to these inside of a bulk get of a bunch of feeds.
# You'll have to do it on its own like so:
Feedzirra::Feed.fetch_and_parse(some_url, :http_authentication => ["myusername", "mypassword"])
Expand Down Expand Up @@ -151,7 +157,8 @@ This thing needs to hammer on many different feeds in the wild. I'm sure there w
Here are some more specific TODOs.
* Fix the iTunes parser so things are normalized again
* Fix the Zlib deflate error
* Fork taf2-curb and require that in feedzirra
* Fix this error: http://github.com/inbox/70508
* Convert to use Typhoeus instead of taf2-curb
* Make the entries parse all link fields
* Make a feedzirra-rails gem to integrate feedzirra seamlessly with Rails and ActiveRecord.
* Create a super sweet DSL for defining new parsers.
Expand Down
44 changes: 26 additions & 18 deletions lib/feedzirra/feed.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def self.add_feed_class(klass)
# === Returns
# A array of class names.
def self.feed_classes
@feed_classes ||= [ITunesRSS, RSS, AtomFeedBurner, Atom]
@feed_classes ||= [RSS, AtomFeedBurner, Atom]
end

# Makes all entry types look for the passed in element to parse. This is actually just a call to
Expand All @@ -58,7 +58,7 @@ def self.feed_classes
def self.add_common_feed_entry_element(element_tag, options = {})
# need to think of a better way to do this. will break for people who want this behavior
# across their added classes
[RSSEntry, AtomFeedBurnerEntry, AtomEntry].each do |klass|
feed_classes.each do |klass|
klass.send(:element, element_tag, options)
end
end
Expand Down Expand Up @@ -100,7 +100,7 @@ def self.fetch_raw(urls, options = {})
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
curl.headers["Accept-encoding"] = 'gzip, deflate'
# curl.headers["Accept-encoding"] = 'gzip, deflate'
curl.follow_location = true
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)

Expand Down Expand Up @@ -216,7 +216,7 @@ def self.add_url_to_multi(multi, url, url_queue, responses, options)
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
curl.headers["Accept-encoding"] = 'gzip, deflate'
# curl.headers["Accept-encoding"] = 'gzip, deflate'
curl.follow_location = true
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)

Expand All @@ -226,12 +226,16 @@ def self.add_url_to_multi(multi, url, url_queue, responses, options)
klass = determine_feed_parser_for_xml(xml)

if klass
feed = klass.parse(xml)
feed.feed_url = c.last_effective_url
feed.etag = etag_from_header(c.header_str)
feed.last_modified = last_modified_from_header(c.header_str)
responses[url] = feed
options[:on_success].call(url, feed) if options.has_key?(:on_success)
begin
feed = klass.parse(xml)
feed.feed_url = c.last_effective_url
feed.etag = etag_from_header(c.header_str)
feed.last_modified = last_modified_from_header(c.header_str)
responses[url] = feed
options[:on_success].call(url, feed) if options.has_key?(:on_success)
rescue Exception => e
options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
end
else
# puts "Error determining parser for #{url} - #{c.last_effective_url}"
# raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
Expand Down Expand Up @@ -271,14 +275,18 @@ def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
curl.follow_location = true

curl.on_success do |c|
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
updated_feed = Feed.parse(c.body_str)
updated_feed.feed_url = c.last_effective_url
updated_feed.etag = etag_from_header(c.header_str)
updated_feed.last_modified = last_modified_from_header(c.header_str)
feed.update_from_feed(updated_feed)
responses[feed.feed_url] = feed
options[:on_success].call(feed) if options.has_key?(:on_success)
begin
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
updated_feed = Feed.parse(c.body_str)
updated_feed.feed_url = c.last_effective_url
updated_feed.etag = etag_from_header(c.header_str)
updated_feed.last_modified = last_modified_from_header(c.header_str)
feed.update_from_feed(updated_feed)
responses[feed.feed_url] = feed
options[:on_success].call(feed) if options.has_key?(:on_success)
rescue Exception => e
options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
end
end

curl.on_failure do |c|
Expand Down
14 changes: 14 additions & 0 deletions spec/sample_feeds/run_against_sample.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
require 'rubygems'
require File.dirname(__FILE__) + "/../../lib/feedzirra.rb"

feed_urls = File.readlines(File.dirname(__FILE__) + "/top5kfeeds.dat").collect {|line| line.split.first}

success = lambda do |url, feed|
puts "SUCCESS - #{feed.title} - #{url}"
end

failure = lambda do |url, response_code, header, body|
puts "*********** FAILED with #{response_code} on #{url}"
end

Feedzirra::Feed.fetch_and_parse(feed_urls, :on_success => success, :on_failure => failure)
Loading

0 comments on commit 6219578

Please sign in to comment.