Permalink
Browse files

fixed merge conflict to add itunes parser and googledocs

  • Loading branch information...
2 parents de97a1b + a7c8065 commit 6f165e827ae0ed2311da232b678097002af09ccc @pauldix pauldix committed Mar 3, 2012
View
@@ -4,3 +4,7 @@ TODO
Gemfile.lock
rdoc/
doc/
+bin
+.bundle
+*.swp
+*.swo
View
@@ -46,7 +46,7 @@ def self.add_feed_class(klass)
# === Returns
# A array of class names.
def self.feed_classes
- @feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::RSS, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom, Feedzirra::Parser::ITunesRSS]
+ @feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::RSS, Feedzirra::Parser::GoogleDocsAtom, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom, Feedzirra::Parser::ITunesRSS]
end
# Makes all registered feeds types look for the passed in element to parse.
@@ -279,7 +279,7 @@ def self.add_url_to_multi(multi, url, url_queue, responses, options)
if klass
begin
- feed = klass.parse(xml, Proc.new{|message| puts "Error while parsing [#{url}] #{message}" })
+ feed = klass.parse(xml, Proc.new{|message| warn "Error while parsing [#{url}] #{message}" })
feed.feed_url = c.last_effective_url
feed.etag = etag_from_header(c.header_str)
feed.last_modified = last_modified_from_header(c.header_str)
@@ -332,7 +332,7 @@ def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
curl.on_success do |c|
begin
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
- updated_feed = Feed.parse(c.body_str){ |message| puts "Error while parsing [#{feed.feed_url}] #{message}" }
+ updated_feed = Feed.parse(c.body_str){ |message| warn "Error while parsing [#{feed.feed_url}] #{message}" }
updated_feed.feed_url = c.last_effective_url
updated_feed.etag = etag_from_header(c.header_str)
updated_feed.last_modified = last_modified_from_header(c.header_str)
@@ -381,4 +381,4 @@ def self.last_modified_from_header(header)
Time.parse($1) if $1
end
end
-end
+end
@@ -11,7 +11,7 @@ def parse_datetime(string)
begin
DateTime.parse(string).feed_utils_to_gm_time
rescue
- puts "DATE CAN'T BE PARSED: [#{string}]"
+ warn "Failed to parse date #{string.inspect}"
nil
end
end
@@ -50,23 +50,12 @@ def sanitize_entries!
private
def find_new_entries_for(feed)
- # this implementation is a hack, which is why it's so ugly.
- # it's to get around the fact that not all feeds have a published date.
- # however, they're always ordered with the newest one first.
- # So we go through the entries just parsed and insert each one as a new entry
- # until we get to one that has the same url as the the newest for the feed
- return feed.entries if self.entries.length == 0
- latest_entry = self.entries.first
- found_new_entries = []
- feed.entries.each do |entry|
- break if entry.url == latest_entry.url
- found_new_entries << entry
- end
- found_new_entries
+ # this algorithm does not optimize based on publication date, but always finds new entries
+ feed.entries.reject {|entry| self.entries.any? {|e| e.url == entry.url} }
end
def existing_entry?(test_entry)
entries.any? { |entry| entry.url == test_entry.url }
end
end
-end
+end
View
@@ -9,6 +9,9 @@ module Parser
autoload :ITunesRSSItem, 'feedzirra/parser/itunes_rss_item'
autoload :ITunesRSSOwner, 'feedzirra/parser/itunes_rss_owner'
+ autoload :GoogleDocsAtom, 'feedzirra/parser/google_docs_atom'
+ autoload :GoogleDocsAtomEntry, 'feedzirra/parser/google_docs_atom_entry'
+
autoload :Atom, 'feedzirra/parser/atom'
autoload :AtomEntry, 'feedzirra/parser/atom_entry'
autoload :AtomFeedBurner, 'feedzirra/parser/atom_feed_burner'
@@ -26,4 +26,4 @@ def feed_url
end
end
-end
+end
@@ -0,0 +1,28 @@
+require File.expand_path('./atom', File.dirname(__FILE__))
+
+module Feedzirra
+ module Parser
+ class GoogleDocsAtom
+ include SAXMachine
+ include FeedUtilities
+ element :title
+ element :subtitle, :as => :description
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
+ elements :link, :as => :links, :value => :href
+ elements :entry, :as => :entries, :class => GoogleDocsAtomEntry
+
+ def url
+ @url ||= links.first
+ end
+
+ def self.able_to_parse?(xml) #:nodoc:
+ %r{<id>https?://docs.google.com/.*\</id\>} =~ xml
+ end
+
+ def feed_url
+ @feed_url ||= links.first
+ end
+ end
+ end
+end
@@ -0,0 +1,29 @@
+module Feedzirra
+ module Parser
+ class GoogleDocsAtomEntry
+ include SAXMachine
+ include FeedEntryUtilities
+
+ element :title
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
+ element :name, :as => :author
+ element :content
+ element :summary
+ element :published
+ element :id, :as => :entry_id
+ element :created, :as => :published
+ element :issued, :as => :published
+ element :updated
+ element :modified, :as => :updated
+ elements :category, :as => :categories, :value => :term
+ elements :link, :as => :links, :value => :href
+ element :"docs:md5Checksum", :as => :checksum
+ element :"docs:filename", :as => :original_filename
+ element :"docs:suggestedFilename", :as => :suggested_filename
+
+ def url
+ @url ||= links.first
+ end
+ end
+ end
+end
@@ -97,6 +97,10 @@
end
describe "#determine_feed_parser_for_xml" do
+ it 'should return the Feedzirra::Parser::GoogleDocsAtom calss for a Google Docs atom feed' do
+ Feedzirra::Feed.determine_feed_parser_for_xml(sample_google_docs_list_feed).should == Feedzirra::Parser::GoogleDocsAtom
+ end
+
it "should return the Feedzirra::Parser::Atom class for an atom feed" do
Feedzirra::Feed.determine_feed_parser_for_xml(sample_atom_feed).should == Feedzirra::Parser::Atom
end
@@ -590,4 +594,4 @@ def self.able_to_parse?(val)
it "should return an return an array of feed objects if multiple feeds are passed in"
end
end
-end
+end
@@ -0,0 +1,22 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+
+describe Feedzirra::Parser::GoogleDocsAtomEntry do
+ describe 'parsing' do
+ before do
+ @feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
+ @entry = @feed.entries.first
+ end
+
+ it 'should have the custom checksum element' do
+ @entry.checksum.should eql '2b01142f7481c7b056c4b410d28f33cf'
+ end
+
+ it 'should have the custom filename element' do
+ @entry.original_filename.should eql "MyFile.pdf"
+ end
+
+ it 'should have the custom suggested filename element' do
+ @entry.suggested_filename.should eql "TaxDocument.pdf"
+ end
+ end
+end
@@ -0,0 +1,31 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+
+describe Feedzirra::Parser::GoogleDocsAtom do
+ describe '.able_to_parser?' do
+ it 'should return true for Google Docs feed' do
+ Feedzirra::Parser::GoogleDocsAtom.should be_able_to_parse(sample_google_docs_list_feed)
+ end
+
+ it 'should not be able to parse another Atom feed' do
+ Feedzirra::Parser::GoogleDocsAtom.should_not be_able_to_parse(sample_atom_feed)
+ end
+ end
+
+ describe 'parsing' do
+ before do
+ @feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
+ end
+
+ it 'should return a bunch of objects' do
+ @feed.entries.should_not be_empty
+ end
+
+ it 'should populate a title, interhited from the Atom entry' do
+ @feed.title.should_not be_nil
+ end
+
+ it 'should return a bunch of entries of type GoogleDocsAtomEntry' do
+ @feed.entries.first.should be_a Feedzirra::Parser::GoogleDocsAtomEntry
+ end
+ end
+end
Oops, something went wrong.

0 comments on commit 6f165e8

Please sign in to comment.