Permalink
Browse files

Merge pull request #86 from bradhe/master

Use `warn` instead of `puts` when reporting errors.
  • Loading branch information...
2 parents 1858215 + 7568dd2 commit a7c806545dd60604e902fd0c52a39a447fa3908f @pauldix pauldix committed Mar 3, 2012
View
@@ -4,3 +4,7 @@ TODO
Gemfile.lock
rdoc/
doc/
+bin
+.bundle
+*.swp
+*.swo
View
@@ -26,7 +26,7 @@ Gem::Specification.new do |s|
s.add_runtime_dependency 'sax-machine', ['~> 0.1.0']
s.add_runtime_dependency 'curb', ['~> 0.7.15']
s.add_runtime_dependency 'builder', ['>= 2.1.2']
- s.add_runtime_dependency 'activesupport', ['~> 3.0.8']
+ s.add_runtime_dependency 'activesupport', ['~> 3.1.1']
s.add_runtime_dependency 'loofah', ['~> 1.2.0']
s.add_runtime_dependency 'rdoc', ['~> 3.8']
s.add_runtime_dependency 'rake', ['>= 0.8.7']
@@ -46,7 +46,7 @@ def self.add_feed_class(klass)
# === Returns
# A array of class names.
def self.feed_classes
- @feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::RSS, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom]
+ @feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::RSS, Feedzirra::Parser::GoogleDocsAtom, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom]
end
# Makes all registered feeds types look for the passed in element to parse.
@@ -279,7 +279,7 @@ def self.add_url_to_multi(multi, url, url_queue, responses, options)
if klass
begin
- feed = klass.parse(xml, Proc.new{|message| puts "Error while parsing [#{url}] #{message}" })
+ feed = klass.parse(xml, Proc.new{|message| warn "Error while parsing [#{url}] #{message}" })
feed.feed_url = c.last_effective_url
feed.etag = etag_from_header(c.header_str)
feed.last_modified = last_modified_from_header(c.header_str)
@@ -332,7 +332,7 @@ def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
curl.on_success do |c|
begin
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
- updated_feed = Feed.parse(c.body_str){ |message| puts "Error while parsing [#{feed.feed_url}] #{message}" }
+ updated_feed = Feed.parse(c.body_str){ |message| warn "Error while parsing [#{feed.feed_url}] #{message}" }
updated_feed.feed_url = c.last_effective_url
updated_feed.etag = etag_from_header(c.header_str)
updated_feed.last_modified = last_modified_from_header(c.header_str)
@@ -381,4 +381,4 @@ def self.last_modified_from_header(header)
Time.parse($1) if $1
end
end
-end
+end
@@ -11,7 +11,7 @@ def parse_datetime(string)
begin
DateTime.parse(string).feed_utils_to_gm_time
rescue
- puts "DATE CAN'T BE PARSED: [#{string}]"
+ warn "Failed to parse date #{string.inspect}"
nil
end
end
@@ -9,6 +9,9 @@ module Parser
autoload :ITunesRSSItem, 'feedzirra/parser/itunes_rss_item'
autoload :ITunesRSSOwner, 'feedzirra/parser/itunes_rss_owner'
+ autoload :GoogleDocsAtom, 'feedzirra/parser/google_docs_atom'
+ autoload :GoogleDocsAtomEntry, 'feedzirra/parser/google_docs_atom_entry'
+
autoload :Atom, 'feedzirra/parser/atom'
autoload :AtomEntry, 'feedzirra/parser/atom_entry'
autoload :AtomFeedBurner, 'feedzirra/parser/atom_feed_burner'
@@ -26,4 +26,4 @@ def feed_url
end
end
-end
+end
@@ -0,0 +1,28 @@
+require File.expand_path('./atom', File.dirname(__FILE__))
+
+module Feedzirra
+ module Parser
+ class GoogleDocsAtom
+ include SAXMachine
+ include FeedUtilities
+ element :title
+ element :subtitle, :as => :description
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
+ elements :link, :as => :links, :value => :href
+ elements :entry, :as => :entries, :class => GoogleDocsAtomEntry
+
+ def url
+ @url ||= links.first
+ end
+
+ def self.able_to_parse?(xml) #:nodoc:
+ %r{<id>https?://docs.google.com/.*\</id\>} =~ xml
+ end
+
+ def feed_url
+ @feed_url ||= links.first
+ end
+ end
+ end
+end
@@ -0,0 +1,29 @@
+module Feedzirra
+ module Parser
+ class GoogleDocsAtomEntry
+ include SAXMachine
+ include FeedEntryUtilities
+
+ element :title
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
+ element :name, :as => :author
+ element :content
+ element :summary
+ element :published
+ element :id, :as => :entry_id
+ element :created, :as => :published
+ element :issued, :as => :published
+ element :updated
+ element :modified, :as => :updated
+ elements :category, :as => :categories, :value => :term
+ elements :link, :as => :links, :value => :href
+ element :"docs:md5Checksum", :as => :checksum
+ element :"docs:filename", :as => :original_filename
+ element :"docs:suggestedFilename", :as => :suggested_filename
+
+ def url
+ @url ||= links.first
+ end
+ end
+ end
+end
@@ -97,6 +97,10 @@
end
describe "#determine_feed_parser_for_xml" do
+ it 'should return the Feedzirra::Parser::GoogleDocsAtom calss for a Google Docs atom feed' do
+ Feedzirra::Feed.determine_feed_parser_for_xml(sample_google_docs_list_feed).should == Feedzirra::Parser::GoogleDocsAtom
+ end
+
it "should return the Feedzirra::Parser::Atom class for an atom feed" do
Feedzirra::Feed.determine_feed_parser_for_xml(sample_atom_feed).should == Feedzirra::Parser::Atom
end
@@ -590,4 +594,4 @@ def self.able_to_parse?(val)
it "should return an return an array of feed objects if multiple feeds are passed in"
end
end
-end
+end
@@ -0,0 +1,22 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+
+describe Feedzirra::Parser::GoogleDocsAtomEntry do
+ describe 'parsing' do
+ before do
+ @feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
+ @entry = @feed.entries.first
+ end
+
+ it 'should have the custom checksum element' do
+ @entry.checksum.should eql '2b01142f7481c7b056c4b410d28f33cf'
+ end
+
+ it 'should have the custom filename element' do
+ @entry.original_filename.should eql "MyFile.pdf"
+ end
+
+ it 'should have the custom suggested filename element' do
+ @entry.suggested_filename.should eql "TaxDocument.pdf"
+ end
+ end
+end
@@ -0,0 +1,31 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+
+describe Feedzirra::Parser::GoogleDocsAtom do
+ describe '.able_to_parser?' do
+ it 'should return true for Google Docs feed' do
+ Feedzirra::Parser::GoogleDocsAtom.should be_able_to_parse(sample_google_docs_list_feed)
+ end
+
+ it 'should not be able to parse another Atom feed' do
+ Feedzirra::Parser::GoogleDocsAtom.should_not be_able_to_parse(sample_atom_feed)
+ end
+ end
+
+ describe 'parsing' do
+ before do
+ @feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
+ end
+
+ it 'should return a bunch of objects' do
+ @feed.entries.should_not be_empty
+ end
+
+ it 'should populate a title, interhited from the Atom entry' do
+ @feed.title.should_not be_nil
+ end
+
+ it 'should return a bunch of entries of type GoogleDocsAtomEntry' do
+ @feed.entries.first.should be_a Feedzirra::Parser::GoogleDocsAtomEntry
+ end
+ end
+end
@@ -0,0 +1,187 @@
+<feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/"
+ xmlns:docs="http://schemas.google.com/docs/2007" xmlns:batch="http://schemas.google.com/gdata/batch"
+ xmlns:gd="http://schemas.google.com/g/2005" gd:etag="W/&quot;DUMFR3YyfCt7ImA9WxNTFU0.&quot;">
+
+<!-- Unique identifier of this feed. Not unique between users. -->
+<id>https://docs.google.com/feeds/default/private/full</id>
+
+<!-- Date this feed was last updated. Do NOT use this, provided for Atom compliance only. -->
+<updated>2009-08-17T11:10:16.894Z</updated>
+
+<!-- Title of this feed result. -->
+<title>Available Documents - john.smith.example@gmail.com</title>
+
+<!-- Link at which a user could consume the same content given here, but in a web browser with a user interface. -->
+<link rel="alternate" type="text/html" href="https://docs.google.com"/>
+
+<!-- Link at which you can add documents or files using resumable upload. -->
+<link rel="http://schemas.google.com/g/2005#resumable-create-media" type="application/atom+xml"
+ href="https://docs.google.com/feeds/upload/create-session/default/private/full"/>
+
+<!-- Link at which you can fetch the next page of results from this feed. -->
+<link rel="next" type="application/atom+xml"
+ href="https://docs.google.com/feeds/default/private/full?start-key=EAEaFgoSCb2YGEPMAAACAG"/>
+
+<!-- Link at which you can fetch this same feed. -->
+<link rel="self" type="application/atom+xml"
+ href="https://docs.google.com/feeds/default/private/full/"/>
+
+<!-- Link at which you can fetch this same feed. -->
+<link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml"
+ href="https://docs.google.com/feeds/default/private/full"/>
+
+<!-- Deprecated. Use resumable-create-media instead. -->
+<!-- Link at which you can POST new entries with metadata only to this feed. -->
+<link rel="http://schemas.google.com/g/2005#post" type="application/atom+xml"
+ href="https://docs.google.com/feeds/default/private/full"/>
+
+<!-- Link at which you can send batch requests to this feed. -->
+<link rel="http://schemas.google.com/g/2005#batch" type="application/atom+xml"
+ href="https://docs.google.com/feeds/default/private/full/batch"/>
+
+<!-- Information about the user who authorized this request. -->
+<author>
+ <name>John Smith</name>
+ <email>john.smith.example@gmail.com</email>
+</author>
+
+<!-- NOT supported, provided for protocol compliance only. -->
+<openSearch:startIndex>1</openSearch:startIndex>
+
+<!-- The ETag here is used to identify the version of this entry. -->
+<entry gd:etag="'EVJVTBICRit7ImBq'">
+ <!-- A unique, permanent identifier for this entry. -->
+ <id>https://docs.google.com/feeds/id/document%3A12345</id>
+
+ <!-- Title of this resource. -->
+ <title>2010 Income Tax Policy</title>
+
+ <!-- Description of this resource (currently visible in the preview pane in the UI). -->
+ <docs:description>Describes how to file income tax for 2010.</docs:description>
+
+ <!-- Resource ID of this document. -->
+ <gd:resourceId>document:12345</gd:resourceId>
+
+ <!-- Date this document was created (the "published" name of this element is mis-leading, but this is Atom standard). -->
+ <published>2009-07-22T19:02:57.616Z</published>
+
+ <!-- Information about the owner of this document (not necessarily the user authorizing this request). -->
+ <author>
+ <name>Jenna Dolsom</name>
+ <email>jenna.dolsom.example@gmail.com</email>
+ </author>
+
+ <!-- Date this entry was last updated (either by Google's systems, the API, or a user in a web browser). -->
+ <updated>2009-07-29T20:31:39.804Z</updated>
+
+ <!-- Date this document was last edited by a user in the document editor in a web browser. -->
+ <app:edited xmlns:app="http://www.w3.org/2007/app">2009-07-31T17:21:26.497Z</app:edited>
+
+ <!-- Information about the user who last modified this entry (not necessarily the user authorizing this request). -->
+ <gd:lastModifiedBy>
+ <name>Aaron Jensen</name>
+ <email>aaron.jensen.example@gmail.com</email>
+ </gd:lastModifiedBy>
+
+ <!-- Date this document was last viewed in a web browser by any user. -->
+ <gd:lastViewed>2009-07-31T17:21:26.273Z</gd:lastViewed>
+
+ <!-- The "kind" of this entry. In this case, a word processing document. -->
+ <category scheme="http://schemas.google.com/g/2005#kind" term="http://schemas.google.com/docs/2007#document" label="document"/>
+
+ <!-- This entry has been viewed by the user, so it has a "viewed" category. -->
+ <category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#viewed" label="viewed"/>
+
+ <!-- Link at which you can download the actual document this entry describes. -->
+ <content type="text/html" src="https://docs.google.com/feeds/download/documents/Export?docId=12345"/>
+
+ <!-- This document is in one collection, detailed here. -->
+ <link rel="http://schemas.google.com/docs/2007#parent" type="application/atom+xml"
+ href="https://docs.google.com/feeds/default/private/full/folder%3A12345" title="ACollectionName"/>
+
+ <!-- Link at which you can open this document in a web browser. -->
+ <link rel="alternate" type="text/html" href="https://docs.google.com/Doc?docid=12345&amp;hl=en"/>
+
+ <!-- Link at which you can fetch only this entry. -->
+ <link rel="self" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/document%3A12345"/>
+
+ <!-- Link at which you can PUT updates to this entry. -->
+ <link rel="edit" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/document%3A12345"/>
+
+ <!-- Link at which you can PUT updates to this entry's content (deprecated, use resumable below). -->
+ <link rel="edit-media" type="text/html" href="https://docs.google.com/feeds/default/media/document%3A12345"/>
+
+ <!-- Link at which you can PUT resumable updates to this entry's content. -->
+ <link rel="http://schemas.google.com/g/2005#resumable-edit-media" type="application/atom+xml"
+ href="https://docs.google.com/feeds/upload/create-session/default/private/full/document%3A12345"/>
+
+ <!-- Link at which you can fetch a thumbnail of this resource. -->
+ <link rel="http://schemas.google.com/docs/2007/thumbnail" type="image/jpeg" href="https://lh3.googleusercontent.com/TQRs812345=s220"/>
+
+ <!-- Link at which you can create, retrieve, update, and delete ACL entries for this document. -->
+ <gd:feedLink rel="http://schemas.google.com/acl/2007#accessControlList"
+ href="https://docs.google.com/feeds/default/private/full/document%3A12345/acl"/>
+
+ <!-- Link at which you can create, retrieve, update, and delete revisions of this document. -->
+ <gd:feedLink rel="http://schemas.google.com/docs/2007/revisions"
+ href="https://docs.google.com/feeds/default/private/full/document%3A12345/revisions"/>
+
+ <!-- Number of bytes of the owner's quota this document uses. Native Google Docs currently use 0 bytes. -->
+ <gd:quotaBytesUsed>0</gd:quotaBytesUsed>
+
+ <!-- "true" if writers can invite other users to view and edit this document. -->
+ <docs:writersCanInvite value="true"/>
+
+ <!-- Given for files only. An MD5 checksum used to verify the contents of this file. -->
+ <!-- Some old files are being processed. Those files will not have this element yet. -->
+ <docs:md5Checksum>2b01142f7481c7b056c4b410d28f33cf</docs:md5Checksum>
+
+ <!-- Original filename of file at time of upload, if available. -->
+ <!-- Only available for resources of type file or pdf. -->
+ <!-- Shown here as example only. This element is not given for resources of type document. -->
+ <docs:filename>MyFile.pdf</docs:filename>
+
+ <!-- Current name of resource, with file extension from docs:filename appended, if available. -->
+ <!-- If the current name already has an extension, then the extension from docs:filename is not appended. -->
+ <!-- If docs:filename does not have an extension, then the current name is given unaltered. -->
+ <!-- Only available for resources of type file or pdf. -->
+ <!-- Shown here as example only. This element is not given for resources of type document. -->
+ <docs:suggestedFilename>TaxDocument.pdf</docs:suggestedFilename>
+</entry>
+<entry xmlns:gd="http://schemas.google.com/g/2005" gd:etag="'HhJSFgpeRyt7ImBq'">
+ <id>https://docs.google.com/feeds/id/pdf%3A12345</id>
+ <published>2009-04-09T18:23:09.035Z</published>
+ <updated>2009-04-09T18:23:09.035Z</updated>
+ <app:edited xmlns:app="http://www.w3.org/2007/app">2009-06-18T22:16:02.388Z</app:edited>
+ <category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#starred" label="starred"/>
+ <category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#viewed" label="viewed"/>
+ <category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#hidden" label="hidden"/>
+ <category scheme="http://schemas.google.com/g/2005#kind" term="http://schemas.google.com/docs/2007#pdf" label="pdf"/>
+ <title>PDF's Title</title>
+ <content type="application/pdf"
+ src="https://doc-04-20-docs.googleusercontent.com/docs/secure/m71240...U1?h=1630126&amp;e=download&amp;gd=true"/>
+ <link rel="alternate" type="text/html" href="https://docs.google.com/fileview?id=12345&amp;hl=en"/>
+ <link rel="self" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/pdf%3A12345"/>
+ <link rel="edit" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/pdf%3A12345"/>
+ <link rel="edit-media" type="application/pdf" href="https://docs.google.com/feeds/default/media/pdf%3A12345"/>
+ <link rel="http://schemas.google.com/g/2005#resumable-edit-media" type="application/atom+xml"
+ href="https://docs.google.com/feeds/upload/create-session/default/private/full/pdf%3A12345"/>
+ <author>
+ <name>user</name>
+ <email>user@gmail.com</email>
+ </author>
+ <gd:resourceId>pdf:12345</gd:resourceId>
+ <gd:lastModifiedBy>
+ <name>user</name>
+ <email>user@gmail.com</email>
+ </gd:lastModifiedBy>
+ <gd:lastViewed>2009-06-18T22:16:02.384Z</gd:lastViewed>
+ <gd:quotaBytesUsed>108538</gd:quotaBytesUsed>
+ <docs:writersCanInvite value="false"/>
+ <docs:md5Checksum>2b01142f7481c7b056c4b410d28f33cf</docs:md5Checksum>
+ <gd:feedLink rel="http://schemas.google.com/acl/2007#accessControlList"
+ href="https://docs.google.com/feeds/default/private/full/pdf%3A12345/acl"/>
+ <gd:feedLink rel="http://schemas.google.com/docs/2007/revisions"
+ href="https://docs.google.com/feeds/default/private/full/document%3A12345/revisions"/>
+</entry>
+</feed>
Oops, something went wrong.

0 comments on commit a7c8065

Please sign in to comment.