Skip to content

Commit

Permalink
Merge pull request #86 from bradhe/master
Browse files Browse the repository at this point in the history
Use `warn` instead of `puts` when reporting errors.
  • Loading branch information
pauldix committed Mar 3, 2012
2 parents 1858215 + 7568dd2 commit a7c8065
Show file tree
Hide file tree
Showing 13 changed files with 321 additions and 9 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Expand Up @@ -4,3 +4,7 @@ TODO
Gemfile.lock
rdoc/
doc/
bin
.bundle
*.swp
*.swo
2 changes: 1 addition & 1 deletion feedzirra.gemspec
Expand Up @@ -26,7 +26,7 @@ Gem::Specification.new do |s|
s.add_runtime_dependency 'sax-machine', ['~> 0.1.0']
s.add_runtime_dependency 'curb', ['~> 0.7.15']
s.add_runtime_dependency 'builder', ['>= 2.1.2']
s.add_runtime_dependency 'activesupport', ['~> 3.0.8']
s.add_runtime_dependency 'activesupport', ['~> 3.1.1']
s.add_runtime_dependency 'loofah', ['~> 1.2.0']
s.add_runtime_dependency 'rdoc', ['~> 3.8']
s.add_runtime_dependency 'rake', ['>= 0.8.7']
Expand Down
8 changes: 4 additions & 4 deletions lib/feedzirra/feed.rb
Expand Up @@ -46,7 +46,7 @@ def self.add_feed_class(klass)
# === Returns
# A array of class names.
def self.feed_classes
@feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::RSS, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom]
@feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::RSS, Feedzirra::Parser::GoogleDocsAtom, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom]
end

# Makes all registered feeds types look for the passed in element to parse.
Expand Down Expand Up @@ -279,7 +279,7 @@ def self.add_url_to_multi(multi, url, url_queue, responses, options)

if klass
begin
feed = klass.parse(xml, Proc.new{|message| puts "Error while parsing [#{url}] #{message}" })
feed = klass.parse(xml, Proc.new{|message| warn "Error while parsing [#{url}] #{message}" })
feed.feed_url = c.last_effective_url
feed.etag = etag_from_header(c.header_str)
feed.last_modified = last_modified_from_header(c.header_str)
Expand Down Expand Up @@ -332,7 +332,7 @@ def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
curl.on_success do |c|
begin
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
updated_feed = Feed.parse(c.body_str){ |message| puts "Error while parsing [#{feed.feed_url}] #{message}" }
updated_feed = Feed.parse(c.body_str){ |message| warn "Error while parsing [#{feed.feed_url}] #{message}" }
updated_feed.feed_url = c.last_effective_url
updated_feed.etag = etag_from_header(c.header_str)
updated_feed.last_modified = last_modified_from_header(c.header_str)
Expand Down Expand Up @@ -381,4 +381,4 @@ def self.last_modified_from_header(header)
Time.parse($1) if $1
end
end
end
end
2 changes: 1 addition & 1 deletion lib/feedzirra/feed_entry_utilities.rb
Expand Up @@ -11,7 +11,7 @@ def parse_datetime(string)
begin
DateTime.parse(string).feed_utils_to_gm_time
rescue
puts "DATE CAN'T BE PARSED: [#{string}]"
warn "Failed to parse date #{string.inspect}"
nil
end
end
Expand Down
3 changes: 3 additions & 0 deletions lib/feedzirra/parser.rb
Expand Up @@ -9,6 +9,9 @@ module Parser
autoload :ITunesRSSItem, 'feedzirra/parser/itunes_rss_item'
autoload :ITunesRSSOwner, 'feedzirra/parser/itunes_rss_owner'

autoload :GoogleDocsAtom, 'feedzirra/parser/google_docs_atom'
autoload :GoogleDocsAtomEntry, 'feedzirra/parser/google_docs_atom_entry'

autoload :Atom, 'feedzirra/parser/atom'
autoload :AtomEntry, 'feedzirra/parser/atom_entry'
autoload :AtomFeedBurner, 'feedzirra/parser/atom_feed_burner'
Expand Down
2 changes: 1 addition & 1 deletion lib/feedzirra/parser/atom.rb
Expand Up @@ -26,4 +26,4 @@ def feed_url
end
end

end
end
28 changes: 28 additions & 0 deletions lib/feedzirra/parser/google_docs_atom.rb
@@ -0,0 +1,28 @@
require File.expand_path('./atom', File.dirname(__FILE__))

module Feedzirra
module Parser
class GoogleDocsAtom
include SAXMachine
include FeedUtilities
element :title
element :subtitle, :as => :description
element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
elements :link, :as => :links, :value => :href
elements :entry, :as => :entries, :class => GoogleDocsAtomEntry

def url
@url ||= links.first
end

def self.able_to_parse?(xml) #:nodoc:
%r{<id>https?://docs.google.com/.*\</id\>} =~ xml
end

def feed_url
@feed_url ||= links.first
end
end
end
end
29 changes: 29 additions & 0 deletions lib/feedzirra/parser/google_docs_atom_entry.rb
@@ -0,0 +1,29 @@
module Feedzirra
module Parser
class GoogleDocsAtomEntry
include SAXMachine
include FeedEntryUtilities

element :title
element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
element :name, :as => :author
element :content
element :summary
element :published
element :id, :as => :entry_id
element :created, :as => :published
element :issued, :as => :published
element :updated
element :modified, :as => :updated
elements :category, :as => :categories, :value => :term
elements :link, :as => :links, :value => :href
element :"docs:md5Checksum", :as => :checksum
element :"docs:filename", :as => :original_filename
element :"docs:suggestedFilename", :as => :suggested_filename

def url
@url ||= links.first
end
end
end
end
6 changes: 5 additions & 1 deletion spec/feedzirra/feed_spec.rb
Expand Up @@ -97,6 +97,10 @@
end

describe "#determine_feed_parser_for_xml" do
it 'should return the Feedzirra::Parser::GoogleDocsAtom calss for a Google Docs atom feed' do
Feedzirra::Feed.determine_feed_parser_for_xml(sample_google_docs_list_feed).should == Feedzirra::Parser::GoogleDocsAtom
end

it "should return the Feedzirra::Parser::Atom class for an atom feed" do
Feedzirra::Feed.determine_feed_parser_for_xml(sample_atom_feed).should == Feedzirra::Parser::Atom
end
Expand Down Expand Up @@ -590,4 +594,4 @@ def self.able_to_parse?(val)
it "should return an return an array of feed objects if multiple feeds are passed in"
end
end
end
end
22 changes: 22 additions & 0 deletions spec/feedzirra/parser/google_docs_atom_entry_spec.rb
@@ -0,0 +1,22 @@
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])

describe Feedzirra::Parser::GoogleDocsAtomEntry do
describe 'parsing' do
before do
@feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
@entry = @feed.entries.first
end

it 'should have the custom checksum element' do
@entry.checksum.should eql '2b01142f7481c7b056c4b410d28f33cf'
end

it 'should have the custom filename element' do
@entry.original_filename.should eql "MyFile.pdf"
end

it 'should have the custom suggested filename element' do
@entry.suggested_filename.should eql "TaxDocument.pdf"
end
end
end
31 changes: 31 additions & 0 deletions spec/feedzirra/parser/google_docs_atom_spec.rb
@@ -0,0 +1,31 @@
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])

describe Feedzirra::Parser::GoogleDocsAtom do
describe '.able_to_parser?' do
it 'should return true for Google Docs feed' do
Feedzirra::Parser::GoogleDocsAtom.should be_able_to_parse(sample_google_docs_list_feed)
end

it 'should not be able to parse another Atom feed' do
Feedzirra::Parser::GoogleDocsAtom.should_not be_able_to_parse(sample_atom_feed)
end
end

describe 'parsing' do
before do
@feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
end

it 'should return a bunch of objects' do
@feed.entries.should_not be_empty
end

it 'should populate a title, interhited from the Atom entry' do
@feed.title.should_not be_nil
end

it 'should return a bunch of entries of type GoogleDocsAtomEntry' do
@feed.entries.first.should be_a Feedzirra::Parser::GoogleDocsAtomEntry
end
end
end
187 changes: 187 additions & 0 deletions spec/sample_feeds/GoogleDocsList.xml
@@ -0,0 +1,187 @@
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/"
xmlns:docs="http://schemas.google.com/docs/2007" xmlns:batch="http://schemas.google.com/gdata/batch"
xmlns:gd="http://schemas.google.com/g/2005" gd:etag="W/&quot;DUMFR3YyfCt7ImA9WxNTFU0.&quot;">

<!-- Unique identifier of this feed. Not unique between users. -->
<id>https://docs.google.com/feeds/default/private/full</id>

<!-- Date this feed was last updated. Do NOT use this, provided for Atom compliance only. -->
<updated>2009-08-17T11:10:16.894Z</updated>

<!-- Title of this feed result. -->
<title>Available Documents - john.smith.example@gmail.com</title>

<!-- Link at which a user could consume the same content given here, but in a web browser with a user interface. -->
<link rel="alternate" type="text/html" href="https://docs.google.com"/>

<!-- Link at which you can add documents or files using resumable upload. -->
<link rel="http://schemas.google.com/g/2005#resumable-create-media" type="application/atom+xml"
href="https://docs.google.com/feeds/upload/create-session/default/private/full"/>

<!-- Link at which you can fetch the next page of results from this feed. -->
<link rel="next" type="application/atom+xml"
href="https://docs.google.com/feeds/default/private/full?start-key=EAEaFgoSCb2YGEPMAAACAG"/>

<!-- Link at which you can fetch this same feed. -->
<link rel="self" type="application/atom+xml"
href="https://docs.google.com/feeds/default/private/full/"/>

<!-- Link at which you can fetch this same feed. -->
<link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml"
href="https://docs.google.com/feeds/default/private/full"/>

<!-- Deprecated. Use resumable-create-media instead. -->
<!-- Link at which you can POST new entries with metadata only to this feed. -->
<link rel="http://schemas.google.com/g/2005#post" type="application/atom+xml"
href="https://docs.google.com/feeds/default/private/full"/>

<!-- Link at which you can send batch requests to this feed. -->
<link rel="http://schemas.google.com/g/2005#batch" type="application/atom+xml"
href="https://docs.google.com/feeds/default/private/full/batch"/>

<!-- Information about the user who authorized this request. -->
<author>
<name>John Smith</name>
<email>john.smith.example@gmail.com</email>
</author>

<!-- NOT supported, provided for protocol compliance only. -->
<openSearch:startIndex>1</openSearch:startIndex>

<!-- The ETag here is used to identify the version of this entry. -->
<entry gd:etag="'EVJVTBICRit7ImBq'">
<!-- A unique, permanent identifier for this entry. -->
<id>https://docs.google.com/feeds/id/document%3A12345</id>

<!-- Title of this resource. -->
<title>2010 Income Tax Policy</title>

<!-- Description of this resource (currently visible in the preview pane in the UI). -->
<docs:description>Describes how to file income tax for 2010.</docs:description>

<!-- Resource ID of this document. -->
<gd:resourceId>document:12345</gd:resourceId>

<!-- Date this document was created (the "published" name of this element is mis-leading, but this is Atom standard). -->
<published>2009-07-22T19:02:57.616Z</published>

<!-- Information about the owner of this document (not necessarily the user authorizing this request). -->
<author>
<name>Jenna Dolsom</name>
<email>jenna.dolsom.example@gmail.com</email>
</author>

<!-- Date this entry was last updated (either by Google's systems, the API, or a user in a web browser). -->
<updated>2009-07-29T20:31:39.804Z</updated>

<!-- Date this document was last edited by a user in the document editor in a web browser. -->
<app:edited xmlns:app="http://www.w3.org/2007/app">2009-07-31T17:21:26.497Z</app:edited>

<!-- Information about the user who last modified this entry (not necessarily the user authorizing this request). -->
<gd:lastModifiedBy>
<name>Aaron Jensen</name>
<email>aaron.jensen.example@gmail.com</email>
</gd:lastModifiedBy>

<!-- Date this document was last viewed in a web browser by any user. -->
<gd:lastViewed>2009-07-31T17:21:26.273Z</gd:lastViewed>

<!-- The "kind" of this entry. In this case, a word processing document. -->
<category scheme="http://schemas.google.com/g/2005#kind" term="http://schemas.google.com/docs/2007#document" label="document"/>

<!-- This entry has been viewed by the user, so it has a "viewed" category. -->
<category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#viewed" label="viewed"/>

<!-- Link at which you can download the actual document this entry describes. -->
<content type="text/html" src="https://docs.google.com/feeds/download/documents/Export?docId=12345"/>

<!-- This document is in one collection, detailed here. -->
<link rel="http://schemas.google.com/docs/2007#parent" type="application/atom+xml"
href="https://docs.google.com/feeds/default/private/full/folder%3A12345" title="ACollectionName"/>

<!-- Link at which you can open this document in a web browser. -->
<link rel="alternate" type="text/html" href="https://docs.google.com/Doc?docid=12345&amp;hl=en"/>

<!-- Link at which you can fetch only this entry. -->
<link rel="self" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/document%3A12345"/>

<!-- Link at which you can PUT updates to this entry. -->
<link rel="edit" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/document%3A12345"/>

<!-- Link at which you can PUT updates to this entry's content (deprecated, use resumable below). -->
<link rel="edit-media" type="text/html" href="https://docs.google.com/feeds/default/media/document%3A12345"/>

<!-- Link at which you can PUT resumable updates to this entry's content. -->
<link rel="http://schemas.google.com/g/2005#resumable-edit-media" type="application/atom+xml"
href="https://docs.google.com/feeds/upload/create-session/default/private/full/document%3A12345"/>

<!-- Link at which you can fetch a thumbnail of this resource. -->
<link rel="http://schemas.google.com/docs/2007/thumbnail" type="image/jpeg" href="https://lh3.googleusercontent.com/TQRs812345=s220"/>

<!-- Link at which you can create, retrieve, update, and delete ACL entries for this document. -->
<gd:feedLink rel="http://schemas.google.com/acl/2007#accessControlList"
href="https://docs.google.com/feeds/default/private/full/document%3A12345/acl"/>

<!-- Link at which you can create, retrieve, update, and delete revisions of this document. -->
<gd:feedLink rel="http://schemas.google.com/docs/2007/revisions"
href="https://docs.google.com/feeds/default/private/full/document%3A12345/revisions"/>

<!-- Number of bytes of the owner's quota this document uses. Native Google Docs currently use 0 bytes. -->
<gd:quotaBytesUsed>0</gd:quotaBytesUsed>

<!-- "true" if writers can invite other users to view and edit this document. -->
<docs:writersCanInvite value="true"/>

<!-- Given for files only. An MD5 checksum used to verify the contents of this file. -->
<!-- Some old files are being processed. Those files will not have this element yet. -->
<docs:md5Checksum>2b01142f7481c7b056c4b410d28f33cf</docs:md5Checksum>

<!-- Original filename of file at time of upload, if available. -->
<!-- Only available for resources of type file or pdf. -->
<!-- Shown here as example only. This element is not given for resources of type document. -->
<docs:filename>MyFile.pdf</docs:filename>

<!-- Current name of resource, with file extension from docs:filename appended, if available. -->
<!-- If the current name already has an extension, then the extension from docs:filename is not appended. -->
<!-- If docs:filename does not have an extension, then the current name is given unaltered. -->
<!-- Only available for resources of type file or pdf. -->
<!-- Shown here as example only. This element is not given for resources of type document. -->
<docs:suggestedFilename>TaxDocument.pdf</docs:suggestedFilename>
</entry>
<entry xmlns:gd="http://schemas.google.com/g/2005" gd:etag="'HhJSFgpeRyt7ImBq'">
<id>https://docs.google.com/feeds/id/pdf%3A12345</id>
<published>2009-04-09T18:23:09.035Z</published>
<updated>2009-04-09T18:23:09.035Z</updated>
<app:edited xmlns:app="http://www.w3.org/2007/app">2009-06-18T22:16:02.388Z</app:edited>
<category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#starred" label="starred"/>
<category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#viewed" label="viewed"/>
<category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#hidden" label="hidden"/>
<category scheme="http://schemas.google.com/g/2005#kind" term="http://schemas.google.com/docs/2007#pdf" label="pdf"/>
<title>PDF's Title</title>
<content type="application/pdf"
src="https://doc-04-20-docs.googleusercontent.com/docs/secure/m71240...U1?h=1630126&amp;e=download&amp;gd=true"/>
<link rel="alternate" type="text/html" href="https://docs.google.com/fileview?id=12345&amp;hl=en"/>
<link rel="self" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/pdf%3A12345"/>
<link rel="edit" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/pdf%3A12345"/>
<link rel="edit-media" type="application/pdf" href="https://docs.google.com/feeds/default/media/pdf%3A12345"/>
<link rel="http://schemas.google.com/g/2005#resumable-edit-media" type="application/atom+xml"
href="https://docs.google.com/feeds/upload/create-session/default/private/full/pdf%3A12345"/>
<author>
<name>user</name>
<email>user@gmail.com</email>
</author>
<gd:resourceId>pdf:12345</gd:resourceId>
<gd:lastModifiedBy>
<name>user</name>
<email>user@gmail.com</email>
</gd:lastModifiedBy>
<gd:lastViewed>2009-06-18T22:16:02.384Z</gd:lastViewed>
<gd:quotaBytesUsed>108538</gd:quotaBytesUsed>
<docs:writersCanInvite value="false"/>
<docs:md5Checksum>2b01142f7481c7b056c4b410d28f33cf</docs:md5Checksum>
<gd:feedLink rel="http://schemas.google.com/acl/2007#accessControlList"
href="https://docs.google.com/feeds/default/private/full/pdf%3A12345/acl"/>
<gd:feedLink rel="http://schemas.google.com/docs/2007/revisions"
href="https://docs.google.com/feeds/default/private/full/document%3A12345/revisions"/>
</entry>
</feed>

0 comments on commit a7c8065

Please sign in to comment.