Permalink
Browse files

Merge branch 'with_facebook_parser_update'

* with_facebook_parser_update:
  Improve source_parser_facebook_spec to describe parsing of various URLs.
  Update SourceParser::Facebook to support new REST-style URLs.
  Improve SourceParser::Base::url_pattern docs, explain capture groups.
  • Loading branch information...
igal committed Mar 14, 2012
2 parents 6f2aed0 + b22c989 commit cdf9665305cb023814ce5fd965484796ddcced68
Showing with 78 additions and 28 deletions.
  1. +6 −0 app/models/source_parser.rb
  2. +17 −1 app/models/source_parser/facebook.rb
  3. +55 −27 spec/models/source_parser_facebook_spec.rb
@@ -79,6 +79,12 @@ def self.label(value=nil)
end
# Gets or sets the applicable URL pattern for this parser.
+ #
+ # This pattern must have the event identifier as the first capture group.
+ #
+ # Example:
+ # # The pattern below gets the event id as the first capture group:
+ # url_pattern %r{^https?://facebook\.com/events/([^/]+)}
def self.url_pattern(value=nil)
self._url_pattern = value if value
return self._url_pattern
@@ -1,7 +1,23 @@
class SourceParser # :nodoc:
class Facebook < Base
label :Facebook
- url_pattern %r{^http(?:s)?://(?:(?:www\.)?facebook\.com/event.php\?eid=|graph\.facebook\.com/)([^/]+)/?}
+ # NOTE: This pattern's goal is to get the Facebook event identifier in the first capture group, so the "(?:foo)" non-capturing group syntax is used to match but not capture those groups -- search the web for "ruby class rexep non-capturing" for details.
+ #
+ url_pattern %r{(?x) # Ignore regexp whitespace and comments
+ ^
+ (?:https?://)? # Optional http URI prefix
+ (?:
+ (?:www\.)? # Optional 'www.' host prefix
+ (?:
+ facebook\.com/events/ # REST-style path
+ | # ...or....
+ facebook\.com/event\.php\?eid= # GET-style path
+ )
+ | # ...or...
+ graph\.facebook\.com/ # API path
+ )
+ ([^/]+) # Facebook event identifier to capture
+ }
def self.to_abstract_events(opts={})
self.to_abstract_events_api_helper(
@@ -2,39 +2,67 @@
describe SourceParser::Facebook do
- before(:each) do
- content = read_sample('facebook.json')
- parsed_content = MultiJson.decode(content)
- HTTParty.should_receive(:get).and_return(parsed_content)
- @events = SourceParser::Facebook.to_abstract_events(:url => 'http://facebook.com/event.php?eid=247619485255249')
- @event = @events.first
- end
+ describe "when importing an event" do
+ before(:each) do
+ content = read_sample('facebook.json')
+ parsed_content = MultiJson.decode(content)
+ HTTParty.should_receive(:get).and_return(parsed_content)
+ @events = SourceParser::Facebook.to_abstract_events(:url => 'http://facebook.com/event.php?eid=247619485255249')
+ @event = @events.first
+ end
- it "should find one event" do
- @events.size.should == 1
- end
+ it "should find one event" do
+ @events.size.should == 1
+ end
- it "should set event details" do
- @event.title.should == "Open Source Bridge 2012"
- @event.start_time.should == Time.zone.parse("26 Jun 2012 09:00:00 PDT -07:00")
- end
+ it "should set event details" do
+ @event.title.should == "Open Source Bridge 2012"
+ @event.start_time.should == Time.zone.parse("26 Jun 2012 09:00:00 PDT -07:00")
+ end
- it "should tag Facebook events with automagic machine tags" do
- @event.tags.should == ["facebook:event=247619485255249"]
- end
+ it "should tag Facebook events with automagic machine tags" do
+ @event.tags.should == ["facebook:event=247619485255249"]
+ end
+
+ it "should set the event url to the original import URL" do
+ @event.url.should == 'http://facebook.com/event.php?eid=247619485255249'
+ end
- it "should set the event url to the original import URL" do
- @event.url.should == 'http://facebook.com/event.php?eid=247619485255249'
+ it "should populate a venue when structured data is provided" do
+ @event.location.title.should == "Eliot Center"
+ @event.location.street_address.should == "1226 SW Salmon Street"
+ @event.location.locality.should == "Portland"
+ @event.location.region.should == "Oregon"
+ @event.location.country.should == "United States"
+ @event.location.latitude.to_s.should == "45.5236"
+ @event.location.longitude.to_s.should == "-122.675"
+ end
end
- it "should populate a venue when structured data is provided" do
- @event.location.title.should == "Eliot Center"
- @event.location.street_address.should == "1226 SW Salmon Street"
- @event.location.locality.should == "Portland"
- @event.location.region.should == "Oregon"
- @event.location.country.should == "United States"
- @event.location.latitude.to_s.should == "45.5236"
- @event.location.longitude.to_s.should == "-122.675"
+ describe "when parsing Facebook URLs" do
+ def should_parse(url)
+ url.match(SourceParser::Facebook.url_pattern)[1].should == "247619485255249"
+ end
+
+ it "should parse a GET-style URL" do
+ should_parse 'http://facebook.com/event.php?eid=247619485255249'
+ end
+
+ it "should parse a GET-style URL using HTTPS" do
+ should_parse 'https://facebook.com/event.php?eid=247619485255249'
+ end
+
+ it "should parse a REST-style URL" do
+ should_parse 'http://www.facebook.com/events/247619485255249'
+ end
+
+ it "should parse a GET-style URL with a 'www.' host prefix" do
+ should_parse 'http://www.facebook.com/event.php?eid=247619485255249'
+ end
+
+ it "should parse a API uri" do
+ should_parse 'http://graph.facebook.com/247619485255249'
+ end
end
end

0 comments on commit cdf9665

Please sign in to comment.