Skip to content

Commit

Permalink
Implement OpenGraph library
Browse files Browse the repository at this point in the history
  • Loading branch information
huyha85 committed Nov 12, 2012
1 parent 7068465 commit f602681
Show file tree
Hide file tree
Showing 14 changed files with 328 additions and 21 deletions.
10 changes: 5 additions & 5 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ source "http://rubygems.org"
# Add dependencies to develop your gem here.
# Include everything needed to run rake, tests, features, etc.
group :development do
gem "rspec", "~> 2.8.0"
gem "rdoc", "~> 3.12"
gem "bundler", "~> 1.0.0"
gem "jeweler", "~> 1.8.4"
gem "rcov", ">= 0"
gem "rspec"
gem "rdoc"
gem "bundler"
gem "jeweler"
gem "nokogiri"
end
33 changes: 33 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
GEM
remote: http://rubygems.org/
specs:
diff-lcs (1.1.3)
git (1.2.5)
jeweler (1.8.4)
bundler (~> 1.0)
git (>= 1.2.5)
rake
rdoc
json (1.7.5)
nokogiri (1.5.5)
rake (0.9.2.2)
rdoc (3.12)
json (~> 1.4)
rspec (2.11.0)
rspec-core (~> 2.11.0)
rspec-expectations (~> 2.11.0)
rspec-mocks (~> 2.11.0)
rspec-core (2.11.1)
rspec-expectations (2.11.3)
diff-lcs (~> 1.1.3)
rspec-mocks (2.11.3)

PLATFORMS
ruby

DEPENDENCIES
bundler
jeweler
nokogiri
rdoc
rspec
34 changes: 30 additions & 4 deletions README.rdoc
Original file line number Diff line number Diff line change
@@ -1,9 +1,36 @@
= opengraph_parser
= OpengraphParser

Description goes here.
OpengraphParser is a simple Ruby library for parsing Open Graph protocol information from a web site. Learn more about the protocol at:
http://ogp.me

== Installation
gem install opengraph_parser

or add to Gemfile

gem "opengraph_parser"

== Usage
og = OpenGraph.new("http://ogp.me")
og.title # => "Open Graph protocol"
og.type # => "website"
og.url # => "http://ogp.me/"
og.description # => "The Open Graph protocol enables any web page to become a rich object in a social graph."
og.images # => ["http://ogp.me/logo.png"]

You can also get other Open Graph metadata as:
og.metadata # => {"og:image:type"=>"image/png", "og:image:width"=>"300", "og:image:height"=>"300"}

If you try to parse Open Graph information for a website that doesn’t have any Open Graph metadata, the library will try to find other information in the website as the following rules:
<title> for title
<meta name="description"> for description
<link rel="image_src"> or all <img> tags for images

You can disable this fallback lookup by passing false to init method:
og = OpenGraph.new("http://ogp.me", false)

== Contributing to opengraph_parser

* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
* Fork the project.
Expand All @@ -16,4 +43,3 @@ Description goes here.

Copyright (c) 2012 Huy Ha. See LICENSE.txt for
further details.

7 changes: 4 additions & 3 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ Jeweler::Tasks.new do |gem|
gem.name = "opengraph_parser"
gem.homepage = "http://github.com/huyha85/opengraph_parser"
gem.license = "MIT"
gem.summary = %Q{TODO: one-line summary of your gem}
gem.description = %Q{TODO: longer description of your gem}
gem.summary = %Q{A simple Ruby library for parsing Open Graph Protocol information from a website.}
gem.description = %Q{A simple Ruby library for parsing Open Graph Protocol information from a website. It also includes a fallback solution when the website has no Open Graph information.}
gem.email = "hhuy424@gmail.com"
gem.authors = ["Huy Ha"]
gem.authors = ["Huy Ha", "Duc Trinh"]
# dependencies defined in Gemfile
gem.files = Dir.glob('lib/**/*.rb')
end
Jeweler::RubygemsDotOrgTasks.new

Expand Down
1 change: 1 addition & 0 deletions VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
85 changes: 85 additions & 0 deletions lib/open_graph.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
require 'nokogiri'
require 'redirect_follower'

class OpenGraph
attr_accessor :src, :url, :type, :title, :description, :images, :metadata, :response

def initialize(src, fallback = true)
@src = src
@images = []
@metadata = {}
parse_opengraph
load_fallback if fallback
check_images_path
end

def parse_opengraph
begin
@response = RedirectFollower.new(@src).resolve
rescue
@title = @url = @src
return
end

if @response && @response.body
attrs_list = %w(title url type description)
doc = Nokogiri.parse(@response.body)
doc.css('meta').each do |m|
if m.attribute('property') && m.attribute('property').to_s.match(/^og:(.+)$/i)
m_content = m.attribute('content').to_s.strip
case metadata_name = m.attribute('property').to_s.gsub("og:", "")
when *attrs_list
self.instance_variable_set("@#{metadata_name}", m_content) unless m_content.empty?
when "image"
add_image(m_content)
else
@metadata[m.attribute('property').to_s] = m_content
end
end
end
end
end

def load_fallback
if @response && @response.body
doc = Nokogiri.parse(@response.body)

if @title.to_s.empty? && doc.xpath("//head/title").size > 0
@title = doc.xpath("//head/title").first.text.to_s.strip
end

@url = @src if @url.to_s.empty?

if @description.to_s.empty? && description_meta = doc.xpath("//head/meta[@name='description']").first
@description = description_meta.attribute("content").to_s.strip
end

fetch_images(doc, "//head/link[@rel='image_src']", "href") if @images.empty?
fetch_images(doc, "//img", "src") if @images.empty?
end
end

def check_images_path
uri = URI.parse(URI.escape(@src))
imgs = @images.dup
@images = []
imgs.each do |img|
if URI.parse(URI.escape(img)).host.nil?
add_image("#{uri.scheme}://#{uri.host}:#{uri.port}#{img}")
else
add_image(img)
end
end
end

private
def add_image(image_url)
@images << image_url unless @images.include?(image_url) || image_url.to_s.empty?
end

def fetch_images(doc, xpath_str, attr)
doc.xpath(xpath_str).each do |link|
add_image(link.attribute(attr).to_s.strip)
end
end
end
1 change: 1 addition & 0 deletions lib/opengraph_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
require 'open_graph'
34 changes: 34 additions & 0 deletions lib/redirect_follower.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
require 'net/http'

class RedirectFollower
class TooManyRedirects < StandardError; end

attr_accessor :url, :body, :redirect_limit, :response

def initialize(url, limit = 5)
@url, @redirect_limit = url, limit
end

def resolve
raise TooManyRedirects if redirect_limit < 0

self.response = Net::HTTP.get_response(URI.parse(URI.escape(url)))

if response.kind_of?(Net::HTTPRedirection)
self.url = redirect_url
self.redirect_limit -= 1
resolve
end

self.body = response.body
self
end

def redirect_url
if response['location'].nil?
response.body.match(/<a href=\"([^>]+)\">/i)[1]
else
response['location']
end
end
end
61 changes: 61 additions & 0 deletions spec/lib/open_graph_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')

describe OpenGraph do
describe "#initialize" do
context "with invalid src" do
it "should set title and url the same as src" do
og = OpenGraph.new("invalid")
og.src.should == "invalid"
og.title.should == "invalid"
og.url.should == "invalid"
end
end

context "with no fallback" do
it "should get values from opengraph metadata" do
response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph.html", 'r') { |f| f.read })
RedirectFollower.stub(:new) { double(resolve: response) }

og = OpenGraph.new("http://test.host", false)
og.src.should == "http://test.host"
og.title.should == "OpenGraph Title"
og.type.should == "article"
og.url.should == "http://test.host"
og.description.should == "My OpenGraph sample site for Rspec"
og.images.should == ["http://test.host/images/rock1.jpg", "http://test.host/images/rock2.jpg"]
end
end

context "with fallback" do
context "when website has opengraph metadata" do
it "should get values from opengraph metadata" do
response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph.html", 'r') { |f| f.read })
RedirectFollower.stub(:new) { double(resolve: response) }

og = OpenGraph.new("http://test.host")
og.src.should == "http://test.host"
og.title.should == "OpenGraph Title"
og.type.should == "article"
og.url.should == "http://test.host"
og.description.should == "My OpenGraph sample site for Rspec"
og.images.should == ["http://test.host/images/rock1.jpg", "http://test.host/images/rock2.jpg"]
end
end

context "when website has no opengraph metadata" do
it "should lookup for other data from website" do
response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph_no_metadata.html", 'r') { |f| f.read })
RedirectFollower.stub(:new) { double(resolve: response) }

og = OpenGraph.new("http://test.host")
og.src.should == "http://test.host"
og.title.should == "OpenGraph Title Fallback"
og.type.should be_nil
og.url.should == "http://test.host"
og.description.should == "Short Description Fallback"
og.images.should == ["http://test.host:80/images/wall1.jpg", "http://test.host:80/images/wall2.jpg"]
end
end
end
end
end
43 changes: 43 additions & 0 deletions spec/lib/redirect_follower_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')

describe RedirectFollower do
describe "#resolve" do
let(:url) { "http://test.host" }
let(:mock_res) { double(body: "Body is here.") }
let(:mock_redirect) {
m = double(body: %Q{<body><a href="http://new.test.host"></a></body>}, kind_of?: Net::HTTPRedirection)
m.stub(:[]).and_return(nil)
m
}

context "with no redirection" do
it "should return the response" do
Net::HTTP.should_receive(:get_response).and_return(mock_res)

res = RedirectFollower.new(url).resolve
res.body.should == "Body is here."
res.redirect_limit.should == 5
end
end

context "with redirection" do
it "should follow the link in redirection" do
Net::HTTP.should_receive(:get_response).with(URI.parse(URI.escape(url))).and_return(mock_redirect)
Net::HTTP.should_receive(:get_response).with(URI.parse(URI.escape("http://new.test.host"))).and_return(mock_res)

res = RedirectFollower.new(url).resolve
res.body.should == "Body is here."
res.redirect_limit.should == 4
end
end

context "with unlimited redirection" do
it "should raise TooManyRedirects error" do
Net::HTTP.stub(:get_response).and_return(mock_redirect)
lambda {
RedirectFollower.new(url).resolve
}.should raise_error(RedirectFollower::TooManyRedirects)
end
end
end
end
7 changes: 0 additions & 7 deletions spec/opengraph_parser_spec.rb

This file was deleted.

5 changes: 3 additions & 2 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
$LOAD_PATH.unshift(File.dirname(__FILE__))
require 'rspec'
require 'opengraph_parser'
require 'open_graph'
require 'redirect_follower'

# Requires supporting files with custom matchers and macros, etc,
# in ./support/ and its subdirectories.
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}

RSpec.configure do |config|

end
16 changes: 16 additions & 0 deletions spec/view/opengraph.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<html>
<head>
<title>OpenGraph Title Fallback</title>
<meta property="og:title" content="OpenGraph Title" />
<meta property="og:type" content="article" />
<meta property="og:url" content="http://test.host" />
<meta property="og:description" content="My OpenGraph sample site for Rspec" />
<meta property="og:image" content="http://test.host/images/rock1.jpg" />
<meta property="og:image" content="http://test.host/images/rock2.jpg" />
<meta name="description" content="Short Description Fallback" />
</head>
<body>
<img src="http://test.host/images/wall1.jpg" />
<img src="http://test.host/images/wall2.jpg" />
</body>
</html>
Loading

0 comments on commit f602681

Please sign in to comment.