Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

globalized wikipedia onebox #387

Merged
merged 1 commit into from Mar 7, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 5 additions & 6 deletions lib/oneboxer/wikipedia_onebox.rb
Expand Up @@ -3,19 +3,18 @@
module Oneboxer
class WikipediaOnebox < HandlebarsOnebox

matcher /^https?:\/\/.*wikipedia.(com|org)\/.*$/
matcher /^https?:\/\/.*wikipedia\.(com|org)\/.*$/
favicon 'wikipedia.png'

def template
template_path('simple_onebox')
end

def translate_url
m = @url.match(/wiki\/(?<identifier>[^#\/]+)/mi)

m = @url.match(/^https?:\/\/((?<subdomain>.+)\.)?wikipedia\.(com|org)\/wiki\/(?<identifier>[^#\/]+)/mi)
subdomain = m[:subdomain] || "en"
article_id = CGI::unescape(m[:identifier])
return "http://en.m.wikipedia.org/w/index.php?title=#{URI::encode(article_id)}"
@url
"http://#{subdomain}.m.wikipedia.org/w/index.php?title=#{URI::encode(article_id)}"
end

def parse(data)
Expand All @@ -25,7 +24,7 @@ def parse(data)
result = {}

title = html_doc.at('title').inner_html
result[:title] = title.gsub!(/ - Wikipedia, the free encyclopedia/, '') if title.present?
result[:title] = title.gsub!(/ - Wikipedia.*$/, '') if title.present?

# get the first image > 150 pix high
images = html_doc.search("img").select { |img| img['height'].to_i > 150 }
Expand Down
28 changes: 23 additions & 5 deletions spec/components/oneboxer/wikipedia_onebox_spec.rb
Expand Up @@ -5,14 +5,32 @@
require 'oneboxer/wikipedia_onebox'

describe Oneboxer::WikipediaOnebox do
before(:each) do
@o = Oneboxer::WikipediaOnebox.new("http://en.wikipedia.org/wiki/Ruby")
FakeWeb.register_uri(:get, @o.translate_url, :response => fixture_file('oneboxer/wikipedia.response'))

it "generates the expected onebox for Wikipedia" do
o = Oneboxer::WikipediaOnebox.new('http://en.wikipedia.org/wiki/Ruby')
FakeWeb.register_uri(:get, o.translate_url, :response => fixture_file('oneboxer/wikipedia.response'))
FakeWeb.register_uri(:get, 'http://en.m.wikipedia.org/wiki/Ruby', :response => fixture_file('oneboxer/wikipedia_redirected.response'))
o.onebox.should == expected_wikipedia_result
end

it "generates the expected onebox for Wikipedia" do
@o.onebox.should == expected_wikipedia_result
it "accepts .com extention" do
o = Oneboxer::WikipediaOnebox.new('http://en.wikipedia.com/wiki/Postgres')
o.translate_url.should == 'http://en.m.wikipedia.org/w/index.php?title=Postgres'
end

it "encodes identifier" do
o = Oneboxer::WikipediaOnebox.new('http://en.wikipedia.com/wiki/Café')
o.translate_url.should == 'http://en.m.wikipedia.org/w/index.php?title=Caf%C3%A9'
end

it "defaults to en locale" do
o = Oneboxer::WikipediaOnebox.new('http://wikipedia.org/wiki/Ruby_on_rails')
o.translate_url.should == 'http://en.m.wikipedia.org/w/index.php?title=Ruby_on_rails'
end

it "generates localized url" do
o = Oneboxer::WikipediaOnebox.new('http://fr.wikipedia.org/wiki/Ruby')
o.translate_url.should == 'http://fr.m.wikipedia.org/w/index.php?title=Ruby'
end

private
Expand Down