Permalink
Browse files

Fix absolutize links when contain international chars

  • Loading branch information...
1 parent 3000890 commit 3288935da888783a13e46f54d73caec7eca013ff @jaimeiniesta committed Aug 13, 2012
Showing with 37 additions and 1 deletion.
  1. +1 −1 lib/meta_inspector/scraper.rb
  2. +26 −0 spec/fixtures/international.response
  3. +10 −0 spec/metainspector_spec.rb
@@ -170,7 +170,7 @@ def absolutify_url(url)
if url =~ /^\w*\:/i
url
else
- URI.parse(@root_url).merge(url).to_s
+ URI.parse(@root_url).merge(URI.encode(url)).to_s.gsub("%23", "#")
end
end
@@ -0,0 +1,26 @@
+HTTP/1.1 200 OK
+Date: Mon, 30 May 2011 09:58:20 GMT
+Server: Microsoft-IIS/6.0
+X-Powered-By: PleskWin
+X-Powered-By: ASP.NET
+Cache-Control: private
+Content-Length: 25902
+Content-Type: text/html
+Expires: Sun, 29 May 2011 09:58:18 GMT
+Set-Cookie: ASPSESSIONIDCSBSQADC=AHENHHKBGGDIFJLHHCCJBHMP; path=/
+Cache-control: private
+
+
+
+<html>
+<head>
+ <title>International chars</title>
+</head>
+<body>
+ <a href="/españa.asp">España</a>
+ <a href="/romanée">Romanée</a>
+ <a href="/faqs#camión">FAQs camión</a>
+ <a href="/search?q=camión">Search camión</a>
+ <a href="/search?q=españa#top">Search España at top</a>
+</body>
+</html>
View
@@ -18,6 +18,7 @@
FakeWeb.register_uri(:get, "http://w3clove.com/faqs", :response => fixture_file("w3clove_faqs.response"))
FakeWeb.register_uri(:get, "https://twitter.com/w3clove", :response => fixture_file("twitter_w3clove.response"))
FakeWeb.register_uri(:get, "https://example.com/empty", :response => fixture_file("empty_page.response"))
+ FakeWeb.register_uri(:get, "http://international.com", :response => fixture_file("international.response"))
describe 'Initialization' do
it 'should accept an URL with a scheme' do
@@ -173,6 +174,15 @@
"http://alazan.com/faqs.asp" ]
end
+ it "should get correct absolute links, encoding the URLs as needed but respecting # and ?" do
+ m = MetaInspector.new('http://international.com')
+ m.links.should == [ "http://international.com/espa%C3%B1a.asp",
+ "http://international.com/roman%C3%A9e",
+ "http://international.com/faqs#cami%C3%B3n",
+ "http://international.com/search?q=cami%C3%B3n",
+ "http://international.com/search?q=espa%C3%B1a#top"]
+ end
+
it "should return empty array if no links found" do
m = MetaInspector.new('http://example.com/empty')
m.links.should == []

0 comments on commit 3288935

Please sign in to comment.