Skip to content

Commit

Permalink
Update url, cached url, and similar url parsers
Browse files Browse the repository at this point in the history
  • Loading branch information
ezkl committed Apr 11, 2012
1 parent da93666 commit ce6d2b3
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions lib/gscraper/search/web_query.rb
Expand Up @@ -419,7 +419,9 @@ def page(page_index)
rank = rank_offset + (index + 1)
link = result.at('.//h3[@class="r"]/a')
title = link.inner_text
url = URI(link.get_attribute('href'))
link_url = URI(link.get_attribute('href')).query_params['q']
url = URI(link_url)

summary_text = ''

if (content = (result.at('.//div[@class="s"]','.//td[@class="j"]//font')))
Expand All @@ -434,12 +436,12 @@ def page(page_index)
cached_url = nil
similar_url = nil

if (gl = result.at('.//span[@class="gl"]'))
if (cached_link = gl.at('a:first'))
cached_url = URI(cached_link.get_attribute('href'))
if (gl = result.at('.//div[@class="s"]'))
if (cached_link = gl.at('.//a[1]'))
cached_url = URI("http://#{search_host}" + cached_link.get_attribute('href'))
end

if (similar_link = gl.at('a:last'))
if (similar_link = gl.at('.//a[2]'))
similar_url = URI("http://#{search_host}" + similar_link.get_attribute('href'))
end
end
Expand Down

0 comments on commit ce6d2b3

Please sign in to comment.