Skip to content

Commit

Permalink
Update scraper.rb
Browse files Browse the repository at this point in the history
  • Loading branch information
anazamarron committed Apr 15, 2015
1 parent baa01cf commit 62d41be
Showing 1 changed file with 16 additions and 7 deletions.
23 changes: 16 additions & 7 deletions scraper.rb
Expand Up @@ -10,14 +10,23 @@
# # Read in a page
page = agent.get("http://www.congreso.es/portal/page/portal/Congreso/Congreso/Diputados?_piref73_1333056_73_1333049_1333049.next_page=/wc/menuAbecedarioInicio&tipoBusqueda=completo&idLegislatura=10")

while true
page.links_with(href: /fichaDiputado/).each do |link|
puts link.text
end
next_page_link = page.link_with(href: /Siguiente/)
break if next_page_link == nil
page = next_page.click
diputados_link = []

while false
diputados_link.concat(page.links_with(href: /fichaDiputado/))
next_page_link = page.link_with(text: /Siguiente/)
break if next_page_link == nil
page = next_page.click
end

page = agent.get("http://www.congreso.es/portal/page/portal/Congreso/Congreso/Diputados/BusqForm?_piref73_1333155_73_1333154_1333154.next_page=/wc/fichaDiputado?idDiputado=268&idLegislatura=10")

nombe_dip = page.search('div.nombredip').text
twitter_dip = page.searc()
curriculum = page.search('div.curriculum')
email = curriculum.search(href: /mailto/).text
puts email

#
# # Find somehing on the page using css selectors
#p page.at('div.listado1')
Expand Down

0 comments on commit 62d41be

Please sign in to comment.