Skip to content

Commit

Permalink
added YAML dump
Browse files Browse the repository at this point in the history
  • Loading branch information
Devin Austin committed Aug 31, 2009
1 parent 692d511 commit 4059e4e
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions scrape.pl
Expand Up @@ -4,9 +4,10 @@
use Web::Scraper;
use URI;
use Data::Dumper;
use YAML qw/ Dump /;
use WWW::Mechanize;


my $yaml;
my $base_url = "http://yellowpages.com.au/search/postSearchEntry.do?clueType=0&clue=electrical+contractors&locationClue=All+States&x=0&y=0";
my $mech = WWW::Mechanize->new;
print "mech object initiated\n";
Expand All @@ -15,9 +16,11 @@
my $names;
my @information;
print "Entering link following loop\n";

while ( $mech->follow_link( text => "Next" ) ) {
print "Beginning scrape inside loop\n";
my $want = scraper {

my $want = scraper {
process "li.gold", "contractors[]" => scraper {
process ".omnitureListingNameLink", name => 'TEXT';
process ".address", address => 'TEXT'; # need to split this up into address, state, postcode,
Expand All @@ -30,11 +33,14 @@
$names = $want->scrape(
URI->new($base_url)
);

push @information, { contractor => $names };

print "Saving page info...\n";
print "Scrape successful\n";
print "Serializing -> YAML\n";
print "Dumping info:\n";
print Dumper(@information);
print Dump(@information);
warn "Page: $base_url\n";
print "Sleep for a bit\n";
sleep(1);
Expand Down

0 comments on commit 4059e4e

Please sign in to comment.