Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Cannot retrieve contributors at this time

executable file 118 lines (88 sloc) 2.173 kb
#!/usr/bin/env perl
=head1 NAME
gethrefre
=head1 SYNOPSIS
gethrefre [ [ -h | --help ] | [ [ -r | --re ] pattern ] url ]
=head1 ARGUMENTS
-h --help Print this usage message and exit
-r --re The pattern to compare href attributes against
-p --print Just print the URLs, progress to stderr
=head1 DESCRIPTION
=cut
use strict;
use warnings;
use Pod::Usage;
use URI;
use URI::Escape qw(uri_unescape);
use LWP::UserAgent;
use HTML::TreeBuilder;
use Getopt::Long;
Getopt::Long::Configure qw(bundling no_ignore_case no_require_order);
my %opt;
Getopt::Long::GetOptions(
'h|help' => \&help,
'r|re=s' => \$opt{re},
'p|print' => \$opt{print},
);
my @url = @ARGV;
my $ua = LWP::UserAgent->new(
agent => '',
);
for my $url (@url) {
print STDERR "* Getting $url ...";
my $cont = getobj($url);
unless (defined $cont) {
print STDERR "failed\n";
next;
} else {
print STDERR "done\n";
my @href = gethrefs($url, $cont);
my @munge = map { s[/src\.cgi/][/cgi/]; $_ } @href;
for my $href (@munge) {
print STDERR "** Getting $href ...";
my $file = (split m!/!, $href)[-1];
if (-e $file) {
print STDERR "exists\n";
next;
}
if ($opt{print}) {
print "$href\n";
} else {
$ua->mirror($href, uri_unescape($file));
}
print STDERR "done\n"
}
print STDERR "done\n";
}
}
sub getobj
{
my $url = shift;
my $res = $ua->get($url);
$res->is_success ? $res->content : undef;
}
sub gethrefs
{
my ($url, $content) = @_;
my @ret;
my $tree = HTML::TreeBuilder->new_from_content($content);
my @food = $tree->look_down(
'_tag' => 'a',
sub {
my $a = shift;
my $href = $a->attr('href');
{
no warnings;
$href =~ $opt{re};
}
},
);
for my $munch (@food) {
push @ret, URI->new($munch->attr('href'))->abs($url);
}
@ret;
}
sub help
{
pod2usage(-verbose => 1);
}
Jump to Line
Something went wrong with that request. Please try again.