From 7574a0767fdd2bb8f607a6902189a3f0a068d944 Mon Sep 17 00:00:00 2001 From: Andy Lester Date: Tue, 22 May 2007 19:16:39 +0000 Subject: [PATCH] Import of PETDANCE/WWW-Mechanize-1.29_01 from CPAN. gitpan-cpan-distribution: WWW-Mechanize gitpan-cpan-version: 1.29_01 gitpan-cpan-path: PETDANCE/WWW-Mechanize-1.29_01.tar.gz gitpan-cpan-author: PETDANCE gitpan-cpan-maturity: developer --- Changes | 18 ++++++++++++-- MANIFEST | 2 ++ META.yml | 23 ++++++++++-------- Makefile.PL | 17 +++++--------- bin/mech-dump | 2 +- lib/WWW/Mechanize.pm | 56 +++++++++++++++++++++++++++++++++++++------- t/live/wikipedia.t | 33 ++++++++++++++++++++++++++ t/local/log-server | 1 + 8 files changed, 119 insertions(+), 33 deletions(-) create mode 100644 t/live/wikipedia.t diff --git a/Changes b/Changes index 4439be3..2dbd8f8 100644 --- a/Changes +++ b/Changes @@ -1,7 +1,22 @@ Revision history for WWW::Mechanize -1.26 +1.29_01 Tue May 22 14:02:55 CDT 2007 +======================================== +Kevin Falcone and I ask for your assistance in figuring out how to +handle the warnings thrown by the tests, other than hiding them. + +[FIXES] +* Overhauled how tainting was done. Stole code directly from + Test::Taint. +* Have LWP only handle decoding of Content-Encoding, not charset. + +[DOCUMENTATION] +* Fixed the docs for $mech->submit_form()'s with_fields arg. + Thanks, Peteris Krumins. + +1.26 Wed May 16 14:21:29 CDT 2007 +======================================== [FIXES] * Re-reversed the content decoding. This is critical for reading from sites with gzip on the fly, like Wikipedia. @@ -18,7 +33,6 @@ Revision history for WWW::Mechanize 1.24 Fri May 11 15:57:56 CDT 2007 ======================================== - NOTE: Version 1.24 will NOT automatically decode gzipped content for you any more. Consider it a "do not use" release. diff --git a/MANIFEST b/MANIFEST index 6517030..d231c14 100644 --- a/MANIFEST +++ b/MANIFEST @@ -55,6 +55,8 @@ t/upload.t t/warnings.t t/warn.t +t/live/wikipedia.t + t/local/LocalServer.pm t/local/back.t t/local/click.t diff --git a/META.yml b/META.yml index 6953b4a..c69dddb 100644 --- a/META.yml +++ b/META.yml @@ -1,10 +1,11 @@ -# http://module-build.sourceforge.net/META-spec.html -#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# -name: WWW-Mechanize -version: 1.26 -version_from: lib/WWW/Mechanize.pm -installdirs: site -requires: +--- #YAML:1.0 +name: WWW-Mechanize +version: 1.29_01 +abstract: Handy web browsing in a Perl object +license: ~ +generated_by: ExtUtils::MakeMaker version 6.32 +distribution_type: module +requires: Carp: 0 File::Temp: 0 FindBin: 0 @@ -23,6 +24,8 @@ requires: URI: 1.25 URI::file: 0 URI::URL: 0 - -distribution_type: module -generated_by: ExtUtils::MakeMaker version 6.30 +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.2.html + version: 1.2 +author: + - Andy Lester diff --git a/Makefile.PL b/Makefile.PL index 62667bb..35f2300 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -7,23 +7,18 @@ use ExtUtils::MakeMaker qw( WriteMakefile ); use File::Spec; use Getopt::Long; +my $live = 1; +my $local = 1; GetOptions( - 'local!' => \my $local, - 'all' => \my $all, + 'live!' => \$live, + 'local!' => \$local, + 'all' => sub { print "All tests are on by default. This option is deprecated.\n" }, 'mech-dump!' => sub { print "mech-dump is always installed now. This option is deprecated.\n" }, ); -if ( $all ) { - $local = 1; -} -else { - if ( not defined $local ) { - $local = 1; - } -} - my @tests = glob File::Spec->catfile( 't', '*.t' ); push( @tests, glob File::Spec->catfile( 't', 'local', '*.t' ) ) if $local; +push( @tests, glob File::Spec->catfile( 't', 'live', '*.t' ) ) if $live; push( @tests, glob File::Spec->catfile( 't', 'mech-dump', '*.t' ) ); my $parms = { diff --git a/bin/mech-dump b/bin/mech-dump index 1866b90..2534fc8 100755 --- a/bin/mech-dump +++ b/bin/mech-dump @@ -59,7 +59,7 @@ get repeated dumps. =cut -my $uri = shift or die "Must specify a URL or file to check\n"; +my $uri = shift or die "Must specify a URL or file to check. See --help for details.\n"; if ( -e $uri ) { require URI::file; $uri = URI::file->new_abs( $uri )->as_string; diff --git a/lib/WWW/Mechanize.pm b/lib/WWW/Mechanize.pm index 44e532c..94c66bb 100644 --- a/lib/WWW/Mechanize.pm +++ b/lib/WWW/Mechanize.pm @@ -6,11 +6,11 @@ WWW::Mechanize - Handy web browsing in a Perl object =head1 VERSION -Version 1.26 +Version 1.29_01 =cut -our $VERSION = '1.26'; +our $VERSION = '1.29_01'; =head1 SYNOPSIS @@ -1615,10 +1615,15 @@ are a list of key/value pairs, all of which are optional. =item * fields => \%fields +Specifies the fields to be filled in the current form. + +=item * with_fields => \%fields + Probably all you need for the common case. It combines a smart form selector and data setting in one operation. It selects the first form that contains all fields mentioned in C<\%fields>. This is nice because you don't need to know the name or number of the form to do this. + (calls C> and C>). If you choose this, the form_number, form_name and fields options will be ignored. @@ -2016,7 +2021,9 @@ sub _update_page { # Try to decode the content. Undef will be returned if there's nothing to decompress. # See docs in HTTP::Message for details. Do we need to expose the options there? - my $content = $res->decoded_content; + # use charset => 'none' because while we want LWP to handle Content-Encoding for + # the auto-gzipping with Compress::Zlib we don't want it messing with charset + my $content = $res->decoded_content( charset => 'none' ); $content = $res->content if (not defined $content); $content .= _taintedness(); @@ -2032,18 +2039,48 @@ sub _update_page { } # _update_page our $_taintbrush; + +# This is lifted wholesale from Test::Taint sub _taintedness { - if ( not defined $_taintbrush ) { - my $file = $0; - open( my $fh, '<', $file ) or die "Can't open $file: $!"; - sysread( $fh, $_taintbrush, 1 ) or die "Can't read from $file: $!"; - $_taintbrush = substr( $_taintbrush, 0, 0 ); - close $fh; + return $_taintbrush if defined $_taintbrush; + + # Somehow we need to get some taintedness into our $_taintbrush. + # Let's try the easy way first. Either of these should be + # tainted, unless somebody has untainted them, so this + # will almost always work on the first try. + # (Unless, of course, taint checking has been turned off!) + $_taintbrush = substr("$0$^X", 0, 0); + return $_taintbrush if _is_tainted( $_taintbrush ); + + # Let's try again. Maybe somebody cleaned those. + $_taintbrush = substr(join("", @ARGV, %ENV), 0, 0); + return $_taintbrush if _is_tainted( $_taintbrush ); + + # If those don't work, go try to open some file from some unsafe + # source and get data from them. That data is tainted. + # (Yes, even reading from /dev/null works!) + for my $filename ( qw(/dev/null / . ..), values %INC, $0, $^X ) { + if ( open my $fh, '<', $filename ) { + my $data; + if ( defined sysread $fh, $data, 1 ) { + $_taintbrush = substr( $data, 0, 0 ); + last if _is_tainted( $_taintbrush ); + } + } } + # Sanity check + die "Our taintbrush should have zero length!" if length $_taintbrush; + return $_taintbrush; } +sub _is_tainted { + no warnings qw(void uninitialized); + + return !eval { join('', shift), kill 0; 1 }; +} # _is_tainted + =head2 $mech->_modify_request( $req ) @@ -2524,6 +2561,7 @@ to read the FAQ if you have support requests. Thanks to the numerous people who have helped out on WWW::Mechanize in one way or another, including Kirrily Robert for the orignal C, +Peteris Krumins, Rafael Kitover, David Steinbrunner, Kevin Falcone, diff --git a/t/live/wikipedia.t b/t/live/wikipedia.t new file mode 100644 index 0000000..d564b50 --- /dev/null +++ b/t/live/wikipedia.t @@ -0,0 +1,33 @@ +#!perl -T + +use warnings; +use strict; + +use constant LANGUAGES => qw( en it ja es nl pl ); +use Test::More tests => 3 + (2 * scalar LANGUAGES); + +BEGIN { + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new; +isa_ok( $mech, 'WWW::Mechanize', 'Created object' ); +$mech->agent_alias( 'Windows IE 6' ); # Wikipedia 403s out obvious bots + +for my $lang ( LANGUAGES ) { + my $start = "http://$lang.wikipedia.org/"; + + $mech->get( $start ); + + ok( $mech->success, "Got $start" ); + my @links = $mech->links(); + cmp_ok( scalar @links, '>', 50, "Over 50 links on $start" ); +} + +SKIP: { + eval "use Test::Memory::Cycle"; + skip "Test::Memory::Cycle not installed", 1 if $@; + + memory_cycle_ok( $mech, "No memory cycles found" ); +} + diff --git a/t/local/log-server b/t/local/log-server index 09b0a90..5dcec6a 100755 --- a/t/local/log-server +++ b/t/local/log-server @@ -1,3 +1,4 @@ +# vi: ft=perl # Thanks to merlyn for nudging me and giving me this snippet! use strict; use HTTP::Daemon;