Skip to content

Commit

Permalink
fix parsing header - release v1.121160
Browse files Browse the repository at this point in the history
  • Loading branch information
ajgb committed Apr 25, 2012
1 parent acf6686 commit 1bdc13a
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 15 deletions.
4 changes: 4 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
Revision history for WWW-Sitemapper

1.121160 Wed Apr 25 23:18:10 2012
[rt#72738]: test fails with libwww-6.03 - fix parsing title from
response

1.110340 Thu Feb 3 14:51:45 2011
Bug fix: allow method modifiers work with Hook'ed methods

Expand Down
16 changes: 14 additions & 2 deletions dist.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,20 @@ name = WWW-Sitemapper
author = Alex J. G. Burzyński <ajgb@cpan.org>
license = Perl_5
copyright_holder = Alex J. G. Burzyński <ajgb@cpan.org>
copyright_year = 2010
copyright_year = 2012

[@Filter]
-bundle=@AJGB
-remove=CompileTests
-remove=PortabilityTests

[Prereqs]
HTTP::Message = 6.0
HTML::HeadParser = 3.40
LWP = 6.0

[@AJGB]
[AutoPrereqs]
[Test::Compile]
[Test::Portability]


15 changes: 9 additions & 6 deletions lib/WWW/Sitemapper.pm
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ use WWW::Robot;
use WWW::Sitemap::XML;
use WWW::Sitemap::XML::URL;
use Storable qw( store retrieve );
use HTML::HeadParser;
use Encode ();

BEGIN {
extends qw( MooseX::MethodAttributes::Inheritable );
Expand Down Expand Up @@ -984,12 +986,13 @@ sub _set_page_data : Hook('invoke-after-get') {

if ( my $node = $self->tree->find_node( $url ) ) {

if ( $response->headers->title ) {
# HTTP::Headers decodes the content.
my ($title) = $response->content =~ m|<title>(.*?)</title>|is;
if ( $title ) {
$node->title( $title );
}
my $hp = HTML::HeadParser->new;
$hp->xml_mode(1) if $response->content_is_xhtml;
$hp->utf8_mode(1) if $] >= 5.008 && $HTML::Parser::VERSION >= 3.40;

$hp->parse($response->content);
if ( my $title = $hp->header('title') ) {
$node->title( Encode::decode($response->content_charset, $title) );
}
if ( my $last_modified = $response->headers->last_modified ) {
$node->last_modified( $last_modified );
Expand Down
3 changes: 2 additions & 1 deletion t/data/32.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

<html>
<head>
<title>Child32</title>
<title>Dziecię32</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
</head>
<body>
<a href="/1.html">Child1</a><br>
Expand Down
2 changes: 1 addition & 1 deletion t/data/html_sitemap.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<ul><li><a href="3.html">Child3</a>
<ul><li><a href="31.html">Child31</a>
</li>
<li><a href="32.html">Child32</a>
<li><a href="32.html">Dziecię32</a>
</li>
</ul>
</li>
Expand Down
2 changes: 1 addition & 1 deletion t/data/sitemap_with_id_with_title.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[0:0:0] friendly_url.html FriendlyUrl
[0:0:0:0] 3.html Child3
[0:0:0:0:0] 31.html Child31
[0:0:0:0:1] 32.html Child32
[0:0:0:0:1] 32.html Dziecię32
[0:1] 2.html Child2
[0:1:0] 21.html Child21
[0:1:1] 22.html Child22
9 changes: 5 additions & 4 deletions t/sitemapper.t
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use HTTP::Date qw( time2str );
use DateTime;
use HTTP::Status qw( HTTP_OK HTTP_NOT_FOUND );
use HTML::HeadParser;
use utf8;

BEGIN {
use_ok( 'WWW::Sitemapper' );
Expand Down Expand Up @@ -267,7 +268,7 @@ if ($is_test = fork ) {
);
is_deeply(
[ map { $_->title } $child3->children ],
[ qw( Child31 Child32 ) ],
[ qw( Child31 Dziecię32 ) ],
"child3 nodes have correct titles"
);

Expand All @@ -288,7 +289,7 @@ if ($is_test = fork ) {
"child32->uri is correct"
);
is($child32->id, '0:0:0:0:1', "child32->id is correct");
is($child32->title, 'Child32', "child32->title is correct");
is($child32->title, 'Dziecię32', "child32->title is correct");
is scalar @{$child32->nodes}, 0, 'child32 has no nodes';

# Child2 / 2.html
Expand Down Expand Up @@ -378,7 +379,7 @@ if ($is_test = fork ) {
} "txt_sitemap() called successfully";
{
local $/;
open( FILE, "t/data/sitemap_with_id_with_title.txt" )
open( FILE, "<:utf8", "t/data/sitemap_with_id_with_title.txt" )
or die "Cannot open sitemap_with_id_with_title.txt: $!\n";
$_txt_sitemap = <FILE>;
close( FILE );
Expand All @@ -395,7 +396,7 @@ if ($is_test = fork ) {
} "html_sitemap() called successfully";
{
local $/;
open( FILE, "t/data/html_sitemap.html" )
open( FILE, "<:utf8", "t/data/html_sitemap.html" )
or die "Cannot open html_sitemap.html: $!\n";
$_html_sitemap = <FILE>;
close( FILE );
Expand Down

0 comments on commit 1bdc13a

Please sign in to comment.