diff --git a/lib/Mojo/Feed.pm b/lib/Mojo/Feed.pm index 145dd51..8421802 100644 --- a/lib/Mojo/Feed.pm +++ b/lib/Mojo/Feed.pm @@ -82,7 +82,6 @@ sub dom { return Mojo::DOM->new($text); } - sub parse { my ($self, $xml) = @_; if ($xml) { @@ -107,15 +106,6 @@ sub parse_feed_dom { my ($self) = @_; my $dom = $self->dom; my $feed = $self->parse_feed_channel(); # Feed properties - my $items = $dom->find('item'); - my $entries = $dom->find('entry'); # Atom - my $res = []; - foreach my $item ($items->each, $entries->each) { - push @$res, parse_feed_item($item); - } - if (@$res) { - $feed->{'items'} = $res; - } $self->root($feed); return $feed; } @@ -171,105 +161,6 @@ sub parse_feed_channel { return \%info; } -sub parse_feed_item { - my ($item) = @_; - my %h; - foreach my $k ( - qw(title id summary guid content description content\:encoded xhtml\:body dc\:creator author), - @time_fields - ) - { - my $p = $item->at($k); - if ($p) { - - # skip namespaced items - like itunes:summary - unless explicitly - # searched: - next - if ($p->tag =~ /\:/ - && $k ne 'content\:encoded' - && $k ne 'xhtml\:body' - && $k ne 'dc\:date' - && $k ne 'dc\:creator'); - $h{$k} = $p->text || $p->content; - if ($k eq 'author' && $p->at('name')) { - $h{$k} = $p->at('name')->text; - } - if ($is_time_field{$k}) { - $h{$k} = str2time($h{$k}); - } - } - } - - $item->find('enclosure')->each( - sub { - push @{ $h{enclosures} }, shift->attr; - } - ); - - # let's handle links seperately, because ATOM loves these buggers: - $item->find('link')->each(sub { - my $l = shift; - if ($l->attr('href')) { - if ( $l->attr('rel' ) && $l->attr('rel') eq 'enclosure' ) { - push @{$h{enclosures}}, { - url => $l->attr('href'), - type => $l->attr('type'), - length => $l->attr('length') - }; - } - elsif (!$l->attr('rel') || $l->attr('rel') eq 'alternate') { - $h{'link'} = $l->attr('href'); - } - } - else { - if ($l->text =~ /\w+/) { - $h{'link'} = $l->text; # simple link - } - -# else { # we have an empty link element with no 'href'. :-( -# $h{'link'} = $1 if ($l->next->text =~ m/^(http\S+)/); -# } - } - }); - - # find tags: - my @tags; - $item->find('category, dc\:subject') - ->each(sub { push @tags, $_[0]->text || $_[0]->attr('term') }); - if (@tags) { - $h{'tags'} = \@tags; - } - # - # normalize fields: - my @replace = ( - 'content\:encoded' => 'content', - 'xhtml\:body' => 'content', - 'summary' => 'description', - 'pubDate' => 'published', - 'dc\:date' => 'published', - 'created' => 'published', - 'issued' => 'published', - 'updated' => 'published', - 'modified' => 'published', - 'dc\:creator' => 'author' - - # 'guid' => 'link' - ); - while (my ($old, $new) = splice(@replace, 0, 2)) { - if ($h{$old} && !$h{$new}) { - $h{$new} = delete $h{$old}; - } - } - my %copy = ('description' => 'content', link => 'id', guid => 'id'); - while (my ($fill, $required) = each %copy) { - if ($h{$fill} && !$h{$required}) { - $h{$required} = $h{$fill}; - } - } - $h{"_raw"} = $item->to_string; - return \%h; -} - # discover - get RSS/Atom feed URL from argument. # Code adapted to use Mojolicious from Feed::Find by Benjamin Trott # Any stupid mistakes are my own @@ -278,17 +169,15 @@ sub discover { my $url = shift; # $self->ua->max_redirects(5)->connect_timeout(30); - return - $self->ua->get_p( $url ) - ->catch(sub { my ($err) = shift; die "Connection Error: $err" }) - ->then(sub { - my ($tx) = @_; - my @feeds; - if ($tx->success && $tx->res->code == 200) { - @feeds = _find_feed_links($self, $tx->req->url, $tx->res); - } - return (@feeds); - }); + return $self->ua->get_p($url) + ->catch(sub { my ($err) = shift; die "Connection Error: $err" })->then(sub { + my ($tx) = @_; + my @feeds; + if ($tx->success && $tx->res->code == 200) { + @feeds = _find_feed_links($self, $tx->req->url, $tx->res); + } + return (@feeds); + }); } sub _find_feed_links { @@ -368,16 +257,10 @@ sub parse_opml { } sub items { - my ($self) = shift; - return Mojo::Collection->new( - map { - # $_->{published} = Mojo::Date->new($_->{published}) if ($_->{published}); - Mojo::Feed::Item->new(%$_); - } @{$self->root->{'items'}} - ); + shift->dom->find('item, entry') + ->map(sub { Mojo::Feed::Item->new(dom => $_) }); } - sub title { return shift->root->{title} unless (@_ > 1); $_[0]->root->{title} = $_[1]; diff --git a/lib/Mojo/Feed/Item.pm b/lib/Mojo/Feed/Item.pm index 8af76f2..f233497 100644 --- a/lib/Mojo/Feed/Item.pm +++ b/lib/Mojo/Feed/Item.pm @@ -1,11 +1,102 @@ package Mojo::Feed::Item; use Mojo::Base '-base'; -has [qw(title link content id description guid published author _raw)]; -has tags => sub { [] }; - -sub summary { return shift->description } +use Mojo::Feed::Item::Enclosure; +use HTTP::Date 'str2time'; +has [qw(title link content id description guid published author)]; + +has tags => sub { + shift->dom->find('category, dc\:subject') + ->map(sub { $_[0]->text || $_[0]->attr('term') }); +}; + +has 'dom'; + +has summary => sub { shift->description }; + +my %selector = ( + content => ['content', 'content\:encoded', 'xhtml\:body', 'description'], + description => ['description', 'summary'], + published => [ + 'published', 'pubDate', 'dc\:date', 'created', + 'issued', 'updated', 'modified' + ], + author => ['author', 'dc\:creator'], + id => ['id', 'guid', 'link'], +); + +sub _at { + my ($self, $selector) = @_; + return $self->dom->find($selector)->first(sub { + my $tag = $_->tag; + $tag =~ s/:/\\:/; + return $tag eq $selector; + }); +} + +foreach my $k (qw(title link content id description guid published author)) { + has $k => sub { + my $self = shift; + for my $selector (@{$selector{$k} || [$k]}) { + if ( my $p = $self->_at($selector) ) { + if ($k eq 'author' && $p->at('name')) { + return $p->at('name')->text; + } + my $text = $p->text || $p->content; + if ($k eq 'published') { + return str2time($text); + } + return $text; + } + } + return; + }; +} + +has enclosures => sub { + my $self = shift; + my @enclosures; + $self->dom->find('enclosure')->each(sub { + push @enclosures, shift->attr; + }); + $self->dom->find('link')->each(sub { + my $l = shift; + if ($l->attr('href') && $l->attr('rel') && $l->attr('rel') eq 'enclosure') { + push @enclosures, + { + url => $l->attr('href'), + type => $l->attr('type'), + length => $l->attr('length') + }; + } + }); + return Mojo::Collection->new(map { Mojo::Feed::Item::Enclosure->new($_) } + @enclosures); +}; + +has link => sub { + + # let's handle links seperately, because ATOM loves these buggers: + my $link; + shift->dom->find('link')->each(sub { + my $l = shift; + if ($l->attr('href') + && (!$l->attr('rel') || $l->attr('rel') eq 'alternate')) + { + $link = $l->attr('href'); + } + else { + if ($l->text =~ /\w+/) { + $link = $l->text; # simple link + } + } + }); + return $link; +}; + +has _raw => sub { shift->dom->to_string }; 1; + __END__ =encoding utf-8 diff --git a/lib/Mojo/Feed/Item/Enclosure.pm b/lib/Mojo/Feed/Item/Enclosure.pm new file mode 100644 index 0000000..6bd4f9b --- /dev/null +++ b/lib/Mojo/Feed/Item/Enclosure.pm @@ -0,0 +1,6 @@ +package Mojo::Feed::Item::Enclosure; +use Mojo::Base -base; + +has [qw( url type lentgh )]; + +1; diff --git a/t/09-enclosures.t b/t/09-enclosures.t index 87965b8..d434b76 100644 --- a/t/09-enclosures.t +++ b/t/09-enclosures.t @@ -7,52 +7,48 @@ use Mojo::Feed; use FindBin; my %test_results = ( - 'rss20-multi-enclosure.xml' => [ - { - 'length' => '2478719', - 'type' => 'audio/mpeg', - 'url' => 'http://example.com/sample_podcast.mp3' - }, - { - 'length' => '8888', - 'type' => 'video/mpeg', - 'url' => 'http://example.com/sample_movie.mpg' - } - ], - 'atom-multi-enclosure.xml' => [ - { - 'length' => '2478719', - 'type' => 'audio/mpeg', - 'url' => 'http://example.com/sample_podcast.mp3' - }, - { - 'length' => '8888', - 'type' => 'video/mpeg', - 'url' => 'http://example.com/sample_movie.mpg' - } - ], - 'atom-enclosure.xml' => [ - { - 'length' => '2478719', - 'type' => 'audio/mpeg', - 'url' => 'http://example.com/sample_podcast.mp3' - } - ], - 'rss20-enclosure.xml' => [ - { - 'length' => '2478719', - 'type' => 'audio/mpeg', - 'url' => 'http://example.com/sample_podcast.mp3' - } - ], + 'rss20-multi-enclosure.xml' => [ + { + 'length' => '2478719', + 'type' => 'audio/mpeg', + 'url' => 'http://example.com/sample_podcast.mp3' + }, + { + 'length' => '8888', + 'type' => 'video/mpeg', + 'url' => 'http://example.com/sample_movie.mpg' + } + ], + 'atom-multi-enclosure.xml' => [ + { + 'length' => '2478719', + 'type' => 'audio/mpeg', + 'url' => 'http://example.com/sample_podcast.mp3' + }, + { + 'length' => '8888', + 'type' => 'video/mpeg', + 'url' => 'http://example.com/sample_movie.mpg' + } + ], + 'atom-enclosure.xml' => [{ + 'length' => '2478719', + 'type' => 'audio/mpeg', + 'url' => 'http://example.com/sample_podcast.mp3' + }], + 'rss20-enclosure.xml' => [{ + 'length' => '2478719', + 'type' => 'audio/mpeg', + 'url' => 'http://example.com/sample_podcast.mp3' + }], ); my $samples = path($FindBin::Bin)->child('samples'); -while ( my ( $file, $result ) = each %test_results ) { - my $feed = Mojo::Feed->new( $samples->child($file) ); - is_deeply( $feed->items->[0]->{enclosures}, $result ); +while (my ($file, $result) = each %test_results) { + my $feed = Mojo::Feed->new($samples->child($file)); + is_deeply($feed->items->[0]->enclosures, $result); } done_testing();