Skip to content

Commit

Permalink
Merge pull request #1 from mdom/lazy
Browse files Browse the repository at this point in the history
Lazy loading of attributes
  • Loading branch information
dotandimet committed Mar 18, 2018
2 parents 9e5d1e8 + 5b548be commit 2ca1186
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 173 deletions.
139 changes: 11 additions & 128 deletions lib/Mojo/Feed.pm
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ sub dom {
return Mojo::DOM->new($text);
}


sub parse {
my ($self, $xml) = @_;
if ($xml) {
Expand All @@ -107,15 +106,6 @@ sub parse_feed_dom {
my ($self) = @_;
my $dom = $self->dom;
my $feed = $self->parse_feed_channel(); # Feed properties
my $items = $dom->find('item');
my $entries = $dom->find('entry'); # Atom
my $res = [];
foreach my $item ($items->each, $entries->each) {
push @$res, parse_feed_item($item);
}
if (@$res) {
$feed->{'items'} = $res;
}
$self->root($feed);
return $feed;
}
Expand Down Expand Up @@ -171,105 +161,6 @@ sub parse_feed_channel {
return \%info;
}

sub parse_feed_item {
my ($item) = @_;
my %h;
foreach my $k (
qw(title id summary guid content description content\:encoded xhtml\:body dc\:creator author),
@time_fields
)
{
my $p = $item->at($k);
if ($p) {

# skip namespaced items - like itunes:summary - unless explicitly
# searched:
next
if ($p->tag =~ /\:/
&& $k ne 'content\:encoded'
&& $k ne 'xhtml\:body'
&& $k ne 'dc\:date'
&& $k ne 'dc\:creator');
$h{$k} = $p->text || $p->content;
if ($k eq 'author' && $p->at('name')) {
$h{$k} = $p->at('name')->text;
}
if ($is_time_field{$k}) {
$h{$k} = str2time($h{$k});
}
}
}

$item->find('enclosure')->each(
sub {
push @{ $h{enclosures} }, shift->attr;
}
);

# let's handle links seperately, because ATOM loves these buggers:
$item->find('link')->each(sub {
my $l = shift;
if ($l->attr('href')) {
if ( $l->attr('rel' ) && $l->attr('rel') eq 'enclosure' ) {
push @{$h{enclosures}}, {
url => $l->attr('href'),
type => $l->attr('type'),
length => $l->attr('length')
};
}
elsif (!$l->attr('rel') || $l->attr('rel') eq 'alternate') {
$h{'link'} = $l->attr('href');
}
}
else {
if ($l->text =~ /\w+/) {
$h{'link'} = $l->text; # simple link
}

# else { # we have an empty link element with no 'href'. :-(
# $h{'link'} = $1 if ($l->next->text =~ m/^(http\S+)/);
# }
}
});

# find tags:
my @tags;
$item->find('category, dc\:subject')
->each(sub { push @tags, $_[0]->text || $_[0]->attr('term') });
if (@tags) {
$h{'tags'} = \@tags;
}
#
# normalize fields:
my @replace = (
'content\:encoded' => 'content',
'xhtml\:body' => 'content',
'summary' => 'description',
'pubDate' => 'published',
'dc\:date' => 'published',
'created' => 'published',
'issued' => 'published',
'updated' => 'published',
'modified' => 'published',
'dc\:creator' => 'author'

# 'guid' => 'link'
);
while (my ($old, $new) = splice(@replace, 0, 2)) {
if ($h{$old} && !$h{$new}) {
$h{$new} = delete $h{$old};
}
}
my %copy = ('description' => 'content', link => 'id', guid => 'id');
while (my ($fill, $required) = each %copy) {
if ($h{$fill} && !$h{$required}) {
$h{$required} = $h{$fill};
}
}
$h{"_raw"} = $item->to_string;
return \%h;
}

# discover - get RSS/Atom feed URL from argument.
# Code adapted to use Mojolicious from Feed::Find by Benjamin Trott
# Any stupid mistakes are my own
Expand All @@ -278,17 +169,15 @@ sub discover {
my $url = shift;

# $self->ua->max_redirects(5)->connect_timeout(30);
return
$self->ua->get_p( $url )
->catch(sub { my ($err) = shift; die "Connection Error: $err" })
->then(sub {
my ($tx) = @_;
my @feeds;
if ($tx->success && $tx->res->code == 200) {
@feeds = _find_feed_links($self, $tx->req->url, $tx->res);
}
return (@feeds);
});
return $self->ua->get_p($url)
->catch(sub { my ($err) = shift; die "Connection Error: $err" })->then(sub {
my ($tx) = @_;
my @feeds;
if ($tx->success && $tx->res->code == 200) {
@feeds = _find_feed_links($self, $tx->req->url, $tx->res);
}
return (@feeds);
});
}

sub _find_feed_links {
Expand Down Expand Up @@ -368,16 +257,10 @@ sub parse_opml {
}

sub items {
my ($self) = shift;
return Mojo::Collection->new(
map {
# $_->{published} = Mojo::Date->new($_->{published}) if ($_->{published});
Mojo::Feed::Item->new(%$_);
} @{$self->root->{'items'}}
);
shift->dom->find('item, entry')
->map(sub { Mojo::Feed::Item->new(dom => $_) });
}


sub title {
return shift->root->{title} unless (@_ > 1);
$_[0]->root->{title} = $_[1];
Expand Down
99 changes: 95 additions & 4 deletions lib/Mojo/Feed/Item.pm
Original file line number Diff line number Diff line change
@@ -1,11 +1,102 @@
package Mojo::Feed::Item;
use Mojo::Base '-base';
has [qw(title link content id description guid published author _raw)];
has tags => sub { [] };

sub summary { return shift->description }
use Mojo::Feed::Item::Enclosure;
use HTTP::Date 'str2time';
has [qw(title link content id description guid published author)];

has tags => sub {
shift->dom->find('category, dc\:subject')
->map(sub { $_[0]->text || $_[0]->attr('term') });
};

has 'dom';

has summary => sub { shift->description };

my %selector = (
content => ['content', 'content\:encoded', 'xhtml\:body', 'description'],
description => ['description', 'summary'],
published => [
'published', 'pubDate', 'dc\:date', 'created',
'issued', 'updated', 'modified'
],
author => ['author', 'dc\:creator'],
id => ['id', 'guid', 'link'],
);

sub _at {
my ($self, $selector) = @_;
return $self->dom->find($selector)->first(sub {
my $tag = $_->tag;
$tag =~ s/:/\\:/;
return $tag eq $selector;
});
}

foreach my $k (qw(title link content id description guid published author)) {
has $k => sub {
my $self = shift;
for my $selector (@{$selector{$k} || [$k]}) {
if ( my $p = $self->_at($selector) ) {
if ($k eq 'author' && $p->at('name')) {
return $p->at('name')->text;
}
my $text = $p->text || $p->content;
if ($k eq 'published') {
return str2time($text);
}
return $text;
}
}
return;
};
}

has enclosures => sub {
my $self = shift;
my @enclosures;
$self->dom->find('enclosure')->each(sub {
push @enclosures, shift->attr;
});
$self->dom->find('link')->each(sub {
my $l = shift;
if ($l->attr('href') && $l->attr('rel') && $l->attr('rel') eq 'enclosure') {
push @enclosures,
{
url => $l->attr('href'),
type => $l->attr('type'),
length => $l->attr('length')
};
}
});
return Mojo::Collection->new(map { Mojo::Feed::Item::Enclosure->new($_) }
@enclosures);
};

has link => sub {

# let's handle links seperately, because ATOM loves these buggers:
my $link;
shift->dom->find('link')->each(sub {
my $l = shift;
if ($l->attr('href')
&& (!$l->attr('rel') || $l->attr('rel') eq 'alternate'))
{
$link = $l->attr('href');
}
else {
if ($l->text =~ /\w+/) {
$link = $l->text; # simple link
}
}
});
return $link;
};

has _raw => sub { shift->dom->to_string };

1;

__END__
=encoding utf-8
Expand Down
6 changes: 6 additions & 0 deletions lib/Mojo/Feed/Item/Enclosure.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package Mojo::Feed::Item::Enclosure;
use Mojo::Base -base;

has [qw( url type lentgh )];

1;
78 changes: 37 additions & 41 deletions t/09-enclosures.t
Original file line number Diff line number Diff line change
Expand Up @@ -7,52 +7,48 @@ use Mojo::Feed;
use FindBin;

my %test_results = (
'rss20-multi-enclosure.xml' => [
{
'length' => '2478719',
'type' => 'audio/mpeg',
'url' => 'http://example.com/sample_podcast.mp3'
},
{
'length' => '8888',
'type' => 'video/mpeg',
'url' => 'http://example.com/sample_movie.mpg'
}
],
'atom-multi-enclosure.xml' => [
{
'length' => '2478719',
'type' => 'audio/mpeg',
'url' => 'http://example.com/sample_podcast.mp3'
},
{
'length' => '8888',
'type' => 'video/mpeg',
'url' => 'http://example.com/sample_movie.mpg'
}
],
'atom-enclosure.xml' => [
{
'length' => '2478719',
'type' => 'audio/mpeg',
'url' => 'http://example.com/sample_podcast.mp3'
}
],
'rss20-enclosure.xml' => [
{
'length' => '2478719',
'type' => 'audio/mpeg',
'url' => 'http://example.com/sample_podcast.mp3'
}
],
'rss20-multi-enclosure.xml' => [
{
'length' => '2478719',
'type' => 'audio/mpeg',
'url' => 'http://example.com/sample_podcast.mp3'
},
{
'length' => '8888',
'type' => 'video/mpeg',
'url' => 'http://example.com/sample_movie.mpg'
}
],
'atom-multi-enclosure.xml' => [
{
'length' => '2478719',
'type' => 'audio/mpeg',
'url' => 'http://example.com/sample_podcast.mp3'
},
{
'length' => '8888',
'type' => 'video/mpeg',
'url' => 'http://example.com/sample_movie.mpg'
}
],
'atom-enclosure.xml' => [{
'length' => '2478719',
'type' => 'audio/mpeg',
'url' => 'http://example.com/sample_podcast.mp3'
}],
'rss20-enclosure.xml' => [{
'length' => '2478719',
'type' => 'audio/mpeg',
'url' => 'http://example.com/sample_podcast.mp3'
}],
);

my $samples = path($FindBin::Bin)->child('samples');


while ( my ( $file, $result ) = each %test_results ) {
my $feed = Mojo::Feed->new( $samples->child($file) );
is_deeply( $feed->items->[0]->{enclosures}, $result );
while (my ($file, $result) = each %test_results) {
my $feed = Mojo::Feed->new($samples->child($file));
is_deeply($feed->items->[0]->enclosures, $result);
}

done_testing();

0 comments on commit 2ca1186

Please sign in to comment.