Skip to content

Commit

Permalink
Merge pull request #68 from jorol/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
jorol committed Dec 18, 2019
2 parents 36cd67c + 6396aef commit bb94a2a
Show file tree
Hide file tree
Showing 5 changed files with 435 additions and 129 deletions.
1 change: 1 addition & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Changelog for Catmandu-PICA

{{$NEXT}}
- align pica_map with marc_map

1.00 2019-10-01 19:57:50 CEST
- fix SRU test (#67)
Expand Down
4 changes: 2 additions & 2 deletions cpanfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on 'test', sub {
};

requires 'perl', '5.12.0';
requires 'Catmandu', '1.0601';
requires 'PICA::Data', '1.00';
requires 'Catmandu', '1.20';
requires 'PICA::Data', '1.01';
requires 'Moo', '1.0';
requires 'Catmandu::SRU', '0.427';
185 changes: 59 additions & 126 deletions lib/Catmandu/Fix/pica_map.pm
Original file line number Diff line number Diff line change
Expand Up @@ -4,127 +4,51 @@ our $VERSION = '1.00';

use Catmandu::Sane;
use Moo;

use Catmandu::Util::Path qw(as_path);
use Catmandu::Fix::Has;
use PICA::Path;

has pica_path => ( fix_arg => 1 );
has path => ( fix_arg => 1 );
has record => ( fix_opt => 1 );
has split => ( fix_opt => 1 );
has join => ( fix_opt => 1 );
has value => ( fix_opt => 1 );
has pluck => ( fix_opt => 1 );

sub emit {
my ( $self, $fixer ) = @_;
my $path = $fixer->split_path( $self->path );
my $record_key = $fixer->emit_string( $self->record // 'record' );
my $join_char = $fixer->emit_string( $self->join // '' );
my $pica_path = PICA::Path->new($self->pica_path);

my ($field_regex, $occurrence_regex, $subfield_regex, $from, $length) = @$pica_path;

my $var = $fixer->var;
my $vals = $fixer->generate_var;
my $perl = $fixer->emit_declare_vars( $vals, '[]' );

my $field_regex_var = $fixer->generate_var;
$perl .= $fixer->emit_declare_vars( $field_regex_var, "qr{$field_regex}" );

my $subfield_regex_var = $fixer->generate_var;
$perl .= $fixer->emit_declare_vars( $subfield_regex_var, "qr{$subfield_regex}" );

my $occurrence_regex_var;
if (defined $occurrence_regex) {
$occurrence_regex_var = $fixer->generate_var;
$perl .= $fixer->emit_declare_vars( $occurrence_regex_var, "qr{$occurrence_regex}" );
}

$perl .= $fixer->emit_foreach(
"${var}->{${record_key}}",
sub {
my $var = shift;
my $v = $fixer->generate_var;
my $perl = "";

$perl .= "next if ${var}->[0] !~ ${field_regex_var};";

if (defined $occurrence_regex) {
$perl .= "next if (!defined ${var}->[1] || ${var}->[1] !~ ${occurrence_regex_var});";
}

if ( $self->value ) {
$perl .= $fixer->emit_declare_vars( $v,
$fixer->emit_string( $self->value ) );
}
else {
my $i = $fixer->generate_var;
my $add_subfields = sub {
my $start = shift;
if ($self->pluck) {
# Treat the subfield_regex as a hash index
my $pluck = $fixer->generate_var;
return
"my ${pluck} = {};" .
"for (my ${i} = ${start}; ${i} < \@{${var}}; ${i} += 2) {".
"push(\@{ ${pluck}->{ ${var}->[${i}] } }, ${var}->[${i} + 1]);" .
"}" .
"for my ${i} (split('','${subfield_regex}')) { " .
"push(\@{${v}}, \@{ ${pluck}->{${i}} }) if exists ${pluck}->{${i}};" .
"}";
}
else {
# Treat the subfield_regex as regex that needs to match the subfields
return
"for (my ${i} = ${start}; ${i} < \@{${var}}; ${i} += 2) {".
"if (${var}->[${i}] =~ ${subfield_regex_var}) {".
"push(\@{${v}}, ${var}->[${i} + 1]);".
"}".
"}";
}
};
$perl .= $fixer->emit_declare_vars( $v, "[]" );
$perl .= $add_subfields->(2);
$perl .= "if (\@{${v}}) {";
if ( !$self->split ) {
$perl .= "${v} = join(${join_char}, \@{${v}});";
if ( defined( my $off = $from ) ) {
$perl .= "if (eval { ${v} = substr(${v}, ${off}, ${length}); 1 }) {";
}
}
$perl .= $fixer->emit_create_path(
$fixer->var,
$path,
sub {
my $var = shift;
if ( $self->split ) {
"if (is_array_ref(${var})) {"
. "push \@{${var}}, ${v};"
. "} else {"
. "${var} = [${v}];" . "}";
}
else {
"if (is_string(${var})) {"
. "${var} = join(${join_char}, ${var}, ${v});"
. "} else {"
. "${var} = ${v};" . "}";
}
}
);
if ( defined($from) ) {
$perl .= "}";
}
$perl .= "}";
}
$perl;
}
use PICA::Data qw(pica_match);

with 'Catmandu::Fix::Builder';

has pica_path => ( fix_arg => 1 );
has path => ( fix_arg => 1 );
has split => ( fix_opt => 1 );
has join => ( fix_opt => 1 );
has value => ( fix_opt => 1 );
has pluck => ( fix_opt => 1 );
has nested_arrays => ( fix_opt => 1 );

sub _build_fixer {
my ($self) = @_;

my $path = as_path( $self->path );
my $key = $path->split_path->[-1];
my $creator = $path->creator;

my %opt = (
'join' => $self->join // '',
'split' => $self->split // 0,
'pluck' => $self->pluck // 0,
'nested_arrays' => $self->nested_arrays // 0,
'value' => $self->value,
'force_array' => ( $key =~ /^(\$.*|[0-9]+)$/ ) ? 1 : 0,
);

$perl;
sub {
my $data = $_[0];
my $matches = pica_match( $data, $self->pica_path, %opt );

$matches = [$matches] if !ref($matches) || ($opt{split} && !$opt{force_array});
while (@$matches) {
$data = $creator->( $data, shift @$matches );
}
return $data;
}

}

1;

__END__
=head1 NAME
Expand All @@ -133,24 +57,33 @@ Catmandu::Fix::pica_map - copy pica values of one field to a new field
=head1 SYNOPSIS
# Copy from field 003@ subfield 0 to dc.identifier hash
pica_map('003A0','dc.identifier');
# Copy from field 021A all subfields to field dc_title
pica_map(021A, dc_title);
# Copy from field 021A subfield a to field dc_title
pica_map(021Aa, dc_title);
# Copy from field 021A subfield a and d to field dc_title and join them
pica_map(021Aad, dc_title, join:' / ');
# Copy from field 021A subfield d and a in given order to field dc_title
pica_map(021Ada, dc_title, pluck:1);
# Copy from field 010@ subfield a to dc.language hash
pica_map('010@a','dc.language');
# Copy from field 021A subfield a and d to field dc_title and append them to an array
pica_map(021Ada, dc_title.$append);
# Copy from field 009Q subfield a to foaf.primaryTopicOf array
pica_map('009Qa','foaf.primaryTopicOf.$append');
# Copy from field 021A all subfields to field dc_title and split them to an array
pica_map(021Ada, dc_title, split:1);
# Copy from field 028A subfields a and d to dc.creator hash joining them by ' '
pica_map('028Aad','dcterms.creator', -join => ' ');
# Copy from all fields 005A all subfields to field bibo_issn and split them to an array of arrays
pica_map(005A, bibo_issn, split:1, nested_arrays:1);
# Copy from field 144Z with occurrence 01 subfield a to dc.subject
pica_map('144Z[01]a','dcterms.subject');
# Copy from field 144Z with occurrence 01 subfield a to dc_subject
pica_map('144Z[01]a','dc_subject');
=head1 SEE ALSO
See L<PICA::Path> for a definition of PICA path expressions and L<PICA::Data>
See L<PICA::Path> for a definition of PICA path expressions and mapping rules or test files for more examples. See L<PICA::Data>
for more methods to process parsed PICA+ records.
=cut
2 changes: 1 addition & 1 deletion t/08-pica-add.t
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ my $fixer = Catmandu::Fix->new(fixes => [
my $importer = Catmandu::Importer::PICA->new(file => "./t/files/plain.pica", type=> "plain");
my $records = $fixer->fix($importer)->to_array;

is_deeply $records->[0]->{'ids'}, [ ['1234', '4321'], ['5678'] ], '003@a added';
is_deeply $records->[0]->{'ids'}, ['1234', '4321', '5678'], '003@a added';
is $records->[0]->{'encoding'}, 'utf16', '201U0 added';
is $records->[0]->{'what'}, 'bar', '101U$0 set';
is $records->[0]->{'multi'}, 'barbaz', 'added multiple subfields to 001@';
Expand Down
Loading

0 comments on commit bb94a2a

Please sign in to comment.