Merge pull request #68 from jorol/dev

Dev
gbv · Dec 18, 2019 · bb94a2a · bb94a2a
2 parents 36cd67c + 6396aef
commit bb94a2a
Show file tree

Hide file tree

Showing 5 changed files with 435 additions and 129 deletions.
diff --git a/Changes b/Changes
@@ -1,6 +1,7 @@
 Changelog for Catmandu-PICA
 
 {{$NEXT}}
+ - align pica_map with marc_map
 
 1.00  2019-10-01 19:57:50 CEST
  - fix SRU test (#67)

diff --git a/cpanfile b/cpanfile
@@ -7,7 +7,7 @@ on 'test', sub {
 };
 
 requires 'perl', '5.12.0';
-requires 'Catmandu', '1.0601';
-requires 'PICA::Data', '1.00';
+requires 'Catmandu', '1.20';
+requires 'PICA::Data', '1.01';
 requires 'Moo', '1.0';
 requires 'Catmandu::SRU', '0.427';
diff --git a/lib/Catmandu/Fix/pica_map.pm b/lib/Catmandu/Fix/pica_map.pm
@@ -4,127 +4,51 @@ our $VERSION = '1.00';
 
 use Catmandu::Sane;
 use Moo;
-
+use Catmandu::Util::Path qw(as_path);
 use Catmandu::Fix::Has;
-use PICA::Path;
-
-has pica_path => ( fix_arg => 1 );
-has path      => ( fix_arg => 1 );
-has record    => ( fix_opt => 1 );
-has split     => ( fix_opt => 1 );
-has join      => ( fix_opt => 1 );
-has value     => ( fix_opt => 1 );
-has pluck     => ( fix_opt => 1 );
-
-sub emit {
-    my ( $self, $fixer ) = @_;
-    my $path       = $fixer->split_path( $self->path );
-    my $record_key = $fixer->emit_string( $self->record // 'record' );
-    my $join_char  = $fixer->emit_string( $self->join // '' );
-    my $pica_path  = PICA::Path->new($self->pica_path); 
-
-    my ($field_regex, $occurrence_regex, $subfield_regex, $from, $length) = @$pica_path;
-
-    my $var  = $fixer->var;
-    my $vals = $fixer->generate_var;
-    my $perl = $fixer->emit_declare_vars( $vals, '[]' );
-
-    my $field_regex_var = $fixer->generate_var;
-    $perl .= $fixer->emit_declare_vars( $field_regex_var, "qr{$field_regex}" );
-
-    my $subfield_regex_var = $fixer->generate_var;
-    $perl .= $fixer->emit_declare_vars( $subfield_regex_var, "qr{$subfield_regex}" );
-
-    my $occurrence_regex_var;
-    if (defined $occurrence_regex) {
-        $occurrence_regex_var = $fixer->generate_var;
-        $perl .= $fixer->emit_declare_vars( $occurrence_regex_var, "qr{$occurrence_regex}" );
-    }
-
-    $perl .= $fixer->emit_foreach(
-        "${var}->{${record_key}}",
-        sub {
-            my $var  = shift;
-            my $v    = $fixer->generate_var;
-            my $perl = "";
-
-            $perl .= "next if ${var}->[0] !~ ${field_regex_var};";
-
-            if (defined $occurrence_regex) {
-                $perl .= "next if (!defined ${var}->[1] || ${var}->[1] !~ ${occurrence_regex_var});";
-            }
-
-            if ( $self->value ) {
-                $perl .= $fixer->emit_declare_vars( $v,
-                    $fixer->emit_string( $self->value ) );
-            }
-            else {
-                my $i = $fixer->generate_var;
-                my $add_subfields = sub {
-                    my $start = shift;
-                    if ($self->pluck) {
-                        # Treat the subfield_regex as a hash index
-                        my $pluck = $fixer->generate_var;
-                        return 
-                        "my ${pluck}  = {};" .
-                        "for (my ${i} = ${start}; ${i} < \@{${var}}; ${i} += 2) {".
-                            "push(\@{ ${pluck}->{ ${var}->[${i}] } }, ${var}->[${i} + 1]);" .
-                        "}" .
-                        "for my ${i} (split('','${subfield_regex}')) { " .
-                            "push(\@{${v}}, \@{ ${pluck}->{${i}} }) if exists ${pluck}->{${i}};" .
-                        "}";
-                    }
-                    else {
-                        # Treat the subfield_regex as regex that needs to match the subfields
-                        return 
-                        "for (my ${i} = ${start}; ${i} < \@{${var}}; ${i} += 2) {".
-                            "if (${var}->[${i}] =~ ${subfield_regex_var}) {".
-                                "push(\@{${v}}, ${var}->[${i} + 1]);".
-                            "}".
-                        "}";
-                    }
-                };
-                $perl .= $fixer->emit_declare_vars( $v, "[]" );
-                $perl .= $add_subfields->(2);
-                $perl .= "if (\@{${v}}) {";
-                if ( !$self->split ) {
-                    $perl .= "${v} = join(${join_char}, \@{${v}});";
-                    if ( defined( my $off = $from ) ) {
-                        $perl .= "if (eval { ${v} = substr(${v}, ${off}, ${length}); 1 }) {";
-                    }
-                }
-                $perl .= $fixer->emit_create_path(
-                    $fixer->var,
-                    $path,
-                    sub {
-                        my $var = shift;
-                        if ( $self->split ) {
-                            "if (is_array_ref(${var})) {"
-                                . "push \@{${var}}, ${v};"
-                                . "} else {"
-                                . "${var} = [${v}];" . "}";
-                        }
-                        else {
-                            "if (is_string(${var})) {"
-                                . "${var} = join(${join_char}, ${var}, ${v});"
-                                . "} else {"
-                                . "${var} = ${v};" . "}";
-                        }
-                    }
-                );
-                if ( defined($from) ) {
-                    $perl .= "}";
-                }
-                $perl .= "}";
-            }
-            $perl;
-        }
+use PICA::Data qw(pica_match);
+
+with 'Catmandu::Fix::Builder';
+
+has pica_path     => ( fix_arg => 1 );
+has path          => ( fix_arg => 1 );
+has split         => ( fix_opt => 1 );
+has join          => ( fix_opt => 1 );
+has value         => ( fix_opt => 1 );
+has pluck         => ( fix_opt => 1 );
+has nested_arrays => ( fix_opt => 1 );
+
+sub _build_fixer {
+    my ($self) = @_;
+
+    my $path    = as_path( $self->path );
+    my $key     = $path->split_path->[-1];
+    my $creator = $path->creator;
+
+    my %opt = (
+        'join'          => $self->join          // '',
+        'split'         => $self->split         // 0,
+        'pluck'         => $self->pluck         // 0,
+        'nested_arrays' => $self->nested_arrays // 0,
+        'value'         => $self->value,
+        'force_array' => ( $key =~ /^(\$.*|[0-9]+)$/ ) ? 1 : 0,
     );
 
-    $perl;
+    sub {
+        my $data = $_[0];
+        my $matches = pica_match( $data, $self->pica_path, %opt );
+
+        $matches = [$matches] if !ref($matches) || ($opt{split} && !$opt{force_array});
+        while (@$matches) {
+            $data = $creator->( $data, shift @$matches );
+        }
+        return $data;
+        }
+
 }
 
 1;
+
 __END__
 
 =head1 NAME
@@ -133,24 +57,33 @@ Catmandu::Fix::pica_map - copy pica values of one field to a new field
 
 =head1 SYNOPSIS
 
-    # Copy from field 003@ subfield 0 to dc.identifier hash
-    pica_map('003A0','dc.identifier');
+    # Copy from field 021A all subfields to field dc_title
+    pica_map(021A, dc_title);
+
+    # Copy from field 021A subfield a to field dc_title
+    pica_map(021Aa, dc_title);
+
+    # Copy from field 021A subfield a and d to field dc_title and join them 
+    pica_map(021Aad, dc_title, join:' / ');
+ 
+    # Copy from field 021A subfield d and a in given order to field dc_title 
+    pica_map(021Ada, dc_title, pluck:1);
 
-    # Copy from field 010@ subfield a to dc.language hash
-    pica_map('010@a','dc.language');
+    # Copy from field 021A subfield a and d to field dc_title and append them to an array
+    pica_map(021Ada, dc_title.$append);
 
-    # Copy from field 009Q subfield a to foaf.primaryTopicOf array
-    pica_map('009Qa','foaf.primaryTopicOf.$append');
+    # Copy from field 021A all subfields to field dc_title and split them to an array
+    pica_map(021Ada, dc_title, split:1);
 
-    # Copy from field 028A subfields a and d to dc.creator hash joining them by ' '
-    pica_map('028Aad','dcterms.creator', -join => ' ');
+    # Copy from all fields 005A all subfields to field bibo_issn and split them to an array of arrays
+    pica_map(005A, bibo_issn, split:1, nested_arrays:1);
 
-    # Copy from field 144Z with occurrence 01 subfield a to dc.subject
-    pica_map('144Z[01]a','dcterms.subject');
+    # Copy from field 144Z with occurrence 01 subfield a to dc_subject
+    pica_map('144Z[01]a','dc_subject');
 
 =head1 SEE ALSO
 
-See L<PICA::Path> for a definition of PICA path expressions and L<PICA::Data>
+See L<PICA::Path> for a definition of PICA path expressions and mapping rules or test files for more examples. See L<PICA::Data>
 for more methods to process parsed PICA+ records.
 
 =cut
diff --git a/t/08-pica-add.t b/t/08-pica-add.t
@@ -41,7 +41,7 @@ my $fixer = Catmandu::Fix->new(fixes => [
 my $importer = Catmandu::Importer::PICA->new(file => "./t/files/plain.pica", type=> "plain");
 my $records = $fixer->fix($importer)->to_array;
 
-is_deeply $records->[0]->{'ids'}, [ ['1234', '4321'], ['5678'] ], '003@a added';
+is_deeply $records->[0]->{'ids'}, ['1234', '4321', '5678'], '003@a added';
 is $records->[0]->{'encoding'}, 'utf16', '201U0 added';
 is $records->[0]->{'what'}, 'bar', '101U$0 set';
 is $records->[0]->{'multi'}, 'barbaz', 'added multiple subfields to 001@';