Skip to content

Commit

Permalink
Merge pull request #3 from jorol/issue2
Browse files Browse the repository at this point in the history
This closes issue #2. For customization of PICA::Writer::Plus I created issue #4.
  • Loading branch information
nichtich committed Sep 23, 2013
2 parents 0aa54ea + 2361bee commit ec51df4
Show file tree
Hide file tree
Showing 9 changed files with 17 additions and 23 deletions.
6 changes: 0 additions & 6 deletions lib/Catmandu/Importer/PICA.pm
Original file line number Diff line number Diff line change
Expand Up @@ -71,24 +71,18 @@ Parse PICA XML to native Perl hash containing two keys: '_id' and 'record'.
[
'001@',
'',
'_',
'',
'0',
'703'
],
[
'001A',
'',
'_',
'',
'0',
'2045:10-03-11'
],
[
'028B',
'01',
'_',
'',
'd',
'Thomas',
'a',
Expand Down
7 changes: 4 additions & 3 deletions lib/PICA/Parser/Plus.pm
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ sub new {
my $self = {
filename => undef,
rec_number => 0,
xml_reader => undef,
reader => undef,
};

# check for file or filehandle
Expand Down Expand Up @@ -87,7 +87,8 @@ sub _decode {
my @record;

if (length($fields[0]) == LEADER_LEN-1 && $fields[0] !~ m/.*SUBFIELD_INDICATOR/){
push( @record, [ 'LDR', undef, undef, shift(@fields) ] );
# drop leader because usage is unclear
shift(@fields);
}

for my $field (@fields) {
Expand All @@ -103,7 +104,7 @@ sub _decode {
}
my @subfields = map { substr( $_, 0, 1 ), substr( $_, 1 ) }
split( SUBFIELD_INDICATOR, substr( $data, 1 ) );
push( @record, [ $tag, $occurence, '_', '', @subfields ] );
push( @record, [ $tag, $occurence, @subfields ] );
}
return \@record;
}
Expand Down
2 changes: 1 addition & 1 deletion lib/PICA/Parser/XML.pm
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ sub _decode {
# get field tag number
my $tag = $field_node->getAttribute('tag');
my $occurrence = $field_node->getAttribute('occurrence') // '';
push(@field, ($tag, $occurrence, '_', ''));
push(@field, ($tag, $occurrence));

# get all subfield nodes
foreach my $subfield_node ( $field_node->getChildrenByTagName('*') ) {
Expand Down
3 changes: 1 addition & 2 deletions lib/PICA/Writer/Plain.pm
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ sub _write_record {
print $fh "/".$field->[1]; # TODO: fix one-digit occ??
}
print $fh ' ';
# ignore $field->[2,3] ...
for (my $i=4; $i<scalar @$field; $i+=2) {
for (my $i=2; $i<scalar @$field; $i+=2) {
my $value = $field->[$i+1];
$value =~ s/\$/\$\$/g;
print $fh SUBFIELD_INDICATOR . $field->[$i] . $value;
Expand Down
4 changes: 2 additions & 2 deletions lib/PICA/Writer/Plus.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use strict;
use charnames qw(:full);
use constant SUBFIELD_INDICATOR => "\N{INFORMATION SEPARATOR ONE}";
use constant END_OF_FIELD => "\N{INFORMATION SEPARATOR TWO}";
use constant END_OF_RECORD => "\x1D\x1A"; # TODO: check
use constant END_OF_RECORD => "\N{LINE FEED}"; # or \N{INFORMATION SEPARATOR THREE}? I would prefer newline separated format

use Moo;
with 'PICA::Writer::Handle';
Expand All @@ -21,7 +21,7 @@ sub _write_record {
print $fh "/".$field->[1];
}
print $fh ' ';
for (my $i=4; $i<scalar @$field; $i+=2) {
for (my $i=2; $i<scalar @$field; $i+=2) {
print $fh SUBFIELD_INDICATOR . $field->[$i] . $field->[$i+1];
}
print $fh END_OF_FIELD;
Expand Down
2 changes: 1 addition & 1 deletion lib/PICA/Writer/XML.pm
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ sub _write_record {
defined $field->[1] && $field->[1] ne '' ?
" occurrence=\"$field->[1]\"" : ""
) . ">\n";
for (my $i=4; $i<scalar @$field; $i+=2) {
for (my $i=2; $i<scalar @$field; $i+=2) {
my $value = $field->[$i+1];
$value =~ s/</&lt;/g;
$value =~ s/&/&amp;/g;
Expand Down
6 changes: 3 additions & 3 deletions t/01-parser.t
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ isa_ok( $parser, 'PICA::Parser::XML' );
my $record = $parser->next();
ok($record->{_id} eq '658700774', 'record _id' );
ok($record->{record}->[0][0] eq '001@', 'tag from first field' );
is_deeply($record->{record}->[1], ['001A', '', '_', '', '0', '2045:10-03-11'], 'second field');
is_deeply($record->{record}->[1], ['001A', '', '0', '2045:10-03-11'], 'second field');
ok($parser->next()->{_id} eq '65869538X', 'next record');

use PICA::Parser::Plus;
$parser = PICA::Parser::Plus->new( './t/picaplus.dat' );
isa_ok( $parser, 'PICA::Parser::Plus' );
$record = $parser->next();
ok($record->{_id} eq '1041318383', 'record _id' );
ok($record->{record}->[0][0] eq 'LDR', 'tag from first field' );
is_deeply($record->{record}->[1], ['001A', '', '_', '', '0', '1240:04-09-13'], 'second field');
ok($record->{record}->[0][0] eq '001A', 'tag from first field' );
is_deeply($record->{record}->[0], ['001A', '', '0', '1240:04-09-13'], 'first field');
ok($parser->next()->{_id} eq '1041318464', 'next record');

done_testing();
4 changes: 2 additions & 2 deletions t/02-importer.t
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ $importer->each(
);
ok(scalar @records == 5, 'records');
ok( $records[0]->{'_id'} eq '658700774', 'record _id' );
is_deeply( $records[0]->{'record'}->[7], ['003@', '', '_', '', '0', '658700774'],
is_deeply( $records[0]->{'record'}->[7], ['003@', '', '0', '658700774'],
'record field'
);

Expand All @@ -29,7 +29,7 @@ $importer->each(
);
ok(scalar @records == 10, 'records');
ok( $records[0]->{'_id'} eq '1041318383', 'record _id' );
is_deeply( $records[0]->{'record'}->[6], ['003@', '', '_', '', '0', '1041318383'],,
is_deeply( $records[0]->{'record'}->[5], ['003@', '', '0', '1041318383'],,
'record field'
);

Expand Down
6 changes: 3 additions & 3 deletions t/05-writer.t
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ my $writer = PICA::Writer::Plain->new( fh => $fh );

my @pica_records = (
[
['003@', '', '_', '', '0', '1041318383'],
['021A', '', '_', '', 'a', encode('UTF-8',"Hello \$\N{U+00A5}!")],
['003@', '', '0', '1041318383'],
['021A', '', 'a', encode('UTF-8',"Hello \$\N{U+00A5}!")],
],
{
record => [
['028C', '01', '_', '', d => 'Emma', a => 'Goldman']
['028C', '01', d => 'Emma', a => 'Goldman']
]
}
);
Expand Down

0 comments on commit ec51df4

Please sign in to comment.