Skip to content

Commit

Permalink
Merge pull request #27 from jorol/xml_writer
Browse files Browse the repository at this point in the history
use XML::Writer to generate XML files
  • Loading branch information
jorol committed Nov 17, 2017
2 parents e122c5b + f617ebe commit fe5b58e
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 130 deletions.
2 changes: 2 additions & 0 deletions cpanfile
Expand Up @@ -7,6 +7,8 @@ requires 'IO::Handle', '0';
requires 'IO::File', '1.14';
requires 'Exporter', '0';
requires 'XML::LibXML', '2';
requires 'XML::Writer', 0;


# don't included here because Dist::Zilla::App::Command::listdeps would include it
# recommends 'Catmandu::PICA';
Expand Down
56 changes: 26 additions & 30 deletions lib/PICA/Writer/XML.pm
Expand Up @@ -5,26 +5,24 @@ use warnings;
our $VERSION = '0.33';

use Scalar::Util qw(reftype);
use XML::Writer;

use parent 'PICA::Writer::Base';

sub new {
my $self = PICA::Writer::Base::new(@_);
$self->{fh}->print("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
$self->start if $self->{start} // 1;
$self->{writer} = XML::Writer->new(OUTPUT => $self->{fh}, DATA_MODE => 1, DATA_INDENT => 2);
$self->{writer}->xmlDecl('UTF-8');
$self->{writer}->startTag('collection', xmlns => 'info:srw/schema/5/picaXML-v1.0');
$self;
}

sub start {
my $fh = $_[0]->{fh};
$fh->print("<collection xlmns=\"info:srw/schema/5/picaXML-v1.0\">\n");
}

sub write_record {
my ($self, $record) = @_;
$record = $record->{record} if reftype $record eq 'HASH';

my $fh = $self->{fh};
my $writer = $self->{writer};

my $i = 0;
my $pica_sort = sub {
Expand All @@ -35,28 +33,27 @@ sub write_record {
};

@$record = map $_->[1], sort { $a->[0] cmp $b->[0] } map { $pica_sort->($_) } @$record;
$fh->print("<record>\n");
$writer->startTag('record');
foreach my $field (@$record) {
# this may break on invalid tag/occurrence values
$fh->print(" <datafield tag=\"$field->[0]\"" . (
defined $field->[1] && $field->[1] ne '' ?
" occurrence=\"$field->[1]\"" : ""
) . ">\n");
for (my $i=2; $i<scalar @$field; $i+=2) {
my $value = $field->[$i+1];
# bug see https://github.com/gbv/Catmandu-PICA/issues/53
$value =~ s/</&lt;/g;
$value =~ s/&/&amp;/g;
# TODO: disallowed code points (?)
$fh->print(" <subfield code=\"$field->[$i]\">$value</subfield>\n");
}
$fh->print(" </datafield>\n");
if ( defined $field->[1] && $field->[1] ne '') {
$writer->startTag('datafield', tag => $field->[0], occurrence => $field->[1] );
}
else {
$writer->startTag('datafield', tag => $field->[0]);
}
for (my $i=2; $i<scalar @$field; $i+=2) {
my $value = $field->[$i+1];
$writer->dataElement('subfield', $value, code => $field->[$i]);
}
$writer->endTag('datafield');
}
$fh->print("</record>\n");
$writer->endTag('record');
}

sub end {
$_[0]->{fh}->print("</collection>\n");
my $self = shift;
$self->{writer}->endTag('collection');
$self->{writer}->end();
}

1;
Expand All @@ -74,13 +71,12 @@ The counterpart of this module is L<PICA::Parser::XML>.
=head2 METHODS
In addition to C<write>, this writer also contains methods C<start> and C<end>
to emit an XML header with start tag C<< <collection> >> or an end tag,
respectively. The start method is automatically called on construction, unless
suppressed with option C<< start => 0 >>:
In addition to C<write>, this writer also contains C<end> method to finish
creating the XML document und check for well-formedness.
my $writer = PICA::Writer::XML->new( fh => $file, start => 0 );
$writer->write( $record ); # no <collection> start tag
my $writer = PICA::Writer::XML->new( fh => $file );
$writer->write( $record );
$writer->end();
The C<end> method does not close the underlying file handle.
Expand Down
227 changes: 127 additions & 100 deletions t/30-writer.t
@@ -1,4 +1,7 @@
use strict;
use warnings;
use utf8;

use Test::More;
use Test::XML;

Expand All @@ -15,148 +18,172 @@ use Encode qw(encode);
use Scalar::Util qw(reftype);

my @pica_records = (
[ [ '003@', '', '0', '1041318383' ],
[ '021A', '', 'a', encode( 'UTF-8', "Hello \$\N{U+00A5}!" ) ],
[
['003@', '', '0', '1041318383'],
['021A', '', 'a', "Hello \$\N{U+00A5}!"],
],
{ record => [ [ '028C', '01', d => 'Emma', a => 'Goldman' ] ] }
{
record => [
['028C', '01', d => 'Emma', a => 'Goldman']
]
}
);

my ( $fh, $filename ) = tempfile();
my $writer = pica_writer( 'plain', fh => $fh );
foreach my $record (@pica_records) {
$writer->write($record);
}
close $fh;

my $PLAIN = <<'PLAIN';
note 'PICA::Writer::Plain';

{
my ($fh, $filename) = tempfile();
my $writer = pica_writer( 'plain', fh => $fh );
foreach my $record (@pica_records) {
$writer->write($record);
}
close $fh;

my $PLAIN = <<'PLAIN';
003@ $01041318383
021A $aHello $$¥!
028C/01 $dEmma$aGoldman
PLAIN

my $out = do { local ( @ARGV, $/ ) = $filename; <> };
is $out, $PLAIN, 'Plain writer';

( $fh, $filename ) = tempfile();
$writer = PICA::Writer::Plus->new( fh => $fh );
my $out = do { local (@ARGV,$/)=$filename; <> };
is $out, $PLAIN, 'Plain writer';

foreach my $record (@pica_records) {
$writer->write($record);
(undef, $filename) = tempfile(OPEN => 0);
pica_writer('plain', fh => $filename);
ok -e $filename, 'write to file';
}
close $fh;

$out = do { local ( @ARGV, $/ ) = $filename; <> };
is $out, <<'PLUS', 'Plus Writer';
note 'PICA::Writer::Plus';

{
my ($fh, $filename) = tempfile();
my $writer = PICA::Writer::Plus->new( fh => $fh );

foreach my $record (@pica_records) {
$writer->write($record);
}
close $fh;

my $out = do { local (@ARGV,$/)=$filename; <> };
my $PLUS = <<'PLUS';
003@ 01041318383021A aHello $¥!
028C/01 dEmmaaGoldman
PLUS

( $fh, $filename ) = tempfile();
$writer = PICA::Writer::XML->new( fh => $fh );

foreach my $record (@pica_records) {
$writer->write($record);
is $out, $PLUS, 'Plus Writer';
}
$writer->end;
close $fh;

$out = do { local ( @ARGV, $/ ) = $filename; <> };
note 'PICA::Writer::XML';

{
my ($fh, $filename) = tempfile();
my $writer = PICA::Writer::XML->new( fh => $fh );

foreach my $record (@pica_records) {
$writer->write($record);
}
$writer->end;
close $fh;

my $out = do { local (@ARGV,$/)=$filename; <> };

my $xml = <<'XML';
my $xml = <<'XML';
<?xml version="1.0" encoding="UTF-8"?>
<collection xlmns="info:srw/schema/5/picaXML-v1.0">
<record>
<datafield tag="003@">
<subfield code="0">1041318383</subfield>
</datafield>
<datafield tag="021A">
<subfield code="a">Hello $¥!</subfield>
</datafield>
</record>
<record>
<datafield tag="028C" occurrence="01">
<subfield code="d">Emma</subfield>
<subfield code="a">Goldman</subfield>
</datafield>
</record>
<collection xmlns="info:srw/schema/5/picaXML-v1.0">
<record>
<datafield tag="003@">
<subfield code="0">1041318383</subfield>
</datafield>
<datafield tag="021A">
<subfield code="a">Hello $¥!</subfield>
</datafield>
</record>
<record>
<datafield tag="028C" occurrence="01">
<subfield code="d">Emma</subfield>
<subfield code="a">Goldman</subfield>
</datafield>
</record>
</collection>
XML

is $out, $xml, 'XML writer';
is $out, $xml, 'XML writer';
}

my $append = "";
foreach my $record (@pica_records) {
bless $record, 'PICA::Data';
$record->write( plain => \$append );
note 'PICA::Writer::XML to object';

my $str = encode( 'UTF-8', $record->string );
my $r = pica_parser( 'plain', \$str )->next;
{
{
package MyStringWriter;
sub print { $_[0]->{out} .= $_[1] }
}

$record = $record->{record} if reftype $record eq 'HASH';
is_deeply $r->{record}, $record, 'record->string';
my $string = bless { }, 'MyStringWriter';

my $writer = PICA::Writer::XML->new( fh => $string );
$writer->write($_) for map { bless $_, 'PICA::Data' } @pica_records;
$writer->end;
like $string->{out}, qr{^<\?xml.+collection>$}sm, 'XML writer (to object)';
}
is $append, $PLAIN, 'record->write';

note 'PICA::Writer::PPXML';

{
my $parser = pica_parser( 'PPXML' => 't/files/slim_ppxml.xml' );
my $record;
my ($fh, $filename) = tempfile();
my $writer = PICA::Writer::PPXML->new( fh => $fh );
while($record = $parser->next){
$writer->write($record);
}
$writer->end;
close $fh;

package MyStringWriter;
sub print { $_[0]->{out} .= $_[1] }
}
my $out = do { local (@ARGV,$/)=$filename; <> };
my $in = do { local (@ARGV,$/)='t/files/slim_ppxml.xml'; <> };

my $string = bless {}, 'MyStringWriter';
is_xml($out, $in, 'PPXML writer');
}

$writer = PICA::Writer::XML->new( fh => $string, start => 0 );
$writer->write($_) for map { bless $_, 'PICA::Data' } @pica_records;
$writer->end;
like $string->{out}, qr{^<record.+record>}sm,
'XML writer (to object, no start)';
note 'PICA::Data';

my ( undef, $filename ) = tempfile( OPEN => 0 );
pica_writer( 'plain', fh => $filename );
ok -e $filename, 'write to file';
{
my $append = "";
foreach my $record (@pica_records) {
bless $record, 'PICA::Data';
$record->write( plain => \$append );

my $str = encode('UTF-8', $record->string);
my $r = pica_parser('plain', \$str)->next;

$record = $record->{record} if reftype $record eq 'HASH';
is_deeply $r->{record}, $record, 'record->string';
}
my $PLAIN = <<'PLAIN';
003@ $01041318383
021A $aHello $$¥!
eval { pica_writer( 'plain', fh => '' ) };
ok $@, 'invalid filename';
028C/01 $dEmma$aGoldman
eval { pica_writer( 'plain', fh => {} ) };
ok $@, 'invalid handle';
PLAIN

# PPXML
my $parser = pica_parser( 'PPXML' => 't/files/slim_ppxml.xml' );
my $record;
( $fh, $filename ) = tempfile();
$writer = PICA::Writer::PPXML->new( fh => $fh );
while ( $record = $parser->next ) {
$writer->write($record);
is $append, $PLAIN, 'record->write';
}
$writer->end;
close $fh;

$out = do { local ( @ARGV, $/ ) = $filename; <> };
my $in = do { local ( @ARGV, $/ ) = 't/files/slim_ppxml.xml'; <> };

is_xml( $out, $in, 'PPXML writer' );

note '3-digit occurrence';
note 'Exeptions';

{
my $record = {
'_id' => '12345',
'record' => [
[ '003@', '', '0', '12345' ],
[ '231@', '102', 'd', '10', 'j', '1966',
'0', '', 'd', '11', 'j', '1970'
]
]
};
my ( $fh, $filename ) = tempfile();
my $writer = PICA::Writer::Plus->new( fh => $fh );
$writer->write($record);
close $fh;
my $out = do { local ( @ARGV, $/ ) = $filename; <> };
is $out, "003@ 012345231@/102 d10j19660d11j1970\n", '3-digit occurrence';
}
eval { pica_writer('plain', fh => '') };
ok $@, 'invalid filename';

done_testing;
eval { pica_writer('plain', fh => {} ) };
ok $@, 'invalid handle';
}

done_testing;

0 comments on commit fe5b58e

Please sign in to comment.