-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #33 from gbv/schema
Add PICA::Schema
- Loading branch information
Showing
4 changed files
with
206 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
package PICA::Schema; | ||
use strict; | ||
use warnings; | ||
|
||
our $VERSION = '0.34'; | ||
|
||
use Scalar::Util qw(reftype); | ||
|
||
sub new { | ||
my ($class, $schema) = @_; | ||
bless $schema, $class; | ||
} | ||
|
||
sub check { | ||
my ($self, $record, %options) = @_; | ||
|
||
$record = $record->{record} if reftype $record eq 'HASH'; | ||
|
||
$options{counter} = {}; | ||
return map { $self->check_field($_, %options) } @$record; | ||
} | ||
|
||
sub _error { | ||
my $field = shift; | ||
return { | ||
tag => $field->[0], | ||
($field->[1] ? (occurrence => $field->[1]) : ()), | ||
@_ | ||
} | ||
} | ||
|
||
sub check_field { | ||
my ($self, $field, %options) = @_; | ||
|
||
my $spec = $self->{fields}{$field->[0]}; | ||
|
||
if (!$spec) { | ||
if (!$options{ignore_unknown_fields}) { | ||
return _error($field, message => 'unknown field') | ||
} else { | ||
return () | ||
} | ||
} | ||
|
||
if ($options{counter} && $spec->{unique}) { | ||
my $tag_occ = join '/', grep { defined } @$field[0,1]; | ||
if ($options{counter}{$tag_occ}++) { | ||
return _error($field, unique => 1, message => 'field is not repeatable') | ||
} | ||
} | ||
|
||
my %errors; | ||
if ($spec->{subfields}) { | ||
my %sfcounter; | ||
my (undef, undef, @subfields) = @$field; | ||
while (@subfields) { | ||
my ($code, undef) = splice @subfields, 0, 2; | ||
my $sfspec = $spec->{subfields}{$code}; | ||
|
||
if ($sfspec) { | ||
if ($sfspec->{unique} && $sfcounter{$code}++) { | ||
$errors{$code} = { | ||
message => 'subfield is not repeatable', | ||
unique => 1 | ||
}; | ||
} | ||
} elsif (!$options{ignore_unknown_subfields}) { | ||
$errors{$code} = { message => 'unknown subfield' }; | ||
} | ||
} | ||
} | ||
|
||
return %errors ? _error($field, subfields => \%errors) : (); | ||
} | ||
|
||
1; | ||
__END__ | ||
=head1 NAME | ||
PICA::Schema - Specification of a PICA based format | ||
=head1 DESCRIPTION | ||
A PICA Schema defines a set of PICA+ fields and subfields to validate | ||
L<PICA::Data> records. A schema is given as hash reference such as: | ||
{ | ||
fields => { | ||
'021A' => { }, # field without additional information | ||
'003@' => { # field with additional constraints | ||
unique => 1, | ||
label => 'Pica-Produktionsnummer', | ||
subfields => { | ||
0 => { unique => 1 } | ||
} | ||
} | ||
} | ||
} | ||
=head1 METHODS | ||
=head2 check( $record [, %options ] ) | ||
Check whether a given L<PICA::Data> record confirms to the schema and return a | ||
list of detected violations. Possible options include: | ||
=over | ||
=item ignore_unknown_fields | ||
Don't report fields not included in the schema. | ||
=item ignore_unknown_subfields | ||
Don't report subfields not included in the schema. | ||
=back | ||
Errors are given as list of hash reference with keys C<tag> and C<occurrence> | ||
set to tag and (optional) ocurrence of the violated field. If key C<unique> is | ||
set, the field was repeated although not repeatable. Otherwise, if key | ||
C<subfields> is set, the field was defined but contained invalid subfields. | ||
Additional error field C<message> contains a human-readable error message which | ||
can also be derived from the rest of the error object. | ||
=head2 check_field( $field [, %options ] ) | ||
Check whether a PICA field confirms to the schema. Use same options as method C<check>. | ||
=head1 LIMITATIONS | ||
The current version can only validate records with tags on level 0. | ||
=head1 SEE ALSO | ||
L<PICA::Path> (support may be added in a future version) | ||
L<MARC::Lint> | ||
=cut |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
use strict; | ||
use warnings; | ||
use utf8; | ||
use PICA::Data qw(pica_parser); | ||
use PICA::Schema; | ||
use Test::More; | ||
|
||
my $schema = PICA::Schema->new({ fields => { '021A' => { unique => 1 } } }); | ||
|
||
sub validate(@) { ## no critic | ||
my ($schema, $record, $errors, %options) = @_; | ||
|
||
use Data::Dumper; print Dumper([ $schema->check($record) ]); | ||
|
||
my ($message) = map { $_->{message} } @$errors; | ||
is_deeply $errors, [ $schema->check($record, %options) ], $message; | ||
} | ||
|
||
my $record = [ ['021A', undef, a => 'title'] ]; | ||
validate $schema, $record, []; | ||
|
||
push @$record, ['021A', undef, a => 'title']; | ||
validate $schema, $record, [ { tag => '021A', unique => 1, message => 'field is not repeatable' } ]; | ||
|
||
$record->[1] = ['003@', undef, 0 => '12345']; | ||
validate $schema, $record, [ { tag => '003@', message => 'unknown field' } ]; | ||
validate $schema, $record, [], ignore_unknown_fields => 1; | ||
|
||
$schema->{fields}{'003@'} = { unique => 1, subfields => { } }; | ||
validate $schema, $record, [ { | ||
tag => '003@', | ||
subfields => { | ||
0 => { message => 'unknown subfield' } | ||
} } ]; | ||
validate $schema, $record, [], ignore_unknown_subfields => 1; | ||
|
||
$schema->{fields}{'003@'} = { unique => 1, subfields => { 0 => { unique => 1 } } }; | ||
validate $schema, $record, []; | ||
|
||
$record->[1] = ['003@', undef, 0 => '12345', 0 => '6789']; | ||
validate $schema, $record, [ { | ||
tag => '003@', | ||
subfields => { | ||
0 => { message => 'subfield is not repeatable', unique => 1 } | ||
} } ]; | ||
|
||
# TODO: check fields in level 1 and level 2 | ||
# $record = pica_parser( 'Plain' => 't/files/bgb.example' )->next; | ||
|
||
done_testing; |