Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
172 lines (130 sloc) 4.24 KB
package DPLA;
use warnings;
use strict;
use MARC::Record;
use Data::Dump qw/dump/;
use JSON::XS;
use Encode;
use base 'Scraper';
my $debug = $ENV{DEBUG} || 0;
sub diag {
warn "# ", @_, $/;
}
# http://dp.la/dev/wiki/Item_API
#
# Base Fields: Mapping to a set of common terms
# Field name Field description
# dpla.keyword Almost all of a record's fields get copied to this field
# dpla.title The title and/or subtitle of the item. Exact matching.
# dpla.title_keyword The title and/or subtitle of the item. Keyword matching.
# dpla.creator The creator(s), contributor(s), editor(s), etc. of the item. Exact matching
# dpla.creator_keyword The creator(s), contributor(s), editor(s), etc. of the item. Keyword matching
# dpla.date The item's date of publication.
# dpla.description The item's description. This often includes the item's Table of Contents. Exact matching.
# dpla.description_keyword The item's description. This often includes the item's Table of Contents. Keyword matching.
# dpla.subject A catchall for subject information. LCSH, Dewey, and other tag related fields are copied to this field. Exact matching.
# dpla.subject_keyword A catchall for subject information. LCSH, Dewey, and other tag related fields are copied to this field. Keyword matching.
# dpla.publisher The name of the publisher. Exact matching.
# dpla.language The primary language of the item. Exact matching.
# dpla.isbn The item's ISBN. Exact matching.
# dpla.oclc The item's OCLC identifier. Exact matching.
# dpla.lccn The item's LCCN. Exact matching.
# dpla.call_num The item's call number. Exact matching.
# dpla.content_link A link to the item's content. Exact matching.
# dpla.contributor The contributing partner. Exact matching.
# dpla.resource_type The resource's type. Common values include item and collection. Exact matching.
# Koha Z39.50 query:
#
# Bib-1 @and @and @and @and @and @and @and @or
# @attr 1=4 title
# @attr 1=7 isbn
# @attr 1=8 issn
# @attr 1=1003 author
# @attr 1=16 dewey
# @attr 1=21 subject-holding
# @attr 1=12 control-no
# @attr 1=1007 standard-id
# @attr 1=1016 any
sub usemap {{
4 => 'dpla.title_keyword',
7 => 'dpla.isbn',
8 => 'dpla.keyword', # XXX fake
1003 => 'dpla.creator_keyword',
# 16 => '',
21 => 'dpla.subject',
# 12 => '',
# 1007 => '',
1016 => 'dpla.keyword',
RPN => {
And => '&',
Or => '&', # FIXME sigh, not really supported?
},
prefix_term => sub {
my ( $prefix, $term ) = @_;
return 'filter=' . $prefix . ':' . $term;
}
}};
sub search {
my ( $self, $query ) = @_;
die "need query" unless defined $query;
my $url = 'http://api.dp.la/v0.03/item/?' . $query;
diag "get $url";
my $mech = $self->mech;
$mech->get( $url );
my $json = decode_json $mech->content;
diag "# json = ", dump($json) if $debug;
my $hits = 0;
if ( exists $json->{num_found} ) {
$hits = $json->{num_found};
} else {
diag "get't find num_found in ", $mech->content;
return;
}
diag "got $hits results";
$self->{_json} = $json;
return $self->{hits} = $hits;
}
sub next_marc {
my ($self,$format) = @_;
$format ||= 'marc';
my $item = shift @{ $self->{_json}->{docs} };
my $marc = MARC::Record->new;
$marc->encoding('utf-8');
my $fields; # empty marc
foreach my $key ( sort keys %$item ) {
my $v = $item->{$key};
warn "# item ",dump( $key, $v ) if $debug;
if ( $key =~ m/^(\d\d\d)(\w)$/ ) {
my ($f,$sf) = ($1,$2);
# XXX do magic and unroll into proper MARC record
$v = [ $v ] unless ref $v eq 'ARRAY';
if ( $fields ) {
if ( $fields->[0]->[0] ne $f ) {
$marc->add_fields( @$fields );
warn "# add_fields ",dump($fields) if $debug;
$fields = undef;
}
}
foreach my $i ( 0 .. $#$v ) {
$fields->[$i]->[0] = $f;
$fields->[$i]->[1] = ' ';
$fields->[$i]->[2] = ' ';
push @{ $fields->[$i] }, $sf, decode('utf-8', $v->[$i]); # decode here fixes API encoding error
}
} else {
warn "# IGNORED: $key ", dump($item->{$key}), "\n";
}
}
$marc->add_fields( @$fields );
diag "# marc ", $marc->as_formatted;
warn dump( $marc->as_usmarc );
$self->mech->back; # return to search results for next page
my $id = $item->{'dpla.id'};
if ( ! $id ) {
warn "no dpla.id in ",dump($item);
return;
}
$self->save_marc( "$id.marc", $marc->as_usmarc );
return $id;
}
1;