Skip to content
Permalink
3.3
Go to file
 
 
Cannot retrieve contributors at this time
940 lines (797 sloc) 21 KB
#!/usr/bin/perl
######################################################################
#
# EPrints OAI 2.0 Handler
#
# Responds to incoming OAI requests
#
######################################################################
#
# __COPYRIGHT__
#
# Copyright 2000-2008 University of Southampton. All Rights Reserved.
#
# __LICENSE__
#
######################################################################
use strict;
use warnings;
use constant {
OAI_REQUIRED => 1,
OAI_OPTIONAL => 2,
OAI_EXCLUSIVE => 3,
};
use EPrints;
my $eprints = EPrints->new;
my $repo = $eprints->current_repository;
exit( 0 ) unless defined $repo;
our $METADATAPREFIX_REGEXP = qr/^[A-Za-z0-9\-_\.!~\*'\(\)]+$/;
our $IDENTIFIER_REGEXP = qr/^[a-z]+:.*$/;
our $DATETIME_REGEXP = qr/^\d\d\d\d-\d\d-\d\d(T\d\d:\d\d:\d\dZ)?$/;
our $DATESTAMP_FIELD = $repo->dataset( "eprint")->get_datestamp_field;
our $ARCHIVE_ID = EPrints::OpenArchives::archive_id( $repo );
# OAI-PMH grammar
our %GRAMMAR = (
GetRecord => {
metadataPrefix => [ OAI_REQUIRED, $METADATAPREFIX_REGEXP, \&check_metadataprefix ],
identifier => [ OAI_REQUIRED, $IDENTIFIER_REGEXP, \&check_identifier ],
},
Identify => {},
ListIdentifiers => {
metadataPrefix => [ OAI_REQUIRED, $METADATAPREFIX_REGEXP, \&check_metadataprefix ],
from => [ OAI_OPTIONAL, $DATETIME_REGEXP, \&check_from_date ],
until => [ OAI_OPTIONAL, $DATETIME_REGEXP, \&check_until_date ],
set => [ OAI_OPTIONAL, qr/./, sub { () } ],
resumptionToken => [ OAI_EXCLUSIVE, qr/./, \&check_resumptiontoken ],
},
ListMetadataFormats => {
identifier => [ OAI_OPTIONAL, $IDENTIFIER_REGEXP, \&check_identifier ],
},
ListRecords => {
metadataPrefix => [ OAI_REQUIRED, $METADATAPREFIX_REGEXP, \&check_metadataprefix ],
from => [ OAI_OPTIONAL, $DATETIME_REGEXP, \&check_from_date ],
until => [ OAI_OPTIONAL, $DATETIME_REGEXP, \&check_until_date ],
set => [ OAI_OPTIONAL, qr/./, sub { () } ],
resumptionToken => [ OAI_EXCLUSIVE, qr/./, \&check_resumptiontoken ],
},
ListSets => {
resumptionToken => [ OAI_EXCLUSIVE, qr/./, sub { () } ],
},
);
# errors will get populated with errors as we check the CGI parameters
my @errors;
# calculate the arguments from the CGI parameters
my %arguments;
my %params;
my @params = $repo->param;
# check whether any parameters were repeated
for(@params)
{
my @values = $repo->param( $_ );
if( scalar(@values) > 1 )
{
push @errors,
render_oai_error( $repo, "badVerb", "Includes a repeated argument for '$_'" );
}
elsif( defined $values[0] && length $values[0] )
{
$params{$_} = $values[0];
}
}
my $verb = delete $params{"verb"};
if( !defined $verb )
{
$verb = "error";
unshift @errors,
render_oai_error( $repo, "badVerb", "Requires verb argument" );
}
elsif( !exists $GRAMMAR{$verb} )
{
$verb = "error";
unshift @errors,
render_oai_error( $repo, "badVerb", "The value of the verb argument is not a legal OAI-PMH verb" ),
render_oai_error( $repo, "badArgument", "Bad verb argument" );
}
else
{
my $grammar = $GRAMMAR{$verb};
# check for unknown parameters
my %unknown = %params;
foreach my $name (keys %$grammar)
{
delete $unknown{$name};
}
foreach my $name (keys %unknown)
{
push @errors,
render_oai_error( $repo, "badArgument", "Unrecognised argument '$name'" );
}
# find out if we have an exclusive argument
my $exclusive = undef;
while(my( $name, $props ) = each %$grammar)
{
next unless $props->[0] == OAI_EXCLUSIVE;
next unless defined $params{$name};
$exclusive = $name;
if( scalar(keys %arguments) > 1 )
{
push @errors,
render_oai_error( $repo, "badArgument", "'$exclusive' is exclusive and can not be used with any other arguments (except for 'verb')" );
}
}
# check required arguments are defined (unless we have an exclusive)
if( !defined($exclusive) )
{
while(my( $name, $props ) = each %$grammar)
{
my $value = $params{$name};
if( $props->[0] == OAI_REQUIRED && !defined($value) )
{
push @errors,
render_oai_error( $repo, "badArgument", "Missing required argument '$name'" );
}
}
}
# check arguments are well-formed and, if so, retrieve their value
while(my( $name, $props ) = each %$grammar)
{
next unless defined $params{$name};
my $value = $params{$name};
if( $value !~ /$props->[1]/ )
{
push @errors,
render_oai_error( $repo, "badArgument", "'$name' doesn't match required syntax '$props->[1]'" );
}
else
{
push @errors, &{$props->[2]}( $repo, $value );
$arguments{$name} = $value;
}
}
}
# if we don't have any argument errors, get the response
my $response = $repo->xml->create_element( $verb );
if( !scalar @errors )
{
no strict "refs";
push @errors, &$verb( $repo, $response, \%arguments );
}
my $oai_pmh = $repo->xml->create_element( "OAI-PMH",
"xmlns" => "http://www.openarchives.org/OAI/2.0/",
"xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
"xsi:schemaLocation" => "http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"
);
$oai_pmh->appendChild( $repo->xhtml->data_element(
"responseDate",
EPrints::Time::get_iso_timestamp() ) );
my $url = $repo->current_url( host => 1, path => "cgi" ) . "/oai2";
# error response
if( scalar @errors )
{
$oai_pmh->appendChild( $repo->xhtml->data_element(
"request",
$url ) );
foreach my $error (@errors)
{
$oai_pmh->appendChild( $error );
}
$repo->xml->dispose( $response );
}
# normal response
else
{
$oai_pmh->appendChild( $repo->xhtml->data_element(
"request",
$url,
verb => $verb,
%params ) );
$oai_pmh->appendChild( $response );
}
# send the response
$repo->send_http_header( content_type=>"text/xml; charset=UTF-8" );
my $xslt = $repo->current_url( path => "static" ) . "/oai2.xsl";
binmode(STDOUT, ":utf8");
print "<?xml version='1.0' encoding='UTF-8'?>\n",
"<?xml-stylesheet type='text/xsl' href='$xslt' ?>\n",
$repo->xml->to_string( $oai_pmh, indent => 1 );
$repo->xml->dispose( $oai_pmh );
sub render_oai_error
{
my( $repo, $code, $message ) = @_;
return $repo->xhtml->data_element(
"error",
$message,
code => $code );
}
sub render_text_url
{
my( $repo, $name, $texturl ) = @_;
my $node = $repo->xml->create_element( $name );
if ( defined $texturl->{"text"} )
{
$node->appendChild( $repo->xhtml->data_element(
"text",
$texturl->{"text"} ) );
}
if ( defined $texturl->{"url"} )
{
$node->appendChild( $repo->xhtml->data_element(
"URL",
$texturl->{"url"} ) );
}
return $node;
}
sub get_eprint_by_identifier
{
my( $repo, $identifier ) = @_;
my $id = EPrints::OpenArchives::from_oai_identifier(
$repo,
$identifier );
return undef unless EPrints::Utils::is_set( $id );
my $dataset = $repo->get_dataset( "eprint" );
my $searchexp = $dataset->prepare_search;
$searchexp->add_field(
$dataset->field( "eprint_status" ),
"archive deletion",
"EQ",
"ANY"
);
$searchexp->add_field(
$dataset->field( "eprintid" ),
$id
);
my $list = $searchexp->perform_search;
my $eprint = $list->item( 0 );
return $eprint;
}
# [@ERRORS] = check_*( SESSION, VALUE )
#
# These methods check an argument, VALUE, and may replace VALUE with
# the value that this script will want to use.
#
# If VALUE isn't correct returns a list of OAI error elements.
sub check_metadataprefix
{
my( $repo, $value ) = @_;
my @errors;
my( $plugin ) = $repo->get_plugins(
metadataPrefix => $value
);
if( defined $plugin )
{
$_[1] = $plugin;
}
else
{
push @errors, render_oai_error(
$repo,
"cannotDisseminateFormat",
"Record not available as metadata type: '$value'" );
}
return @errors;
}
sub check_identifier
{
my( $repo, $value ) = @_;
my @errors;
my $eprint = get_eprint_by_identifier( $repo, $value );
if( !defined $eprint )
{
push @errors, render_oai_error(
$repo,
"idDoesNotExist",
"'$value' is not a valid item in this repository" );
}
$_[1] = $eprint;
return @errors;
}
sub check_from_date
{
my( $repo, $value ) = @_;
if( $value =~ /^(\d\d\d\d-\d\d-\d\d)$/ )
{
$_[1] = "$1T00:00:00";
}
elsif( $value =~ /^(\d\d\d\d-\d\d-\d\d)T(\d\d:\d\d:\d\d)Z$/ )
{
$_[1] = "$1T$2";
}
return ();
}
sub check_until_date
{
my( $repo, $value ) = @_;
if( $value =~ /^(\d\d\d\d-\d\d-\d\d)$/ )
{
$_[1] = "$1T23:59:59";
}
elsif( $value =~ /^(\d\d\d\d-\d\d-\d\d)T(\d\d:\d\d:\d\d)Z$/ )
{
$_[1] = "$1T$2";
}
return ();
}
sub check_resumptiontoken
{
my( $repo, $value ) = @_;
my %token;
my @params = split /&/, URI::Escape::uri_unescape($value);
for(@params)
{
my( $name, $value ) = split /=/, $_;
next unless defined $value;
$token{URI::Escape::uri_unescape($name)} = URI::Escape::uri_unescape($value);
}
$_[1] = \%token;
return ();
}
# serialises and returns a hash ref that can be used as a resumption token
sub serialise_resumptiontoken
{
my( $repo, $params ) = @_;
return URI::Escape::uri_escape(
join "&",
map { URI::Escape::uri_escape($_) . "=" . URI::Escape::uri_escape($params->{$_}) } sort keys %$params
);
}
sub Identify
{
my( $repo, $response, $args ) = @_;
my $earliest_datestamp = "0001-01-01T00:00:00Z";
my $dataset = $repo->dataset( "eprint" );
# get a real identifier & datestamp
my $order = $DATESTAMP_FIELD->get_name;
my $searchexp = $dataset->prepare_search(
custom_order => "$order",
limit => 1,
);
$searchexp->add_field(
$dataset->field( "eprint_status" ),
"archive",
"EQ",
"ANY"
);
my $sample_eprint = $searchexp->perform_search->item( 0 );
my $sample_identifier;
if( defined $sample_eprint )
{
$sample_identifier = EPrints::OpenArchives::to_oai_identifier(
EPrints::OpenArchives::archive_id( $repo ),
$sample_eprint->id
);
if( $sample_eprint->is_set( $DATESTAMP_FIELD->get_name ) )
{
my $datetime = $DATESTAMP_FIELD->get_value( $sample_eprint );
my( $date, $time ) = split / /, $datetime;
$time ||= "00:00:00";
$earliest_datestamp = sprintf("%sT%sZ", $date, $time);
}
}
$response->appendChild( $repo->xhtml->data_element(
"repositoryName",
$repo->phrase( "archive_name" ) ) );
$response->appendChild( $repo->xhtml->data_element(
"baseURL",
$repo->config( "oai","v2","base_url" ) ) );
$response->appendChild( $repo->xhtml->data_element(
"protocolVersion",
"2.0" ) );
$response->appendChild( $repo->xhtml->data_element(
"adminEmail",
$repo->config( "adminemail" ) ) );
# Later this may be either calcualted from the
# database, or configurable.
$response->appendChild( $repo->xhtml->data_element(
"earliestDatestamp",
$earliest_datestamp ) );
$response->appendChild( $repo->xhtml->data_element(
"deletedRecord",
"persistent" ) );
$response->appendChild( $repo->xhtml->data_element(
"granularity",
"YYYY-MM-DDThh:mm:ssZ" ) );
my $d1 = $repo->xml->create_element( "description" );
my $oaiid = $repo->xml->create_element(
"oai-identifier",
"xmlns"=>"http://www.openarchives.org/OAI/2.0/oai-identifier",
"xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance",
"xsi:schemaLocation"=>"http://www.openarchives.org/OAI/2.0/oai-identifier http://www.openarchives.org/OAI/2.0/oai-identifier.xsd" );
$d1->appendChild( $oaiid );
$response->appendChild( $d1 );
$oaiid->appendChild( $repo->xhtml->data_element(
"scheme",
"oai" ) );
$oaiid->appendChild( $repo->xhtml->data_element(
"repositoryIdentifier",
$ARCHIVE_ID ) );
$oaiid->appendChild( $repo->xhtml->data_element(
"delimiter",
":" ) );
$oaiid->appendChild( $repo->xhtml->data_element(
"sampleIdentifier",
$sample_identifier ) );
my $d2 = $repo->xml->create_element( "description" );
my $eprints = $repo->xml->create_element(
"eprints",
"xmlns"=>"http://www.openarchives.org/OAI/1.1/eprints",
"xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance",
"xsi:schemaLocation"=>"http://www.openarchives.org/OAI/1.1/eprints http://www.openarchives.org/OAI/1.1/eprints.xsd" );
$d2->appendChild( $eprints );
$response->appendChild( $d2 );
$eprints->appendChild( render_text_url(
$repo,
"content",
$repo->config( "oai","content" ) ) );
$eprints->appendChild( render_text_url(
$repo,
"metadataPolicy",
$repo->config( "oai","metadata_policy" ) ) );
$eprints->appendChild( render_text_url(
$repo,
"dataPolicy",
$repo->config( "oai","data_policy" ) ) );
$eprints->appendChild( render_text_url(
$repo,
"submissionPolicy",
$repo->config( "oai","submission_policy" ) ) );
foreach( @{$repo->config( "oai","comments" )} )
{
$eprints->appendChild( $repo->xhtml->data_element(
"comment",
$_ ) );
}
return ();
}
sub GetRecord
{
my( $repo, $response, $args ) = @_;
my $eprint = $args->{identifier};
# The eprint exists, so write the record
# if the metadataFormat isn't available for
# this record, only the header will be output.
my $plugin = $args->{metadataPrefix};
$response->appendChild(
EPrints::OpenArchives::make_record(
$repo,
$eprint,
$plugin,
1 ) );
return ();
}
sub ListMetadataFormats
{
my( $repo, $response, $args ) = @_;
my $eprint = $args->{identifier};
my @plugins = sort {
$a->param( "metadataPrefix" ) cmp $b->param( "metadataPrefix" )
} $repo->get_plugins(
metadataPrefix => "*",
is_advertised => 1,
);
foreach my $plugin (@plugins)
{
if( defined($eprint) )
{
my $xml = $plugin->xml_dataobj( $eprint );
next if !defined $xml;
}
my $mdf = $repo->xml->create_element( "metadataFormat" );
$mdf->appendChild( $repo->xhtml->data_element(
"metadataPrefix",
$plugin->param( "metadataPrefix" )
));
$mdf->appendChild( $repo->xhtml->data_element(
"schema",
$plugin->param( "schemaLocation" )
));
$mdf->appendChild( $repo->xhtml->data_element(
"metadataNamespace",
$plugin->param( "xmlns" )
));
$response->appendChild( $mdf );
}
return ();
}
sub ListSets
{
my( $repo, $response, $args ) = @_;
if( defined $args->{resumptionToken} )
{
return( render_oai_error(
$repo,
"badResumptionToken",
"Resumption Tokens not supported for ListSets" ) );
}
my @sets = ();
my %setnames = ();
my $ds = $repo->get_dataset( "archive" );
my $ds_del = $repo->get_dataset( "deletion" );
my $viewconf = $repo->config( "oai","sets" );
my $info;
foreach $info ( @{$viewconf} )
{
my $fieldname;
my %v = ();
foreach $fieldname ( split( "/" , $info->{fields} ) )
{
my $field = EPrints::Utils::field_from_config_string( $ds, $fieldname );
if( $field->is_type( "subject" ) )
{
my $topsubj = EPrints::DataObj::Subject->new(
$repo,
$field->get_property( "top" ) );
my $i;
foreach $i ( @{$topsubj->get_subjects( 0, 0, 1 )} )
{
my @kb = split( ":", $i->[0] );
foreach( @kb )
{
$_ = EPrints::OpenArchives::encode_setspec( $_ );
}
my $key = join( ":", @kb );
$v{$key} = $i->[1];
}
}
else
{
my $v1 = $field->get_values( $repo, $ds );
my $delfield = $field->clone();
#cjg why clone with new style datasets?
#$delfield->set_dataset( $ds_del );
my $v2 = $delfield->get_values( $repo, $ds_del );
foreach( @{$v1}, @{$v2} )
{
my $key = $field->get_id_from_value( $repo, $_ );
$key = EPrints::OpenArchives::encode_setspec( $key );
$v{$key} = EPrints::Utils::tree_to_utf8( $field->get_value_label( $repo, $_ ) );
}
}
}
unless( $info->{allow_null} ) { delete $v{""}; }
foreach( keys %v )
{
my $set = $repo->xml->create_element( "set" );
$response->appendChild( $set );
my $spec = EPrints::OpenArchives::encode_setspec( $info->{id}."=" ).$_;
$set->appendChild( $repo->xhtml->data_element(
"setSpec",
$spec ) );
my $name = $repo->get_view_name( $ds, $info->{id} )." = ".$v{$_};
$set->appendChild( $repo->xhtml->data_element(
"setName",
$name ) );
}
}
my $custom_sets = $repo->config( "oai", "custom_sets");
foreach my $info (@{$custom_sets||[]})
{
my $set = $repo->xml->create_element( "set" );
$response->appendChild( $set );
$set->appendChild( $repo->xhtml->data_element(
"setSpec",
$info->{spec} ) );
$set->appendChild( $repo->xhtml->data_element(
"setName",
$info->{name} ) );
}
return ();
}
sub ListIdentifiers
{
my( $repo, $response, $args ) = @_;
return _list( $repo, $response, $args, 0 );
}
sub ListRecords
{
my( $repo, $response, $args ) = @_;
return _list( $repo, $response, $args, 1 );
}
sub _list
{
my( $repo, $response, $args, $show_metadata ) = @_;
my $PAGESIZE = 100;
# different params depending if we have a resumptionToken
# or not
my %token;
if( defined $repo->param( "resumptionToken" ) )
{
%$args = %token = %{$args->{resumptionToken}};
my @errors;
if( defined $args->{metadataPrefix} )
{
push @errors,
check_metadataprefix( $repo, $args->{metadataPrefix} );
}
else
{
push @errors, render_oai_error(
$repo,
"badArgument",
"Expected metadata prefix as part of resumption token, token contains: ".join(',',%token) );
}
if( defined $args->{from} )
{
push @errors, check_from_date( $repo, $args->{from} );
}
if( defined $args->{until} )
{
push @errors, check_until_date( $repo, $args->{until} );
}
if( scalar(@errors) )
{
unshift @errors, render_oai_error(
$repo,
"badResumptionToken",
"The value of the resumptionToken argument is invalid or expired." );
}
return @errors if scalar @errors;
}
else
{
# granularity check
if(
$repo->param( "from" ) && $repo->param( "until" ) &&
length($repo->param( "from" )) != length($repo->param( "until" ))
)
{
return( render_oai_error(
$repo,
"badArgument",
"from and until dates have different granularity" ) );
}
# populate the resumption token with original parameters
foreach my $name (qw( metadataPrefix from until set ))
{
my $value = $repo->param( $name );
if( defined $value && length($value) )
{
$token{$name} = $value;
}
}
}
my $ds = $repo->dataset( "eprint" );
my $filters = $repo->config( "oai", "filters" );
$filters = [] unless defined $filters;
push @$filters, {
meta_fields => [qw( datestamp )],
match => "SET",
};
# custom sets
if( defined $args->{set} )
{
my $custom_sets = $repo->config( "oai", "custom_sets" );
my( $info ) = grep { $_->{spec} eq $args->{set} } @{$custom_sets||[]};
if( defined $info )
{
my $_filters = $info->{filters};
$_filters = [$info] if !defined $_filters;
push @$filters, @$_filters;
delete $args->{set};
}
}
my $searchexp = $ds->prepare_search(
allow_blank => 1,
filters => [
@{$filters},
],
custom_order => "eprintid",
limit => ($PAGESIZE+1) );
if( defined $args->{offset} )
{
my $offset = sprintf("%d..", $args->{offset});
$searchexp->add_field( $ds->key_field, $offset );
}
my $date_range;
if( defined $args->{from} )
{
$date_range = $args->{from}."..";
}
if( defined $args->{until} )
{
$date_range = "-" if( !defined $date_range );
$date_range .= $args->{until};
}
if( defined $date_range )
{
$searchexp->add_field( $DATESTAMP_FIELD, $date_range );
}
if( defined $args->{set} )
{
my( $head , @tail ) = EPrints::OpenArchives::decode_setspec( $args->{set} );
my( $key , $value ) = split( /=/ , $head );
$value = pop @tail if( scalar @tail > 0 );
my $views = $repo->config( "oai","sets" ); #cjg
my $info;
foreach( @{$views} )
{
$info = $_ if( $_->{id} eq $key );
}
if( !defined $info )
{
return( render_oai_error(
$repo,
"badArgument",
"Invalid set parameter; unknown key ( $key )" ) );
}
my @fields;
my $match = "EX";
foreach( split( "/", $info->{fields} ) )
{
my $field = EPrints::Utils::field_from_config_string( $ds, $_ );
unless( $field->is_browsable() )
{
# Eeep. This is really bad. Just die now.
my $type = $field->get_type();
EPrints::abort( <<END );
Cannot generate OAI set for field "$_"
- Type "$type" cannot be browsed.
END
}
push @fields, $field;
if( $field->is_type( "subject" ) )
{
$match = "EQ";
}
}
$value = $fields[0]->get_value_from_id( $repo, $value );
$searchexp->add_field( \@fields, $value, $match );
}
if( !defined $searchexp )
{
# something went wrong
return( render_oai_error(
$repo,
"badArgument",
"Could not make Search (system error)." ) );
}
my $list = $searchexp->perform_search();
my @records = $list->slice( 0, $PAGESIZE );
if( @records == 0 )
{
# no items at all
return( render_oai_error(
$repo,
"noRecordsMatch",
"No items match. None. None at all. Not even deleted ones." ) );
}
my $plugin = $args->{metadataPrefix};
# speed up header generation
$repo->cache_subjects;
if( !$show_metadata )
{
foreach my $eprint ( @records )
{
$response->appendChild(
EPrints::OpenArchives::make_header(
$repo,
$eprint,
1 ) );
$token{offset} = $eprint->id + 1;
}
}
else
{
foreach my $eprint ( @records )
{
$response->appendChild(
EPrints::OpenArchives::make_record(
$repo,
$eprint,
$plugin,
1 ) );
$token{offset} = $eprint->id() + 1;
}
}
# will show a blank list if total is % $PAGESIZE
if( $list->count > $PAGESIZE )
{
# we'll give a fake expiration date to make DRIVER happy
$response->appendChild( $repo->xhtml->data_element(
"resumptionToken",
serialise_resumptiontoken( $repo, \%token ),
expirationDate => EPrints::Time::get_iso_timestamp( time + 86400 )
) );
}
return ();
}
1;