Permalink
Cannot retrieve contributors at this time
522 lines (421 sloc)
18.9 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ##################################################### | |
| # New architecture | |
| # for Eprint => datacite mapping | |
| ##################################################### | |
| #################################### | |
| # Mandatory fields for Datacite 4.0 | |
| # - identifier | |
| # - resourceType | |
| # - creators | |
| # - titles | |
| # - publisher | |
| # - publicationYear | |
| # ################################# | |
| # identifer this is the DOI and is automatically generated see EPrints::Plugin::Event::DataCiteEvent::coin_doi | |
| ################################################## | |
| # resourceType this is derived from the eprint.type and the datacitedoi->{typemap} in cfg/cfg.d/z_datacite.pl | |
| # https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#resourceType | |
| $c->{datacite_mapping_type} = sub { | |
| my($xml, $dataobj, $repo) = @_; | |
| my $resourceTypeGeneral_opts = [ qw/ | |
| Audiovisual | |
| Collection | |
| Dataset | |
| Event | |
| Image | |
| InteractiveResource | |
| Model | |
| PhysicalObject | |
| Service | |
| Software | |
| Sound | |
| Text | |
| Workflow | |
| Other | |
| /]; | |
| my $resourceType = undef; | |
| if($dataobj->exists_and_set("type")){ | |
| my $pub_resourceType = $repo->get_conf("datacitedoi", "typemap", $dataobj->value("type")); | |
| if (defined $pub_resourceType) { | |
| if(grep $pub_resourceType->{'a'} eq $_, @$resourceTypeGeneral_opts){ | |
| $resourceType = $xml->create_data_element("resourceType", $pub_resourceType->{'v'}, | |
| resourceTypeGeneral=>$pub_resourceType->{'a'}); | |
| } | |
| } | |
| } | |
| # We have the recollect plugin in play, so let's use the data_type if set | |
| if(defined $repo->get_conf("recollect") && $dataobj->exists_and_set("data_type")){ | |
| if(grep $dataobj->value("data_type") eq $_, @$resourceTypeGeneral_opts){ | |
| $resourceType = $xml->create_data_element("resourceType", "Dataset", | |
| resourceTypeGeneral=>$dataobj->value("data_type")); | |
| } | |
| } | |
| return $resourceType; | |
| }; | |
| ############################################################### | |
| # creators this is derived from creators and/or corp_creators | |
| # https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#creators | |
| $c->{datacite_mapping_creators} = sub { | |
| my($xml, $dataobj, $repo) = @_; | |
| my $creators = undef; | |
| if($dataobj->exists_and_set("creators")){ | |
| $creators = $xml->create_element("creators"); | |
| foreach my $name(@{$dataobj->value("creators")}) { | |
| my $author = $xml->create_element("creator"); | |
| my $name_str = EPrints::Utils::make_name_string($name->{name}); | |
| my $family = $name->{name}->{family}; | |
| my $given = $name->{name}->{given}; | |
| my $orcid = $name->{orcid}; | |
| if ($family eq '' && $given eq '') { | |
| $creators->appendChild($author); | |
| } else { | |
| $author->appendChild($xml->create_data_element("creatorName", $name_str)); | |
| } | |
| if ($given eq '') { | |
| $creators->appendChild($author); | |
| } else { | |
| $author->appendChild($xml->create_data_element("givenName", $given)); | |
| } | |
| if ($family eq '') { | |
| $creators->appendChild($author); | |
| } else { | |
| $author->appendChild($xml->create_data_element("familyName", $family)); | |
| } | |
| if ($dataobj->exists_and_set("creators_orcid")) { | |
| if ($orcid eq '') { | |
| $creators->appendChild($author); | |
| } else { | |
| $author->appendChild($xml->create_data_element("nameIdentifier", $orcid, | |
| schemeURI =>"http://orcid.org/", | |
| nameIdentifierScheme=>"ORCID")); | |
| } | |
| } | |
| $creators->appendChild($author); | |
| } | |
| } | |
| if($dataobj->exists_and_set("corp_creators")){ | |
| $creators = $xml->create_element("creators") if (!defined $creators); | |
| # Corp creator is a multiple | |
| foreach my $corp ( @{ $dataobj->get_value( 'corp_creators' ) } ) { | |
| my $corp_creator = $xml->create_element('creator'); | |
| $corp_creator->appendChild($xml->create_data_element("creatorName", $corp)); | |
| $creators->appendChild($corp_creator); | |
| } | |
| } | |
| return $creators | |
| }; | |
| ################################################## | |
| # titles this is derived from the eprint.title | |
| # https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#titles | |
| $c->{datacite_mapping_title} = sub { | |
| my($xml, $dataobj, $repo) = @_; | |
| my $titles = undef; | |
| if($dataobj->exists_and_set("title")){ | |
| $titles = $xml->create_element("titles"); | |
| $titles->appendChild($xml->create_data_element("title", $dataobj->render_value("title"), | |
| "xml:lang"=>$repo->get_language->get_id)); | |
| } | |
| return $titles | |
| }; | |
| ##################################################### | |
| # publisher this is derived from the eprint.publisher | |
| # https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#publisher | |
| $c->{datacite_mapping_publisher} = sub { | |
| my($xml, $dataobj, $repo) = @_; | |
| my $publisher = $repo->get_conf("datacitedoi","publisher"); | |
| if($dataobj->exists_and_set("publisher")){ | |
| $publisher = $dataobj->render_value("publisher"); | |
| } | |
| return $xml->create_data_element("publisher", $publisher); | |
| }; | |
| ################################################## | |
| # publicationYear this is derived from the eprint.date (this will have the pub date if datesdatesdates is in play) | |
| # https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#publicationYear | |
| # Year when the data is made publicly available. | |
| # If an embargo period has been in effect, use the date when the embargo period ends. | |
| $c->{datacite_mapping_date} = sub { | |
| my ( $xml, $dataobj, $repo ) = @_; | |
| my $publicationYear = undef; | |
| my $pub_year = undef; | |
| if($dataobj->exists_and_set("date")) { | |
| $dataobj->get_value( "date" ) =~ /^([0-9]{4})/; | |
| $pub_year = $1; | |
| } | |
| for my $doc ( $dataobj->get_all_documents() ) { | |
| if($doc->exists_and_set("date_embargo")){ | |
| $doc->get_value( "date_embargo" ) =~ /^([0-9]{4})/; | |
| $pub_year = $1 if $1 > $pub_year; #highest available pub_year value | |
| } | |
| } | |
| $publicationYear = $xml->create_data_element( "publicationYear", $pub_year ) if defined $pub_year; | |
| return $publicationYear; | |
| }; | |
| ################################################################# | |
| # descriptions this is derived from the eprint.abstract | |
| # If recollect is in place from eprint.collection_method, eprint.provenance too | |
| # https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#descriptions | |
| ##################### | |
| # descriptionTypes: | |
| # | |
| # Abstract | |
| # Methods | |
| # SeriesInformation | |
| # TableOfContents | |
| # TechnicalInfo | |
| # Other | |
| # | |
| ##################### | |
| $c->{datacite_mapping_abstract} = sub { | |
| my($xml, $dataobj, $repo) = @_; | |
| my $descriptions = undef; | |
| if($dataobj->exists_and_set("abstract")){ | |
| $descriptions = $xml->create_element("descriptions"); | |
| $descriptions->appendChild($xml->create_data_element("description", $dataobj->get_value("abstract"), | |
| "xml:lang"=>$repo->get_language->get_id, | |
| descriptionType=>"Abstract")); | |
| } | |
| if ($dataobj->exists_and_set("collection_method")) { | |
| $descriptions = $xml->create_element("descriptions") if(!defined $descriptions); | |
| $descriptions->appendChild($xml->create_data_element("description", $dataobj->get_value("collection_method"), | |
| "xml:lang"=>$repo->get_language->get_id, | |
| descriptionType =>"Methods")); | |
| } | |
| if ($dataobj->exists_and_set("provenance")) { | |
| $descriptions = $xml->create_element("descriptions") if(!defined $descriptions); | |
| $descriptions->appendChild($xml->create_data_element("description", $dataobj->get_value("provenance"), | |
| "xml:lang"=>$repo->get_language->get_id, | |
| descriptionType =>"TechnicalInfo")); | |
| } | |
| return $descriptions; | |
| }; | |
| ################################################################# | |
| # subjects this is derived from the eprint.keywords | |
| # https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#subjects | |
| $c->{datacite_mapping_keywords} = sub { | |
| my($xml, $dataobj, $repo) = @_; | |
| my $subjects = undef; | |
| if ($dataobj->exists_and_set("keywords")) { | |
| my $subjects = $xml->create_element("subjects"); | |
| my $keywords = $dataobj->get_value("keywords"); | |
| # keyswords as a multiple field | |
| if (ref($keywords) eq "ARRAY") { | |
| foreach my $keyword(@$keywords) { | |
| $subjects->appendChild($xml->create_data_element("subject", $keyword, | |
| "xml:lang"=>$repo->get_language->get_id)); | |
| } | |
| #or a block of text | |
| }else{ | |
| $subjects->appendChild($xml->create_data_element("subject", $keywords, | |
| "xml:lang"=>$repo->get_language->get_id)); | |
| } | |
| } | |
| return $subjects | |
| }; | |
| ################################################################# | |
| # geoLocations this is derived from the eprint.geographic_cover | |
| # and/or eprint.bounding_box (requires recollect) | |
| # https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#subjects | |
| $c->{datacite_mapping_geographic_cover} = sub { | |
| my($xml, $dataobj, $repo) = @_; | |
| my $geo_locations = undef; | |
| if ($dataobj->exists_and_set("geographic_cover")) { | |
| $geo_locations = $xml->create_element("geoLocations"); | |
| $geo_locations->appendChild(my $geo_location = $xml->create_element("geoLocation")); | |
| # Get value of geographic_cover field and append to $geo_location XML element | |
| my $geographic_cover = $dataobj->get_value("geographic_cover"); | |
| $geo_location->appendChild($xml->create_data_element("geoLocationPlace", $geographic_cover)); | |
| } | |
| if($dataobj->exists_and_set("bounding_box")){ | |
| if(!defined $geo_locations){ | |
| $geo_locations = $xml->create_element("geoLocations"); | |
| $geo_locations->appendChild(my $geo_location = $xml->create_element("geoLocation")); | |
| } | |
| # Get values of bounding box | |
| my $west = $dataobj->get_value("bounding_box_west_edge"); | |
| my $east = $dataobj->get_value("bounding_box_east_edge"); | |
| my $south = $dataobj->get_value("bounding_box_south_edge"); | |
| my $north = $dataobj->get_value("bounding_box_north_edge"); | |
| # Check to see | |
| # if $north, $south, $east, and $west values are defined | |
| if (defined $north && defined $south && defined $east && defined $west) { | |
| #Created $geo_location_box XML element | |
| my $geo_location_box = $xml->create_element("geoLocationBox"); | |
| #If $long / lat is defined, created XML element with the appropriate value | |
| $geo_location_box->appendChild($xml->create_data_element("westBoundLongitude", $west)); | |
| $geo_location_box->appendChild($xml->create_data_element("eastBoundLongitude", $east)); | |
| $geo_location_box->appendChild($xml->create_data_element("southBoundLatitude", $south)); | |
| $geo_location_box->appendChild($xml->create_data_element("northBoundLatitude", $north)); | |
| #Append child $geo_location_box XML element to parent $geo_location XML element | |
| if(!defined $geo_locations){ | |
| $geo_locations = $xml->create_element("geoLocations"); | |
| } | |
| $geo_locations->appendChild(my $geo_location = $xml->create_element("geoLocation")); | |
| $geo_location->appendChild($geo_location_box); | |
| } | |
| } | |
| return $geo_locations; | |
| }; | |
| ################################################################# | |
| # fundingReferences this is derived from the eprint.funders and eprint.projects | |
| # Possibly also eprint.grant (recollect) or a compound eprint.project (rioxx2) | |
| # https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#fundingReferences | |
| $c->{datacite_mapping_funders} = sub { | |
| my($xml, $dataobj, $repo) = @_; | |
| ############################## | |
| # If at all possible we do this: | |
| # | |
| # funders => funderName [mandatory] | |
| # projects => awardTitle | |
| # grant -> awardNumber | |
| # funder_id => funderIdentifier | |
| #Funders and projects are default eprints field, both are multiple | |
| my $funders = undef; | |
| my $projects = undef; | |
| my $fundingReferences = undef; | |
| if ($dataobj->exists_and_set("funders")) { | |
| $funders = $dataobj->get_value("funders"); | |
| my $i=0; | |
| $fundingReferences = $xml->create_element("fundingReferences"); | |
| foreach my $funderName(@$funders) { | |
| $fundingReferences->appendChild(my $fundingReference = $xml->create_element("fundingReference")); | |
| $fundingReference->appendChild($xml->create_data_element("funderName", $funderName)); | |
| if($dataobj->exists_and_set("projects")){ | |
| $projects = $dataobj->get_value("projects"); | |
| if(ref($projects) =~ /ARRAY/) { | |
| my $project = $projects->[scalar(@$projects)-1]; | |
| if(defined $projects->[$i]){ | |
| $project = $projects->[$i]; | |
| } | |
| $fundingReference->appendChild($xml->create_data_element("awardTitle", $project)); | |
| }else{ | |
| $fundingReference->appendChild($xml->create_data_element("awardTitle", $projects)); | |
| } | |
| } | |
| #grants is added by recollect if present | |
| if($dataobj->exists_and_set("grant")) { | |
| my $grants = $dataobj->get_value("grant"); | |
| #Just in case it has been configured as multiple | |
| if(ref($grants) =~ /ARRAY/) { | |
| my $grant = $grants->[scalar(@$grants)-1]; | |
| if(defined $grants->[$i]){ | |
| $grant = $grants->[$i]; | |
| } | |
| $fundingReference->appendChild($xml->create_data_element("awardNumber", $grant)); | |
| }else{ | |
| $fundingReference->appendChild($xml->create_data_element("awardNumber", $grants)); | |
| } | |
| } | |
| } | |
| } | |
| #If we have the funder data in the ioxx2 format. | |
| #This will be preferred if present (as should have been derived from the thers anyway | |
| #TODO keep grant if present? | |
| if ($dataobj->exists_and_set("rioxx2_project_input")) { | |
| my $i=0; | |
| $fundingReferences = $xml->create_element("fundingReferences"); | |
| foreach my $project(@{$dataobj->value("rioxx2_project_input")}) { | |
| $fundingReferences->appendChild(my $fundingReference = $xml->create_element("fundingReference")); | |
| $fundingReference->appendChild($xml->create_data_element("funderName", $project->{funder_name})); | |
| $fundingReference->appendChild($xml->create_data_element("awardTitle", $project->{project})); | |
| $fundingReference->appendChild($xml->create_data_element("funderIdentifier", $project->{funder_id}, funderIdentifierType=>"Crossref Funder")); | |
| } | |
| } | |
| return $fundingReferences; | |
| }; | |
| # TODO sort this one out too | |
| $c->{datacite_mapping_rights_from_docs} = sub { | |
| my ( $xml, $dataobj, $repo ) = @_; | |
| my $rightsList = $xml->create_element("rightsList"); | |
| my $previous = {}; | |
| my $attached_licence = undef; | |
| my $seen = {}; | |
| for my $doc ( $dataobj->get_all_documents() ) { | |
| my $license = $doc->get_value("license"); | |
| my $content = $doc->get_value("content"); | |
| my ($license_uri,$license_phrase); | |
| # This doc is the license (for docs that have license == attached | |
| if ((defined $content) && ($content eq "licence")){ | |
| $license_uri = $doc->uri; | |
| $license_phrase = $repo->phrase("licenses_typename_attached"); | |
| }elsif(defined $license){ | |
| $license_uri = $repo->phrase("licenses_uri_$license"); | |
| $license_phrase = $repo->phrase("licenses_typename_$license"); | |
| }else{ #do not attempt to add rights tag if no license is set for a file | |
| next; | |
| } | |
| #no dupes | |
| next if $seen->{$license_uri}; | |
| if($doc->exists_and_set("date_embargo")){ | |
| $license_phrase .= $repo->phrase("embargoed_until", embargo_date=>$doc->value("date_embargo")); | |
| } | |
| $seen->{$license_uri} = 1; | |
| $rightsList->appendChild($xml->create_data_element("rights", $license_phrase, rightsURI => $license_uri)); | |
| } | |
| return $rightsList; | |
| }; | |
| $c->{datacite_mapping_repo_link} = sub { | |
| my($xml, $entry, $dataobj) = @_; | |
| my $relatedIdentifiers = undef; | |
| #default codein plugin (for reference) | |
| # my $theurls = $dataobj->get_value( "repo_link" ); | |
| # my $relatedIdentifiers = $xml->create_element( "relatedIdentifiers" ); | |
| # foreach my $theurl ( @$theurls ) { | |
| # my $linkk = $theurl->{link}; | |
| # if (!$linkk eq ''){ | |
| # $relatedIdentifiers->appendChild( $xml->create_data_element( "relatedIdentifier", $linkk, relatedIdentifierType=>"URL", relationType=>"IsReferencedBy" ) ); | |
| # } | |
| # } | |
| return $relatedIdentifiers; | |
| }; | |
| $c->{validate_datacite} = sub | |
| { | |
| my( $eprint, $repository ) = @_; | |
| my $xml = $repository->xml(); | |
| my @problems = (); | |
| #NEED CREATORS | |
| if( !$eprint->is_set( "creators" ) && | |
| !$eprint->is_set( "corp_creators" ) ) | |
| { | |
| my $creators = $xml->create_element( "span", class=>"ep_problem_field:creators" ); | |
| my $corp_creators = $xml->create_element( "span", class=>"ep_problem_field:corp_creators" ); | |
| push @problems, $repository->html_phrase( | |
| "datacite_validate:need_creators_or_corp_creators", | |
| creators=>$creators, | |
| corp_creators=>$corp_creators ); | |
| } | |
| #NEED TITLE | |
| if( !$eprint->is_set( "title" ) ) | |
| { | |
| my $title = $xml->create_element( "span", class=>"ep_problem_field:title" ); | |
| push @problems, $repository->html_phrase( | |
| "datacite_validate:need_title", | |
| title=>$title ); | |
| } | |
| #we will accept the publisher set in config... as long as it has been set to something other than the default | |
| if( !$eprint->is_set( "publisher" ) && (!EPrints::Utils::is_set($repository->get_conf("datacitedoi","publisher")) || | |
| $repository->get_conf("datacitedoi","publisher") eq "EPrints Repo" ) ) | |
| { | |
| my $publisher = $xml->create_element( "span", class=>"ep_problem_field:publisher" ); | |
| my $default_publisher = $repository->make_text( $repository->get_conf("datacitedoi","publisher") ); | |
| push @problems, $repository->html_phrase( | |
| "datacite_validate:need_publisher", | |
| publisher=>$publisher, | |
| default_publisher => $default_publisher); | |
| } | |
| if( !$eprint->is_set( "date" ) || !$eprint->is_set( "date_type" ) || ($eprint->value( "date_type" ) ne "published") ) | |
| { | |
| my $dates = $xml->create_element( "span", class=>"ep_problem_field:dates" ); | |
| push @problems, $repository->html_phrase( | |
| "datacite_validate:need_published_year", | |
| dates=>$dates ); | |
| } | |
| # If we don't have a type or its not in our mapping, thats bad | |
| if ( !$eprint->exists_and_set("type") || !$repository->get_conf("datacitedoi", "typemap", $eprint->value("type"))) | |
| { | |
| my $types = $xml->create_element( "span", class=>"ep_problem_field:type" ); | |
| push @problems, $repository->html_phrase( | |
| "datacite_validate:need_mapped_type", | |
| types=>$types ); | |
| } | |
| return( @problems ); | |
| }; | |