Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

5060 additional metadata to pid providers #5179

Merged
merged 10 commits into from
Oct 19, 2018
7 changes: 7 additions & 0 deletions doc/sphinx-guides/source/admin/dataverses-datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,10 @@ Mint new PID for a Dataset
Mints a new identifier for a dataset previously registered with a handle. Only accessible to superusers. ::

curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/admin/$dataset-id/reregisterHDLToPID

Send Dataset metadata to PID provider
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Forces update to metadata provided to the PID provider of a published dataset. Only accessible to superusers. ::

curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/datasets/$dataset-id/modifyRegistrationMetadata
6 changes: 6 additions & 0 deletions src/main/java/Bundle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2073,3 +2073,9 @@ admin.api.migrateHDL.failure.must.be.hdl.dataset=Dataset was not registered as a
admin.api.migrateHDL.success=Dataset migrate HDL registration complete. Dataset re-registered successfully.
admin.api.migrateHDL.failure=Failed to migrate Dataset Handle id: {0}
admin.api.migrateHDL.failureWithException=Failed to migrate Dataset Handle id: {0} Unexpected exception: {1}

#Datasets.java
datasets.api.updatePIDMetadata.failure.dataset.must.be.released=Modify Registration Metadata must be run on a published dataset.
datasets.api.updatePIDMetadata.auth.mustBeSuperUser=Forbidden. You must be a superuser.
datasets.api.updatePIDMetadata.success.for.single.dataset=Dataset {0} PID Metadata updated successfully.
datasets.api.updatePIDMetadata.success.for.update.all=All Dataset PID Metadata update completed successfully.
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@

import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.SystemConfig;
import java.io.InputStream;

import javax.ejb.EJB;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public abstract class AbstractGlobalIdServiceBean implements GlobalIdServiceBean {

Expand Down Expand Up @@ -65,6 +70,7 @@ protected Map<String, String> addBasicMetadata(DvObject dvObjectIn, Map<String,
metadata.put("datacite.creator", authorString);
metadata.put("datacite.title", dvObjectIn.getDisplayName());
metadata.put("datacite.publisher", producerString);
metadata.put("datacite.publicationyear", generateYear(dvObjectIn));
return metadata;
}

Expand Down Expand Up @@ -114,4 +120,313 @@ public DvObject generateIdentifier(DvObject dvObject) {
return dvObject;
}

class GlobalIdMetadataTemplate {


private String template;

public GlobalIdMetadataTemplate(){
try (InputStream in = GlobalIdMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) {
template = Util.readAndClose(in, "utf-8");
} catch (Exception e) {
logger.log(Level.SEVERE, "datacite metadata template load error");
logger.log(Level.SEVERE, "String " + e.toString());
logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage());
logger.log(Level.SEVERE, "cause " + e.getCause());
logger.log(Level.SEVERE, "message " + e.getMessage());
}
}

private String xmlMetadata;
private String identifier;
private String datasetIdentifier;
private List<String> datafileIdentifiers;
private List<String> creators;
private String title;
private String publisher;
private String publisherYear;
private List<DatasetAuthor> authors;
private String description;
private List<String[]> contacts;
private List<String[]> producers;

public List<String[]> getProducers() {
return producers;
}

public void setProducers(List<String[]> producers) {
this.producers = producers;
}

public List<String[]> getContacts() {
return contacts;
}

public void setContacts(List<String[]> contacts) {
this.contacts = contacts;
}

public String getDescription() {
return description;
}

public void setDescription(String description) {
this.description = description;
}

public List<DatasetAuthor> getAuthors() {
return authors;
}

public void setAuthors(List<DatasetAuthor> authors) {
this.authors = authors;
}


public List<String> getDatafileIdentifiers() {
return datafileIdentifiers;
}

public void setDatafileIdentifiers(List<String> datafileIdentifiers) {
this.datafileIdentifiers = datafileIdentifiers;
}

public GlobalIdMetadataTemplate(String xmlMetaData) {
this.xmlMetadata = xmlMetaData;
Document doc = Jsoup.parseBodyFragment(xmlMetaData);
Elements identifierElements = doc.select("identifier");
if (identifierElements.size() > 0) {
identifier = identifierElements.get(0).html();
}
Elements creatorElements = doc.select("creatorName");
creators = new ArrayList<>();
for (Element creatorElement : creatorElements) {
creators.add(creatorElement.html());
}
Elements titleElements = doc.select("title");
if (titleElements.size() > 0) {
title = titleElements.get(0).html();
}
Elements publisherElements = doc.select("publisher");
if (publisherElements.size() > 0) {
publisher = publisherElements.get(0).html();
}
Elements publisherYearElements = doc.select("publicationYear");
if (publisherYearElements.size() > 0) {
publisherYear = publisherYearElements.get(0).html();
}
}

public String generateXML(DvObject dvObject) {
// Can't use "UNKNOWN" here because DataCite will respond with "[facet 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'"
String publisherYearFinal = "9999";
// FIXME: Investigate why this.publisherYear is sometimes null now that pull request #4606 has been merged.
if (this.publisherYear != null) {
// Added to prevent a NullPointerException when trying to destroy datasets when using DataCite rather than EZID.
publisherYearFinal = this.publisherYear;
}
xmlMetadata = template.replace("${identifier}", this.identifier.trim())
.replace("${title}", this.title)
.replace("${publisher}", this.publisher)
.replace("${publisherYear}", publisherYearFinal)
.replace("${description}", this.description);
StringBuilder creatorsElement = new StringBuilder();
for (DatasetAuthor author : authors) {
creatorsElement.append("<creator><creatorName>");
creatorsElement.append(author.getName().getDisplayValue());
creatorsElement.append("</creatorName>");

if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() && !author.getIdValue().isEmpty() && author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) {

if (author.getIdType().equals("ORCID")) {
creatorsElement.append("<nameIdentifier schemeURI=\"https://orcid.org/\" nameIdentifierScheme=\"ORCID\">" + author.getIdValue() + "</nameIdentifier>");
}
if (author.getIdType().equals("ISNI")) {
creatorsElement.append("<nameIdentifier schemeURI=\"http://isni.org/isni/\" nameIdentifierScheme=\"ISNI\">" + author.getIdValue() + "</nameIdentifier>");
}
if (author.getIdType().equals("LCNA")) {
creatorsElement.append("<nameIdentifier schemeURI=\"http://id.loc.gov/authorities/names/\" nameIdentifierScheme=\"LCNA\">" + author.getIdValue() + "</nameIdentifier>");
}
}
if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) {
creatorsElement.append("<affiliation>" + author.getAffiliation().getDisplayValue() + "</affiliation>");
}
creatorsElement.append("</creator>");
}
xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString());

StringBuilder contributorsElement = new StringBuilder();
for (String[] contact : this.getContacts()) {
if (!contact[0].isEmpty()) {
contributorsElement.append("<contributor contributorType=\"ContactPerson\"><contributorName>" + contact[0] + "</contributorName>");
if (!contact[1].isEmpty()) {
contributorsElement.append("<affiliation>" + contact[1] + "</affiliation>");
}
contributorsElement.append("</contributor>");
}
}
for (String[] producer : this.getProducers()) {
contributorsElement.append("<contributor contributorType=\"Producer\"><contributorName>" + producer[0] + "</contributorName>");
if (!producer[1].isEmpty()) {
contributorsElement.append("<affiliation>" + producer[1] + "</affiliation>");
}
contributorsElement.append("</contributor>");
}

String relIdentifiers = generateRelatedIdentifiers(dvObject);

xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers);

xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString());
return xmlMetadata;
}

private String generateRelatedIdentifiers(DvObject dvObject) {

StringBuilder sb = new StringBuilder();
if (dvObject.isInstanceofDataset()) {
Dataset dataset = (Dataset) dvObject;
if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) {

datafileIdentifiers = new ArrayList<>();
for (DataFile dataFile : dataset.getFiles()) {
if (!dataFile.getGlobalId().asString().isEmpty()) {
if (sb.toString().isEmpty()) {
sb.append("<relatedIdentifiers>");
}
sb.append("<relatedIdentifier relatedIdentifierType=\"DOI\" relationType=\"HasPart\">" + dataFile.getGlobalId() + "</relatedIdentifier>");
}
}

if (!sb.toString().isEmpty()) {
sb.append("</relatedIdentifiers>");
}
}
} else if (dvObject.isInstanceofDataFile()) {
DataFile df = (DataFile) dvObject;
sb.append("<relatedIdentifiers>");
sb.append("<relatedIdentifier relatedIdentifierType=\"DOI\" relationType=\"IsPartOf\""
+ ">" + df.getOwner().getGlobalId() + "</relatedIdentifier>");
sb.append("</relatedIdentifiers>");
}
return sb.toString();
}

public void generateFileIdentifiers(DvObject dvObject) {

if (dvObject.isInstanceofDataset()) {
Dataset dataset = (Dataset) dvObject;

if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) {

datafileIdentifiers = new ArrayList<>();
for (DataFile dataFile : dataset.getFiles()) {
datafileIdentifiers.add(dataFile.getIdentifier());
int x = xmlMetadata.indexOf("</relatedIdentifiers>") - 1;
xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier());
xmlMetadata = xmlMetadata.substring(0, x) + "<relatedIdentifier relatedIdentifierType=\"hasPart\" "
+ "relationType=\"doi\">${relatedIdentifier}</relatedIdentifier>" + template.substring(x, template.length() - 1);

}

} else {
xmlMetadata = xmlMetadata.replace("<relatedIdentifier relatedIdentifierType=\"hasPart\" relationType=\"doi\">${relatedIdentifier}</relatedIdentifier>", "");
}
}
}

public String getTemplate() {
return template;
}

public void setTemplate(String templateIn) {
template = templateIn;
}

public String getIdentifier() {
return identifier;
}

public void setIdentifier(String identifier) {
this.identifier = identifier;
}

public void setDatasetIdentifier(String datasetIdentifier) {
this.datasetIdentifier = datasetIdentifier;
}

public List<String> getCreators() {
return creators;
}

public void setCreators(List<String> creators) {
this.creators = creators;
}

public String getTitle() {
return title;
}

public void setTitle(String title) {
this.title = title;
}

public String getPublisher() {
return publisher;
}

public void setPublisher(String publisher) {
this.publisher = publisher;
}

public String getPublisherYear() {
return publisherYear;
}

public void setPublisherYear(String publisherYear) {
this.publisherYear = publisherYear;
}
}
public String getMetadataFromDvObject(String identifier, Map<String, String> metadata, DvObject dvObject) {

Dataset dataset = null;

if (dvObject instanceof Dataset) {
dataset = (Dataset) dvObject;
} else {
dataset = (Dataset) dvObject.getOwner();
}

GlobalIdMetadataTemplate metadataTemplate = new GlobalIdMetadataTemplate();
metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1));
metadataTemplate.setCreators(Util.getListFromStr(metadata.get("datacite.creator")));
metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors());
if (dvObject.isInstanceofDataset()) {
metadataTemplate.setDescription(dataset.getLatestVersion().getDescriptionPlainText());
}
if (dvObject.isInstanceofDataFile()) {
DataFile df = (DataFile) dvObject;
String fileDescription = df.getDescription();
metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription);
String datasetPid = df.getOwner().getGlobalId().asString();
metadataTemplate.setDatasetIdentifier(datasetPid);
} else {
metadataTemplate.setDatasetIdentifier("");
}

metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts());
metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers());
metadataTemplate.setTitle(dvObject.getDisplayName());
String producerString = dataverseService.findRootDataverse().getName();
if (producerString.isEmpty()) {
producerString = ":unav";
}
metadataTemplate.setPublisher(producerString);
metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear"));

String xmlMetadata = metadataTemplate.generateXML(dvObject);
logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata);
return xmlMetadata;
}

}