Skip to content

Commit

Permalink
make non-matches via plazi explicit; only include pub ids for treatme…
Browse files Browse the repository at this point in the history
…nts to avoid including entire taxonomic context of a single treatment; #23
  • Loading branch information
jhpoelen committed Oct 1, 2020
1 parent ef807fc commit 198e588
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 141 deletions.
Expand Up @@ -9,31 +9,34 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.eol.globi.domain.NameType;
import org.eol.globi.domain.PropertyAndValueDictionary;
import org.eol.globi.domain.Taxon;
import org.eol.globi.domain.TaxonomyProvider;
import org.eol.globi.domain.TaxonImpl;
import org.eol.globi.domain.Term;
import org.eol.globi.domain.TermImpl;
import org.eol.globi.service.PropertyEnricher;
import org.eol.globi.service.PropertyEnricherException;
import org.eol.globi.service.TaxonUtil;
import org.eol.globi.taxon.TaxonCacheListener;
import org.eol.globi.taxon.TaxonCacheService;
import org.eol.globi.taxon.TaxonLookupServiceImpl;
import org.eol.globi.util.ExternalIdUtil;
import org.eol.globi.taxon.TermMatchListener;
import org.eol.globi.taxon.TermMatcher;
import org.globalbioticinteractions.nomer.util.PropertyEnricherInfo;
import org.globalbioticinteractions.nomer.util.TermMatcherContext;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicLong;

@PropertyEnricherInfo(name = "plazi", description = "Lookup Plazi taxon treatment by name or id using offline-enabled database dump")
public class PlaziService implements PropertyEnricher {
public class PlaziService implements TermMatcher {

private static final Log LOG = LogFactory.getLog(PlaziService.class);

Expand All @@ -46,32 +49,33 @@ public PlaziService(TermMatcherContext ctx) {
}

@Override
public Map<String, String> enrichFirstMatch(Map<String, String> properties) throws PropertyEnricherException {
List<Map<String, String>> enrichedList = enrichAllMatches(properties);
return enrichedList.get(0);
}

@Override
public List<Map<String, String>> enrichAllMatches(Map<String, String> properties) throws PropertyEnricherException {
Map<String, String> enriched = new TreeMap<>(properties);
Taxon[] taxa = lookupByName(properties);

List<Map<String, String>> enrichedList = new ArrayList<>();
if (taxa == null || taxa.length == 0) {
enrichedList.add(enriched);
} else {
for (Taxon taxon : taxa) {
enrichedList.add(new TreeMap<>(TaxonUtil.taxonToMap(taxon)));
public void match(List<Term> terms, TermMatchListener termMatchListener) throws PropertyEnricherException {
for (Term term : terms) {
Taxon[] taxons = lookupLinkedTerms(term);
if (taxons == null || taxons.length == 0) {
termMatchListener.foundTaxonForTerm(
null,
term,
new TaxonImpl(term.getName(), term.getId()),
NameType.NONE
);
} else {
for (Taxon taxon : taxons) {
Taxon taxonToBeSubmitted = taxon;
if (StringUtils.startsWith(taxon.getExternalId(), "doi:")
|| StringUtils.startsWith(taxon.getExternalId(), "http://treatment.plazi.org/id/")) {
taxonToBeSubmitted = new TaxonImpl(taxon.getExternalId(), taxon.getExternalId());
taxonToBeSubmitted.setPath(taxon.getExternalId());
}
termMatchListener.foundTaxonForTerm(null, term, taxonToBeSubmitted, NameType.SAME_AS);
}
}
}
return enrichedList;
}

private Taxon[] lookupByName(Map<String, String> properties) throws PropertyEnricherException {
private Taxon[] lookupLinkedTerms(Term term) throws PropertyEnricherException {
Taxon[] taxa = null;
String name = properties.get(PropertyAndValueDictionary.NAME);
String externalId = properties.get(PropertyAndValueDictionary.EXTERNAL_ID);
if (StringUtils.isNotBlank(name) || StringUtils.isNotBlank(externalId)) {
if (StringUtils.isNotBlank(term.getName()) || StringUtils.isNotBlank(term.getId())) {
if (needsInit()) {
if (ctx == null) {
throw new PropertyEnricherException("context needed to initialize");
Expand All @@ -80,17 +84,18 @@ private Taxon[] lookupByName(Map<String, String> properties) throws PropertyEnri
}

try {
if (StringUtils.isNotBlank(externalId)) {
if (StringUtils.startsWith(externalId, "PLAZI:")) {
externalId = StringUtils.replace(externalId, "PLAZI:", "http://treatment.plazi.org/id/");
String externalId = term.getId();
if (StringUtils.isNotBlank(term.getId())) {
if (StringUtils.startsWith(term.getId(), "PLAZI:")) {
externalId = StringUtils.replace(term.getId(), "PLAZI:", "http://treatment.plazi.org/id/");
}
taxa = taxonLookupService.lookupTermsById(externalId);
}
if ((taxa == null || taxa.length == 0) && StringUtils.isNotBlank(name)) {
taxa = taxonLookupService.lookupTermsByName(name);
if ((taxa == null || taxa.length == 0) && StringUtils.isNotBlank(term.getName())) {
taxa = taxonLookupService.lookupTermsByName(term.getName());
}
} catch (IOException e) {
throw new PropertyEnricherException("failed to lookup [" + name + "]", e);
throw new PropertyEnricherException("failed to lookup [" + term.getName() + "]", e);
}
}
return taxa;
Expand Down Expand Up @@ -173,12 +178,6 @@ private boolean needsInit() {
return taxonLookupService == null;
}


@Override
public void shutdown() {

}

private File getCacheDir(TermMatcherContext ctx) {
return new File(ctx.getCacheDir(), "plazi");
}
Expand Down
Expand Up @@ -12,7 +12,7 @@
import org.apache.commons.lang3.StringUtils;
import org.eol.globi.data.CharsetConstant;
import org.eol.globi.domain.PropertyAndValueDictionary;
import org.eol.globi.domain.TaxonImpl;
import org.eol.globi.domain.Taxon;
import org.eol.globi.service.TaxonUtil;
import org.eol.globi.taxon.TaxonCacheListener;
import org.globalbioticinteractions.doi.DOI;
Expand Down Expand Up @@ -60,13 +60,15 @@ public static void importTreatment(InputStream treatmentGraph, TaxonCacheListene
ResultSet rs = qexec.execSelect();
while (rs.hasNext()) {
final QuerySolution next = rs.next();
addTaxonByPlaziId(listener, next);
addTaxonByPublicationDoi(listener, next);
addTaxonConcept(listener, next);
Taxon addedTaxon = addTaxonConcept(listener, next);
if (addedTaxon != null) {
addTaxonByPlaziId(listener, next, addedTaxon);
addTaxonByPublicationDoi(listener, next, addedTaxon);
}
}
}

private static void addTaxonConcept(TaxonCacheListener listener, QuerySolution next) {
private static Taxon addTaxonConcept(TaxonCacheListener listener, QuerySolution next) {
List<String> taxonRanks = Arrays.asList(
"?subspecificEpithet",
"?specificEpithet",
Expand All @@ -90,41 +92,46 @@ private static void addTaxonConcept(TaxonCacheListener listener, QuerySolution n
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));

populateTaxa(taxonRanks, taxonMap);
addTaxonByTaxonConcept(listener, next, taxonMap);
return addTaxonByTaxonConcept(listener, next, taxonMap);
}

private static void addTaxonByPublicationDoi(TaxonCacheListener listener, QuerySolution next) {
private static void addTaxonByPublicationDoi(TaxonCacheListener listener, QuerySolution next, Taxon some_name) {
RDFNode pubNode = next.get("?publication");
if (pubNode != null && pubNode.isURIResource()) {
try {
DOI pubDoi = DOI.create(URI.create(pubNode.asResource().getURI()));
String doiString = pubDoi.toPrintableDOI();
addTermForPubId(listener, doiString);
Taxon copy = TaxonUtil.copy(some_name);
copy.setExternalId(doiString);
addTermForPubId(listener, copy);
} catch (MalformedDOIException e) {
// ignore non-DOIs
}
}
}

private static void addTermForPubId(TaxonCacheListener listener, String doiString) {
TaxonImpl taxon = new TaxonImpl(doiString, doiString);
taxon.setPath(doiString);
listener.addTaxon(taxon);
private static void addTermForPubId(TaxonCacheListener listener, Taxon term) {
listener.addTaxon(term);
}

private static void addTaxonByTaxonConcept(TaxonCacheListener listener, QuerySolution next, Map<String, String> taxonMap) {
private static Taxon addTaxonByTaxonConcept(TaxonCacheListener listener, QuerySolution next, Map<String, String> taxonMap) {
Taxon taxonToBeAdded = null;
RDFNode pubNode = next.get("?tc");
if (pubNode != null && pubNode.isURIResource()) {
taxonMap.put(PropertyAndValueDictionary.EXTERNAL_ID, pubNode.asResource().getURI());
listener.addTaxon(TaxonUtil.mapToTaxon(taxonMap));
taxonToBeAdded = TaxonUtil.mapToTaxon(taxonMap);
listener.addTaxon(taxonToBeAdded);
}
return taxonToBeAdded;
}

private static void addTaxonByPlaziId(TaxonCacheListener listener, QuerySolution next) {
private static void addTaxonByPlaziId(TaxonCacheListener listener, QuerySolution next, Taxon name) {
RDFNode pubNode1 = next.get("?treatment");
if (pubNode1 != null && pubNode1.isURIResource()) {
String externalId = pubNode1.asResource().getURI();
addTermForPubId(listener, externalId);
Taxon copy = TaxonUtil.copy(name);
copy.setExternalId(externalId);
addTermForPubId(listener, copy);
}
}

Expand Down

0 comments on commit 198e588

Please sign in to comment.