Skip to content

Commit

Permalink
allow for target/source taxon ids to be enriched their related occurr…
Browse files Browse the repository at this point in the history
…ence ids; allow to disable enriching individual occurrence ids from external resources using shouldResolveReferences = false; related to #427 ;
  • Loading branch information
Jorrit Poelen committed Oct 2, 2023
1 parent f215052 commit 917fc1e
Show file tree
Hide file tree
Showing 8 changed files with 258 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,27 @@ public Map<String, String> enrich(final Map<String, String> properties) throws S
private void enrichFields(Map<String, String> enrichedProperties, String taxonNameField, String taxonIdField, String taxonRankField, String occurrenceIdField) throws StudyImporterException {
String occurrenceId = enrichedProperties.get(occurrenceIdField);
if (isINaturalistObservation(occurrenceId)) {
try (InputStream is = getResponse(createObservationUrl(occurrenceId))) {
enrichWithINaturalistObservation(is,
taxonNameField,
taxonIdField,
taxonRankField,
enrichedProperties);
} catch (IOException e) {
throw new StudyImporterException("failed to resolve [" + occurrenceId + "]");
if (isBlank(enrichedProperties, taxonNameField)
|| isBlank(enrichedProperties, taxonIdField)
|| isBlank(enrichedProperties, taxonRankField)) {
try (InputStream is = getResponse(createObservationUrl(occurrenceId))) {
enrichWithINaturalistObservation(is,
taxonNameField,
taxonIdField,
taxonRankField,
enrichedProperties);
} catch (IOException e) {
throw new StudyImporterException("failed to resolve [" + occurrenceId + "]");
}
}
}
}

private boolean isBlank(Map<String, String> enrichedProperties, String taxonNameField) {
String s = enrichedProperties.get(taxonNameField);
return StringUtils.isBlank(s);
}

public String createObservationUrl(String sourceOccurrenceId) {
return "https://www.inaturalist.org/observations/" + parseObservationId(sourceOccurrenceId) + ".json";
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.eol.globi.process;

import org.apache.commons.lang3.StringUtils;
import org.eol.globi.data.OccurrenceIdEnricherAtlasOfLivingAustralia;
import org.eol.globi.data.OccurrenceIdEnricherCaliforniaAcademyOfSciences;
import org.eol.globi.data.OccurrenceIdIdEnricherGenBank;
Expand All @@ -10,12 +11,15 @@
import org.eol.globi.data.OccurrenceIdEnricherFieldMuseum;
import org.eol.globi.data.SpecimenCitationEnricher;
import org.eol.globi.data.StudyImporterException;
import org.eol.globi.domain.PropertyAndValueDictionary;
import org.eol.globi.service.GeoNamesService;
import org.eol.globi.util.InteractUtil;
import org.globalbioticinteractions.dataset.Dataset;
import org.globalbioticinteractions.dataset.DatasetConstant;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;

Expand All @@ -35,10 +39,22 @@ public InteractionListenerImpl(NodeFactory nodeFactory,
}
};

this.processors =
String shouldResolveReferences = dataset.getOrDefault(DatasetConstant.SHOULD_RESOLVE_REFERENCES, "false");


List<InteractionListener> resolvingEnrichers = Arrays.asList(
new OccurrenceIdIdEnricherINaturalist(queue, logger, dataset),
new OccurrenceIdIdEnricherGenBank(queue, logger, dataset)
);

List<InteractionListener> listeners = new ArrayList<>();

if (StringUtils.equalsIgnoreCase(shouldResolveReferences, "true")) {
listeners.addAll(resolvingEnrichers);
}

listeners.addAll(
Arrays.asList(
new OccurrenceIdIdEnricherINaturalist(queue, logger, dataset),
new OccurrenceIdIdEnricherGenBank(queue, logger, dataset),
new OccurrenceIdEnricherFieldMuseum(queue, logger),
new OccurrenceIdEnricherCaliforniaAcademyOfSciences(queue, logger),
new OccurrenceIdEnricherAtlasOfLivingAustralia(queue, logger),
Expand All @@ -49,7 +65,9 @@ public InteractionListenerImpl(NodeFactory nodeFactory,
new InteractionValidator(queue, logger),
new DOIReferenceExtractor(queue, logger),
new InteractionImporter(nodeFactory, logger, geoNamesService)
);
));

this.processors = Collections.unmodifiableList(listeners);
}

public InteractionListener createMappingListener(ImportLogger logger, Dataset dataset, InteractionListener queue) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ public InteractionListenerCollectUnresolvedOccurrenceIds(Map<Pair<String, String
@Override
public void on(Map<String, String> interaction) throws StudyImporterException {
addUnresolvedSourceOccurrenceId(interaction);
addUnresolvedSourceTaxonId(interaction);
addUnresolvedTargetOccurrenceId(interaction);
addUnresolvedTargetTaxonId(interaction);
}

public void addUnresolvedTargetOccurrenceId(Map<String, String> interaction) {
Expand All @@ -34,6 +36,15 @@ public void addUnresolvedTargetOccurrenceId(Map<String, String> interaction) {
}
}

public void addUnresolvedTargetTaxonId(Map<String, String> interaction) {
if (hasUnresolvedTargetTaxonId(interaction)) {
interactionsWithUnresolvedOccurrenceIds.put(
Pair.of(TaxonUtil.TARGET_TAXON_ID,
InteractionListenerIndexing.getOccurrenceId(interaction, TaxonUtil.TARGET_TAXON_ID)),
Collections.emptyMap());
}
}

public void addUnresolvedSourceOccurrenceId(Map<String, String> interaction) {
if (hasUnresolvedSourceOccurrenceId(interaction)) {
interactionsWithUnresolvedOccurrenceIds.put(
Expand All @@ -43,16 +54,37 @@ public void addUnresolvedSourceOccurrenceId(Map<String, String> interaction) {
}
}

public void addUnresolvedSourceTaxonId(Map<String, String> interaction) {
if (hasUnresolvedSourceTaxonId(interaction)) {
interactionsWithUnresolvedOccurrenceIds.put(
Pair.of(TaxonUtil.SOURCE_TAXON_ID,
InteractionListenerIndexing.getOccurrenceId(interaction, TaxonUtil.SOURCE_TAXON_ID)),
new HashMap<>(interaction));
}
}

public static boolean hasUnresolvedSourceOccurrenceId(Map<String, String> interaction) {
return StringUtils.isBlank(StringUtils.defaultString(interaction.get(TaxonUtil.SOURCE_TAXON_NAME),
TaxonUtil.generateSourceTaxonName(interaction)))
&& interaction.containsKey(DatasetImporterForTSV.SOURCE_OCCURRENCE_ID);
}

public static boolean hasUnresolvedSourceTaxonId(Map<String, String> interaction) {
return StringUtils.isBlank(StringUtils.defaultString(interaction.get(TaxonUtil.SOURCE_TAXON_NAME),
TaxonUtil.generateSourceTaxonName(interaction)))
&& interaction.containsKey(TaxonUtil.SOURCE_TAXON_ID);
}

public static boolean hasUnresolvedTargetOccurrenceId(Map<String, String> interaction) {
return StringUtils.isBlank(StringUtils.defaultString(interaction.get(TaxonUtil.TARGET_TAXON_NAME),
TaxonUtil.generateTargetTaxonName(interaction)))
&& interaction.containsKey(DatasetImporterForTSV.TARGET_OCCURRENCE_ID);
}

public static boolean hasUnresolvedTargetTaxonId(Map<String, String> interaction) {
return StringUtils.isBlank(StringUtils.defaultString(interaction.get(TaxonUtil.TARGET_TAXON_NAME),
TaxonUtil.generateTargetTaxonName(interaction)))
&& interaction.containsKey(TaxonUtil.TARGET_TAXON_ID);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.eol.globi.data.DatasetImporterForTSV;
import org.eol.globi.data.StudyImporterException;
import org.eol.globi.process.InteractionListener;
import org.eol.globi.service.TaxonUtil;

import java.util.ArrayList;
import java.util.List;
Expand All @@ -14,11 +15,11 @@
import static org.eol.globi.util.InteractionListenerIndexing.getOccurrenceId;

public class InteractionListenerResolving implements InteractionListener {
private final Map<Pair<String, String>, Map<String, String>> interactionsWithUnresolvedOccurrenceIds;
private final Map<Pair<String, String>, Map<String, String>> interactionsWithUnresolvedOccurrenceOrTaxonIds;
private final InteractionListener interactionListener;

public InteractionListenerResolving(Map<Pair<String, String>, Map<String, String>> interactionsWithUnresolvedOccurrenceIds, InteractionListener interactionListener) {
this.interactionsWithUnresolvedOccurrenceIds = interactionsWithUnresolvedOccurrenceIds;
public InteractionListenerResolving(Map<Pair<String, String>, Map<String, String>> interactionsWithUnresolvedOccurrenceOrTaxonIds, InteractionListener interactionListener) {
this.interactionsWithUnresolvedOccurrenceOrTaxonIds = interactionsWithUnresolvedOccurrenceOrTaxonIds;
this.interactionListener = interactionListener;
}

Expand All @@ -40,7 +41,16 @@ public List<Map<String, String>> resolveOccurrenceIdsIfPossible(Map<String, Stri

if (InteractionListenerCollectUnresolvedOccurrenceIds.hasUnresolvedTargetOccurrenceId(interaction)) {
String targetOccurrenceId = getOccurrenceId(interaction, DatasetImporterForTSV.TARGET_OCCURRENCE_ID);
Map<String, String> resolved = interactionsWithUnresolvedOccurrenceIds.get(Pair.of(DatasetImporterForTSV.TARGET_OCCURRENCE_ID, targetOccurrenceId));
Map<String, String> resolved = interactionsWithUnresolvedOccurrenceOrTaxonIds.get(Pair.of(DatasetImporterForTSV.TARGET_OCCURRENCE_ID, targetOccurrenceId));
if (resolved != null && !resolved.isEmpty()) {
enrichedProperties = new ArrayList<>();
enrichedProperties.add(resolved);
}
}

if (InteractionListenerCollectUnresolvedOccurrenceIds.hasUnresolvedTargetTaxonId(interaction)) {
String targetTaxonId = getOccurrenceId(interaction, TaxonUtil.TARGET_TAXON_ID);
Map<String, String> resolved = interactionsWithUnresolvedOccurrenceOrTaxonIds.get(Pair.of(TaxonUtil.TARGET_TAXON_ID, targetTaxonId));
if (resolved != null && !resolved.isEmpty()) {
enrichedProperties = new ArrayList<>();
enrichedProperties.add(resolved);
Expand All @@ -49,14 +59,26 @@ public List<Map<String, String>> resolveOccurrenceIdsIfPossible(Map<String, Stri

if (InteractionListenerCollectUnresolvedOccurrenceIds.hasUnresolvedSourceOccurrenceId(interaction)) {
String sourceOccurrenceId = getOccurrenceId(interaction, DatasetImporterForTSV.SOURCE_OCCURRENCE_ID);
Map<String, String> resolved = interactionsWithUnresolvedOccurrenceIds.get(Pair.of(DatasetImporterForTSV.SOURCE_OCCURRENCE_ID, sourceOccurrenceId));
Map<String, String> resolved = interactionsWithUnresolvedOccurrenceOrTaxonIds.get(Pair.of(DatasetImporterForTSV.SOURCE_OCCURRENCE_ID, sourceOccurrenceId));
if (resolved != null && !resolved.isEmpty()) {
if (enrichedProperties == null) {
enrichedProperties = new ArrayList<>();
}
enrichedProperties.add(resolved);
}
}

if (InteractionListenerCollectUnresolvedOccurrenceIds.hasUnresolvedSourceTaxonId(interaction)) {
String sourceTaxonId = getOccurrenceId(interaction, TaxonUtil.SOURCE_TAXON_ID);
Map<String, String> resolved = interactionsWithUnresolvedOccurrenceOrTaxonIds.get(Pair.of(TaxonUtil.SOURCE_TAXON_ID, sourceTaxonId));
if (resolved != null && !resolved.isEmpty()) {
if (enrichedProperties == null) {
enrichedProperties = new ArrayList<>();
}
enrichedProperties.add(resolved);
}
}

return enrichedProperties;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import java.util.Map;
import java.util.TreeMap;

import static org.eol.globi.service.TaxonUtil.TARGET_TAXON_ID;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.not;
Expand Down Expand Up @@ -150,6 +151,20 @@ public void readRSSVertnetWithoutConfig() throws StudyImporterException, IOExcep
assertThat(datasets.get(0).getArchiveURI(), is(URI.create("http://ipt.vertnet.org:8080/ipt/archive.do?r=utep_mamm")));
}

@Test
public void iNaturalistRSSWithoutConfig() throws StudyImporterException, IOException {
String configJson = "{ \"url\": \"classpath:/org/eol/globi/data/rss_inaturalist.xml\" }";
final Dataset dataset = datasetFor(configJson);
List<Dataset> datasets = DatasetImporterForRSS.getDatasetsForFeed(dataset);
assertThat(datasets.size(), is(2));
assertThat(datasets.get(0).getOrDefault("hasDependencies", null), is("false"));
assertThat(datasets.get(0).getOrDefault("url", null), is("https://www.inaturalist.org/observations/globi-observations-resource-relationships-dwca.zip"));
assertThat(datasets.get(0).getArchiveURI(), is(URI.create("https://www.inaturalist.org/observations/globi-observations-resource-relationships-dwca.zip")));
assertThat(datasets.get(1).getOrDefault("hasDependencies", null), is("false"));
assertThat(datasets.get(1).getOrDefault("url", null), is("https://www.inaturalist.org/taxa/inaturalist-taxonomy.dwca.zip"));
assertThat(datasets.get(1).getArchiveURI(), is(URI.create("https://www.inaturalist.org/taxa/inaturalist-taxonomy.dwca.zip")));
}

@Test
public void readFieldMuseum() throws StudyImporterException, IOException {
String configJson = "{ \"url\": \"classpath:/org/eol/globi/data/rss_fieldmuseum.xml\" }";
Expand Down Expand Up @@ -245,7 +260,7 @@ public void on(Map<String, String> interaction) throws StudyImporterException {
put(DatasetImporterForTSV.TARGET_BODY_PART_NAME, "bodyPartName");
put(DatasetImporterForTSV.TARGET_BODY_PART_ID, "bodyPartId");
put(TaxonUtil.TARGET_TAXON_NAME, "taxonName");
put(TaxonUtil.TARGET_TAXON_ID, "taxonId");
put(TARGET_TAXON_ID, "taxonId");
}
});
}};
Expand All @@ -258,7 +273,7 @@ public void on(Map<String, String> interaction) throws StudyImporterException {
assertThat(receivedLinks.size(), is(1));
Map<String, String> received = receivedLinks.get(0);
assertThat(received.get(TaxonUtil.TARGET_TAXON_NAME), is("taxonName"));
assertThat(received.get(TaxonUtil.TARGET_TAXON_ID), is("taxonId"));
assertThat(received.get(TARGET_TAXON_ID), is("taxonId"));
assertThat(received.get(DatasetImporterForTSV.TARGET_BODY_PART_NAME), is("bodyPartName"));
assertThat(received.get(DatasetImporterForTSV.TARGET_BODY_PART_ID), is("bodyPartId"));
assertThat(received.get(DatasetImporterForTSV.TARGET_LIFE_STAGE_NAME), is("lifeStageName"));
Expand All @@ -267,7 +282,7 @@ public void on(Map<String, String> interaction) throws StudyImporterException {

}

@Test()
@Test
public void enrichingInteractionListenerSourceOccurrence() throws StudyImporterException {
DatasetImporterWithListener studyImporter = new DatasetImporterWithListener(new ParserFactory() {
@Override
Expand Down Expand Up @@ -299,7 +314,7 @@ public void on(Map<String, String> interaction) throws StudyImporterException {
put(DatasetImporterForTSV.TARGET_BODY_PART_NAME, "bodyPartName");
put(DatasetImporterForTSV.TARGET_BODY_PART_ID, "bodyPartId");
put(TaxonUtil.TARGET_TAXON_NAME, "taxonName");
put(TaxonUtil.TARGET_TAXON_ID, "taxonId");
put(TARGET_TAXON_ID, "taxonId");
}
});
}};
Expand All @@ -315,14 +330,62 @@ public void on(Map<String, String> interaction) throws StudyImporterException {
Map<String, String> received = receivedLinks.get(0);
assertThat(received.get(DatasetImporterForTSV.SOURCE_OCCURRENCE_ID), is("4567"));
assertThat(received.get(TaxonUtil.TARGET_TAXON_NAME), is("taxonName"));
assertThat(received.get(TaxonUtil.TARGET_TAXON_ID), is("taxonId"));
assertThat(received.get(TARGET_TAXON_ID), is("taxonId"));
assertThat(received.get(DatasetImporterForTSV.TARGET_OCCURRENCE_ID), is("1234"));
assertThat(received.get(DatasetImporterForTSV.TARGET_BODY_PART_NAME), is("bodyPartName"));
assertThat(received.get(DatasetImporterForTSV.TARGET_BODY_PART_ID), is("bodyPartId"));
assertThat(received.get(DatasetImporterForTSV.TARGET_LIFE_STAGE_NAME), is("lifeStageName"));
assertThat(received.get(DatasetImporterForTSV.TARGET_LIFE_STAGE_ID), is("lifeStageId"));


}
@Test
public void enrichingInteractionListenerSourceOccurrenceTargetTaxon() throws StudyImporterException {
DatasetImporterWithListener studyImporter = new DatasetImporterWithListener(new ParserFactory() {
@Override
public LabeledCSVParser createParser(URI studyResource, String characterEncoding) throws IOException {
return null;
}
}, new NodeFactoryNull()) {
@Override
public void importStudy() throws StudyImporterException {
//
}
};


final List<Map<String, String>> receivedLinks = new ArrayList<>();
studyImporter.setInteractionListener(new InteractionListener() {
@Override
public void on(Map<String, String> interaction) throws StudyImporterException {
receivedLinks.add(interaction);
}
});
TreeMap<Pair<String, String>, Map<String, String>> interactionsWithUnresolvedOccurrenceIds = new TreeMap<Pair<String, String>, Map<String, String>>() {{
put(Pair.of(TARGET_TAXON_ID, "1234"), new TreeMap<String, String>() {
{
put(TaxonUtil.TARGET_TAXON_NAME, "taxonName");
put(TARGET_TAXON_ID, "1234");
}
});
}};
InteractionListenerResolving listener = new InteractionListenerResolving(
interactionsWithUnresolvedOccurrenceIds,
studyImporter.getInteractionListener());

listener.on(new TreeMap<String, String>() {{
put(DatasetImporterForTSV.SOURCE_OCCURRENCE_ID, "4567");
put(TARGET_TAXON_ID, "1234");

}});

assertThat(receivedLinks.size(), is(1));
Map<String, String> received = receivedLinks.get(0);
assertThat(received.get(DatasetImporterForTSV.SOURCE_OCCURRENCE_ID), is("4567"));
assertThat(received.get(TaxonUtil.TARGET_TAXON_NAME), is("taxonName"));
assertThat(received.get(TARGET_TAXON_ID), is("1234"));


}

private DatasetImpl getDatasetGroup() throws IOException {
Expand Down

0 comments on commit 917fc1e

Please sign in to comment.