Skip to content

Commit

Permalink
first attempt at using resource relationships to capture inaturalist …
Browse files Browse the repository at this point in the history
…observation fields; related to #427
  • Loading branch information
jhpoelen committed Nov 7, 2019
1 parent ae9f3ad commit 7347c50
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import org.eol.globi.domain.InteractType;
import org.gbif.dwc.Archive;
import org.gbif.dwc.ArchiveFile;
import org.gbif.dwc.extensions.Extension;
import org.gbif.dwc.extensions.ExtensionProperty;
import org.gbif.dwc.record.Record;
import org.gbif.dwc.terms.DcTerm;
Expand All @@ -28,6 +29,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -57,6 +59,7 @@
public class StudyImporterForDwCA extends StudyImporterWithListener {
public static final String EXTENSION_ASSOCIATED_TAXA = "http://purl.org/NET/aec/associatedTaxa";
public static final String EXTENSION_RESOURCE_RELATIONSHIP = "http://rs.tdwg.org/dwc/terms/ResourceRelationship";
public static final String EXTENSION_TAXON = "http://rs.tdwg.org/dwc/terms/Taxon";


public StudyImporterForDwCA(ParserFactory parserFactory, NodeFactory nodeFactory) {
Expand Down Expand Up @@ -350,8 +353,8 @@ static void importResourceRelationExtension(Archive archive, InteractionListener


for (Record record : resourceExtension) {
String sourceId = record.value(DwcTerm.relatedResourceID);
String targetId = record.value(DwcTerm.resourceID);
String targetId = record.value(DwcTerm.relatedResourceID);
String sourceId = record.value(DwcTerm.resourceID);
if (StringUtils.isNotBlank(sourceId)
&& StringUtils.isNotBlank(targetId)) {
referencedSourceIds.add(sourceId);
Expand All @@ -361,24 +364,41 @@ static void importResourceRelationExtension(Archive archive, InteractionListener
final List<DwcTerm> idTerms = Arrays.asList(
DwcTerm.occurrenceID, DwcTerm.taxonID);

ArchiveFile core = archive.getCore();
for (Record coreRecord : core) {
for (DwcTerm idTerm : idTerms) {
attemptLinkUsingTerm(termIdPropMap,
referencedSourceIds,
referencedTargetIds,
coreRecord,
idTerm);
List<ArchiveFile> archiveFiles = new ArrayList<>();
archiveFiles.add(archive.getCore());

ArchiveFile taxon = findResourceExtension(archive, EXTENSION_TAXON);
if (taxon != null) {
archiveFiles.add(taxon);
}

for (ArchiveFile archiveFile : archiveFiles) {
for (Record record : archiveFile) {
for (DwcTerm idTerm : idTerms) {
attemptLinkUsingTerm(termIdPropMap,
referencedSourceIds,
referencedTargetIds,
record,
idTerm);
}
}
}


for (Record record : resourceExtension) {
Map<String, String> props = new TreeMap<>();
String sourceId = record.value(DwcTerm.relatedResourceID);
String sourceId = record.value(DwcTerm.resourceID);
String relationship = record.value(DwcTerm.relationshipOfResource);
String targetId = record.value(DwcTerm.resourceID);

String relationshipTypeId = findRelationshipTypeIdByLabel(relationship);
Optional<Term> relationshipOfResourceIDTerm = record.terms().stream().filter(x -> StringUtils.equals(x.simpleName(), "relationshipOfResourceID")).findFirst();
String relationshipTypeIdValue = relationshipOfResourceIDTerm
.map(record::value)
.orElse(null);
String targetId = record.value(DwcTerm.relatedResourceID);

String relationshipTypeId = StringUtils.isBlank(relationshipTypeIdValue)
? findRelationshipTypeIdByLabel(relationship)
: relationshipTypeIdValue;

if (StringUtils.isNotBlank(sourceId)
&& StringUtils.isNotBlank(targetId)
Expand Down Expand Up @@ -442,6 +462,7 @@ private static String findRelationshipTypeIdByLabel(String relationship) {
put("parasite of", InteractType.PARASITE_OF.getIRI());
put("stomach contents of", InteractType.EATEN_BY.getIRI());
put("stomach contents", InteractType.ATE.getIRI());
put("eaten by", InteractType.EATEN_BY.getIRI());
}});

String relationshipKey = StringUtils.lowerCase(StringUtils.trim(relationship));
Expand Down Expand Up @@ -500,11 +521,15 @@ private static void putIfAbsentAndNotBlank(Map<String, String> props, String key
}

private static ArchiveFile findResourceRelationshipExtension(Archive archive) {
return findResourceExtension(archive, EXTENSION_RESOURCE_RELATIONSHIP);
}

private static ArchiveFile findResourceExtension(Archive archive, String extensionType) {
ArchiveFile resourceRelationExtension = null;
Set<ArchiveFile> extensions = archive.getExtensions();
for (ArchiveFile extension : extensions) {
if (StringUtils.equals(extension.getRowType().qualifiedName(),
EXTENSION_RESOURCE_RELATIONSHIP)) {
extensionType)) {
resourceRelationExtension = extension;
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ public void newLink(Map<String, String> properties) throws StudyImporterExceptio
}

@Test
public void hasResourceRelationships() throws IOException, URISyntaxException {
public void hasResourceRelationshipsOccurrenceToOccurrence() throws IOException, URISyntaxException {
URI sampleArchive = getClass().getResource("fmnh-rr-test.zip").toURI();

Archive archive = DwCAUtil.archiveFor(sampleArchive, "target/tmp");
Expand Down Expand Up @@ -463,5 +463,38 @@ public void newLink(Map<String, String> properties) throws StudyImporterExceptio
assertThat(numberOfFoundLinks.get(), is(7));
}

@Test
public void hasResourceRelationshipsOccurrenceToTaxa() throws IOException, URISyntaxException {
URI sampleArchive = getClass().getResource("inaturalist-dwca-rr.zip").toURI();

Archive archive = DwCAUtil.archiveFor(sampleArchive, "target/tmp");

AtomicInteger numberOfFoundLinks = new AtomicInteger(0);
StudyImporterForDwCA.importResourceRelationExtension(archive, new InteractionListener() {

@Override
public void newLink(Map<String, String> properties) throws StudyImporterException {
numberOfFoundLinks.incrementAndGet();
if (1 == numberOfFoundLinks.get()) {
assertThat(properties.get(StudyImporterForTSV.SOURCE_TAXON_ID), is("http://www.inaturalist.org/taxa/465153"));
assertThat(properties.get(StudyImporterForTSV.SOURCE_TAXON_NAME), is("Gorgonocephalus eucnemis"));
assertThat(properties.get(StudyImporterForTSV.SOURCE_OCCURRENCE_ID), is("http://www.inaturalist.org/observations/2309983"));
assertThat(properties.get(StudyImporterForTSV.INTERACTION_TYPE_NAME), is("Eaten by"));
assertThat(properties.get(StudyImporterForTSV.INTERACTION_TYPE_ID), is("http://www.inaturalist.org/observation_fields/879"));
assertThat(properties.get(StudyImporterForTSV.BASIS_OF_RECORD_NAME), is("HumanObservation"));
assertThat(properties.get(StudyImporterForTSV.TARGET_TAXON_ID), is("http://www.inaturalist.org/taxa/133061"));
assertThat(properties.get(StudyImporterForTSV.TARGET_TAXON_NAME), is("Enhydra lutris kenyoni"));
assertThat(properties.get(StudyImporterForTSV.REFERENCE_CITATION), is("https://www.inaturalist.org/users/dpom"));
}
assertThat(properties.get(StudyImporterForTSV.STUDY_SOURCE_CITATION), is("some citation"));
assertThat(properties.get(StudyImporterForTSV.REFERENCE_CITATION), is(notNullValue()));
assertThat(properties.get(StudyImporterForTSV.REFERENCE_ID), is("some citation"));

}
}, "some citation");

assertThat(numberOfFoundLinks.get(), is(1));
}


}
Binary file not shown.

0 comments on commit 7347c50

Please sign in to comment.