Permalink
Browse files

add support for blank nodes

  • Loading branch information...
jhpoelen committed Sep 2, 2018
1 parent 201d8c3 commit e793d51c8a81e3b13692cfc53c6b75d9fec264a2
@@ -2,3 +2,4 @@
target/*
*.iml
data/*
datasets/*
@@ -39,7 +39,7 @@
add(Seeds.SEED_NODE_BIOCASE.getIRIString());
}};

@Parameter(names = {"-l", "--log",}, description = "select how toLiteral show the biodiversity graph", converter = LoggerConverter.class)
@Parameter(names = {"-l", "--log",}, description = "select how to show the biodiversity graph", converter = LoggerConverter.class)
private Logger logMode = Logger.tsv;

@Override
@@ -1,5 +1,6 @@
package org.globalbioticinteractions.preston.model;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.rdf.api.BlankNode;
import org.apache.commons.rdf.api.BlankNodeOrIRI;
import org.apache.commons.rdf.api.IRI;
@@ -62,4 +63,10 @@ public static BlankNode toBlank() {
return rdf.createBlankNode();
}

public static boolean isBlankOrSkolemizedBlank(BlankNodeOrIRI iri) {
//see https://www.w3.org/TR/rdf11-concepts/#section-skolemization
return iri instanceof BlankNode
|| iri.toString().contains("/.well-known/genid/");
}

}
@@ -70,13 +70,19 @@ public void put(Triple statement) throws IOException {
&& object != null
&& object instanceof IRI) {

IRI mostRecent = findMostRecent((IRI) object);
if (mostRecent == null || !shouldResolveOnMissingOnly()) {
IRI keyForMostRecent = findMostRecent((IRI) object);
if (keyForMostRecent == null || !shouldResolveOnMissingOnly()) {
if (getDereferencer() != null) {
BlankNodeOrIRI derivedSubject = null;
try {
attemptUpdate((IRI) object, mostRecent);
derivedSubject = dereference((IRI) object);
} catch (IOException e) {
LOG.warn("failed to update [" + object.toString() + "]", e);
} finally {
if (derivedSubject == null) {
derivedSubject = toSkolemizedBlank((BlankNode) subj);
}
recordUpdate((IRI) object, keyForMostRecent, derivedSubject);
}
}
}
@@ -89,23 +95,31 @@ public void put(Triple statement) throws IOException {
}
}

private void attemptUpdate(IRI object, IRI mostRecent) throws IOException {
InputStream data = getDereferencer().dereference(object);
IRI derivedSubject = blobStore.putBlob(data);
if (null != mostRecent && !mostRecent.equals(derivedSubject)) {
public static IRI toSkolemizedBlank(BlankNode subj) {
// see https://www.w3.org/TR/rdf11-concepts/#section-skolemization
return RefNodeFactory.toIRI("https://deeplinker.bio/.well-known/genid/" + subj.uniqueReference());
}

private void recordUpdate(IRI object, IRI keyForMostRecent, BlankNodeOrIRI derivedSubject) throws IOException {
if (null != keyForMostRecent && !keyForMostRecent.equals(derivedSubject)) {
recordGenerationTime(derivedSubject);
put(Pair.of(Predicate.WAS_REVISION_OF, mostRecent), derivedSubject);
Triple of = RefNodeFactory.toStatement(derivedSubject, Predicate.WAS_REVISION_OF, mostRecent);
put(Pair.of(Predicate.WAS_REVISION_OF, keyForMostRecent), derivedSubject);
Triple of = RefNodeFactory.toStatement(derivedSubject, Predicate.WAS_REVISION_OF, keyForMostRecent);
emit(of);

} else if (null == mostRecent) {
} else if (null == keyForMostRecent) {
recordGenerationTime(derivedSubject);
put(Pair.of(Predicate.WAS_DERIVED_FROM, object), derivedSubject);
emit(RefNodeFactory.toStatement(derivedSubject, Predicate.WAS_DERIVED_FROM, object));
}
}

private void recordGenerationTime(IRI derivedSubject) throws IOException {
private IRI dereference(IRI object) throws IOException {
InputStream data = getDereferencer().dereference(object);
return blobStore.putBlob(data);
}

private void recordGenerationTime(BlankNodeOrIRI derivedSubject) throws IOException {
String value = RefNodeFactory.toDateTime(DateUtil.now()).getLexicalForm();
blobStore.putBlob(IOUtils.toInputStream(value, StandardCharsets.UTF_8));
IRI value1 = Hasher.calcSHA256(value);
@@ -127,7 +141,7 @@ public void setResolveOnMissingOnly(boolean resolveOnMissingOnly) {
}

private IRI findMostRecent(IRI obj) throws IOException {
IRI existingId = findKey(Pair.of(Predicate.WAS_DERIVED_FROM, obj));
IRI existingId = get(Pair.of(Predicate.WAS_DERIVED_FROM, obj));

if (existingId != null) {
emitExistingVersion(existingId, Predicate.WAS_DERIVED_FROM, obj);
@@ -139,15 +153,15 @@ private IRI findMostRecent(IRI obj) throws IOException {
private IRI findLastVersionId(IRI existingId) throws IOException {
IRI lastVersionId = existingId;
IRI newerVersionId;
while ((newerVersionId = findKey(Pair.of(Predicate.WAS_REVISION_OF, lastVersionId))) != null) {
while ((newerVersionId = get(Pair.of(Predicate.WAS_REVISION_OF, lastVersionId))) != null) {
emitExistingVersion(newerVersionId, Predicate.WAS_REVISION_OF, lastVersionId);
lastVersionId = newerVersionId;
}
return lastVersionId;
}

private void emitExistingVersion(IRI subj, IRI predicate, RDFTerm obj) throws IOException {
IRI timeKey = findKey(Pair.of(subj, Predicate.GENERATED_AT_TIME));
IRI timeKey = get(Pair.of(subj, Predicate.GENERATED_AT_TIME));
if (timeKey != null) {
InputStream input = blobStore.get(timeKey);
if (input != null) {
@@ -170,17 +184,17 @@ public void put(Pair<RDFTerm, RDFTerm> partialStatement, RDFTerm value) throws I
}

private IRI calculateKeyFor(Pair<RDFTerm, RDFTerm> unhashedKeyPair) {
IRI left = hashRDFTerm(unhashedKeyPair.getLeft());
IRI right = hashRDFTerm(unhashedKeyPair.getRight());
IRI left = calculateHashFor(unhashedKeyPair.getLeft());
IRI right = calculateHashFor(unhashedKeyPair.getRight());
return Hasher.calcSHA256(left.getIRIString() + right.getIRIString());
}

private IRI hashRDFTerm(RDFTerm left1) {
private IRI calculateHashFor(RDFTerm left1) {
return Hasher.calcSHA256(RDFUtil.getValueFor(left1));
}

@Override
public IRI findKey(Pair<RDFTerm, RDFTerm> partialStatement) throws IOException {
public IRI get(Pair<RDFTerm, RDFTerm> partialStatement) throws IOException {
InputStream inputStream = persistence.get(calculateKeyFor(partialStatement).getIRIString());
return inputStream == null
? null
@@ -12,6 +12,6 @@

void put(Triple statement) throws IOException;

IRI findKey(Pair<RDFTerm, RDFTerm> partialStatement) throws IOException;
IRI get(Pair<RDFTerm, RDFTerm> partialStatement) throws IOException;

}
@@ -8,52 +8,63 @@
import org.globalbioticinteractions.preston.Hasher;
import org.globalbioticinteractions.preston.model.RefNodeFactory;
import org.hamcrest.core.Is;
import org.junit.Ignore;
import org.junit.Test;

import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.nio.charset.StandardCharsets;

import static junit.framework.TestCase.assertTrue;
import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertNotNull;

public class AppendOnlyStatementStoreTest {

public static final IRI SOME_IRI = RefNodeFactory.toIRI(URI.create("http://some"));
private static final IRI SOME_IRI = RefNodeFactory.toIRI(URI.create("http://some"));

@Ignore("store only dereferenced content and timestamps for now")
@Test
public void putImmutableStatement() throws IOException {
IRI GBIF = RefNodeFactory.toIRI(URI.create("http://gbif.org"));
IRI GBIF_REGISTRY = RefNodeFactory.toIRI(URI.create("https://api.gbif.org/v1/registry"));
public void putContentThatFailsToDownload() throws IOException {
BlankNode blank = RefNodeFactory.toBlank();
Triple statement
= RefNodeFactory.toStatement(GBIF_REGISTRY, Predicate.WAS_DERIVED_FROM, GBIF);
= RefNodeFactory.toStatement(blank,
Predicate.WAS_DERIVED_FROM,
RefNodeFactory.toIRI(URI.create("http://some")));

Dereferencer dereferencer = new DereferenceTest("deref@");
AppendOnlyBlobStore blobStore1 = new AppendOnlyBlobStore(TestUtil.getTestPersistence());
StatementStore blobStore = getAppendOnlyRelationStore(dereferencer, blobStore1, TestUtil.getTestPersistence());
Dereferencer dereferencer = uri -> {
throw new IOException("fails to dereference");
};

blobStore.put(statement);
Persistence testPersistence = TestUtil.getTestPersistence();

AppendOnlyStatementStore relationStore = new AppendOnlyStatementStore(
new AppendOnlyBlobStore(testPersistence),
testPersistence,
dereferencer);

IRI key = blobStore.findKey(Pair.of(Predicate.WAS_DERIVED_FROM, GBIF));
relationStore.put(statement);

assertThat(key.toString(), Is.is("hash://sha256/809f41e24585d47dd30008e11d3848aec67065135042a28847b357af3ccf84e4"));
// dereference subject

InputStream URIString = blobStore1.get(key);
IRI contentHash = relationStore.get(
Pair.of(Predicate.WAS_DERIVED_FROM,
RefNodeFactory.toIRI(URI.create("http://some"))));

assertThat(toUTF8(URIString), Is.is("https://api.gbif.org/v1/registry"));
assertTrue(RefNodeFactory.isBlankOrSkolemizedBlank(contentHash));
}

@Test
public void putContentThatNeedsDownload() throws IOException {
BlankNode blank = RefNodeFactory.toBlank();
Triple statement
= RefNodeFactory.toStatement(RefNodeFactory.toBlank(), Predicate.WAS_DERIVED_FROM, RefNodeFactory.toIRI(URI.create("http://some")));
= RefNodeFactory.toStatement(blank,
Predicate.WAS_DERIVED_FROM,
RefNodeFactory.toIRI(URI.create("http://some")));

Dereferencer dereferencer = new DereferenceTest("derefData@");
Persistence testPersistence = TestUtil.getTestPersistence();

AppendOnlyStatementStore relationStore = new AppendOnlyStatementStore(
new AppendOnlyBlobStore(testPersistence),
testPersistence,
@@ -65,19 +76,17 @@ public void putContentThatNeedsDownload() throws IOException {

// dereference subject

IRI contentHash = relationStore.findKey(Pair.of(Predicate.WAS_DERIVED_FROM, RefNodeFactory.toIRI(URI.create("http://some"))));
IRI contentHash = relationStore.get(
Pair.of(Predicate.WAS_DERIVED_FROM,
RefNodeFactory.toIRI(URI.create("http://some"))));
InputStream content = blobStore.get(contentHash);

assertNotNull(contentHash);
InputStream otherContent = blobStore.get(contentHash);
String actualOtherContent = toUTF8(otherContent);

String expectedContent = "derefData@http://some";

String actualContent = toUTF8(content);
assertThat(actualContent, Is.is(expectedContent));
assertThat(contentHash, Is.is(Hasher.calcSHA256(expectedContent)));
assertThat(actualContent, Is.is(actualOtherContent));
}

private AppendOnlyStatementStore getAppendOnlyRelationStore(Dereferencer dereferencer, BlobStore blobStore, Persistence testPersistencetence) {
@@ -105,19 +114,19 @@ public void putNewVersionOfContent() throws IOException {

relationstore.put(statement);

IRI contentHash = relationstore.findKey(Pair.of(Predicate.WAS_DERIVED_FROM, SOME_IRI));
IRI contentHash = relationstore.get(Pair.of(Predicate.WAS_DERIVED_FROM, SOME_IRI));
assertNotNull(contentHash);

Dereferencer dereferencer = new DereferenceTest("derefData2@");
relationstore.setDereferencer(dereferencer);
relationstore.put(statement);

IRI contentHash2 = relationstore.findKey(Pair.of(Predicate.WAS_DERIVED_FROM, SOME_IRI));
IRI contentHash2 = relationstore.get(Pair.of(Predicate.WAS_DERIVED_FROM, SOME_IRI));


assertThat(contentHash, Is.is(contentHash2));

IRI newContentHash = relationstore.findKey(Pair.of(Predicate.WAS_REVISION_OF, contentHash));
IRI newContentHash = relationstore.get(Pair.of(Predicate.WAS_REVISION_OF, contentHash));
InputStream newContent = blogStore.get(newContentHash);

assertThat(contentHash, not(Is.is(newContentHash)));
@@ -128,7 +137,7 @@ public void putNewVersionOfContent() throws IOException {
relationstore.setDereferencer(new DereferenceTest("derefData3@"));
relationstore.put(statement);

IRI newerContentHash = relationstore.findKey(Pair.of(Predicate.WAS_REVISION_OF, newContentHash));
IRI newerContentHash = relationstore.get(Pair.of(Predicate.WAS_REVISION_OF, newContentHash));
InputStream newerContent = blogStore.get(newerContentHash);

assertThat(newerContentHash.getIRIString(), Is.is("hash://sha256/7e66eac09d137afe06dd73614e966a417260a111208dabe7225b05f02ce380fd"));

0 comments on commit e793d51

Please sign in to comment.