Skip to content
Permalink
Browse files

feat(rdf-api): Add a general-purpose SHACL validation utility (DSP-930)…

… (#1762)
  • Loading branch information
benjamingeer committed Dec 1, 2020
1 parent 346873d commit bfd3192ea04d5f42d79836cf3b8fbf17007bab71
Showing with 848 additions and 110 deletions.
  1. +13 −2 docs/05-internals/design/principles/rdf-api.md
  2. +13 −0 test_data/shacl/test/person.ttl
  3. +3 −1 third_party/dependencies.bzl
  4. +1 −0 webapi/BUILD.bazel
  5. +4 −1 webapi/src/it/scala/org/knora/webapi/ITKnoraFakeSpec.scala
  6. +2 −1 webapi/src/it/scala/org/knora/webapi/ITKnoraLiveSpec.scala
  7. +35 −29 webapi/src/main/resources/application.conf
  8. +4 −2 webapi/src/main/scala/org/knora/webapi/app/LiveCore.scala
  9. +1 −0 webapi/src/main/scala/org/knora/webapi/messages/BUILD.bazel
  10. +10 −0 webapi/src/main/scala/org/knora/webapi/messages/OntologyConstants.scala
  11. +0 −12 webapi/src/main/scala/org/knora/webapi/messages/StringFormatter.scala
  12. +97 −0 webapi/src/main/scala/org/knora/webapi/messages/util/rdf/AbstractShaclValidator.scala
  13. +44 −4 webapi/src/main/scala/org/knora/webapi/messages/util/rdf/RdfFeatureFactory.scala
  14. +57 −5 webapi/src/main/scala/org/knora/webapi/messages/util/rdf/RdfFormatUtil.scala
  15. +67 −0 webapi/src/main/scala/org/knora/webapi/messages/util/rdf/RdfModel.scala
  16. +53 −0 webapi/src/main/scala/org/knora/webapi/messages/util/rdf/ShaclValidator.scala
  17. +69 −0 webapi/src/main/scala/org/knora/webapi/messages/util/rdf/jenaimpl/JenaShaclValidator.scala
  18. +106 −0 webapi/src/main/scala/org/knora/webapi/messages/util/rdf/rdf4jimpl/RDF4JShaclValidator.scala
  19. +3 −1 webapi/src/main/scala/org/knora/webapi/settings/KnoraSettings.scala
  20. +7 −42 webapi/src/main/scala/org/knora/webapi/store/triplestore/upgrade/RepositoryUpdater.scala
  21. +2 −0 webapi/src/test/scala/org/knora/webapi/CoreSpec.scala
  22. +1 −0 webapi/src/test/scala/org/knora/webapi/E2ESpec.scala
  23. +3 −2 webapi/src/test/scala/org/knora/webapi/R2RSpec.scala
  24. +1 −0 webapi/src/test/scala/org/knora/webapi/messages/util/rdf/BUILD.bazel
  25. +0 −3 webapi/src/test/scala/org/knora/webapi/messages/util/rdf/JsonLDUtilSpec.scala
  26. +3 −5 webapi/src/test/scala/org/knora/webapi/messages/util/rdf/RdfFormatUtilSpec.scala
  27. +154 −0 webapi/src/test/scala/org/knora/webapi/messages/util/rdf/ShaclValidatorSpec.scala
  28. +19 −0 webapi/src/test/scala/org/knora/webapi/messages/util/rdf/jenaimpl/BUILD.bazel
  29. +28 −0 webapi/src/test/scala/org/knora/webapi/messages/util/rdf/jenaimpl/JenaShaclValidatorSpec.scala
  30. +20 −0 webapi/src/test/scala/org/knora/webapi/messages/util/rdf/rdf4jimpl/BUILD.bazel
  31. +28 −0 webapi/src/test/scala/org/knora/webapi/messages/util/rdf/rdf4jimpl/RDF4JShaclValidatorSpec.scala
@@ -49,10 +49,12 @@ The API is in the package `org.knora.webapi.messages.util.rdf`. It includes:
- `JsonLDUtil`, which provides specialised functionality for working
with RDF in JSON-LD format, and for converting between RDF models
and JSON-LD documents. `RdfFormatUtil` uses `JsonLDUtil` when appropriate.

- `ShaclValidator`, which validates RDF models using SHACL shapes.

To work with RDF models, start with `RdfFeatureFactory`, which returns instances
of `RdfNodeFactory`, `RdfModelFactory`, and `RdfFormatUtil`, using feature toggle
configuration. `JsonLDUtil` does not need a feature factory.
of `RdfNodeFactory`, `RdfModelFactory`, `RdfFormatUtil`, and `ShaclValidator`,
using feature toggle configuration. `JsonLDUtil` does not need a feature factory.

To iterate efficiently over the statements in an `RdfModel`, use its `iterator` method.
An `RdfModel` cannot be modified while you are iterating over it.
@@ -85,6 +87,15 @@ an `RdfModel`. To do this, use the `RdfModel.asRepository` method, which
returns an `RdfRepository` that can run `SELECT` queries.


## SHACL validation

On startup, graphs of SHACL shapes are loaded from Turtle files in a directory specified
by `app.shacl.shapes-dir` in `application.conf`, and in subdirectories of
that directory. To validate the default graph of an `RdfModel` using a graph of
SHACL shapes, call `ShaclValidator.validate`, specifying the relative path of the
Turtle file containing the graph of shapes.


## Implementations

- The Jena-based implementation, in package `org.knora.webapi.messages.util.rdf.jenaimpl`.
@@ -0,0 +1,13 @@
@prefix ex: <http://example.com/ns#> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .

ex:PersonShape a sh:NodeShape ;
sh:targetClass foaf:Person ;
sh:property ex:PersonShapeProperty .

ex:PersonShapeProperty sh:path foaf:age ;
sh:datatype xsd:int ;
sh:maxCount 1 ;
sh:minCount 1 .
@@ -84,7 +84,9 @@ def dependencies():
"org.xmlunit:xmlunit-core:2.1.1",

# other
"org.eclipse.rdf4j:rdf4j-runtime:3.0.0",
"org.eclipse.rdf4j:rdf4j-runtime:3.4.4",
"org.eclipse.rdf4j:rdf4j-client:3.4.4",
"org.eclipse.rdf4j:rdf4j-shacl:3.4.4",
"org.rogach:scallop_2.12:3.2.0",
"com.google.gwt:gwt-servlet:2.8.0",
"net.sf.saxon:Saxon-HE:9.9.0-2",
@@ -212,6 +212,7 @@ scala_library(
"//webapi/src/main/scala/org/knora/webapi/app",
"//webapi/src/main/scala/org/knora/webapi/core",
"//webapi/src/main/scala/org/knora/webapi/exceptions",
"//webapi/src/main/scala/org/knora/webapi/feature",
"//webapi/src/main/scala/org/knora/webapi/instrumentation",
"//webapi/src/main/scala/org/knora/webapi/messages",
"//webapi/src/main/scala/org/knora/webapi/routing",
@@ -20,6 +20,7 @@
package org.knora.webapi

import akka.actor.ActorSystem
import akka.event.LoggingAdapter
import akka.http.scaladsl.Http
import akka.http.scaladsl.client.RequestBuilding
import akka.http.scaladsl.model.{HttpRequest, HttpResponse, StatusCodes}
@@ -33,6 +34,7 @@ import org.scalatest.wordspec.AnyWordSpecLike
import org.scalatest.{BeforeAndAfterAll, Suite}
import spray.json.{JsObject, _}
import org.knora.webapi.messages.StringFormatter
import org.knora.webapi.messages.util.rdf.RdfFeatureFactory

import scala.concurrent.duration.{Duration, _}
import scala.concurrent.{Await, ExecutionContext}
@@ -63,8 +65,9 @@ class ITKnoraFakeSpec(_system: ActorSystem) extends Core with KnoraFakeCore with

/* Needs to be initialized before any responders */
StringFormatter.initForTest()
RdfFeatureFactory.init(settings)

val log = akka.event.Logging(system, this.getClass)
val log: LoggingAdapter = akka.event.Logging(system, this.getClass)

protected val baseApiUrl: String = settings.internalKnoraApiBaseUrl
protected val baseInternalSipiUrl: String = settings.internalSipiBaseUrl
@@ -36,7 +36,7 @@ import org.knora.webapi.exceptions.AssertionException
import org.knora.webapi.messages.StringFormatter
import org.knora.webapi.messages.app.appmessages.{AppStart, AppStop, SetAllowReloadOverHTTPState}
import org.knora.webapi.messages.store.triplestoremessages.{RdfDataObject, TriplestoreJsonProtocol}
import org.knora.webapi.messages.util.rdf.{JsonLDDocument, JsonLDUtil}
import org.knora.webapi.messages.util.rdf.{JsonLDDocument, JsonLDUtil, RdfFeatureFactory}
import org.knora.webapi.settings._
import org.knora.webapi.util.StartupUtils
import org.scalatest.matchers.should.Matchers
@@ -75,6 +75,7 @@ class ITKnoraLiveSpec(_system: ActorSystem) extends Core with StartupUtils with

/* Needs to be initialized before any responders */
StringFormatter.initForTest()
RdfFeatureFactory.init(settings)

val log: LoggingAdapter = akka.event.Logging(system, this.getClass)

@@ -264,35 +264,41 @@ akka-http-cors {
}

app {
feature-toggles {
new-list-admin-routes {
description = "Replace the old list admin routes with new ones."

available-versions = [ 1 ]
default-version = 1
enabled-by-default = no
override-allowed = yes
expiration-date = "2021-12-01T00:00:00Z"

developer-emails = [
"Sepideh Alassi <sepideh.alassi@dasch.swiss>"
"Benjamin Geer <benjamin.geer@dasch.swiss>"
]
}

jena-rdf-library {
description = "Use the Jena API for RDF processing. If turned off, use the RDF4J API."

available-versions = [ 1 ]
default-version = 1
enabled-by-default = no
override-allowed = yes

developer-emails = [
"Benjamin Geer <benjamin.geer@dasch.swiss>"
]
}
}
feature-toggles {
new-list-admin-routes {
description = "Replace the old list admin routes with new ones."

available-versions = [ 1 ]
default-version = 1
enabled-by-default = no
override-allowed = yes
expiration-date = "2021-12-01T00:00:00Z"

developer-emails = [
"Sepideh Alassi <sepideh.alassi@dasch.swiss>"
"Benjamin Geer <benjamin.geer@dasch.swiss>"
]
}

jena-rdf-library {
description = "Use the Jena API for RDF processing. If turned off, use the RDF4J API."

available-versions = [ 1 ]
default-version = 1
enabled-by-default = no
override-allowed = yes

developer-emails = [
"Benjamin Geer <benjamin.geer@dasch.swiss>"
]
}
}

shacl {
# The directory that SHACL shapes are loaded from.
shapes-dir = "shacl"
shapes-dir = ${?KNORA_WEBAPI_SHACLE_SHAPES_DIR}
}

print-extended-config = false // If true, an extended list of configuration parameters will be printed out at startup.
print-extended-config = ${?KNORA_WEBAPI_PRINT_EXTENDED_CONFIG}
@@ -27,8 +27,8 @@ import org.knora.webapi.settings.{KnoraDispatchers, KnoraSettings, KnoraSettings
import scala.concurrent.ExecutionContext
import scala.language.postfixOps
import scala.languageFeature.postfixOps

import org.knora.webapi.core.Core
import org.knora.webapi.messages.util.rdf.RdfFeatureFactory


/**
@@ -57,8 +57,10 @@ trait LiveCore extends Core {
implicit val executionContext: ExecutionContext = system.dispatchers.lookup(KnoraDispatchers.KnoraActorDispatcher)


// Initialise StringFormatter with the system settings. This must happen before any responders are constructed.
// Initialise StringFormatter and RdfFeatureFactory with the system settings.
// This must happen before any responders are constructed.
StringFormatter.init(settings)
RdfFeatureFactory.init(settings)

/**
* The main application supervisor actor which is at the top of the actor
@@ -37,6 +37,7 @@ scala_library(
"@maven//:org_eclipse_rdf4j_rdf4j_repository_sail",
"@maven//:org_eclipse_rdf4j_rdf4j_sail_api",
"@maven//:org_eclipse_rdf4j_rdf4j_sail_memory",
"@maven//:org_eclipse_rdf4j_rdf4j_shacl",
"@maven//:org_jodd_jodd",
"@maven//:org_scala_lang_modules_scala_xml_2_12",
"@maven//:org_scala_lang_scala_library",
@@ -125,6 +125,16 @@ object OntologyConstants {
val DateTimeStamp: IRI = XsdPrefixExpansion + "dateTimeStamp"
}

object Shacl {
val ShaclPrefixExpansion: IRI = "http://www.w3.org/ns/shacl#"

val Conforms: IRI = ShaclPrefixExpansion + "conforms"
val Result: IRI = ShaclPrefixExpansion + "result"
val SourceConstraintComponent: IRI = ShaclPrefixExpansion + "sourceConstraintComponent"
val DatatypeConstraintComponent: IRI = ShaclPrefixExpansion + "DatatypeConstraintComponent"
val MaxCountConstraintComponent: IRI = ShaclPrefixExpansion + "MaxCountConstraintComponent"
}

/**
* http://schema.org
*/
@@ -318,18 +318,6 @@ object StringFormatter {
}
}

/**
* Initialises the singleton instance of [[StringFormatter]] for use in a client program that will connect to Knora.
*/
def initForClient(knoraHostAndPort: String): Unit = {
this.synchronized {
generalInstance match {
case Some(_) => ()
case None => generalInstance = Some(new StringFormatter(maybeKnoraHostAndPort = Some(knoraHostAndPort)))
}
}
}

/**
* Initialises the singleton instance of [[StringFormatter]] for a test.
*/
@@ -0,0 +1,97 @@
/*
* Copyright © 2015-2019 the contributors (see Contributors.md).
*
* This file is part of Knora.
*
* Knora is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Knora is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public
* License along with Knora. If not, see <http://www.gnu.org/licenses/>.
*/

package org.knora.webapi.messages.util.rdf

import java.nio.file.attribute.BasicFileAttributes
import java.nio.file._

import org.knora.webapi.exceptions.AssertionException

/**
* An abstract base class for classes that validate RDF models using SHACL shapes.
*
* @param baseDir the base directory that SHACL graphs are loaded from.
* @param rdfFormatUtil an [[RdfFormatUtil]].
* @tparam ShaclGraphT an implementation-specific representation of a graph of SHACL shapes.
*/
abstract class AbstractShaclValidator[ShaclGraphT](baseDir: Path, private val rdfFormatUtil: RdfFormatUtil) extends ShaclValidator {

/**
* A map of relative paths to objects representing graphs of SHACL shapes.
*/
private val shaclGraphs: Map[Path, ShaclGraphT] = if (Files.exists(baseDir)) {
val fileVisitor = new ShaclGraphCollectingFileVisitor
Files.walkFileTree(baseDir, fileVisitor)
fileVisitor.visitedShaclGraphs.toMap
} else {
Map.empty
}

def validate(rdfModel: RdfModel, shaclPath: Path): ShaclValidationResult = {
validateWithShaclGraph(
rdfModel = rdfModel,
shaclGraph = shaclGraphs.getOrElse(shaclPath, throw AssertionException(s"SHACL graph $shaclPath not found"))
)
}

/**
* A [[FileVisitor]] that loads graphs of SHACL shapes while walking a file tree.
*/
private class ShaclGraphCollectingFileVisitor extends SimpleFileVisitor[Path] {
// A collection of the graphs that have been loaded so far.
val visitedShaclGraphs: collection.mutable.Map[Path, ShaclGraphT] = collection.mutable.Map.empty

override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
// Is this a Turtle file?
if (file.getFileName.toString.endsWith(".ttl")) {
// Yes. Parse it.
val shaclModel: RdfModel = rdfFormatUtil.fileToRdfModel(file = file.toFile, rdfFormat = Turtle)

// Convert it to a ShaclGraphT.
val shaclGraph: ShaclGraphT = rdfModelToShaclGraph(shaclModel)

// Get its path relative to baseDir.
val relativePath: Path = baseDir.relativize(file)

// Add it to the collection.
visitedShaclGraphs += relativePath -> shaclGraph
}

FileVisitResult.CONTINUE
}
}

/**
* Validates the default graph of an [[RdfModel]] using a graph of SHACL shapes.
*
* @param rdfModel the [[RdfModel]] to be validated.
* @param shaclGraph a graph of SHACL shapes.
* @return the validation result.
*/
protected def validateWithShaclGraph(rdfModel: RdfModel, shaclGraph: ShaclGraphT): ShaclValidationResult

/**
* Converts the default graph of an [[RdfModel]] to a [[ShaclGraphT]].
*
* @param rdfModel an [[RdfModel]] whose default graph contains SHACL shapes.
* @return a [[ShaclGraphT]] representing the SHACL shapes.
*/
protected def rdfModelToShaclGraph(rdfModel: RdfModel): ShaclGraphT
}

0 comments on commit bfd3192

Please sign in to comment.