From cd1615229abcf5f1673a027192085278cc848861 Mon Sep 17 00:00:00 2001 From: Jesse Hatfield Date: Thu, 21 Dec 2017 17:32:47 -0500 Subject: [PATCH 1/2] RYA-416 A new query node type to represent a MongoDB aggregation pipeline whose results can be converted to binding sets, and tools for optionally transforming some SPARQL expressions into such a node. --- dao/mongodb.rya/pom.xml | 5 + ...bstractMongoDBRdfConfigurationBuilder.java | 16 + .../rya/mongodb/MongoDBRdfConfiguration.java | 40 +- .../AggregationPipelineQueryNode.java | 856 ++++++++++++++++++ .../AggregationPipelineQueryOptimizer.java | 73 ++ .../aggregation/PipelineResultIteration.java | 135 +++ .../SparqlToPipelineTransformVisitor.java | 196 ++++ .../dao/SimpleMongoDBStorageStrategy.java | 21 +- .../AggregationPipelineQueryNodeTest.java | 331 +++++++ .../mongodb/aggregation/PipelineQueryIT.java | 421 +++++++++ .../PipelineResultIterationTest.java | 152 ++++ .../SparqlToPipelineTransformVisitorTest.java | 207 +++++ 12 files changed, 2446 insertions(+), 7 deletions(-) create mode 100644 dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryNode.java create mode 100644 dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryOptimizer.java create mode 100644 dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/PipelineResultIteration.java create mode 100644 dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitor.java create mode 100644 dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryNodeTest.java create mode 100644 dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineQueryIT.java create mode 100644 dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineResultIterationTest.java create mode 100644 dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitorTest.java diff --git a/dao/mongodb.rya/pom.xml b/dao/mongodb.rya/pom.xml index 0803aa8d1..0afac815f 100644 --- a/dao/mongodb.rya/pom.xml +++ b/dao/mongodb.rya/pom.xml @@ -86,5 +86,10 @@ Tests will fail with the following error when using 32bit JVM on either Linux or junit test + + org.mockito + mockito-all + test + diff --git a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/AbstractMongoDBRdfConfigurationBuilder.java b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/AbstractMongoDBRdfConfigurationBuilder.java index bb14a39c1..369f7a0d8 100644 --- a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/AbstractMongoDBRdfConfigurationBuilder.java +++ b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/AbstractMongoDBRdfConfigurationBuilder.java @@ -43,6 +43,7 @@ public abstract class AbstractMongoDBRdfConfigurationBuilder> getOptimizers() { + List> optimizers = super.getOptimizers(); + if (getUseAggregationPipeline()) { + Class cl = AggregationPipelineQueryOptimizer.class; + @SuppressWarnings("unchecked") + Class optCl = (Class) cl; + optimizers.add(optCl); + } + return optimizers; + } +} diff --git a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryNode.java b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryNode.java new file mode 100644 index 000000000..7a84f5def --- /dev/null +++ b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryNode.java @@ -0,0 +1,856 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.mongodb.aggregation; + +import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.CONTEXT; +import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.DOCUMENT_VISIBILITY; +import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.OBJECT; +import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.OBJECT_HASH; +import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.OBJECT_TYPE; +import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.PREDICATE; +import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.PREDICATE_HASH; +import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.STATEMENT_METADATA; +import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.SUBJECT; +import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.SUBJECT_HASH; +import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.TIMESTAMP; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.NavigableSet; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.function.Function; + +import org.apache.rya.api.domain.RyaStatement; +import org.apache.rya.api.domain.RyaType; +import org.apache.rya.api.domain.RyaURI; +import org.apache.rya.api.domain.StatementMetadata; +import org.apache.rya.api.resolver.RdfToRyaConversions; +import org.apache.rya.mongodb.MongoDbRdfConstants; +import org.apache.rya.mongodb.dao.MongoDBStorageStrategy; +import org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy; +import org.apache.rya.mongodb.document.operators.query.ConditionalOperators; +import org.apache.rya.mongodb.document.visibility.DocumentVisibilityAdapter; +import org.bson.Document; +import org.bson.conversions.Bson; +import org.openrdf.model.Literal; +import org.openrdf.model.Resource; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.vocabulary.XMLSchema; +import org.openrdf.query.BindingSet; +import org.openrdf.query.QueryEvaluationException; +import org.openrdf.query.algebra.Compare; +import org.openrdf.query.algebra.ExtensionElem; +import org.openrdf.query.algebra.ProjectionElem; +import org.openrdf.query.algebra.ProjectionElemList; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.ValueConstant; +import org.openrdf.query.algebra.ValueExpr; +import org.openrdf.query.algebra.Var; +import org.openrdf.query.algebra.evaluation.impl.ExternalSet; + +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; +import com.mongodb.BasicDBObject; +import com.mongodb.DBObject; +import com.mongodb.client.MongoCollection; +import com.mongodb.client.model.Aggregates; +import com.mongodb.client.model.BsonField; +import com.mongodb.client.model.Filters; +import com.mongodb.client.model.Projections; + +import info.aduna.iteration.CloseableIteration; + +/** + * Represents a portion of a query tree as MongoDB aggregation pipeline. Should + * be built bottom-up: start with a statement pattern implemented as a $match + * step, then add steps to the pipeline to handle higher levels of the query + * tree. Methods are provided to add certain supported query operations to the + * end of the internal pipeline. In some cases, specific arguments may be + * unsupported, in which case the pipeline is unchanged and the method returns + * false. + */ +public class AggregationPipelineQueryNode extends ExternalSet { + /** + * An aggregation result corresponding to a solution should map this key + * to an object which itself maps variable names to variable values. + */ + static final String VALUES = ""; + + /** + * An aggregation result corresponding to a solution should map this key + * to an object which itself maps variable names to the corresponding hashes + * of their values. + */ + static final String HASHES = ""; + + /** + * An aggregation result corresponding to a solution should map this key + * to an object which itself maps variable names to their datatypes, if any. + */ + static final String TYPES = ""; + + private static final String LEVEL = "derivation_level"; + private static final String[] FIELDS = { VALUES, HASHES, TYPES, LEVEL, TIMESTAMP }; + + private static final String JOINED_TRIPLE = ""; + private static final String FIELDS_MATCH = ""; + + private static final MongoDBStorageStrategy strategy = new SimpleMongoDBStorageStrategy(); + + private static final Bson DEFAULT_TYPE = new Document("$literal", XMLSchema.ANYURI.stringValue()); + private static final Bson DEFAULT_CONTEXT = new Document("$literal", ""); + private static final Bson DEFAULT_DV = DocumentVisibilityAdapter.toDBObject(MongoDbRdfConstants.EMPTY_DV); + private static final Bson DEFAULT_METADATA = new Document("$literal", + StatementMetadata.EMPTY_METADATA.toString()); + + private static boolean isValidFieldName(String name) { + return !(name == null || name.contains(".") || name.contains("$") + || name.equals("_id")); + } + + /** + * For a given statement pattern, represents a mapping from query variables + * to their corresponding parts of matching triples. If necessary, also + * substitute variable names including invalid characters with temporary + * replacements, while producing a map back to the original names. + */ + private static class StatementVarMapping { + private final Map varToTripleValue = new HashMap<>(); + private final Map varToTripleHash = new HashMap<>(); + private final Map varToTripleType = new HashMap<>(); + private final BiMap varToOriginalName; + + String valueField(String varName) { + return varToTripleValue.get(varName); + } + String hashField(String varName) { + return varToTripleHash.get(varName); + } + String typeField(String varName) { + return varToTripleType.get(varName); + } + + Set varNames() { + return varToTripleValue.keySet(); + } + + private String replace(String original) { + if (varToOriginalName.containsValue(original)) { + return varToOriginalName.inverse().get(original); + } + else { + String replacement = "field-" + UUID.randomUUID(); + varToOriginalName.put(replacement, original); + return replacement; + } + } + + private String sanitize(String name) { + if (varToOriginalName.containsValue(name)) { + return varToOriginalName.inverse().get(name); + } + else if (name != null && !isValidFieldName(name)) { + return replace(name); + } + return name; + } + + StatementVarMapping(StatementPattern sp, BiMap varToOriginalName) { + this.varToOriginalName = varToOriginalName; + if (sp.getSubjectVar() != null && !sp.getSubjectVar().hasValue()) { + String name = sanitize(sp.getSubjectVar().getName()); + varToTripleValue.put(name, SUBJECT); + varToTripleHash.put(name, SUBJECT_HASH); + } + if (sp.getPredicateVar() != null && !sp.getPredicateVar().hasValue()) { + String name = sanitize(sp.getPredicateVar().getName()); + varToTripleValue.put(name, PREDICATE); + varToTripleHash.put(name, PREDICATE_HASH); + } + if (sp.getObjectVar() != null && !sp.getObjectVar().hasValue()) { + String name = sanitize(sp.getObjectVar().getName()); + varToTripleValue.put(name, OBJECT); + varToTripleHash.put(name, OBJECT_HASH); + varToTripleType.put(name, OBJECT_TYPE); + } + if (sp.getContextVar() != null && !sp.getContextVar().hasValue()) { + String name = sanitize(sp.getContextVar().getName()); + varToTripleValue.put(name, CONTEXT); + } + } + + Bson getProjectExpression() { + return getProjectExpression(new LinkedList<>(), str -> "$" + str); + } + + Bson getProjectExpression(Iterable alsoInclude, + Function getFieldExpr) { + Document values = new Document(); + Document hashes = new Document(); + Document types = new Document(); + for (String varName : varNames()) { + values.append(varName, getFieldExpr.apply(valueField(varName))); + if (varToTripleHash.containsKey(varName)) { + hashes.append(varName, getFieldExpr.apply(hashField(varName))); + } + if (varToTripleType.containsKey(varName)) { + types.append(varName, getFieldExpr.apply(typeField(varName))); + } + } + for (String varName : alsoInclude) { + values.append(varName, 1); + hashes.append(varName, 1); + types.append(varName, 1); + } + List fields = new LinkedList<>(); + fields.add(Projections.excludeId()); + fields.add(Projections.computed(VALUES, values)); + fields.add(Projections.computed(HASHES, hashes)); + if (!types.isEmpty()) { + fields.add(Projections.computed(TYPES, types)); + } + fields.add(Projections.computed(LEVEL, new Document("$max", + Arrays.asList("$" + LEVEL, getFieldExpr.apply(LEVEL), 0)))); + fields.add(Projections.computed(TIMESTAMP, new Document("$max", + Arrays.asList("$" + TIMESTAMP, getFieldExpr.apply(TIMESTAMP), 0)))); + return Projections.fields(fields); + } + } + + /** + * Given a StatementPattern, generate an object representing the arguments + * to a "$match" command that will find matching triples. + * @param sp The StatementPattern to search for + * @param path If given, specify the field that should be matched against + * the statement pattern, using an ordered list of field names for a nested + * field. E.g. to match records { "x": { "y": 0) { + StringBuilder sb = new StringBuilder(); + for (String str : path) { + sb.append(str).append("."); + } + String prefix = sb.toString(); + Set originalKeys = new HashSet<>(obj.keySet()); + originalKeys.forEach(key -> { + Object value = obj.removeField(key); + obj.put(prefix + key, value); + }); + } + return (BasicDBObject) obj; + } + + private static String valueFieldExpr(String varName) { + return "$" + VALUES + "." + varName; + } + private static String hashFieldExpr(String varName) { + return "$" + HASHES + "." + varName; + } + private static String typeFieldExpr(String varName) { + return "$" + TYPES + "." + varName; + } + private static String joinFieldExpr(String triplePart) { + return "$" + JOINED_TRIPLE + "." + triplePart; + } + + /** + * Get an object representing the value field of some value expression, or + * return null if the expression isn't supported. + */ + private Object valueFieldExpr(ValueExpr expr) { + if (expr instanceof Var) { + return valueFieldExpr(((Var) expr).getName()); + } + else if (expr instanceof ValueConstant) { + return new Document("$literal", ((ValueConstant) expr).getValue().stringValue()); + } + else { + return null; + } + } + + private final List pipeline; + private final MongoCollection collection; + private final Set assuredBindingNames; + private final Set bindingNames; + private final BiMap varToOriginalName; + + private String replace(String original) { + if (varToOriginalName.containsValue(original)) { + return varToOriginalName.inverse().get(original); + } + else { + String replacement = "field-" + UUID.randomUUID(); + varToOriginalName.put(replacement, original); + return replacement; + } + } + + /** + * Create a pipeline query node based on a StatementPattern. + * @param collection The collection of triples to query. + * @param baseSP The leaf node in the query tree. + */ + public AggregationPipelineQueryNode(MongoCollection collection, StatementPattern baseSP) { + this.collection = Preconditions.checkNotNull(collection); + Preconditions.checkNotNull(baseSP); + this.varToOriginalName = HashBiMap.create(); + StatementVarMapping mapping = new StatementVarMapping(baseSP, varToOriginalName); + this.assuredBindingNames = new HashSet<>(mapping.varNames()); + this.bindingNames = new HashSet<>(mapping.varNames()); + this.pipeline = new LinkedList<>(); + this.pipeline.add(Aggregates.match(getMatchExpression(baseSP))); + this.pipeline.add(Aggregates.project(mapping.getProjectExpression())); + } + + AggregationPipelineQueryNode(MongoCollection collection, + List pipeline, Set assuredBindingNames, + Set bindingNames, BiMap varToOriginalName) { + this.collection = Preconditions.checkNotNull(collection); + this.pipeline = Preconditions.checkNotNull(pipeline); + this.assuredBindingNames = Preconditions.checkNotNull(assuredBindingNames); + this.bindingNames = Preconditions.checkNotNull(bindingNames); + this.varToOriginalName = Preconditions.checkNotNull(varToOriginalName); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o instanceof AggregationPipelineQueryNode) { + AggregationPipelineQueryNode other = (AggregationPipelineQueryNode) o; + if (this.collection.equals(other.collection) + && this.assuredBindingNames.equals(other.assuredBindingNames) + && this.bindingNames.equals(other.bindingNames) + && this.varToOriginalName.equals(other.varToOriginalName) + && this.pipeline.size() == other.pipeline.size()) { + // Check pipeline steps for equality -- underlying types don't + // have well-behaved equals methods, so check for equivalent + // string representations. + for (int i = 0; i < this.pipeline.size(); i++) { + Bson doc1 = this.pipeline.get(i); + Bson doc2 = other.pipeline.get(i); + if (!doc1.toString().equals(doc2.toString())) { + return false; + } + } + return true; + } + } + return false; + } + + @Override + public int hashCode() { + return Objects.hashCode(collection, pipeline, assuredBindingNames, + bindingNames, varToOriginalName); + } + + @Override + public CloseableIteration evaluate(BindingSet bindings) + throws QueryEvaluationException { + return new PipelineResultIteration(collection.aggregate(pipeline), varToOriginalName, bindings); + } + + @Override + public Set getAssuredBindingNames() { + Set names = new HashSet<>(); + for (String name : assuredBindingNames) { + names.add(varToOriginalName.getOrDefault(name, name)); + } + return names; + } + + @Override + public Set getBindingNames() { + Set names = new HashSet<>(); + for (String name : bindingNames) { + names.add(varToOriginalName.getOrDefault(name, name)); + } + return names; + } + + @Override + public AggregationPipelineQueryNode clone() { + return new AggregationPipelineQueryNode(collection, + new LinkedList<>(pipeline), + new HashSet<>(assuredBindingNames), + new HashSet<>(bindingNames), + HashBiMap.create(varToOriginalName)); + } + + @Override + public String getSignature() { + super.getSignature(); + Set assured = getAssuredBindingNames(); + Set any = getBindingNames(); + StringBuilder sb = new StringBuilder("AggregationPipelineQueryNode (binds: "); + sb.append(String.join(", ", assured)); + if (any.size() > assured.size()) { + Set optionalBindingNames = any; + optionalBindingNames.removeAll(assured); + sb.append(" [") + .append(String.join(", ", optionalBindingNames)) + .append("]"); + } + sb.append(")\n"); + for (Bson doc : pipeline) { + sb.append(doc.toString()).append("\n"); + } + return sb.toString(); + } + + /** + * Get the internal list of aggregation pipeline steps. Note that documents + * resulting from this pipeline will be structured using an internal + * intermediate representation. For documents representing triples, see + * {@link #getTriplePipeline}, and for query solutions, see + * {@link #evaluate}. + * @return The current internal pipeline. + */ + List getPipeline() { + return pipeline; + } + + /** + * Add a join with an individual {@link StatementPattern} to the pipeline. + * @param sp The statement pattern to join with + * @return true if the join was successfully added to the pipeline. + */ + public boolean joinWith(StatementPattern sp) { + Preconditions.checkNotNull(sp); + // 1. Determine shared variables and new variables + StatementVarMapping spMap = new StatementVarMapping(sp, varToOriginalName); + NavigableSet sharedVars = new ConcurrentSkipListSet<>(spMap.varNames()); + sharedVars.retainAll(assuredBindingNames); + // 2. Join on one shared variable + String joinKey = sharedVars.pollFirst(); + String collectionName = collection.getNamespace().getCollectionName(); + Bson join; + if (joinKey == null) { + return false; + } + else { + join = Aggregates.lookup(collectionName, + HASHES + "." + joinKey, + spMap.hashField(joinKey), + JOINED_TRIPLE); + } + pipeline.add(join); + // 3. Unwind the joined triples so each document represents a binding + // set (solution) from the base branch and a triple that may match. + pipeline.add(Aggregates.unwind("$" + JOINED_TRIPLE)); + // 4. (Optional) If there are any shared variables that weren't used as + // the join key, project all existing fields plus a new field that + // tests the equality of those shared variables. + BasicDBObject matchOpts = getMatchExpression(sp, JOINED_TRIPLE); + if (!sharedVars.isEmpty()) { + List eqTests = new LinkedList<>(); + for (String varName : sharedVars) { + String oldField = valueFieldExpr(varName); + String newField = joinFieldExpr(spMap.valueField(varName)); + Bson eqTest = new Document("$eq", Arrays.asList(oldField, newField)); + eqTests.add(eqTest); + } + Bson eqProjectOpts = Projections.fields( + Projections.computed(FIELDS_MATCH, Filters.and(eqTests)), + Projections.include(JOINED_TRIPLE, VALUES, HASHES, TYPES, LEVEL, TIMESTAMP)); + pipeline.add(Aggregates.project(eqProjectOpts)); + matchOpts.put(FIELDS_MATCH, true); + } + // 5. Filter for solutions whose triples match the joined statement + // pattern, and, if applicable, whose additional shared variables + // match the current solution. + pipeline.add(Aggregates.match(matchOpts)); + // 6. Project the results to include variables from the new SP (with + // appropriate renaming) and variables referenced only in the base + // pipeline (with previous names). + Bson finalProjectOpts = new StatementVarMapping(sp, varToOriginalName) + .getProjectExpression(assuredBindingNames, + str -> joinFieldExpr(str)); + assuredBindingNames.addAll(spMap.varNames()); + bindingNames.addAll(spMap.varNames()); + pipeline.add(Aggregates.project(finalProjectOpts)); + return true; + } + + /** + * Add a SPARQL projection or multi-projection operation to the pipeline. + * The number of documents produced by the pipeline after this operation + * will be the number of documents entering this stage (the number of + * intermediate results) multiplied by the number of + * {@link ProjectionElemList}s supplied here. + * @param projections One or more projections, i.e. mappings from the result + * at this stage of the query into a set of variables. + * @return true if the projection(s) were added to the pipeline. + */ + public boolean project(Iterable projections) { + if (projections == null || !projections.iterator().hasNext()) { + return false; + } + List projectOpts = new LinkedList<>(); + Set bindingNamesUnion = new HashSet<>(); + Set bindingNamesIntersection = null; + for (ProjectionElemList projection : projections) { + Document valueDoc = new Document(); + Document hashDoc = new Document(); + Document typeDoc = new Document(); + Set projectionBindingNames = new HashSet<>(); + for (ProjectionElem elem : projection.getElements()) { + String to = elem.getTargetName(); + // If the 'to' name is invalid, replace it internally + if (!isValidFieldName(to)) { + to = replace(to); + } + String from = elem.getSourceName(); + // If the 'from' name is invalid, use the internal substitute + if (varToOriginalName.containsValue(from)) { + from = varToOriginalName.inverse().get(from); + } + projectionBindingNames.add(to); + if (to.equals(from)) { + valueDoc.append(to, 1); + hashDoc.append(to, 1); + typeDoc.append(to, 1); + } + else { + valueDoc.append(to, valueFieldExpr(from)); + hashDoc.append(to, hashFieldExpr(from)); + typeDoc.append(to, typeFieldExpr(from)); + } + } + bindingNamesUnion.addAll(projectionBindingNames); + if (bindingNamesIntersection == null) { + bindingNamesIntersection = new HashSet<>(projectionBindingNames); + } + else { + bindingNamesIntersection.retainAll(projectionBindingNames); + } + projectOpts.add(new Document() + .append(VALUES, valueDoc) + .append(HASHES, hashDoc) + .append(TYPES, typeDoc) + .append(LEVEL, "$" + LEVEL) + .append(TIMESTAMP, "$" + TIMESTAMP)); + } + if (projectOpts.size() == 1) { + pipeline.add(Aggregates.project(projectOpts.get(0))); + } + else { + String listKey = "PROJECTIONS"; + Bson projectIndividual = Projections.fields( + Projections.computed(VALUES, "$" + listKey + "." + VALUES), + Projections.computed(HASHES, "$" + listKey + "." + HASHES), + Projections.computed(TYPES, "$" + listKey + "." + TYPES), + Projections.include(LEVEL), + Projections.include(TIMESTAMP)); + pipeline.add(Aggregates.project(Projections.computed(listKey, projectOpts))); + pipeline.add(Aggregates.unwind("$" + listKey)); + pipeline.add(Aggregates.project(projectIndividual)); + } + assuredBindingNames.clear(); + bindingNames.clear(); + assuredBindingNames.addAll(bindingNamesIntersection); + bindingNames.addAll(bindingNamesUnion); + return true; + } + + /** + * Add a SPARQL extension to the pipeline, if possible. An extension adds + * some number of variables to the result. Adds a "$project" step to the + * pipeline, but differs from the SPARQL project operation in that + * 1) pre-existing variables are always kept, and 2) values of new variables + * are defined by expressions, which may be more complex than simply + * variable names. Not all expressions are supported. If unsupported + * expression types are used in the extension, the pipeline will remain + * unchanged and this method will return false. + * @param extensionElements A list of new variables and their expressions + * @return True if the extension was successfully converted into a pipeline + * step, false otherwise. + */ + public boolean extend(Iterable extensionElements) { + List valueFields = new LinkedList<>(); + List hashFields = new LinkedList<>(); + List typeFields = new LinkedList<>(); + for (String varName : bindingNames) { + valueFields.add(Projections.include(varName)); + hashFields.add(Projections.include(varName)); + typeFields.add(Projections.include(varName)); + } + Set newVarNames = new HashSet<>(); + for (ExtensionElem elem : extensionElements) { + String name = elem.getName(); + if (!isValidFieldName(name)) { + // If the field name is invalid, replace it internally + name = replace(name); + } + // We can only handle certain kinds of value expressions; return + // failure for any others. + ValueExpr expr = elem.getExpr(); + final Object valueField; + final Object hashField; + final Object typeField; + if (expr instanceof Var) { + String varName = ((Var) expr).getName(); + valueField = "$" + varName; + hashField = "$" + varName; + typeField = "$" + varName; + } + else if (expr instanceof ValueConstant) { + Value val = ((ValueConstant) expr).getValue(); + valueField = new Document("$literal", val.stringValue()); + hashField = new Document("$literal", SimpleMongoDBStorageStrategy.hash(val.stringValue())); + if (val instanceof Literal) { + typeField = new Document("$literal", ((Literal) val).getDatatype().stringValue()); + } + else { + typeField = null; + } + } + else { + // if not understood, return failure + return false; + } + valueFields.add(Projections.computed(name, valueField)); + hashFields.add(Projections.computed(name, hashField)); + if (typeField != null) { + typeFields.add(Projections.computed(name, typeField)); + } + newVarNames.add(name); + } + assuredBindingNames.addAll(newVarNames); + bindingNames.addAll(newVarNames); + Bson projectOpts = Projections.fields( + Projections.computed(VALUES, Projections.fields(valueFields)), + Projections.computed(HASHES, Projections.fields(hashFields)), + Projections.computed(TYPES, Projections.fields(typeFields)), + Projections.include(LEVEL), + Projections.include(TIMESTAMP)); + pipeline.add(Aggregates.project(projectOpts)); + return true; + } + + /** + * Add a SPARQL filter to the pipeline, if possible. A filter eliminates + * results that don't satisfy a given condition. Not all conditional + * expressions are supported. If unsupported expressions are used in the + * filter, the pipeline will remain unchanged and this method will return + * false. Currently only supports binary {@link Compare} conditions among + * variables and/or literals. + * @param condition The filter condition + * @return True if the filter was successfully converted into a pipeline + * step, false otherwise. + */ + public boolean filter(ValueExpr condition) { + if (condition instanceof Compare) { + Compare compare = (Compare) condition; + Compare.CompareOp operator = compare.getOperator(); + Object leftArg = valueFieldExpr(compare.getLeftArg()); + Object rightArg = valueFieldExpr(compare.getRightArg()); + if (leftArg == null || rightArg == null) { + // unsupported value expression, can't convert filter + return false; + } + final String opFunc; + switch (operator) { + case EQ: + opFunc = "$eq"; + break; + case NE: + opFunc = "$ne"; + break; + case LT: + opFunc = "$lt"; + break; + case LE: + opFunc = "$le"; + break; + case GT: + opFunc = "$gt"; + break; + case GE: + opFunc = "$ge"; + break; + default: + // unrecognized comparison operator, can't convert filter + return false; + } + Document compareDoc = new Document(opFunc, Arrays.asList(leftArg, rightArg)); + pipeline.add(Aggregates.project(Projections.fields( + Projections.computed("FILTER", compareDoc), + Projections.include(VALUES, HASHES, TYPES, LEVEL, TIMESTAMP)))); + pipeline.add(Aggregates.match(new Document("FILTER", true))); + pipeline.add(Aggregates.project(Projections.fields( + Projections.include(VALUES, HASHES, TYPES, LEVEL, TIMESTAMP)))); + return true; + } + return false; + } + + /** + * Add a $group step to filter out redundant solutions. + * @return True if the distinct operation was successfully appended. + */ + public boolean distinct() { + List key = new LinkedList<>(); + for (String varName : bindingNames) { + key.add(hashFieldExpr(varName)); + } + List reduceOps = new LinkedList<>(); + for (String field : FIELDS) { + reduceOps.add(new BsonField(field, new Document("$first", "$" + field))); + } + pipeline.add(Aggregates.group(new Document("$concat", key), reduceOps)); + return true; + } + + /** + * Add a step to the end of the current pipeline which prunes the results + * according to the recorded derivation level of their sources. At least one + * triple that was used to construct the result must have a derivation level + * at least as high as the parameter, indicating that it was derived via + * that many steps from the original data. (A value of zero is equivalent to + * input data that was not derived at all.) Use in conjunction with + * getTriplePipeline (which sets source level for generated triples) to + * avoid repeatedly deriving the same results. + * @param requiredLevel Required derivation depth. Reject a solution to the + * query if all of the triples involved in producing that solution have a + * lower derivation depth than this. If zero, does nothing. + */ + public void requireSourceDerivationDepth(int requiredLevel) { + if (requiredLevel > 0) { + pipeline.add(Aggregates.match(new Document(LEVEL, + new Document("$gte", requiredLevel)))); + } + } + + /** + * Add a step to the end of the current pipeline which prunes the results + * according to the timestamps of their sources. At least one triple that + * was used to construct the result must have a timestamp at least as + * recent as the parameter. Use in iterative applications to avoid deriving + * solutions that would have been generated in an earlier iteration. + * @param t Minimum required timestamp. Reject a solution to the query if + * all of the triples involved in producing that solution have an earlier + * timestamp than this. + */ + public void requireSourceTimestamp(long t) { + pipeline.add(Aggregates.match(new Document(TIMESTAMP, + new Document("$gte", t)))); + } + + /** + * Given that the current state of the pipeline produces data that can be + * interpreted as triples, add a project step to map each result from the + * intermediate result structure to a structure that can be stored in the + * triple store. Does not modify the internal pipeline, which will still + * produce intermediate results suitable for query evaluation. + * @param timestamp Attach this timestamp to the resulting triples. + * @param requireNew If true, add an additional step to check constructed + * triples against existing triples and only include new ones in the + * result. Adds a potentially expensive $lookup step. + * @throws IllegalStateException if the results produced by the current + * pipeline do not have variable names allowing them to be interpreted as + * triples (i.e. "subject", "predicate", and "object"). + */ + public List getTriplePipeline(long timestamp, boolean requireNew) { + if (!assuredBindingNames.contains(SUBJECT) + || !assuredBindingNames.contains(PREDICATE) + || !assuredBindingNames.contains(OBJECT)) { + throw new IllegalStateException("Current pipeline does not produce " + + "records that can be converted into triples.\n" + + "Required variable names: <" + SUBJECT + ", " + PREDICATE + + ", " + OBJECT + ">\nCurrent variable names: " + + assuredBindingNames); + } + List triplePipeline = new LinkedList<>(pipeline); + List fields = new LinkedList<>(); + fields.add(Projections.computed(SUBJECT, valueFieldExpr(SUBJECT))); + fields.add(Projections.computed(SUBJECT_HASH, hashFieldExpr(SUBJECT))); + fields.add(Projections.computed(PREDICATE, valueFieldExpr(PREDICATE))); + fields.add(Projections.computed(PREDICATE_HASH, hashFieldExpr(PREDICATE))); + fields.add(Projections.computed(OBJECT, valueFieldExpr(OBJECT))); + fields.add(Projections.computed(OBJECT_HASH, hashFieldExpr(OBJECT))); + fields.add(Projections.computed(OBJECT_TYPE, + ConditionalOperators.ifNull(typeFieldExpr(OBJECT), DEFAULT_TYPE))); + fields.add(Projections.computed(CONTEXT, DEFAULT_CONTEXT)); + fields.add(Projections.computed(STATEMENT_METADATA, DEFAULT_METADATA)); + fields.add(DEFAULT_DV); + fields.add(Projections.computed(TIMESTAMP, new Document("$literal", timestamp))); + fields.add(Projections.computed(LEVEL, new Document("$add", Arrays.asList("$" + LEVEL, 1)))); + triplePipeline.add(Aggregates.project(Projections.fields(fields))); + if (requireNew) { + // Prune any triples that already exist in the data store + String collectionName = collection.getNamespace().getCollectionName(); + Bson includeAll = Projections.include(SUBJECT, SUBJECT_HASH, + PREDICATE, PREDICATE_HASH, OBJECT, OBJECT_HASH, + OBJECT_TYPE, CONTEXT, STATEMENT_METADATA, + DOCUMENT_VISIBILITY, TIMESTAMP, LEVEL); + List eqTests = new LinkedList<>(); + eqTests.add(new Document("$eq", Arrays.asList("$$this." + PREDICATE_HASH, "$" + PREDICATE_HASH))); + eqTests.add(new Document("$eq", Arrays.asList("$$this." + OBJECT_HASH, "$" + OBJECT_HASH))); + Bson redundantFilter = new Document("$filter", new Document("input", "$" + JOINED_TRIPLE) + .append("as", "this").append("cond", new Document("$and", eqTests))); + triplePipeline.add(Aggregates.lookup(collectionName, SUBJECT_HASH, + SUBJECT_HASH, JOINED_TRIPLE)); + String numRedundant = "REDUNDANT"; + triplePipeline.add(Aggregates.project(Projections.fields(includeAll, + Projections.computed(numRedundant, new Document("$size", redundantFilter))))); + triplePipeline.add(Aggregates.match(Filters.eq(numRedundant, 0))); + triplePipeline.add(Aggregates.project(Projections.fields(includeAll))); + } + return triplePipeline; + } +} diff --git a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryOptimizer.java b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryOptimizer.java new file mode 100644 index 000000000..fb1f558ba --- /dev/null +++ b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryOptimizer.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.mongodb.aggregation; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.rya.mongodb.StatefulMongoDBRdfConfiguration; +import org.openrdf.query.BindingSet; +import org.openrdf.query.Dataset; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.evaluation.QueryOptimizer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +/** + * MongoDB-specific query optimizer that replaces part or all of a SPARQL query + * tree with a MongoDB aggregation pipeline. + *

+ * Transforms query trees using {@link SparqlToPipelineTransformVisitor}. If + * possible, this visitor will replace portions of the query tree, or the entire + * query, with an equivalent aggregation pipeline (contained in an + * {@link AggregationPipelineQueryNode}), thereby allowing query logic to be + * evaluated by the MongoDB server rather than by the client. + */ +public class AggregationPipelineQueryOptimizer implements QueryOptimizer, Configurable { + private StatefulMongoDBRdfConfiguration conf; + private Logger logger = LoggerFactory.getLogger(getClass()); + + @Override + public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) { + SparqlToPipelineTransformVisitor pipelineVisitor = new SparqlToPipelineTransformVisitor(conf); + try { + tupleExpr.visit(pipelineVisitor); + } catch (Exception e) { + logger.error("Error attempting to transform query using the aggregation pipeline", e); + } + } + + /** + * @throws IllegalArgumentException if conf is not a {@link StatefulMongoDBRdfConfiguration}. + */ + @Override + public void setConf(Configuration conf) { + Preconditions.checkNotNull(conf); + Preconditions.checkArgument(conf instanceof StatefulMongoDBRdfConfiguration, + "Expected an instance of %s; received %s", + StatefulMongoDBRdfConfiguration.class.getName(), conf.getClass().getName()); + this.conf = (StatefulMongoDBRdfConfiguration) conf; + } + + @Override + public StatefulMongoDBRdfConfiguration getConf() { + return conf; + } +} diff --git a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/PipelineResultIteration.java b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/PipelineResultIteration.java new file mode 100644 index 000000000..c533efce6 --- /dev/null +++ b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/PipelineResultIteration.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.mongodb.aggregation; + +import java.util.Map; + +import org.bson.Document; +import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.model.vocabulary.XMLSchema; +import org.openrdf.query.Binding; +import org.openrdf.query.BindingSet; +import org.openrdf.query.QueryEvaluationException; +import org.openrdf.query.algebra.evaluation.QueryBindingSet; + +import com.google.common.base.Preconditions; +import com.mongodb.client.AggregateIterable; +import com.mongodb.client.MongoCursor; + +import info.aduna.iteration.CloseableIteration; + +/** + * An iterator that converts the documents resulting from an + * {@link AggregationPipelineQueryNode} into {@link BindingSet}s. + */ +public class PipelineResultIteration implements CloseableIteration { + private static final int BATCH_SIZE = 1000; + private static final ValueFactory VF = ValueFactoryImpl.getInstance(); + + private final MongoCursor cursor; + private final Map varToOriginalName; + private final BindingSet bindings; + private BindingSet nextSolution = null; + + /** + * Constructor. + * @param aggIter Iterator of documents in AggregationPipelineQueryNode's + * intermediate solution representation. + * @param varToOriginalName A mapping from field names in the pipeline + * result documents to equivalent variable names in the original query. + * Where an entry does not exist for a field, the field name and variable + * name are assumed to be the same. + * @param bindings A partial solution. May be empty. + */ + public PipelineResultIteration(AggregateIterable aggIter, + Map varToOriginalName, + BindingSet bindings) { + this.varToOriginalName = Preconditions.checkNotNull(varToOriginalName); + this.bindings = Preconditions.checkNotNull(bindings); + Preconditions.checkNotNull(aggIter); + aggIter.batchSize(BATCH_SIZE); + this.cursor = aggIter.iterator(); + } + + private void lookahead() { + while (nextSolution == null && cursor.hasNext()) { + nextSolution = docToBindingSet(cursor.next()); + } + } + + @Override + public boolean hasNext() throws QueryEvaluationException { + lookahead(); + return nextSolution != null; + } + + @Override + public BindingSet next() throws QueryEvaluationException { + lookahead(); + BindingSet solution = nextSolution; + nextSolution = null; + return solution; + } + + /** + * @throws UnsupportedOperationException always. + */ + @Override + public void remove() throws QueryEvaluationException { + throw new UnsupportedOperationException("remove() undefined for query result iteration"); + } + + @Override + public void close() throws QueryEvaluationException { + cursor.close(); + } + + private QueryBindingSet docToBindingSet(Document result) { + QueryBindingSet bindingSet = new QueryBindingSet(bindings); + Document valueSet = result.get(AggregationPipelineQueryNode.VALUES, Document.class); + Document typeSet = result.get(AggregationPipelineQueryNode.TYPES, Document.class); + if (valueSet != null) { + for (Map.Entry entry : valueSet.entrySet()) { + String fieldName = entry.getKey(); + String valueString = entry.getValue().toString(); + String typeString = typeSet == null ? null : typeSet.getString(fieldName); + String varName = varToOriginalName.getOrDefault(fieldName, fieldName); + Value varValue; + if (typeString == null || typeString.equals(XMLSchema.ANYURI.stringValue())) { + varValue = VF.createURI(valueString); + } + else { + varValue = VF.createLiteral(valueString, VF.createURI(typeString)); + } + Binding existingBinding = bindingSet.getBinding(varName); + // If this variable is not already bound, add it. + if (existingBinding == null) { + bindingSet.addBinding(varName, varValue); + } + // If it's bound to something else, the solutions are incompatible. + else if (!existingBinding.getValue().equals(varValue)) { + return null; + } + } + } + return bindingSet; + } +} diff --git a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitor.java b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitor.java new file mode 100644 index 000000000..b7f5a67b8 --- /dev/null +++ b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitor.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.mongodb.aggregation; + +import java.util.Arrays; + +import org.apache.rya.mongodb.StatefulMongoDBRdfConfiguration; +import org.bson.Document; +import org.openrdf.query.algebra.Distinct; +import org.openrdf.query.algebra.Extension; +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.MultiProjection; +import org.openrdf.query.algebra.Projection; +import org.openrdf.query.algebra.Reduced; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; + +import com.google.common.base.Preconditions; +import com.mongodb.MongoClient; +import com.mongodb.client.MongoCollection; +import com.mongodb.client.MongoDatabase; + +/** + * Visitor that transforms a SPARQL query tree by replacing as much of the tree + * as possible with one or more {@code AggregationPipelineQueryNode}s. + *

+ * Each {@link AggregationPipelineQueryNode} contains a MongoDB aggregation + * pipeline which is equivalent to the replaced portion of the original query. + * Evaluating this node executes the pipeline and converts the results into + * query solutions. If only part of the query was transformed, the remaining + * query logic (higher up in the query tree) can be applied to those + * intermediate solutions as normal. + *

+ * In general, processes the tree in bottom-up order: A leaf node + * ({@link StatementPattern}) is replaced with a pipeline that matches the + * corresponding statements. Then, if the parent node's semantics are supported + * by the visitor, stages are appended to the pipeline and the subtree at the + * parent node is replaced with the extended pipeline. This continues up the + * tree until reaching a node that cannot be transformed, in which case that + * node's child is now a single {@code AggregationPipelineQueryNode} (a leaf + * node) instead of the previous subtree, or until the entire tree has been + * subsumed into a single pipeline node. + *

+ * Nodes which are transformed into pipeline stages: + *

    + *
  • A {@code StatementPattern} node forms the beginning of each pipeline. + *
  • Single-argument operations {@link Projection}, {@link MultiProjection}, + * {@link Extension}, {@link Distinct}, and {@link Reduced} will be transformed + * into pipeline stages whenever the child {@link TupleExpr} represents a + * pipeline. + *
  • A {@link Filter} operation will be appended to the pipeline when its + * child {@code TupleExpr} represents a pipeline and the filter condition is a + * type of {@link ValueExpr} understood by {@code AggregationPipelineQueryNode}. + *
  • A {@link Join} operation will be appended to the pipeline when one child + * is a {@code StatementPattern} and the other is an + * {@code AggregationPipelineQueryNode}. + *
+ */ +public class SparqlToPipelineTransformVisitor extends QueryModelVisitorBase { + private final MongoCollection inputCollection; + + /** + * Instantiate a visitor directly from a {@link MongoCollection}. + * @param inputCollection Stores triples. + */ + public SparqlToPipelineTransformVisitor(MongoCollection inputCollection) { + this.inputCollection = Preconditions.checkNotNull(inputCollection); + } + + /** + * Instantiate a visitor from a {@link MongoDBRdfConfiguration}. + * @param conf Contains database connection information. + */ + public SparqlToPipelineTransformVisitor(StatefulMongoDBRdfConfiguration conf) { + Preconditions.checkNotNull(conf); + MongoClient mongo = conf.getMongoClient(); + MongoDatabase db = mongo.getDatabase(conf.getMongoDBName()); + this.inputCollection = db.getCollection(conf.getTriplesCollectionName()); + } + + @Override + public void meet(StatementPattern sp) { + sp.replaceWith(new AggregationPipelineQueryNode(inputCollection, sp)); + } + + @Override + public void meet(Join join) throws Exception { + // If one branch is a single statement pattern, then try replacing the + // other with a pipeline. + AggregationPipelineQueryNode pipelineNode = null; + StatementPattern joinWithSP = null; + if (join.getRightArg() instanceof StatementPattern) { + join.getLeftArg().visit(this); + if (join.getLeftArg() instanceof AggregationPipelineQueryNode) { + pipelineNode = (AggregationPipelineQueryNode) join.getLeftArg(); + joinWithSP = (StatementPattern) join.getRightArg(); + } + } + else if (join.getLeftArg() instanceof StatementPattern) { + join.getRightArg().visit(this); + if (join.getRightArg() instanceof AggregationPipelineQueryNode) { + pipelineNode = (AggregationPipelineQueryNode) join.getRightArg(); + joinWithSP = (StatementPattern) join.getLeftArg(); + } + } + else { + // Otherwise, visit the children to try to replace smaller subtrees + join.visitChildren(this); + } + // If this is now a join between a pipeline node and a statement + // pattern, add the join step at the end of the pipeline, and replace + // this node with the extended pipeline node. + if (pipelineNode != null && joinWithSP != null && pipelineNode.joinWith(joinWithSP)) { + join.replaceWith(pipelineNode); + } + } + + @Override + public void meet(Projection projectionNode) throws Exception { + projectionNode.visitChildren(this); + if (projectionNode.getArg() instanceof AggregationPipelineQueryNode && projectionNode.getParentNode() != null) { + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) projectionNode.getArg(); + if (pipelineNode.project(Arrays.asList(projectionNode.getProjectionElemList()))) { + projectionNode.replaceWith(pipelineNode); + } + } + } + + @Override + public void meet(MultiProjection projectionNode) throws Exception { + projectionNode.visitChildren(this); + if (projectionNode.getArg() instanceof AggregationPipelineQueryNode && projectionNode.getParentNode() != null) { + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) projectionNode.getArg(); + if (pipelineNode.project(projectionNode.getProjections())) { + projectionNode.replaceWith(pipelineNode); + } + } + } + + @Override + public void meet(Extension extensionNode) throws Exception { + extensionNode.visitChildren(this); + if (extensionNode.getArg() instanceof AggregationPipelineQueryNode && extensionNode.getParentNode() != null) { + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) extensionNode.getArg(); + if (pipelineNode.extend(extensionNode.getElements())) { + extensionNode.replaceWith(pipelineNode); + } + } + } + + @Override + public void meet(Reduced reducedNode) throws Exception { + reducedNode.visitChildren(this); + if (reducedNode.getArg() instanceof AggregationPipelineQueryNode && reducedNode.getParentNode() != null) { + reducedNode.replaceWith(reducedNode.getArg()); + } + } + + @Override + public void meet(Distinct distinctNode) throws Exception { + distinctNode.visitChildren(this); + if (distinctNode.getArg() instanceof AggregationPipelineQueryNode && distinctNode.getParentNode() != null) { + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) distinctNode.getArg(); + pipelineNode.distinct(); + distinctNode.replaceWith(pipelineNode); + } + } + + @Override + public void meet(Filter filterNode) throws Exception { + filterNode.visitChildren(this); + if (filterNode.getArg() instanceof AggregationPipelineQueryNode && filterNode.getParentNode() != null) { + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) filterNode.getArg(); + if (pipelineNode.filter(filterNode.getCondition())) { + filterNode.replaceWith(pipelineNode); + } + } + } +} diff --git a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/dao/SimpleMongoDBStorageStrategy.java b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/dao/SimpleMongoDBStorageStrategy.java index db331817b..ecad9c686 100644 --- a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/dao/SimpleMongoDBStorageStrategy.java +++ b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/dao/SimpleMongoDBStorageStrategy.java @@ -63,6 +63,15 @@ public class SimpleMongoDBStorageStrategy implements MongoDBStorageStrategy collection; + + @Before + @SuppressWarnings("unchecked") + public void setUp() { + collection = Mockito.mock(MongoCollection.class); + Mockito.when(collection.getNamespace()).thenReturn(new MongoNamespace("db", "collection")); + } + + @Test + public void testEquals() { + final AggregationPipelineQueryNode node1 = new AggregationPipelineQueryNode( + collection, + new LinkedList<>(), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + HashBiMap.create()); + final AggregationPipelineQueryNode node2 = new AggregationPipelineQueryNode( + collection, + new LinkedList<>(), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + HashBiMap.create()); + Assert.assertEquals(node1, node2); + Assert.assertEquals(node1.hashCode(), node2.hashCode()); + final AggregationPipelineQueryNode diff1 = new AggregationPipelineQueryNode( + collection, + new LinkedList<>(), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y"), + HashBiMap.create()); + final AggregationPipelineQueryNode diff2 = new AggregationPipelineQueryNode( + collection, + Arrays.asList(new Document()), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + HashBiMap.create()); + HashBiMap varMapping = HashBiMap.create(); + varMapping.put("field-x", "x"); + final AggregationPipelineQueryNode diff3 = new AggregationPipelineQueryNode( + collection, + Arrays.asList(new Document()), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + varMapping); + Assert.assertNotEquals(diff1, node1); + Assert.assertNotEquals(diff2, node1); + Assert.assertNotEquals(diff3, node1); + node1.joinWith(new StatementPattern(new Var("x"), constant(TAKES), new Var("c"))); + node2.joinWith(new StatementPattern(new Var("x"), constant(TAKES), new Var("c"))); + Assert.assertEquals(node1, node2); + node2.joinWith(new StatementPattern(new Var("x"), constant(TAKES), new Var("c"))); + Assert.assertNotEquals(node1, node2); + } + + @Test + public void testClone() { + final AggregationPipelineQueryNode base = new AggregationPipelineQueryNode( + collection, + new LinkedList<>(), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + HashBiMap.create()); + final AggregationPipelineQueryNode copy = base.clone(); + Assert.assertEquals(base, copy); + copy.getPipeline().add(new Document("$project", new Document())); + Assert.assertNotEquals(base, copy); + base.getPipeline().add(new Document("$project", new Document())); + Assert.assertEquals(base, copy); + } + + @Test + public void testStatementPattern() throws Exception { + // All variables + StatementPattern sp = new StatementPattern(new Var("s"), new Var("p"), new Var("o")); + AggregationPipelineQueryNode node = new AggregationPipelineQueryNode(collection, sp); + Assert.assertEquals(Sets.newHashSet("s", "p", "o"), node.getBindingNames()); + Assert.assertEquals(Sets.newHashSet("s", "p", "o"), node.getAssuredBindingNames()); + Assert.assertEquals(2, node.getPipeline().size()); + // All constants + sp = new StatementPattern(constant(VF.createURI("urn:Alice")), constant(RDF.TYPE), constant(UNDERGRAD)); + node = new AggregationPipelineQueryNode(collection, sp); + Assert.assertEquals(Sets.newHashSet(), node.getBindingNames()); + Assert.assertEquals(Sets.newHashSet(), node.getAssuredBindingNames()); + Assert.assertEquals(2, node.getPipeline().size()); + // Mixture + sp = new StatementPattern(new Var("student"), constant(RDF.TYPE), constant(UNDERGRAD)); + node = new AggregationPipelineQueryNode(collection, sp); + Assert.assertEquals(Sets.newHashSet("student"), node.getBindingNames()); + Assert.assertEquals(Sets.newHashSet("student"), node.getAssuredBindingNames()); + Assert.assertEquals(2, node.getPipeline().size()); + } + + @Test + public void testJoin() throws Exception { + final AggregationPipelineQueryNode base = new AggregationPipelineQueryNode( + collection, + new LinkedList<>(), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + HashBiMap.create()); + // Join on one shared variable + AggregationPipelineQueryNode node = base.clone(); + boolean success = node.joinWith(new StatementPattern(new Var("x"), constant(TAKES), new Var("c"))); + Assert.assertTrue(success); + Assert.assertEquals(Sets.newHashSet("x", "y", "c", "opt"), node.getBindingNames()); + Assert.assertEquals(Sets.newHashSet("x", "y", "c"), node.getAssuredBindingNames()); + Assert.assertEquals(4, node.getPipeline().size()); + // Join on multiple shared variables + node = base.clone(); + success = node.joinWith(new StatementPattern(new Var("x"), constant(TAKES), new Var("y"))); + Assert.assertTrue(success); + Assert.assertEquals(Sets.newHashSet("x", "y", "opt"), node.getBindingNames()); + Assert.assertEquals(Sets.newHashSet("x", "y"), node.getAssuredBindingNames()); + Assert.assertEquals(5, node.getPipeline().size()); + } + + @Test + public void testProject() { + final AggregationPipelineQueryNode base = new AggregationPipelineQueryNode( + collection, + new LinkedList<>(), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + HashBiMap.create()); + // Add a single projection + ProjectionElemList singleProjection = new ProjectionElemList(); + singleProjection.addElement(new ProjectionElem("x", "z")); + singleProjection.addElement(new ProjectionElem("y", "y")); + List projections = Arrays.asList(singleProjection); + AggregationPipelineQueryNode node = base.clone(); + boolean success = node.project(projections); + Assert.assertTrue(success); + Assert.assertEquals(1, node.getPipeline().size()); + Assert.assertEquals(Sets.newHashSet("z", "y"), + node.getAssuredBindingNames()); + Assert.assertEquals(Sets.newHashSet("z", "y"), + node.getBindingNames()); + // Add a multi-projection + ProjectionElemList p1 = new ProjectionElemList(); + p1.addElement(new ProjectionElem("x", "solution")); + ProjectionElemList p2 = new ProjectionElemList(); + p2.addElement(new ProjectionElem("y", "solution")); + ProjectionElemList p3 = new ProjectionElemList(); + p3.addElement(new ProjectionElem("x", "x")); + p3.addElement(new ProjectionElem("x", "solution")); + p3.addElement(new ProjectionElem("y", "y")); + projections = Arrays.asList(p1, p2, p3); + node = base.clone(); + success = node.project(projections); + Assert.assertTrue(success); + Assert.assertEquals(3, node.getPipeline().size()); + Assert.assertEquals(Sets.newHashSet("solution"), + node.getAssuredBindingNames()); + Assert.assertEquals(Sets.newHashSet("x", "y", "solution"), + node.getBindingNames()); + // Add no projections + node = base.clone(); + success = node.project(Arrays.asList()); + Assert.assertFalse(success); + Assert.assertEquals(base, node); + } + + @Test + public void testExtend() { + final AggregationPipelineQueryNode base = new AggregationPipelineQueryNode( + collection, + new LinkedList<>(), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + HashBiMap.create()); + // Extend with a mix of variables and constants + List extensionElements = Arrays.asList( + new ExtensionElem(new Var("x"), "subject"), + new ExtensionElem(new ValueConstant(RDF.TYPE), "predicate"), + new ExtensionElem(new Var("y"), "object")); + AggregationPipelineQueryNode node = base.clone(); + boolean success = node.extend(extensionElements); + Assert.assertTrue(success); + Assert.assertEquals(1, node.getPipeline().size()); + Assert.assertEquals(Sets.newHashSet("x", "y", "subject", "predicate", "object"), + node.getAssuredBindingNames()); + Assert.assertEquals(Sets.newHashSet("x", "y", "subject", "predicate", "object", "opt"), + node.getBindingNames()); + // Attempt to extend with an unsupported expression + extensionElements = Arrays.asList( + new ExtensionElem(new Var("x"), "subject"), + new ExtensionElem(new Not(new ValueConstant(VF.createLiteral(true))), "notTrue")); + node = base.clone(); + success = node.extend(extensionElements); + Assert.assertFalse(success); + Assert.assertEquals(base, node); + } + + @Test + public void testDistinct() { + final AggregationPipelineQueryNode base = new AggregationPipelineQueryNode( + collection, + new LinkedList<>(), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + HashBiMap.create()); + AggregationPipelineQueryNode node = base.clone(); + boolean success = node.distinct(); + Assert.assertTrue(success); + Assert.assertEquals(Sets.newHashSet("x", "y", "opt"), node.getBindingNames()); + Assert.assertEquals(Sets.newHashSet("x", "y"), node.getAssuredBindingNames()); + Assert.assertEquals(1, node.getPipeline().size()); + } + + @Test + public void testFilter() { + final AggregationPipelineQueryNode base = new AggregationPipelineQueryNode( + collection, + new LinkedList<>(), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + HashBiMap.create()); + // Extend with a supported filter + AggregationPipelineQueryNode node = base.clone(); + boolean success = node.filter(new Compare(new Var("x"), new Var("y"), Compare.CompareOp.EQ)); + Assert.assertTrue(success); + Assert.assertEquals(Sets.newHashSet("x", "y", "opt"), node.getBindingNames()); + Assert.assertEquals(Sets.newHashSet("x", "y"), node.getAssuredBindingNames()); + Assert.assertEquals(3, node.getPipeline().size()); + // Extend with an unsupported filter + node = base.clone(); + success = node.filter(new IsLiteral(new Var("opt"))); + Assert.assertFalse(success); + Assert.assertEquals(Sets.newHashSet("x", "y", "opt"), node.getBindingNames()); + Assert.assertEquals(Sets.newHashSet("x", "y"), node.getAssuredBindingNames()); + Assert.assertEquals(0, node.getPipeline().size()); + } + + @Test + public void testRequireSourceDerivationLevel() throws Exception { + final AggregationPipelineQueryNode base = new AggregationPipelineQueryNode( + collection, + new LinkedList<>(), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + HashBiMap.create()); + // Extend with a level greater than zero + AggregationPipelineQueryNode node = base.clone(); + node.requireSourceDerivationDepth(3); + Assert.assertEquals(Sets.newHashSet("x", "y", "opt"), node.getBindingNames()); + Assert.assertEquals(Sets.newHashSet("x", "y"), node.getAssuredBindingNames()); + Assert.assertEquals(1, node.getPipeline().size()); + // Extend with a level of zero (no effect) + node = base.clone(); + node.requireSourceDerivationDepth(0); + Assert.assertEquals(Sets.newHashSet("x", "y", "opt"), node.getBindingNames()); + Assert.assertEquals(Sets.newHashSet("x", "y"), node.getAssuredBindingNames()); + Assert.assertEquals(0, node.getPipeline().size()); + } + + @Test + public void testRequireSourceTimestamp() { + final AggregationPipelineQueryNode base = new AggregationPipelineQueryNode( + collection, + new LinkedList<>(), + Sets.newHashSet("x", "y"), + Sets.newHashSet("x", "y", "opt"), + HashBiMap.create()); + // Extend with a level greater than zero + AggregationPipelineQueryNode node = base.clone(); + node.requireSourceTimestamp(System.currentTimeMillis()); + Assert.assertEquals(Sets.newHashSet("x", "y", "opt"), node.getBindingNames()); + Assert.assertEquals(Sets.newHashSet("x", "y"), node.getAssuredBindingNames()); + Assert.assertEquals(1, node.getPipeline().size()); + } +} diff --git a/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineQueryIT.java b/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineQueryIT.java new file mode 100644 index 000000000..45855a0eb --- /dev/null +++ b/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineQueryIT.java @@ -0,0 +1,421 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.mongodb.aggregation; + +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +import org.apache.rya.api.domain.RyaStatement; +import org.apache.rya.api.domain.RyaStatement.RyaStatementBuilder; +import org.apache.rya.api.persist.RyaDAOException; +import org.apache.rya.api.resolver.RdfToRyaConversions; +import org.apache.rya.api.resolver.RyaToRdfConversions; +import org.apache.rya.mongodb.MongoDBRyaDAO; +import org.apache.rya.mongodb.MongoITBase; +import org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy; +import org.bson.Document; +import org.bson.conversions.Bson; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.openrdf.model.Literal; +import org.openrdf.model.Resource; +import org.openrdf.model.Statement; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.model.vocabulary.FOAF; +import org.openrdf.model.vocabulary.OWL; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.model.vocabulary.RDFS; +import org.openrdf.model.vocabulary.XMLSchema; +import org.openrdf.query.BindingSet; +import org.openrdf.query.QueryEvaluationException; +import org.openrdf.query.algebra.QueryRoot; +import org.openrdf.query.algebra.evaluation.QueryBindingSet; +import org.openrdf.query.impl.ListBindingSet; +import org.openrdf.query.parser.sparql.SPARQLParser; + +import com.google.common.collect.HashMultiset; +import com.google.common.collect.Multiset; +import com.mongodb.DBObject; +import com.mongodb.util.JSON; + +import info.aduna.iteration.CloseableIteration; + +public class PipelineQueryIT extends MongoITBase { + + private static ValueFactory VF = ValueFactoryImpl.getInstance(); + private static SPARQLParser PARSER = new SPARQLParser(); + + private MongoDBRyaDAO dao; + + @Before + @Override + public void setupTest() throws Exception { + super.setupTest(); + dao = new MongoDBRyaDAO(); + dao.setConf(conf); + dao.init(); + } + + private void insert(Resource subject, URI predicate, Value object) throws RyaDAOException { + insert(subject, predicate, object, 0); + } + + private void insert(Resource subject, URI predicate, Value object, int derivationLevel) throws RyaDAOException { + final RyaStatementBuilder builder = new RyaStatementBuilder(); + builder.setSubject(RdfToRyaConversions.convertResource(subject)); + builder.setPredicate(RdfToRyaConversions.convertURI(predicate)); + builder.setObject(RdfToRyaConversions.convertValue(object)); + final RyaStatement rstmt = builder.build(); + if (derivationLevel > 0) { + DBObject obj = new SimpleMongoDBStorageStrategy().serialize(builder.build()); + obj.put("derivation_level", derivationLevel); + getRyaDbCollection().insert(obj); + } + else { + dao.add(rstmt); + } + } + + private void testPipelineQuery(String query, Multiset expectedSolutions) throws Exception { + // Prepare query and convert to pipeline + QueryRoot queryTree = new QueryRoot(PARSER.parseQuery(query, null).getTupleExpr()); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(getRyaCollection()); + queryTree.visit(visitor); + // Execute pipeline and verify results + Assert.assertTrue(queryTree.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) queryTree.getArg(); + Multiset solutions = HashMultiset.create(); + CloseableIteration iter = pipelineNode.evaluate(new QueryBindingSet()); + while (iter.hasNext()) { + solutions.add(iter.next()); + } + Assert.assertEquals(expectedSolutions, solutions); + } + + @Test + public void testSingleStatementPattern() throws Exception { + // Insert data + insert(OWL.THING, RDF.TYPE, OWL.CLASS); + insert(FOAF.PERSON, RDF.TYPE, OWL.CLASS, 1); + insert(FOAF.PERSON, RDFS.SUBCLASSOF, OWL.THING); + insert(VF.createURI("urn:Alice"), RDF.TYPE, FOAF.PERSON); + dao.flush(); + // Define query and expected results + final String query = "SELECT * WHERE {\n" + + " ?individual a ?type .\n" + + "}"; + List varNames = Arrays.asList("individual", "type"); + Multiset expectedSolutions = HashMultiset.create(); + expectedSolutions.add(new ListBindingSet(varNames, OWL.THING, OWL.CLASS)); + expectedSolutions.add(new ListBindingSet(varNames, FOAF.PERSON, OWL.CLASS)); + expectedSolutions.add(new ListBindingSet(varNames, VF.createURI("urn:Alice"), FOAF.PERSON)); + // Execute pipeline and verify results + testPipelineQuery(query, expectedSolutions); + } + + @Test + public void testJoinTwoSharedVariables() throws Exception { + // Insert data + URI person = VF.createURI("urn:Person"); + URI livingThing = VF.createURI("urn:LivingThing"); + URI human = VF.createURI("urn:Human"); + URI programmer = VF.createURI("urn:Programmer"); + URI thing = VF.createURI("urn:Thing"); + insert(programmer, RDFS.SUBCLASSOF, person); + insert(person, RDFS.SUBCLASSOF, FOAF.PERSON); + insert(FOAF.PERSON, RDFS.SUBCLASSOF, person); + insert(person, OWL.EQUIVALENTCLASS, human); + insert(person, RDFS.SUBCLASSOF, livingThing); + insert(livingThing, RDFS.SUBCLASSOF, thing); + insert(thing, RDFS.SUBCLASSOF, OWL.THING); + insert(OWL.THING, RDFS.SUBCLASSOF, thing); + dao.flush(); + // Define query and expected results + final String query = "SELECT ?A ?B WHERE {\n" + + " ?A rdfs:subClassOf ?B .\n" + + " ?B rdfs:subClassOf ?A .\n" + + "}"; + List varNames = Arrays.asList("A", "B"); + Multiset expectedSolutions = HashMultiset.create(); + expectedSolutions.add(new ListBindingSet(varNames, person, FOAF.PERSON)); + expectedSolutions.add(new ListBindingSet(varNames, FOAF.PERSON, person)); + expectedSolutions.add(new ListBindingSet(varNames, thing, OWL.THING)); + expectedSolutions.add(new ListBindingSet(varNames, OWL.THING, thing)); + // Execute query and verify results + testPipelineQuery(query, expectedSolutions); + } + + @Test + public void testVariableRename() throws Exception { + // Insert data + URI alice = VF.createURI("urn:Alice"); + URI bob = VF.createURI("urn:Bob"); + URI carol = VF.createURI("urn:Carol"); + URI dan = VF.createURI("urn:Dan"); + URI eve = VF.createURI("urn:Eve"); + URI friend = VF.createURI("urn:friend"); + insert(alice, friend, bob); + insert(alice, friend, carol); + insert(bob, friend, eve); + insert(carol, friend, eve); + insert(dan, friend, carol); + insert(eve, friend, alice); + // Define non-distinct query and expected results + final String query1 = "SELECT ?x (?z as ?friendOfFriend) WHERE {\n" + + " ?x ?y .\n" + + " ?y ?z .\n" + + "}"; + Multiset expectedSolutions1 = HashMultiset.create(); + List varNames = Arrays.asList("x", "friendOfFriend"); + expectedSolutions1.add(new ListBindingSet(varNames, alice, eve)); + expectedSolutions1.add(new ListBindingSet(varNames, alice, eve)); + expectedSolutions1.add(new ListBindingSet(varNames, bob, alice)); + expectedSolutions1.add(new ListBindingSet(varNames, carol, alice)); + expectedSolutions1.add(new ListBindingSet(varNames, dan, eve)); + expectedSolutions1.add(new ListBindingSet(varNames, eve, bob)); + expectedSolutions1.add(new ListBindingSet(varNames, eve, carol)); + // Define distinct query and expected results + final String query2 = "SELECT DISTINCT ?x (?z as ?friendOfFriend) WHERE {\n" + + " ?x ?y .\n" + + " ?y ?z .\n" + + "}"; + Multiset expectedSolutions2 = HashMultiset.create(); + expectedSolutions2.add(new ListBindingSet(varNames, alice, eve)); + expectedSolutions2.add(new ListBindingSet(varNames, bob, alice)); + expectedSolutions2.add(new ListBindingSet(varNames, carol, alice)); + expectedSolutions2.add(new ListBindingSet(varNames, dan, eve)); + expectedSolutions2.add(new ListBindingSet(varNames, eve, bob)); + expectedSolutions2.add(new ListBindingSet(varNames, eve, carol)); + // Execute and verify results + testPipelineQuery(query1, expectedSolutions1); + testPipelineQuery(query2, expectedSolutions2); + } + + @Test + public void testFilterQuery() throws Exception { + // Insert data + URI alice = VF.createURI("urn:Alice"); + URI bob = VF.createURI("urn:Bob"); + URI eve = VF.createURI("urn:Eve"); + URI relatedTo = VF.createURI("urn:relatedTo"); + insert(alice, FOAF.KNOWS, bob); + insert(alice, FOAF.KNOWS, alice); + insert(alice, FOAF.KNOWS, eve); + insert(alice, relatedTo, bob); + insert(bob, FOAF.KNOWS, eve); + insert(bob, relatedTo, bob); + dao.flush(); + // Define query 1 and expected results + final String query1 = "SELECT * WHERE {\n" + + " ?x <" + FOAF.KNOWS.stringValue() + "> ?y1 .\n" + + " ?x <" + relatedTo.stringValue() + "> ?y2 .\n" + + " FILTER (?y1 != ?y2) .\n" + + "}"; + final List varNames = Arrays.asList("x", "y1", "y2"); + final Multiset expected1 = HashMultiset.create(); + expected1.add(new ListBindingSet(varNames, alice, alice, bob)); + expected1.add(new ListBindingSet(varNames, alice, eve, bob)); + expected1.add(new ListBindingSet(varNames, bob, eve, bob)); + // Define query 2 and expected results + final String query2 = "SELECT * WHERE {\n" + + " ?x <" + FOAF.KNOWS.stringValue() + "> ?y1 .\n" + + " ?x <" + relatedTo.stringValue() + "> ?y2 .\n" + + " FILTER (?y1 = ?y2) .\n" + + "}"; + final Multiset expected2 = HashMultiset.create(); + expected2.add(new ListBindingSet(varNames, alice, bob, bob)); + // Execute and verify results + testPipelineQuery(query1, expected1); + testPipelineQuery(query2, expected2); + } + + @Test + public void testMultiConstruct() throws Exception { + // Insert data + URI alice = VF.createURI("urn:Alice"); + URI bob = VF.createURI("urn:Bob"); + URI eve = VF.createURI("urn:Eve"); + URI friend = VF.createURI("urn:friend"); + URI knows = VF.createURI("urn:knows"); + URI person = VF.createURI("urn:Person"); + insert(alice, friend, bob); + insert(bob, knows, eve); + insert(eve, knows, alice); + // Define query and expected results + final String query = "CONSTRUCT {\n" + + " ?x rdf:type owl:Thing .\n" + + " ?x rdf:type .\n" + + "} WHERE { ?x ?y }"; + final Multiset expected = HashMultiset.create(); + List varNames = Arrays.asList("subject", "predicate", "object"); + expected.add(new ListBindingSet(varNames, bob, RDF.TYPE, OWL.THING)); + expected.add(new ListBindingSet(varNames, bob, RDF.TYPE, person)); + expected.add(new ListBindingSet(varNames, eve, RDF.TYPE, OWL.THING)); + expected.add(new ListBindingSet(varNames, eve, RDF.TYPE, person)); + // Test query + testPipelineQuery(query, expected); + } + + @Test + public void testTriplePipeline() throws Exception { + URI alice = VF.createURI("urn:Alice"); + URI bob = VF.createURI("urn:Bob"); + URI eve = VF.createURI("urn:Eve"); + URI friend = VF.createURI("urn:friend"); + URI knows = VF.createURI("urn:knows"); + URI year = VF.createURI("urn:year"); + Literal yearLiteral = VF.createLiteral("2017", XMLSchema.GYEAR); + final String query = "CONSTRUCT {\n" + + " ?x ?y .\n" + + " ?x \"2017\"^^<" + XMLSchema.GYEAR + "> .\n" + + "} WHERE { ?x ?y }"; + insert(alice, friend, bob); + insert(bob, knows, eve); + insert(eve, knows, alice); + // Prepare query and convert to pipeline + QueryRoot queryTree = new QueryRoot(PARSER.parseQuery(query, null).getTupleExpr()); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(getRyaCollection()); + queryTree.visit(visitor); + // Get pipeline, add triple conversion, and verify that the result is a + // properly serialized statement + Assert.assertTrue(queryTree.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) queryTree.getArg(); + List triplePipeline = pipelineNode.getTriplePipeline(System.currentTimeMillis(), false); + SimpleMongoDBStorageStrategy strategy = new SimpleMongoDBStorageStrategy(); + List results = new LinkedList<>(); + for (Document doc : getRyaCollection().aggregate(triplePipeline)) { + final DBObject dbo = (DBObject) JSON.parse(doc.toJson()); + RyaStatement rstmt = strategy.deserializeDBObject(dbo); + Statement stmt = RyaToRdfConversions.convertStatement(rstmt); + results.add(stmt); + } + Assert.assertEquals(2, results.size()); + Assert.assertTrue(results.contains(VF.createStatement(alice, knows, bob))); + Assert.assertTrue(results.contains(VF.createStatement(alice, year, yearLiteral))); + } + + @Test + public void testRequiredDerivationLevel() throws Exception { + // Insert data + URI person = VF.createURI("urn:Person"); + URI livingThing = VF.createURI("urn:LivingThing"); + URI human = VF.createURI("urn:Human"); + URI programmer = VF.createURI("urn:Programmer"); + URI thing = VF.createURI("urn:Thing"); + insert(programmer, RDFS.SUBCLASSOF, person); + insert(person, RDFS.SUBCLASSOF, FOAF.PERSON); + insert(FOAF.PERSON, RDFS.SUBCLASSOF, person); + insert(person, OWL.EQUIVALENTCLASS, human); + insert(person, RDFS.SUBCLASSOF, livingThing); + insert(livingThing, RDFS.SUBCLASSOF, thing); + insert(thing, RDFS.SUBCLASSOF, OWL.THING, 1); + insert(OWL.THING, RDFS.SUBCLASSOF, thing); + dao.flush(); + // Define query and expected results + final String query = "SELECT ?A ?B WHERE {\n" + + " ?A rdfs:subClassOf ?B .\n" + + " ?B rdfs:subClassOf ?A .\n" + + "}"; + List varNames = Arrays.asList("A", "B"); + Multiset expectedSolutions = HashMultiset.create(); + expectedSolutions.add(new ListBindingSet(varNames, person, FOAF.PERSON)); + expectedSolutions.add(new ListBindingSet(varNames, FOAF.PERSON, person)); + expectedSolutions.add(new ListBindingSet(varNames, thing, OWL.THING)); + expectedSolutions.add(new ListBindingSet(varNames, OWL.THING, thing)); + // Prepare query and convert to pipeline + QueryRoot queryTree = new QueryRoot(PARSER.parseQuery(query, null).getTupleExpr()); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(getRyaCollection()); + queryTree.visit(visitor); + Assert.assertTrue(queryTree.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) queryTree.getArg(); + // Extend the pipeline by requiring a derivation level of zero (should have no effect) + pipelineNode.requireSourceDerivationDepth(0); + Multiset solutions = HashMultiset.create(); + CloseableIteration iter = pipelineNode.evaluate(new QueryBindingSet()); + while (iter.hasNext()) { + solutions.add(iter.next()); + } + Assert.assertEquals(expectedSolutions, solutions); + // Extend the pipeline by requiring a derivation level of one (should produce the thing/thing pair) + expectedSolutions = HashMultiset.create(); + expectedSolutions.add(new ListBindingSet(varNames, thing, OWL.THING)); + expectedSolutions.add(new ListBindingSet(varNames, OWL.THING, thing)); + pipelineNode.requireSourceDerivationDepth(1); + solutions = HashMultiset.create(); + iter = pipelineNode.evaluate(new QueryBindingSet()); + while (iter.hasNext()) { + solutions.add(iter.next()); + } + Assert.assertEquals(expectedSolutions, solutions); + } + + @Test + public void testRequiredTimestamp() throws Exception { + // Insert data + URI person = VF.createURI("urn:Person"); + URI livingThing = VF.createURI("urn:LivingThing"); + URI human = VF.createURI("urn:Human"); + URI programmer = VF.createURI("urn:Programmer"); + URI thing = VF.createURI("urn:Thing"); + insert(programmer, RDFS.SUBCLASSOF, person); + insert(person, RDFS.SUBCLASSOF, FOAF.PERSON, 2); + insert(FOAF.PERSON, RDFS.SUBCLASSOF, person); + insert(person, OWL.EQUIVALENTCLASS, human); + insert(person, RDFS.SUBCLASSOF, livingThing); + insert(livingThing, RDFS.SUBCLASSOF, thing); + insert(thing, RDFS.SUBCLASSOF, OWL.THING); + insert(OWL.THING, RDFS.SUBCLASSOF, thing); + dao.flush(); + // Define query and expected results + final String query = "SELECT ?A ?B WHERE {\n" + + " ?A rdfs:subClassOf ?B .\n" + + " ?B rdfs:subClassOf ?A .\n" + + "}"; + List varNames = Arrays.asList("A", "B"); + Multiset expectedSolutions = HashMultiset.create(); + expectedSolutions.add(new ListBindingSet(varNames, person, FOAF.PERSON)); + expectedSolutions.add(new ListBindingSet(varNames, FOAF.PERSON, person)); + expectedSolutions.add(new ListBindingSet(varNames, thing, OWL.THING)); + expectedSolutions.add(new ListBindingSet(varNames, OWL.THING, thing)); + // Prepare query and convert to pipeline + QueryRoot queryTree = new QueryRoot(PARSER.parseQuery(query, null).getTupleExpr()); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(getRyaCollection()); + queryTree.visit(visitor); + Assert.assertTrue(queryTree.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) queryTree.getArg(); + // Extend the pipeline by requiring a timestamp of zero (should have no effect) + pipelineNode.requireSourceTimestamp(0); + Multiset solutions = HashMultiset.create(); + CloseableIteration iter = pipelineNode.evaluate(new QueryBindingSet()); + while (iter.hasNext()) { + solutions.add(iter.next()); + } + Assert.assertEquals(expectedSolutions, solutions); + // Extend the pipeline by requiring a future timestamp (should produce no results) + long delta = 1000 * 60 * 60 * 24; + pipelineNode.requireSourceTimestamp(System.currentTimeMillis() + delta); + iter = pipelineNode.evaluate(new QueryBindingSet()); + Assert.assertFalse(iter.hasNext()); + } +} diff --git a/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineResultIterationTest.java b/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineResultIterationTest.java new file mode 100644 index 000000000..67752352e --- /dev/null +++ b/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineResultIterationTest.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.mongodb.aggregation; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; + +import org.bson.Document; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.query.BindingSet; +import org.openrdf.query.QueryEvaluationException; +import org.openrdf.query.algebra.evaluation.QueryBindingSet; +import org.openrdf.query.impl.ListBindingSet; + +import com.google.common.collect.Sets; +import com.mongodb.client.AggregateIterable; +import com.mongodb.client.MongoCursor; + +public class PipelineResultIterationTest { + ValueFactory VF = ValueFactoryImpl.getInstance(); + + @SuppressWarnings("unchecked") + private AggregateIterable documentIterator(Document ... documents) { + Iterator docIter = Arrays.asList(documents).iterator(); + MongoCursor cursor = Mockito.mock(MongoCursor.class); + Mockito.when(cursor.hasNext()).thenAnswer(new Answer() { + @Override + public Boolean answer(InvocationOnMock invocation) throws Throwable { + return docIter.hasNext(); + } + }); + Mockito.when(cursor.next()).thenAnswer(new Answer() { + @Override + public Document answer(InvocationOnMock invocation) throws Throwable { + return docIter.next(); + } + }); + AggregateIterable aggIter = Mockito.mock(AggregateIterable.class); + Mockito.when(aggIter.iterator()).thenReturn(cursor); + return aggIter; + } + + @Test + public void testIteration() throws QueryEvaluationException { + HashMap nameMap = new HashMap<>(); + nameMap.put("bName", "b"); + nameMap.put("eName", "e"); + PipelineResultIteration iter = new PipelineResultIteration( + documentIterator( + new Document("", new Document("a", "urn:Alice").append("b", "urn:Bob")), + new Document("", new Document("a", "urn:Alice").append("b", "urn:Beth")), + new Document("", new Document("a", "urn:Alice").append("bName", "urn:Bob")), + new Document("", new Document("a", "urn:Alice").append("c", "urn:Carol")), + new Document("", new Document("cName", "urn:Carol").append("d", "urn:Dan"))), + nameMap, + new QueryBindingSet()); + Assert.assertTrue(iter.hasNext()); + BindingSet bs = iter.next(); + Assert.assertEquals(Sets.newHashSet("a", "b"), bs.getBindingNames()); + Assert.assertEquals("urn:Alice", bs.getBinding("a").getValue().stringValue()); + Assert.assertEquals("urn:Bob", bs.getBinding("b").getValue().stringValue()); + Assert.assertTrue(iter.hasNext()); + bs = iter.next(); + Assert.assertEquals(Sets.newHashSet("a", "b"), bs.getBindingNames()); + Assert.assertEquals("urn:Alice", bs.getBinding("a").getValue().stringValue()); + Assert.assertEquals("urn:Beth", bs.getBinding("b").getValue().stringValue()); + Assert.assertTrue(iter.hasNext()); + bs = iter.next(); + Assert.assertEquals(Sets.newHashSet("a", "b"), bs.getBindingNames()); + Assert.assertEquals("urn:Alice", bs.getBinding("a").getValue().stringValue()); + Assert.assertEquals("urn:Bob", bs.getBinding("b").getValue().stringValue()); + Assert.assertTrue(iter.hasNext()); + bs = iter.next(); + Assert.assertEquals(Sets.newHashSet("a", "c"), bs.getBindingNames()); + Assert.assertEquals("urn:Alice", bs.getBinding("a").getValue().stringValue()); + Assert.assertEquals("urn:Carol", bs.getBinding("c").getValue().stringValue()); + bs = iter.next(); + Assert.assertEquals(Sets.newHashSet("cName", "d"), bs.getBindingNames()); + Assert.assertEquals("urn:Carol", bs.getBinding("cName").getValue().stringValue()); + Assert.assertEquals("urn:Dan", bs.getBinding("d").getValue().stringValue()); + Assert.assertFalse(iter.hasNext()); + } + + @Test + public void testIterationGivenBindingSet() throws QueryEvaluationException { + BindingSet solution = new ListBindingSet(Arrays.asList("b", "c"), + VF.createURI("urn:Bob"), VF.createURI("urn:Charlie")); + HashMap nameMap = new HashMap<>(); + nameMap.put("bName", "b"); + nameMap.put("cName", "c"); + nameMap.put("c", "cName"); + PipelineResultIteration iter = new PipelineResultIteration( + documentIterator( + new Document("", new Document("a", "urn:Alice").append("b", "urn:Bob")), + new Document("", new Document("a", "urn:Alice").append("b", "urn:Beth")), + new Document("", new Document("a", "urn:Alice").append("bName", "urn:Bob")), + new Document("", new Document("a", "urn:Alice").append("bName", "urn:Beth")), + new Document("", new Document("a", "urn:Alice").append("cName", "urn:Carol")), + new Document("", new Document("c", "urn:Carol").append("d", "urn:Dan"))), + nameMap, + solution); + Assert.assertTrue(iter.hasNext()); + BindingSet bs = iter.next(); + // Add 'c=Charlie' to first result ('b=Bob' matches) + Assert.assertEquals(Sets.newHashSet("a", "b", "c"), bs.getBindingNames()); + Assert.assertEquals("urn:Alice", bs.getBinding("a").getValue().stringValue()); + Assert.assertEquals("urn:Bob", bs.getBinding("b").getValue().stringValue()); + Assert.assertEquals("urn:Charlie", bs.getBinding("c").getValue().stringValue()); + Assert.assertTrue(iter.hasNext()); + bs = iter.next(); + // Skip second result ('b=Beth' incompatible with 'b=Bob') + // Add 'c=Charlie' to third result ('bName=Bob' resolves to 'b=Bob', matches) + Assert.assertEquals(Sets.newHashSet("a", "b", "c"), bs.getBindingNames()); + Assert.assertEquals("urn:Alice", bs.getBinding("a").getValue().stringValue()); + Assert.assertEquals("urn:Bob", bs.getBinding("b").getValue().stringValue()); + Assert.assertEquals("urn:Charlie", bs.getBinding("c").getValue().stringValue()); + Assert.assertTrue(iter.hasNext()); + bs = iter.next(); + // Skip fourth result ('bName=Beth' resolves to 'b=Beth', incompatible) + // Skip fifth result ('cName=Carol' resolves to 'c=Carol', incompatible with 'c=Charlie') + // Add 'b=Bob' and 'c=Charlie' to sixth result ('c=Carol' resolves to 'cName=Carol', compatible) + Assert.assertEquals(Sets.newHashSet("b", "c", "cName", "d"), bs.getBindingNames()); + Assert.assertEquals("urn:Bob", bs.getBinding("b").getValue().stringValue()); + Assert.assertEquals("urn:Charlie", bs.getBinding("c").getValue().stringValue()); + Assert.assertEquals("urn:Carol", bs.getBinding("cName").getValue().stringValue()); + Assert.assertEquals("urn:Dan", bs.getBinding("d").getValue().stringValue()); + Assert.assertFalse(iter.hasNext()); + } +} diff --git a/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitorTest.java b/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitorTest.java new file mode 100644 index 000000000..cc9349b45 --- /dev/null +++ b/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitorTest.java @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.mongodb.aggregation; + +import java.util.Arrays; +import java.util.List; + +import org.bson.Document; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.openrdf.model.URI; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.query.algebra.Extension; +import org.openrdf.query.algebra.ExtensionElem; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.MultiProjection; +import org.openrdf.query.algebra.Not; +import org.openrdf.query.algebra.Projection; +import org.openrdf.query.algebra.ProjectionElem; +import org.openrdf.query.algebra.ProjectionElemList; +import org.openrdf.query.algebra.QueryRoot; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.ValueConstant; +import org.openrdf.query.algebra.Var; + +import com.google.common.collect.Sets; +import com.mongodb.MongoNamespace; +import com.mongodb.client.MongoCollection; + +public class SparqlToPipelineTransformVisitorTest { + + private static final ValueFactory VF = ValueFactoryImpl.getInstance(); + + private static final String LUBM = "urn:lubm"; + private static final URI UNDERGRAD = VF.createURI(LUBM, "UndergraduateStudent"); + private static final URI PROFESSOR = VF.createURI(LUBM, "Professor"); + private static final URI COURSE = VF.createURI(LUBM, "Course"); + private static final URI TAKES = VF.createURI(LUBM, "takesCourse"); + private static final URI TEACHES = VF.createURI(LUBM, "teachesCourse"); + + private static Var constant(URI value) { + return new Var(value.stringValue(), value); + } + + MongoCollection collection; + + @Before + @SuppressWarnings("unchecked") + public void setUp() { + collection = Mockito.mock(MongoCollection.class); + Mockito.when(collection.getNamespace()).thenReturn(new MongoNamespace("db", "collection")); + } + + @Test + public void testStatementPattern() throws Exception { + QueryRoot query = new QueryRoot(new StatementPattern( + new Var("x"), constant(RDF.TYPE), constant(UNDERGRAD))); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(collection); + query.visit(visitor); + Assert.assertTrue(query.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) query.getArg(); + Assert.assertEquals(Sets.newHashSet("x"), pipelineNode.getAssuredBindingNames()); + } + + @Test + public void testJoin() throws Exception { + QueryRoot query = new QueryRoot(new Join( + new StatementPattern(new Var("x"), constant(RDF.TYPE), constant(UNDERGRAD)), + new StatementPattern(new Var("x"), constant(TAKES), new Var("course")))); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(collection); + query.visit(visitor); + Assert.assertTrue(query.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) query.getArg(); + Assert.assertEquals(Sets.newHashSet("x", "course"), pipelineNode.getAssuredBindingNames()); + } + + @Test + public void testNestedJoins() throws Exception { + StatementPattern isUndergrad = new StatementPattern(new Var("x"), constant(RDF.TYPE), constant(UNDERGRAD)); + StatementPattern isProfessor = new StatementPattern(new Var("y"), constant(RDF.TYPE), constant(PROFESSOR)); + StatementPattern takesCourse = new StatementPattern(new Var("x"), constant(TAKES), new Var("c")); + StatementPattern teachesCourse = new StatementPattern(new Var("y"), constant(TEACHES), new Var("c")); + QueryRoot queryTree = new QueryRoot(new Join( + isProfessor, + new Join( + new Join(isUndergrad, takesCourse), + teachesCourse))); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(collection); + queryTree.visit(visitor); + Assert.assertTrue(queryTree.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) queryTree.getArg(); + Assert.assertEquals(Sets.newHashSet("x", "y", "c"), pipelineNode.getAssuredBindingNames()); + } + + @Test + public void testComplexJoin() throws Exception { + StatementPattern isUndergrad = new StatementPattern(new Var("x"), constant(RDF.TYPE), constant(UNDERGRAD)); + StatementPattern isProfessor = new StatementPattern(new Var("y"), constant(RDF.TYPE), constant(PROFESSOR)); + StatementPattern takesCourse = new StatementPattern(new Var("x"), constant(TAKES), new Var("c")); + StatementPattern teachesCourse = new StatementPattern(new Var("y"), constant(TEACHES), new Var("c")); + QueryRoot queryTree = new QueryRoot(new Join( + new Join(isUndergrad, takesCourse), + new Join(isProfessor, teachesCourse))); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(collection); + queryTree.visit(visitor); + Assert.assertTrue(queryTree.getArg() instanceof Join); + Join topJoin = (Join) queryTree.getArg(); + Assert.assertTrue(topJoin.getLeftArg() instanceof AggregationPipelineQueryNode); + Assert.assertTrue(topJoin.getRightArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode leftPipeline = (AggregationPipelineQueryNode) topJoin.getLeftArg(); + AggregationPipelineQueryNode rightPipeline = (AggregationPipelineQueryNode) topJoin.getRightArg(); + Assert.assertEquals(Sets.newHashSet("x", "c"), leftPipeline.getAssuredBindingNames()); + Assert.assertEquals(Sets.newHashSet("y", "c"), rightPipeline.getAssuredBindingNames()); + } + + @Test + public void testProjection() throws Exception { + StatementPattern isUndergrad = new StatementPattern(new Var("x"), constant(RDF.TYPE), constant(UNDERGRAD)); + StatementPattern isCourse = new StatementPattern(new Var("course"), constant(RDF.TYPE), constant(COURSE)); + StatementPattern hasEdge = new StatementPattern(new Var("x"), new Var("p"), new Var("course")); + ProjectionElemList projectionElements = new ProjectionElemList( + new ProjectionElem("p", "relation"), + new ProjectionElem("course")); + QueryRoot queryTree = new QueryRoot(new Projection( + new Join(new Join(isCourse, hasEdge), isUndergrad), + projectionElements)); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(collection); + queryTree.visit(visitor); + Assert.assertTrue(queryTree.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) queryTree.getArg(); + Assert.assertEquals(Sets.newHashSet("relation", "course"), pipelineNode.getAssuredBindingNames()); + } + + @Test + public void testMultiProjection() throws Exception { + StatementPattern isUndergrad = new StatementPattern(new Var("x"), constant(RDF.TYPE), constant(UNDERGRAD)); + StatementPattern isCourse = new StatementPattern(new Var("course"), constant(RDF.TYPE), constant(COURSE)); + StatementPattern hasEdge = new StatementPattern(new Var("x"), new Var("p"), new Var("course")); + ProjectionElemList courseHasRelation = new ProjectionElemList( + new ProjectionElem("p", "relation"), + new ProjectionElem("course")); + ProjectionElemList studentHasRelation = new ProjectionElemList( + new ProjectionElem("p", "relation"), + new ProjectionElem("x", "student")); + QueryRoot queryTree = new QueryRoot(new MultiProjection( + new Join(new Join(isCourse, hasEdge), isUndergrad), + Arrays.asList(courseHasRelation, studentHasRelation))); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(collection); + queryTree.visit(visitor); + Assert.assertTrue(queryTree.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) queryTree.getArg(); + Assert.assertEquals(Sets.newHashSet("relation"), pipelineNode.getAssuredBindingNames()); + Assert.assertEquals(Sets.newHashSet("relation", "course", "student"), pipelineNode.getBindingNames()); + } + + @Test + public void testExtension() throws Exception { + QueryRoot queryTree = new QueryRoot(new Extension( + new StatementPattern(new Var("x"), constant(TAKES), new Var("c")), + new ExtensionElem(new Var("x"), "renamed"), + new ExtensionElem(new ValueConstant(TAKES), "constant"))); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(collection); + queryTree.visit(visitor); + Assert.assertTrue(queryTree.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) queryTree.getArg(); + Assert.assertEquals(Sets.newHashSet("x", "c", "renamed", "constant"), pipelineNode.getAssuredBindingNames()); + } + + @Test + public void testUnsupportedExtension() throws Exception { + StatementPattern sp = new StatementPattern(new Var("x"), constant(TAKES), new Var("c")); + List elements = Arrays.asList(new ExtensionElem(new Var("x"), "renamed"), + new ExtensionElem(new Not(new ValueConstant(VF.createLiteral(true))), "notTrue"), + new ExtensionElem(new ValueConstant(TAKES), "constant")); + Extension extensionNode = new Extension(sp, elements); + QueryRoot queryTree = new QueryRoot(extensionNode); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(collection); + queryTree.visit(visitor); + Assert.assertTrue(queryTree.getArg() instanceof Extension); + Assert.assertEquals(elements, ((Extension) queryTree.getArg()).getElements()); + TupleExpr innerQuery = ((Extension) queryTree.getArg()).getArg(); + Assert.assertTrue(innerQuery instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) innerQuery; + Assert.assertEquals(Sets.newHashSet("x", "c"), pipelineNode.getAssuredBindingNames()); + } +} From d06ea41d43fa451e3095deba7f82f9192f19b297 Mon Sep 17 00:00:00 2001 From: Jesse Hatfield Date: Fri, 22 Dec 2017 12:02:33 -0500 Subject: [PATCH 2/2] RYA-417 Batch forward-chaining rules engine --- .../AggregationPipelineQueryNode.java | 8 +- .../mongodb/aggregation/PipelineQueryIT.java | 32 ++ .../SparqlToPipelineTransformVisitorTest.java | 14 + .../rya/sail/config/RyaSailFactory.java | 64 ++-- extras/pom.xml | 1 + extras/rya.forwardchain/pom.xml | 119 ++++++ .../forwardchain/ForwardChainConstants.java | 37 ++ .../forwardchain/ForwardChainException.java | 54 +++ .../batch/AbstractForwardChainTool.java | 148 ++++++++ .../batch/ForwardChainSpinTool.java | 77 ++++ .../rule/AbstractConstructRule.java | 65 ++++ .../rule/AbstractInconsistencyRule.java | 51 +++ .../forwardchain/rule/AbstractUpdateRule.java | 34 ++ .../forwardchain/rule/AntecedentVisitor.java | 51 +++ .../rule/ConstructConsequentVisitor.java | 138 +++++++ .../apache/rya/forwardchain/rule/Rule.java | 75 ++++ .../apache/rya/forwardchain/rule/Ruleset.java | 166 +++++++++ .../forwardchain/rule/SpinConstructRule.java | 344 ++++++++++++++++++ .../AbstractForwardChainStrategy.java | 82 +++++ .../AbstractRuleExecutionStrategy.java | 108 ++++++ .../strategy/MongoPipelineStrategy.java | 276 ++++++++++++++ .../strategy/RoundRobinStrategy.java | 212 +++++++++++ .../strategy/SailExecutionStrategy.java | 223 ++++++++++++ .../rya/forwardchain/batch/MongoSpinIT.java | 169 +++++++++ .../rule/AntecedentVisitorTest.java | 156 ++++++++ .../rule/ConstructConsequentVisitorTest.java | 164 +++++++++ .../rya/forwardchain/rule/RulesetTest.java | 137 +++++++ .../rule/SpinConstructRuleTest.java | 213 +++++++++++ .../src/test/resources/data.ttl | 56 +++ .../src/test/resources/owlrl.ttl | 106 ++++++ .../src/test/resources/query.sparql | 32 ++ .../src/test/resources/university.ttl | 58 +++ 32 files changed, 3442 insertions(+), 28 deletions(-) create mode 100644 extras/rya.forwardchain/pom.xml create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/ForwardChainConstants.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/ForwardChainException.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/batch/AbstractForwardChainTool.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/batch/ForwardChainSpinTool.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractConstructRule.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractInconsistencyRule.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractUpdateRule.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AntecedentVisitor.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/ConstructConsequentVisitor.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/Rule.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/Ruleset.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/SpinConstructRule.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/AbstractForwardChainStrategy.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/AbstractRuleExecutionStrategy.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/MongoPipelineStrategy.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/RoundRobinStrategy.java create mode 100644 extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/SailExecutionStrategy.java create mode 100644 extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/batch/MongoSpinIT.java create mode 100644 extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/AntecedentVisitorTest.java create mode 100644 extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/ConstructConsequentVisitorTest.java create mode 100644 extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/RulesetTest.java create mode 100644 extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/SpinConstructRuleTest.java create mode 100644 extras/rya.forwardchain/src/test/resources/data.ttl create mode 100644 extras/rya.forwardchain/src/test/resources/owlrl.ttl create mode 100644 extras/rya.forwardchain/src/test/resources/query.sparql create mode 100644 extras/rya.forwardchain/src/test/resources/university.ttl diff --git a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryNode.java b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryNode.java index 7a84f5def..45092e4e6 100644 --- a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryNode.java +++ b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryNode.java @@ -531,7 +531,9 @@ public boolean joinWith(StatementPattern sp) { * The number of documents produced by the pipeline after this operation * will be the number of documents entering this stage (the number of * intermediate results) multiplied by the number of - * {@link ProjectionElemList}s supplied here. + * {@link ProjectionElemList}s supplied here. Empty projections are + * unsupported; if one or more projections given binds zero variables, then + * the pipeline will be unchanged and the method will return false. * @param projections One or more projections, i.e. mappings from the result * at this stage of the query into a set of variables. * @return true if the projection(s) were added to the pipeline. @@ -544,6 +546,10 @@ public boolean project(Iterable projections) { Set bindingNamesUnion = new HashSet<>(); Set bindingNamesIntersection = null; for (ProjectionElemList projection : projections) { + if (projection.getElements().isEmpty()) { + // Empty projections are unsupported -- fail when seen + return false; + } Document valueDoc = new Document(); Document hashDoc = new Document(); Document typeDoc = new Document(); diff --git a/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineQueryIT.java b/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineQueryIT.java index 45855a0eb..0552ac099 100644 --- a/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineQueryIT.java +++ b/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/PipelineQueryIT.java @@ -49,8 +49,10 @@ import org.openrdf.model.vocabulary.XMLSchema; import org.openrdf.query.BindingSet; import org.openrdf.query.QueryEvaluationException; +import org.openrdf.query.algebra.Projection; import org.openrdf.query.algebra.QueryRoot; import org.openrdf.query.algebra.evaluation.QueryBindingSet; +import org.openrdf.query.impl.EmptyBindingSet; import org.openrdf.query.impl.ListBindingSet; import org.openrdf.query.parser.sparql.SPARQLParser; @@ -134,6 +136,36 @@ public void testSingleStatementPattern() throws Exception { testPipelineQuery(query, expectedSolutions); } + @Test + public void testNoVariableSP() throws Exception { + // Insert data + insert(OWL.THING, RDF.TYPE, OWL.CLASS); + insert(FOAF.PERSON, RDF.TYPE, OWL.CLASS, 1); + insert(FOAF.PERSON, RDFS.SUBCLASSOF, OWL.THING); + insert(VF.createURI("urn:Alice"), RDF.TYPE, FOAF.PERSON); + dao.flush(); + // Define query and expected results + final String query = "SELECT * WHERE {\n" + + " owl:Thing a owl:Class .\n" + + "}"; + Multiset expectedSolutions = HashMultiset.create(); + expectedSolutions.add(new EmptyBindingSet()); + // Execute pipeline and verify results + QueryRoot queryTree = new QueryRoot(PARSER.parseQuery(query, null).getTupleExpr()); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(getRyaCollection()); + queryTree.visit(visitor); + Assert.assertTrue(queryTree.getArg() instanceof Projection); + Projection projection = (Projection) queryTree.getArg(); + Assert.assertTrue(projection.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) projection.getArg(); + Multiset solutions = HashMultiset.create(); + CloseableIteration iter = pipelineNode.evaluate(new QueryBindingSet()); + while (iter.hasNext()) { + solutions.add(iter.next()); + } + Assert.assertEquals(expectedSolutions, solutions); + } + @Test public void testJoinTwoSharedVariables() throws Exception { // Insert data diff --git a/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitorTest.java b/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitorTest.java index cc9349b45..506b8afed 100644 --- a/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitorTest.java +++ b/dao/mongodb.rya/src/test/java/org/apache/rya/mongodb/aggregation/SparqlToPipelineTransformVisitorTest.java @@ -29,6 +29,7 @@ import org.openrdf.model.URI; import org.openrdf.model.ValueFactory; import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.model.vocabulary.OWL; import org.openrdf.model.vocabulary.RDF; import org.openrdf.query.algebra.Extension; import org.openrdf.query.algebra.ExtensionElem; @@ -152,6 +153,19 @@ public void testProjection() throws Exception { Assert.assertEquals(Sets.newHashSet("relation", "course"), pipelineNode.getAssuredBindingNames()); } + @Test + public void testEmptyProjection() throws Exception { + StatementPattern isClass = new StatementPattern(constant(UNDERGRAD), constant(RDF.TYPE), constant(OWL.CLASS)); + QueryRoot queryTree = new QueryRoot(new Projection(isClass, new ProjectionElemList())); + SparqlToPipelineTransformVisitor visitor = new SparqlToPipelineTransformVisitor(collection); + queryTree.visit(visitor); + Assert.assertTrue(queryTree.getArg() instanceof Projection); + Projection projectNode = (Projection) queryTree.getArg(); + Assert.assertTrue(projectNode.getArg() instanceof AggregationPipelineQueryNode); + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) projectNode.getArg(); + Assert.assertEquals(Sets.newHashSet(), pipelineNode.getAssuredBindingNames()); + } + @Test public void testMultiProjection() throws Exception { StatementPattern isUndergrad = new StatementPattern(new Var("x"), constant(RDF.TYPE), constant(UNDERGRAD)); diff --git a/extras/indexing/src/main/java/org/apache/rya/sail/config/RyaSailFactory.java b/extras/indexing/src/main/java/org/apache/rya/sail/config/RyaSailFactory.java index b5adee380..56af9b4bc 100644 --- a/extras/indexing/src/main/java/org/apache/rya/sail/config/RyaSailFactory.java +++ b/extras/indexing/src/main/java/org/apache/rya/sail/config/RyaSailFactory.java @@ -88,33 +88,10 @@ private static Sail getRyaSail(final Configuration config) throws InferenceEngin // Get a reference to a Mongo DB configuration object. final MongoDBRdfConfiguration mongoConfig = (config instanceof MongoDBRdfConfiguration) ? (MongoDBRdfConfiguration)config : new MongoDBRdfConfiguration(config); - - // Create the MongoClient that will be used by the Sail object's components. - final MongoClient client = createMongoClient(mongoConfig); - - // Add the Indexer and Optimizer names to the configuration object that are configured to be used. - ConfigUtils.setIndexers(mongoConfig); - - // Populate the configuration using previously stored Rya Details if this instance uses them. - try { - final MongoRyaInstanceDetailsRepository ryaDetailsRepo = new MongoRyaInstanceDetailsRepository(client, mongoConfig.getRyaInstanceName()); - RyaDetailsToConfiguration.addRyaDetailsToConfiguration(ryaDetailsRepo.getRyaInstanceDetails(), mongoConfig); - } catch (final RyaDetailsRepositoryException e) { - LOG.info("Instance does not have a rya details collection, skipping."); - } - - // Set the configuration to the stateful configuration that is used to pass the constructed objects around. - final StatefulMongoDBRdfConfiguration statefulConfig = new StatefulMongoDBRdfConfiguration(mongoConfig, client); - final List indexers = statefulConfig.getInstances(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS, MongoSecondaryIndex.class); - statefulConfig.setIndexers(indexers); - rdfConfig = statefulConfig; - - // Create the DAO that is able to interact with MongoDB. - final MongoDBRyaDAO mongoDao = new MongoDBRyaDAO(); - mongoDao.setConf(statefulConfig); - mongoDao.init(); - dao = mongoDao; - + // Instantiate a Mongo client and Mongo DAO. + dao = getMongoDAO(mongoConfig); + // Then use the DAO's newly-created stateful conf in place of the original + rdfConfig = dao.getConf(); } else { rdfConfig = new AccumuloRdfConfiguration(config); user = rdfConfig.get(ConfigUtils.CLOUDBASE_USER); @@ -237,4 +214,37 @@ public static void updateAccumuloConfig(final AccumuloRdfConfiguration config, f LOG.info("Instance does not have a rya details collection, skipping."); } } + + /** + * Connects to MongoDB and creates a MongoDBRyaDAO. + * @param config - user configuration + * @return - MongoDBRyaDAO with Indexers configured according to user's specification + * @throws RyaDAOException if the DAO can't be initialized + */ + public static MongoDBRyaDAO getMongoDAO(MongoDBRdfConfiguration mongoConfig) throws RyaDAOException { + // Create the MongoClient that will be used by the Sail object's components. + final MongoClient client = createMongoClient(mongoConfig); + + // Add the Indexer and Optimizer names to the configuration object that are configured to be used. + ConfigUtils.setIndexers(mongoConfig); + + // Populate the configuration using previously stored Rya Details if this instance uses them. + try { + final MongoRyaInstanceDetailsRepository ryaDetailsRepo = new MongoRyaInstanceDetailsRepository(client, mongoConfig.getRyaInstanceName()); + RyaDetailsToConfiguration.addRyaDetailsToConfiguration(ryaDetailsRepo.getRyaInstanceDetails(), mongoConfig); + } catch (final RyaDetailsRepositoryException e) { + LOG.info("Instance does not have a rya details collection, skipping."); + } + + // Set the configuration to the stateful configuration that is used to pass the constructed objects around. + final StatefulMongoDBRdfConfiguration statefulConfig = new StatefulMongoDBRdfConfiguration(mongoConfig, client); + final List indexers = statefulConfig.getInstances(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS, MongoSecondaryIndex.class); + statefulConfig.setIndexers(indexers); + + // Create the DAO that is able to interact with MongoDB. + final MongoDBRyaDAO mongoDao = new MongoDBRyaDAO(); + mongoDao.setConf(statefulConfig); + mongoDao.init(); + return mongoDao; + } } \ No newline at end of file diff --git a/extras/pom.xml b/extras/pom.xml index 62220ca45..4ebcb823a 100644 --- a/extras/pom.xml +++ b/extras/pom.xml @@ -45,6 +45,7 @@ under the License. rya.merger rya.giraph rya.streams + rya.forwardchain diff --git a/extras/rya.forwardchain/pom.xml b/extras/rya.forwardchain/pom.xml new file mode 100644 index 000000000..7acabcaea --- /dev/null +++ b/extras/rya.forwardchain/pom.xml @@ -0,0 +1,119 @@ + + + + + 4.0.0 + + org.apache.rya + rya.extras + 3.2.12-incubating-SNAPSHOT + + + rya.forwardchain + Apache Rya Forward Chaining Inference + + + + org.openrdf.sesame + sesame-runtime + + + org.apache.rya + rya.api + + + org.apache.rya + rya.sail + + + org.apache.rya + rya.indexing + + + org.apache.rya + mongodb.rya + + + + + junit + junit + test + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + + + org.apache.maven.plugins + maven-jar-plugin + + + true + + true + org.apache.rya.forwardchain.batch.ForwardChainSpinTool + + + + + + org.apache.maven.plugins + maven-shade-plugin + + true + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + package + + shade + + + true + shaded + + + + + + + + + + diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/ForwardChainConstants.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/ForwardChainConstants.java new file mode 100644 index 000000000..f1fe8b34d --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/ForwardChainConstants.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain; + +import org.apache.rya.api.RdfCloudTripleStoreConstants; +import org.apache.rya.api.domain.RyaSchema; +import org.apache.rya.api.domain.RyaURI; +import org.apache.rya.api.resolver.RdfToRyaConversions; +import org.openrdf.model.URI; +import org.openrdf.model.ValueFactory; + +public class ForwardChainConstants { + private static final ValueFactory VF = RdfCloudTripleStoreConstants.VALUE_FACTORY; + private static final String NAMESPACE = RyaSchema.NAMESPACE; + + public static final URI DERIVATION_TIME = VF.createURI(NAMESPACE, "forwardChainIteration"); + public static final URI DERIVATION_RULE = VF.createURI(NAMESPACE, "forwardChainRule"); + + public static final RyaURI RYA_DERIVATION_RULE = RdfToRyaConversions.convertURI(DERIVATION_RULE); + public static final RyaURI RYA_DERIVATION_TIME = RdfToRyaConversions.convertURI(DERIVATION_TIME); +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/ForwardChainException.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/ForwardChainException.java new file mode 100644 index 000000000..64b05a4cc --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/ForwardChainException.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain; + +/** + * Broad exception representing an error during forward chaining. Useful for + * wrapping the diverse kinds of exceptions that may be thrown by + * implementations of reasoning logic. + */ +public class ForwardChainException extends Exception { + private static final long serialVersionUID = 1L; + + /** + * Constructs a new ForwardChainException with a message and a cause. + * @param string Detail message + * @param e Underlying cause + */ + public ForwardChainException(String string, Exception e) { + super(string , e); + } + + /** + * Constructs a new ForwardChainException with a message only. + * @param string Detail message + */ + public ForwardChainException(String string) { + super(string); + } + + /** + * Constructs a new ForwardChainException with a root cause and no + * additional message. + * @param e Underlying cause + */ + public ForwardChainException(Exception e) { + super(e); + } +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/batch/AbstractForwardChainTool.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/batch/AbstractForwardChainTool.java new file mode 100644 index 000000000..db08407be --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/batch/AbstractForwardChainTool.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.batch; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.Tool; +import org.apache.log4j.Logger; +import org.apache.rya.accumulo.AccumuloRdfConfiguration; +import org.apache.rya.api.RdfCloudTripleStoreConfiguration; +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.rule.Ruleset; +import org.apache.rya.forwardchain.strategy.AbstractForwardChainStrategy; +import org.apache.rya.forwardchain.strategy.AbstractRuleExecutionStrategy; +import org.apache.rya.forwardchain.strategy.MongoPipelineStrategy; +import org.apache.rya.forwardchain.strategy.RoundRobinStrategy; +import org.apache.rya.forwardchain.strategy.SailExecutionStrategy; +import org.apache.rya.indexing.accumulo.ConfigUtils; +import org.apache.rya.mongodb.MongoDBRdfConfiguration; +import com.google.common.base.Preconditions; + +/** + * Base class for a {@link Tool} that executes forward-chaining rules until + * completion (when no more new information can be derived). + *

+ * Subclasses must implement {@link #getRuleset()} to yield the specific set of + * {@link Rule}s to materialize. + *

+ * Subclasses may additionally override {@link #getStrategy()} and/or + * {@link #getRuleStrategy()} to provide specific forward chaining execution + * logic. + */ +public abstract class AbstractForwardChainTool implements Tool { + private static final Logger logger = Logger.getLogger(AbstractForwardChainTool.class); + + private RdfCloudTripleStoreConfiguration conf; + + private long numInferences = 0; + + /** + * Set the {@link Configuration} for this tool, which will be converted to + * an {@link RdfCloudTripleStoreConfiguration}. + * @param conf Configuration object that specifies Rya connection details. + * Should not be null. + */ + @Override + public void setConf(Configuration conf) { + Preconditions.checkNotNull(conf); + if (conf.getBoolean(ConfigUtils.USE_MONGO, false)) { + this.conf = new MongoDBRdfConfiguration(conf); + } + else { + this.conf = new AccumuloRdfConfiguration(conf); + } + } + + /** + * Get the RdfCloudTripleStoreConfiguration used by this tool. + * @return Rya configuration object. + */ + @Override + public RdfCloudTripleStoreConfiguration getConf() { + return conf; + } + + @Override + public int run(String[] args) throws Exception { + numInferences = getStrategy().executeAll(getRuleset()); + logger.info("Forward chaining complete; made " + numInferences + " inferences."); + return 0; + } + + /** + * Gets the number of inferences that have been made. + * @return zero before forward chaining, or the total number of inferences + * after. + */ + public long getNumInferences() { + return numInferences; + } + + /** + * Get the high-level {@link AbstractForwardChainStrategy} that governs how + * reasoning will proceed. By default, returns a {@link RoundRobinStrategy} + * which executes each relevant rule one-by-one, then moves to the next + * iteration and repeats, until no rules are still relevant. Subclasses may + * override this method to provide alternative strategies. + * @return The high-level forward chaining logic. + * @throws ForwardChainException if the strategy can't be instantiated. + */ + protected AbstractForwardChainStrategy getStrategy() throws ForwardChainException { + return new RoundRobinStrategy(getRuleStrategy()); + } + + /** + * Get the low-level {@link AbstractRuleExecutionStrategy} that governs the + * application of rules on an individual basis. This is used by the default + * ForwardChainStrategy (RoundRobinStrategy) and may be used by any + * high-level strategy that executes rules individually. By default, returns + * a {@link MongoPipelineStrategy} if the configuration object specifies a + * MongoDB connection with aggregation pipelines enabled, and a + * {@link SailExecutionStrategy} otherwise. Subclasses may override this + * method to provide alternative strategies. + * @return The low-level rule execution logic. + * @throws ForwardChainExceptionthe strategy can't be instantiated. + */ + protected AbstractRuleExecutionStrategy getRuleStrategy() throws ForwardChainException { + if (ConfigUtils.getUseMongo(conf)) { + final MongoDBRdfConfiguration mongoConf; + if (conf instanceof MongoDBRdfConfiguration) { + mongoConf = (MongoDBRdfConfiguration) conf; + } + else { + mongoConf = new MongoDBRdfConfiguration(conf); + } + if (mongoConf.getUseAggregationPipeline()) { + return new MongoPipelineStrategy(mongoConf); + } + } + return new SailExecutionStrategy(conf); + } + + /** + * Get the set of rules for this tool to apply. Subclasses should implement + * this for their specific domains. The subclass should ensure that the + * ruleset returned only contains rules whose types are supported by the + * forward chaining strategy. The default strategy supports only CONSTRUCT + * rules, so the ruleset should only contain {@link AbstractConstructRule}s. + * @return A set of forward-chaining rules. + * @throws ForwardChainException if rules couldn't be retrieved. + */ + protected abstract Ruleset getRuleset() throws ForwardChainException; +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/batch/ForwardChainSpinTool.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/batch/ForwardChainSpinTool.java new file mode 100644 index 000000000..c35f37eee --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/batch/ForwardChainSpinTool.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.batch; + +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.rya.api.RdfCloudTripleStoreConfiguration; +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.rule.Ruleset; +import org.apache.rya.forwardchain.rule.SpinConstructRule; + +/** + * {@link Tool} to load SPIN Construct rules from a Rya data store, then apply + * those rules to the same store using forward-chaining inference + * (materialization), adding triples back to Rya until no more information can + * be derived. + */ +public class ForwardChainSpinTool extends AbstractForwardChainTool { + private Ruleset ruleset; + + /** + * Constructor that takes in an {@link RdfCloudTripleStoreConfiguration}. + * @param conf Configuration object containing Rya connection information. + */ + public ForwardChainSpinTool(RdfCloudTripleStoreConfiguration conf) { + setConf(conf); + } + + /** + * Default constructor that does not take in a configuration object. Rya + * connection details should be provided via an + * RdfCloudTripleStoreConfiguration, either using + * {@link AbstractForwardChainTool#setConf} or a {@link ToolRunner}. + */ + public ForwardChainSpinTool() { } + + /** + * Load SPIN Construct rules from Rya. + * @return A set of construct query rules. + * @throws ForwardChainException if loading rules from Rya fails. + */ + @Override + protected Ruleset getRuleset() throws ForwardChainException { + if (ruleset == null) { + ruleset = SpinConstructRule.loadSpinRules(getConf()); + } + return ruleset; + } + + public static void main(String[] args) throws Exception { + long start = System.currentTimeMillis(); + ForwardChainSpinTool tool = new ForwardChainSpinTool(); + ToolRunner.run(tool, args); + long end = System.currentTimeMillis(); + double seconds = (end - start) / 1000.0; + long inferences = tool.getNumInferences(); + long rules = tool.getRuleset().getRules().size(); + System.out.println(String.format("ForwardChainSpinTool: %d rules, %d inferences, %.3f seconds", + rules, inferences, seconds)); + } +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractConstructRule.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractConstructRule.java new file mode 100644 index 000000000..c4c12c72d --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractConstructRule.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import org.apache.rya.api.domain.StatementMetadata; +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.strategy.AbstractRuleExecutionStrategy; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.parser.ParsedGraphQuery; + +import com.google.common.base.Preconditions; + +/** + * A rule that produces new triples, and can be expressed as a graph query + * (SPARQL "CONSTRUCT"). Should not modify existing triples. + */ +public abstract class AbstractConstructRule implements Rule { + /** + * Get the query tree corresponding to this construct rule. + * @return The query algebra representation of this rule. + */ + public abstract ParsedGraphQuery getQuery(); + + @Override + public long execute(AbstractRuleExecutionStrategy strategy, + StatementMetadata metadata) throws ForwardChainException { + Preconditions.checkNotNull(strategy); + Preconditions.checkNotNull(metadata); + return strategy.executeConstructRule(this, metadata); + } + + /** + * Whether any of the possible consequents of this rule include anonymous + * variables. Care should be taken when executing such rules, so that + * repeated application doesn't continually produce new bnodes. + * @return true if any subject, predicate, or object variable involved in a + * consequent is flagged as anonymous. + */ + public boolean hasAnonymousConsequent() { + for (StatementPattern sp : getConsequentPatterns()) { + if (sp.getSubjectVar().isAnonymous() + || sp.getPredicateVar().isAnonymous() + || sp.getObjectVar().isAnonymous()) { + return true; + } + } + return false; + } +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractInconsistencyRule.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractInconsistencyRule.java new file mode 100644 index 000000000..451c5e449 --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractInconsistencyRule.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import java.util.Arrays; +import java.util.Collection; + +import org.apache.rya.api.domain.StatementMetadata; +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.strategy.AbstractRuleExecutionStrategy; +import org.openrdf.query.algebra.StatementPattern; + +/** + * A rule that identifies an inconsistency in the data, but does not add or + * modify any triples. + */ +public abstract class AbstractInconsistencyRule implements Rule { + + @Override + public boolean canConclude(StatementPattern sp) { + return false; + } + + @Override + public Collection getConsequentPatterns() { + return Arrays.asList(); + } + + @Override + public long execute(AbstractRuleExecutionStrategy strategy, + StatementMetadata metadata) throws ForwardChainException { + return strategy.executeInconsistencyRule(this, metadata); + } + +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractUpdateRule.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractUpdateRule.java new file mode 100644 index 000000000..d87aeae6b --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AbstractUpdateRule.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import org.apache.rya.api.domain.StatementMetadata; +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.strategy.AbstractRuleExecutionStrategy; + +/** + * A rule that modifies existing data. + */ +public abstract class AbstractUpdateRule implements Rule { + @Override + public long execute(AbstractRuleExecutionStrategy strategy, + StatementMetadata metadata) throws ForwardChainException { + return strategy.executeUpdateRule(this, metadata); + } +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AntecedentVisitor.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AntecedentVisitor.java new file mode 100644 index 000000000..1f2cbba2c --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/AntecedentVisitor.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import java.util.HashSet; +import java.util.Set; + +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; + +/** + * Query visitor that identifies all triple patterns represented as + * {@link StatementPattern}s in a query, which therefore represent triples + * that could potentially contribute to a solution. Considers only the statement + * patterns themselves, i.e. the leaves of the query tree, and does not consider + * other constraints that may restrict the set of triples that may be relevant. + * This means relying on this analysis to determine whether a fact can be part + * of a solution can yield false positives, but not false negatives. + */ +class AntecedentVisitor extends QueryModelVisitorBase { + private Set antecedentStatementPatterns = new HashSet<>(); + + /** + * Get the StatementPatterns used by this query. + * @return A set of patterns that can contribute to query solutions. + */ + public Set getAntecedents() { + return antecedentStatementPatterns; + } + + @Override + public void meet(StatementPattern sp) { + antecedentStatementPatterns.add(sp.clone()); + } +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/ConstructConsequentVisitor.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/ConstructConsequentVisitor.java new file mode 100644 index 000000000..e28dbe3b6 --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/ConstructConsequentVisitor.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import org.openrdf.model.Value; +import org.openrdf.query.algebra.BNodeGenerator; +import org.openrdf.query.algebra.Extension; +import org.openrdf.query.algebra.ExtensionElem; +import org.openrdf.query.algebra.MultiProjection; +import org.openrdf.query.algebra.Projection; +import org.openrdf.query.algebra.ProjectionElem; +import org.openrdf.query.algebra.ProjectionElemList; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.ValueConstant; +import org.openrdf.query.algebra.Var; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; + +/** + * Query visitor that identifies all triple patterns produced by a "CONSTRUCT" + * query. Finds the topmost instance of a {@link Projection} or + * {@link MultiProjection}, and expects the variables projected to include + * "subject", "predicate", and "object". Each projection is converted to a + * {@link StatementPattern}, where any constant values are expected to be + * provided by an Extension directly underneath the projection, if applicable. + *

+ * Undefined behavior if applied to a query other than a CONSTRUCT query. + *

+ * Does not report any constraints on possible consequent triples beyond the + * constant values, where appropriate, of each part of the triple. Therefore, + * this analysis may produce an overly broad set of possible consequents + * compared to some more sophisticated method. + */ +public class ConstructConsequentVisitor extends QueryModelVisitorBase { + private Set consequentStatementPatterns = new HashSet<>(); + + private static final String SUBJECT_VAR_NAME = "subject"; + private static final String PREDICATE_VAR_NAME = "predicate"; + private static final String OBJECT_VAR_NAME = "object"; + + /** + * Get the possible conclusions of this construct rule. + * @return StatementPatterns representing the possible triple patterns that + * can be inferred. + */ + public Set getConsequents() { + return consequentStatementPatterns; + } + + /** + * Get the names of any bnodes generated by this construct rule. + * @return Variable names corresponding to new entities + */ + public Set getBnodes() { + return consequentStatementPatterns; + } + + @Override + public void meet(Projection projection) { + if (projection.getArg() instanceof Extension) { + recordConsequent(projection.getProjectionElemList(), + ((Extension) projection.getArg()).getElements()); + } + else { + recordConsequent(projection.getProjectionElemList(), Arrays.asList()); + } + } + + @Override + public void meet(MultiProjection projection) { + List bindings; + if (projection.getArg() instanceof Extension) { + bindings = ((Extension) projection.getArg()).getElements(); + } + else { + bindings = Arrays.asList(); + } + for (ProjectionElemList template : projection.getProjections()) { + recordConsequent(template, bindings); + } + } + + private void recordConsequent(ProjectionElemList variables, List extensionElements) { + Map bindings = new ConcurrentHashMap<>(); + Map values = new ConcurrentHashMap<>(); + Set queryBnodes = new HashSet<>(); + Set projectedBnodes = new HashSet<>(); + for (ExtensionElem ee : extensionElements) { + if (ee.getExpr() instanceof ValueConstant) { + bindings.put(ee.getName(), ((ValueConstant) ee.getExpr()).getValue()); + } + else if (ee.getExpr() instanceof BNodeGenerator) { + queryBnodes.add(ee.getName()); + } + } + for (ProjectionElem var : variables.getElements()) { + String sourceName = var.getSourceName(); + String targetName = var.getTargetName(); + Value constValue = bindings.get(sourceName); + if (constValue != null) { + values.put(targetName, constValue); + } + else if (queryBnodes.contains(sourceName)) { + projectedBnodes.add(targetName); + } + } + Var subjVar = new Var(SUBJECT_VAR_NAME, values.get(SUBJECT_VAR_NAME)); + Var predVar = new Var(PREDICATE_VAR_NAME, values.get(PREDICATE_VAR_NAME)); + Var objVar = new Var(OBJECT_VAR_NAME, values.get(OBJECT_VAR_NAME)); + subjVar.setAnonymous(projectedBnodes.contains(SUBJECT_VAR_NAME)); + predVar.setAnonymous(projectedBnodes.contains(PREDICATE_VAR_NAME)); + objVar.setAnonymous(projectedBnodes.contains(OBJECT_VAR_NAME)); + StatementPattern sp = new StatementPattern(subjVar, predVar, objVar); + consequentStatementPatterns.add(sp); + } +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/Rule.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/Rule.java new file mode 100644 index 000000000..74004b9c1 --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/Rule.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import java.util.Collection; + +import org.apache.rya.api.domain.StatementMetadata; +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.strategy.AbstractRuleExecutionStrategy; +import org.openrdf.query.algebra.StatementPattern; + +/** + * Represents a forward-chaining inference rule. A rule is triggered by some + * combination of triples, and may produce some combination of triples when + * applied. Potential triggers (antecedents) and potential results (consequents) + * are represented in a general form as {@link StatementPattern}s and can be + * used to determine relationships between rules. + */ +public interface Rule { + /** + * Whether this rule, if applied, could produce triples of a given form. + * @param sp A statement pattern describing a possible inferred triple; + * assumed not null. + * @return true if a consequent of this rule could match the pattern. + */ + abstract public boolean canConclude(StatementPattern sp); + + /** + * All {@link StatementPattern}s that can, in some combination, trigger this + * rule. Should be a complete set, such that if no statements matching any + * of the patterns exist, the rule cannot derive any new information. + * @return Any number of statement patterns. + */ + abstract public Collection getAntecedentPatterns(); + + /** + * {@link StatementPattern}s completely describing the possible conclusions + * of this rule. Any derived statement should match one of these patterns. + * @return Any number of statement patterns. + */ + abstract public Collection getConsequentPatterns(); + + /** + * Given an {@link AbstractRuleExecutionStrategy}, executes this rule. + * Associates any new or modified triples with the specified statement + * metadata. + * @param strategy A strategy capable of applying individual rules; should + * not be null. + * @param metadata StatementMetadata to add to any results. Can be used to + * record the circumstances of the derivation. Should not be null; use + * {@link StatementMetadata#EMPTY_METADATA} to add none. Implementing + * classes may add additional metadata specific to the rule. + * @return The number of new inferences made during rule execution. + * @throws ForwardChainException if an error was encountered during + * rule application. + */ + abstract public long execute(AbstractRuleExecutionStrategy strategy, + StatementMetadata metadata) throws ForwardChainException; +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/Ruleset.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/Ruleset.java new file mode 100644 index 000000000..965d2d39b --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/Ruleset.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.log4j.Logger; +import org.openrdf.query.algebra.StatementPattern; + +import com.google.common.base.Preconditions; + +/** + * Represents a set of forward-chaining {@link Rule}s and their relationships. + */ +public class Ruleset { + private final Set rules; + private final Map> successors; + private final Map> predecessors; + + private final Logger logger = Logger.getLogger(this.getClass()); + + /** + * Constructor. Takes in a set of rules and determines their dependencies. + * @param rules The complete set of rules to process; should not be null. + */ + public Ruleset(Collection rules) { + Preconditions.checkNotNull(rules); + this.rules = new HashSet<>(); + for (Rule rule : rules) { + if (rule != null) { + this.rules.add(rule); + } + } + successors = new ConcurrentHashMap<>(); + predecessors = new ConcurrentHashMap<>(); + // Build the dependency graph of all the rules, in both directions + for (Rule rule : rules) { + successors.put(rule, new HashSet<>()); + predecessors.put(rule, new HashSet<>()); + } + for (Rule rule1 : rules) { + for (Rule rule2 : rules) { + if (canTrigger(rule1, rule2)) { + logger.trace("\t" + rule1.toString() + " can trigger " + rule2.toString()); + successors.get(rule1).add(rule2); + predecessors.get(rule2).add(rule1); + } + } + } + } + + /** + * Get the rules associated with this ruleset. + * @return The complete set of rules. + */ + public Set getRules() { + return rules; + } + + /** + * Given a rule, return the set of all rules that it may trigger. That is, + * if the rule were to produce inferences, those inferences might directly + * cause other rules to apply in turn. + * @param precedingRule The potentially triggering rule; not null. + * @return All rules that could be triggered by the given rule. + */ + public Collection getSuccessorsOf(Rule precedingRule) { + Preconditions.checkNotNull(precedingRule); + return successors.get(precedingRule); + } + + /** + * Given a rule, return the set of all rules that could trigger it. That is, + * if any one of those rules were applied, their potential conclusions could + * directly cause the specified rule to apply in turn. + * @param dependentRule The potentially triggered rule; not null. + * @return All rules that could trigger the given rule. + */ + public Collection getPredecessorsOf(Rule dependentRule) { + Preconditions.checkNotNull(dependentRule); + return predecessors.get(dependentRule); + } + + /** + * Given a pair of rules, determine whether a path exists from the first to + * the second. That is, whether the first rule precedes the second rule + * either directly or transitively. If either rule is null, no path exists. + * @param r1 The start of the path + * @param r2 The end of the path + * @return whether a forward path exists. + */ + public boolean pathExists(Rule r1, Rule r2) { + if (r1 == null || r2 == null) { + return false; + } + Set forwardFrontier = new HashSet<>(); + Set backwardFrontier = new HashSet<>(); + Set visitedForward = new HashSet<>(); + Set visitedBackward = new HashSet<>(); + forwardFrontier.addAll(getSuccessorsOf(r1)); + backwardFrontier.add(r2); + while (!forwardFrontier.isEmpty() && !backwardFrontier.isEmpty()) { + Set currentGoals = new HashSet<>(backwardFrontier); + for (Rule goal : currentGoals) { + if (forwardFrontier.contains(goal)) { + return true; + } + else { + visitedBackward.add(goal); + backwardFrontier.addAll(getPredecessorsOf(goal)); + } + } + backwardFrontier.removeAll(visitedBackward); + Set currentSources = new HashSet<>(forwardFrontier); + for (Rule source : currentSources) { + if (backwardFrontier.contains(source)) { + return true; + } + else { + visitedForward.add(source); + forwardFrontier.addAll(getSuccessorsOf(source)); + } + } + forwardFrontier.removeAll(visitedForward); + } + return false; + } + + /** + * Whether the first rule can, in any circumstance, directly trigger the second. + * @param rule1 The first rule, which may produce some inferences + * @param rule2 The second rule, which may use the first rule's conclusions + * @return True if the first rule's conclusions could be used by the second. + */ + private boolean canTrigger(Rule rule1, Rule rule2) { + if (rule1 == null || rule2 == null) { + return false; + } + for (StatementPattern antecedent : rule2.getAntecedentPatterns()) { + if (rule1.canConclude(antecedent)) { + return true; + } + } + return false; + } +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/SpinConstructRule.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/SpinConstructRule.java new file mode 100644 index 000000000..44e15e62c --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/rule/SpinConstructRule.java @@ -0,0 +1,344 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import java.util.Collection; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.log4j.Logger; +import org.apache.rya.api.RdfCloudTripleStoreConfiguration; +import org.apache.rya.api.domain.StatementMetadata; +import org.apache.rya.api.resolver.RdfToRyaConversions; +import org.apache.rya.forwardchain.ForwardChainConstants; +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.strategy.AbstractRuleExecutionStrategy; +import org.apache.rya.sail.config.RyaSailFactory; +import org.openrdf.model.Literal; +import org.openrdf.model.Resource; +import org.openrdf.model.Value; +import org.openrdf.model.vocabulary.OWL; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.model.vocabulary.RDFS; +import org.openrdf.model.vocabulary.SP; +import org.openrdf.model.vocabulary.SPIN; +import org.openrdf.query.BindingSet; +import org.openrdf.query.MalformedQueryException; +import org.openrdf.query.QueryEvaluationException; +import org.openrdf.query.QueryLanguage; +import org.openrdf.query.TupleQuery; +import org.openrdf.query.TupleQueryResultHandlerBase; +import org.openrdf.query.TupleQueryResultHandlerException; +import org.openrdf.query.algebra.Extension; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.SingletonSet; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.UnaryTupleOperator; +import org.openrdf.query.algebra.ValueExpr; +import org.openrdf.query.algebra.Var; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; +import org.openrdf.query.parser.ParsedGraphQuery; +import org.openrdf.query.parser.ParsedQuery; +import org.openrdf.query.parser.sparql.SPARQLParser; +import org.openrdf.repository.RepositoryException; +import org.openrdf.repository.sail.SailRepository; +import org.openrdf.repository.sail.SailRepositoryConnection; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; + +/** + * Represents a SPIN Construct rule extracted from the data store, providing + * access to its associated query tree and providing methods to apply the rule. + */ +public class SpinConstructRule extends AbstractConstructRule { + private static Logger logger = Logger.getLogger(SpinConstructRule.class); + + private final Resource ruleId; + private final ParsedGraphQuery graphQuery; + private Set antecedentStatementPatterns = null; + private Set consequentStatementPatterns = null; + + /** + * Instantiate a SPIN construct rule given its associated type, URI or bnode + * identifier, and construct query tree. Modifies the query tree to + * incorporate the fact that ?this must belong to the associated type, and + * traverses the modified tree to find antecedent and consequent triple + * patterns. + * @param type This rule applies to objects of this type. Should not be + * null. If the type is owl:Thing or rdfs:Resource, it will be applied to + * any objects. Otherwise, a statement pattern will be added that + * effectively binds ?this to members of the type. Therefore, passing + * owl:Thing or rdfs:Resource yields the intended behavior of + * sp:thisUnbound. + * @param ruleId The Resource representing this rule in the RDF data; + * should not be null. + * @param graphQuery The query tree corresponding to the "construct" text; + * should not be null. + */ + public SpinConstructRule(Resource type, Resource ruleId, + ParsedGraphQuery graphQuery) { + Preconditions.checkNotNull(type); + Preconditions.checkNotNull(ruleId); + Preconditions.checkNotNull(graphQuery); + this.ruleId = ruleId; + this.graphQuery = graphQuery; + // Add the type requirement: ?this must belong to the type + graphQuery.getTupleExpr().visit(new TypeRequirementVisitor("this", type)); + // Find all statement patterns that could trigger this rule + AntecedentVisitor aVisitor = new AntecedentVisitor(); + graphQuery.getTupleExpr().visit(aVisitor); + antecedentStatementPatterns = aVisitor.getAntecedents(); + // Construct statement patterns for all possible conclusions of this rule + ConstructConsequentVisitor cVisitor = new ConstructConsequentVisitor(); + graphQuery.getTupleExpr().visit(cVisitor); + consequentStatementPatterns = cVisitor.getConsequents(); + } + + /** + * Get the URI or bnode associated with this rule in the data. + * @return The rule's identifier. + */ + public Resource getId() { + return ruleId; + } + + @Override + public String toString() { + return "SpinConstructRule{" + ruleId.stringValue() + "}"; + } + + @Override + public ParsedGraphQuery getQuery() { + return graphQuery; + } + + @Override + public boolean canConclude(StatementPattern sp) { + Preconditions.checkNotNull(sp); + Value s1 = getVarValue(sp.getSubjectVar()); + Value p1 = getVarValue(sp.getPredicateVar()); + Value o1 = getVarValue(sp.getObjectVar()); + Value c1 = getVarValue(sp.getContextVar()); + for (StatementPattern consequent : consequentStatementPatterns) { + Value s2 = getVarValue(consequent.getSubjectVar()); + Value p2 = getVarValue(consequent.getPredicateVar()); + Value o2 = getVarValue(consequent.getObjectVar()); + Value c2 = getVarValue(consequent.getContextVar()); + if ((s1 == null || s2 == null || s1.equals(s2)) + && (p1 == null || p2 == null || p1.equals(p2)) + && (o1 == null || o2 == null || o1.equals(o2)) + && (c1 == null || c2 == null || c1.equals(c2))) { + return true; + } + } + return false; + } + + @Override + public Collection getAntecedentPatterns() { + return antecedentStatementPatterns; + } + + @Override + public Collection getConsequentPatterns() { + return consequentStatementPatterns; + } + + @Override + public long execute(AbstractRuleExecutionStrategy strategy, + StatementMetadata metadata) throws ForwardChainException { + metadata.addMetadata(ForwardChainConstants.RYA_DERIVATION_RULE, + RdfToRyaConversions.convertResource(ruleId)); + return super.execute(strategy, metadata); + } + + private static Value getVarValue(Var var) { + return var == null ? null : var.getValue(); + } + + private static class TypeRequirementVisitor extends QueryModelVisitorBase { + private static final Var RDF_TYPE_VAR = new Var("-const-" + RDF.TYPE.stringValue(), RDF.TYPE); + private static final Set BASE_TYPES = Sets.newHashSet(RDFS.RESOURCE, OWL.THING); + static { + RDF_TYPE_VAR.setConstant(true); + } + + private final String varName; + private final StatementPattern typeRequirement; + public TypeRequirementVisitor(String varName, Resource requiredType) { + final Var typeVar = new Var("-const-" + requiredType.stringValue(), requiredType); + typeVar.setConstant(true); + this.varName = varName; + if (BASE_TYPES.contains(requiredType)) { + this.typeRequirement = null; + } + else { + this.typeRequirement = new StatementPattern(new Var(varName), RDF_TYPE_VAR, typeVar); + } + } + @Override + public void meet(SingletonSet node) { + if (typeRequirement != null) { + node.replaceWith(typeRequirement); + } + } + @Override + public void meet(Extension node) { + Set argBindings = node.getArg().getBindingNames(); + if (typeRequirement != null) { + node.getElements().removeIf(elem -> { + if (varName.equals(elem.getName())) { + ValueExpr expr = elem.getExpr(); + if (expr == null) { + return true; + } + else if (expr instanceof Var) { + String fromName = ((Var) expr).getName(); + if (getVarValue((Var) expr) == null && !argBindings.contains(fromName)) { + return true; + } + } + } + return false; + }); + meetUnaryTupleOperator(node); + } + } + @Override + public void meetNode(QueryModelNode node) { + if (typeRequirement != null) { + if (node instanceof TupleExpr && ((TupleExpr) node).getBindingNames().contains(varName)) { + final Join withType = new Join((TupleExpr) node.clone(), typeRequirement); + node.replaceWith(withType); + } + else { + node.visitChildren(this); + } + } + } + @Override + public void meetUnaryTupleOperator(UnaryTupleOperator node) { + if (typeRequirement != null) { + if (node.getArg().getBindingNames().contains(varName)) { + node.visitChildren(this); + } + else { + meetNode(node); + } + } + } + } + + /** + * Load a set of SPIN rules from a data store. + * @param conf Contains the connection information. Not null. + * @return A map of rule identifiers to rule objects. + * @throws ForwardChainException if connecting, querying for rules, or + * parsing rules fails. + */ + public static Ruleset loadSpinRules(RdfCloudTripleStoreConfiguration conf) + throws ForwardChainException { + Preconditions.checkNotNull(conf); + Map rules = new ConcurrentHashMap<>(); + // Connect to Rya + SailRepository repository = null; + SailRepositoryConnection conn = null; + try { + repository = new SailRepository(RyaSailFactory.getInstance(conf)); + } catch (Exception e) { + throw new ForwardChainException("Couldn't initialize SAIL from configuration", e); + } + // Load and parse the individual SPIN rules from the data store + String ruleQueryString = "SELECT ?type ?rule ?text WHERE {\n" + + " ?type <" + SPIN.RULE_PROPERTY.stringValue() + "> ?rule .\n" + + " {\n" + + " ?rule a <" + SP.CONSTRUCT_CLASS.stringValue() + "> .\n" + + " ?rule <" + SP.TEXT_PROPERTY.stringValue() + "> ?text .\n" + + " } UNION {\n" + + " ?rule a ?template .\n" + + " ?template <" + SPIN.BODY_PROPERTY + ">? ?body .\n" + + " ?body a <" + SP.CONSTRUCT_CLASS.stringValue() + "> .\n" + + " ?body <" + SP.TEXT_PROPERTY.stringValue() + "> ?text .\n" + + " }\n" + + "}"; + SPARQLParser parser = new SPARQLParser(); + try { + conn = repository.getConnection(); + TupleQuery ruleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, ruleQueryString); + ruleQuery.evaluate(new TupleQueryResultHandlerBase() { + @Override + public void handleSolution(BindingSet bs) throws TupleQueryResultHandlerException { + // For each rule identifier found, instantiate a SpinRule + Value requiredType = bs.getValue("type"); + Value ruleIdentifier = bs.getValue("rule"); + Value ruleText = bs.getValue("text"); + if (requiredType instanceof Resource + && ruleIdentifier instanceof Resource + && ruleText instanceof Literal) { + ParsedQuery parsedRule; + try { + parsedRule = parser.parseQuery(ruleText.stringValue(), null); + if (parsedRule instanceof ParsedGraphQuery) { + SpinConstructRule rule = new SpinConstructRule( + (Resource) requiredType, + (Resource) ruleIdentifier, + (ParsedGraphQuery) parsedRule); + if (rule.hasAnonymousConsequent()) { + logger.error("Skipping unsupported rule " + ruleIdentifier + + " -- consequent refers to bnode, which is not" + + " currently supported (creating new bnodes at each" + + " application could lead to infinite recursion)."); + } + else { + rules.put((Resource) ruleIdentifier, rule); + } + } + } catch (Exception e) { + throw new TupleQueryResultHandlerException(e); + } + } + } + }); + } catch (TupleQueryResultHandlerException | QueryEvaluationException + | MalformedQueryException | RepositoryException e) { + throw new ForwardChainException("Couldn't retrieve SPIN rules", e); + } + finally { + if (conn != null) { + try { + conn.close(); + } catch (RepositoryException e) { + logger.warn("Error closing repository connection", e); + } + } + if (repository.isInitialized()) { + try { + repository.shutDown(); + } catch (RepositoryException e) { + logger.warn("Error shutting down repository", e); + } + } + } + return new Ruleset(rules.values()); + } +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/AbstractForwardChainStrategy.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/AbstractForwardChainStrategy.java new file mode 100644 index 000000000..fb0314e0a --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/AbstractForwardChainStrategy.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.strategy; + +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.rule.Ruleset; + +/** + * Base class for high-level strategies which define how to conduct + * forward-chaining reasoning (materialization). + */ +public abstract class AbstractForwardChainStrategy { + /** + * A running count of new inferences so far. + */ + protected long totalInferences; + + /** + * Initializes reasoning with respect to a given ruleset. + * @param ruleset The complete set of rules to materialize. Should not be + * null. + * @throws ForwardChainException if initialization fails. + */ + abstract public void initialize(Ruleset ruleset) throws ForwardChainException; + + /** + * Whether forward chaining is both initialized and yet to finish. + * @return true if a ruleset has been provided and some rules may still + * yield new information. + */ + abstract protected boolean isActive(); + + /** + * Execute the next step of reasoning, such as a single rule if the strategy + * proceeds one rule at a time. + * @return The number of inferences made during this step. + * @throws ForwardChainException if any error is encountered during rule + * application. + */ + abstract protected long executeNext() throws ForwardChainException; + + /** + * Execute an entire ruleset until no new rules can be derived. Initializes + * strategy and proceeds until completion. + * @param rules The complete set of rules; not null. + * @return The number of total inferences made. + * @throws ForwardChainException if any error is encountered during + * initialization or application. + */ + public long executeAll(Ruleset rules) throws ForwardChainException { + initialize(rules); + totalInferences = 0; + while (isActive()) { + totalInferences += executeNext(); + } + return totalInferences; + } + + /** + * Get the running total of inferences made so far. + * @return The number of inferences made since initialization. + */ + public long getNumInferences() { + return totalInferences; + } +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/AbstractRuleExecutionStrategy.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/AbstractRuleExecutionStrategy.java new file mode 100644 index 000000000..24c8de941 --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/AbstractRuleExecutionStrategy.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.strategy; + +import org.apache.rya.api.domain.StatementMetadata; +import org.apache.rya.forwardchain.ForwardChainException; + +import org.apache.rya.forwardchain.rule.AbstractConstructRule; +import org.apache.rya.forwardchain.rule.AbstractInconsistencyRule; +import org.apache.rya.forwardchain.rule.AbstractUpdateRule; + +/** + * Base class for rule application strategies, which can execute a single + * forward chaining (materialization) rule at a time. Subclasses may provide + * implementations of methods to execute whichever they support of construct + * rules, update rules, and inconsistency rules. The default behavior for all + * kinds is to throw an {@link UnsupportedOperationException}. + */ +public abstract class AbstractRuleExecutionStrategy { + protected int requiredLevel = 0; + + /** + * Execute a rule corresponding to a "CONSTRUCT" query. Throws an + * UnsupportedOperationException if not explicitly overridden. + * @param rule The construct rule to apply; assumed not null. + * @param metadata Additional metadata to add to any inferred triples; + * assumed not null. + * @return The number of inferred triples. Higher-level forward chaining + * strategies may rely on the accuracy of this number. + * @throws ForwardChainException if execution failed. + */ + public long executeConstructRule(AbstractConstructRule rule, + StatementMetadata metadata) throws ForwardChainException { + throw new UnsupportedOperationException("Rule execution strategy does not support construct rules."); + }; + + /** + * Execute a rule corresponding to an update query. Throws an + * UnsupportedOperationException if not explicitly overridden. + * @param rule The update rule to apply; assumed not null. + * @param metadata Additional metadata to add to any updated triples; + * assumed not null. + * @return The number of inferences made. Higher-level forward chaining + * strategies may rely on the accuracy of this number. + * @throws ForwardChainException if execution failed. + */ + public long executeUpdateRule(AbstractUpdateRule rule, + StatementMetadata metadata) throws ForwardChainException { + throw new UnsupportedOperationException("Rule execution strategy does not support update rules."); + }; + + /** + * Execute a rule capable of detecting inconsistencies. Throws an + * UnsupportedOperationException if not explicitly overridden. + * @param rule The inconsistency rule to apply; assumed not null. + * @param metadata Additional metadata associated with inconsistencies; + * assumed not null. + * @return The number of inconsistencies found. + * @throws ForwardChainException if execution failed. + */ + public long executeInconsistencyRule(AbstractInconsistencyRule rule, + StatementMetadata metadata) throws ForwardChainException { + throw new UnsupportedOperationException("Rule execution strategy does not perform inconsistency detection."); + } + + /** + * Initialize the strategy and make any preparations for executing rules. + * Does nothing by default; subclasses should override if necessary. + * @throws ForwardChainException + */ + public void initialize() throws ForwardChainException { }; + + /** + * Shut down the strategy and perform any appropriate cleanup. Does nothing + * by default; subclasses should override if necessary. + * @throws ForwardChainException + */ + public void shutDown() throws ForwardChainException { } + + /** + * Indicate that a rule need only be applied if one of the source statements + * is is at least this derivation level, i.e. took this many steps to derive + * itself. Subclasses may use this for optimization, but are not guaranteed + * to. + * @param derivationLevel Forward chaining level of statements that should + * be used to trigger rules. If not set, defaults to zero which should have + * no effect. + */ + public void setRequiredLevel(int derivationLevel) { + this.requiredLevel = derivationLevel; + }; +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/MongoPipelineStrategy.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/MongoPipelineStrategy.java new file mode 100644 index 000000000..c09512250 --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/MongoPipelineStrategy.java @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.strategy; + +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.LongAdder; + +import org.apache.log4j.Logger; +import org.apache.rya.api.domain.RyaStatement; +import org.apache.rya.api.domain.StatementMetadata; +import org.apache.rya.api.persist.RyaDAOException; +import org.apache.rya.api.persist.query.RyaQuery; +import org.apache.rya.api.persist.query.RyaQueryEngine; +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.rule.AbstractConstructRule; +import org.apache.rya.forwardchain.rule.Rule; +import org.apache.rya.mongodb.MongoDBRdfConfiguration; +import org.apache.rya.mongodb.MongoDBRyaDAO; +import org.apache.rya.mongodb.StatefulMongoDBRdfConfiguration; +import org.apache.rya.mongodb.aggregation.AggregationPipelineQueryNode; +import org.apache.rya.mongodb.aggregation.SparqlToPipelineTransformVisitor; +import org.apache.rya.mongodb.batch.MongoDbBatchWriter; +import org.apache.rya.mongodb.batch.MongoDbBatchWriterConfig; +import org.apache.rya.mongodb.batch.MongoDbBatchWriterException; +import org.apache.rya.mongodb.batch.MongoDbBatchWriterUtils; +import org.apache.rya.mongodb.batch.collection.CollectionType; +import org.apache.rya.mongodb.batch.collection.MongoCollectionType; +import org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy; +import org.apache.rya.sail.config.RyaSailFactory; +import org.bson.Document; +import org.bson.conversions.Bson; +import org.openrdf.query.algebra.QueryRoot; +import org.openrdf.query.algebra.TupleExpr; + +import com.google.common.base.Preconditions; +import com.mongodb.Block; +import com.mongodb.DBObject; +import com.mongodb.MongoClient; +import com.mongodb.client.MongoCollection; +import com.mongodb.client.MongoDatabase; +import com.mongodb.util.JSON; + +/** + * A rule execution strategy for MongoDB Rya that converts a single rule into an + * aggregation pipeline whenever possible. Falls back on an internal + * {@link SailExecutionStrategy} to handle any rules that can't be converted. + */ +public class MongoPipelineStrategy extends AbstractRuleExecutionStrategy { + private static final Logger logger = Logger.getLogger(MongoPipelineStrategy.class); + + private static final int PIPELINE_BATCH_SIZE = 1000; + + private final SparqlToPipelineTransformVisitor pipelineVisitor; + private final MongoCollection baseCollection; + private final MongoDbBatchWriter batchWriter; + private final MongoDBRyaDAO dao; + private final SimpleMongoDBStorageStrategy storageStrategy = new SimpleMongoDBStorageStrategy(); + private final ConcurrentHashMap executionTimes = new ConcurrentHashMap<>(); + private final AbstractRuleExecutionStrategy backup; + private final RyaQueryEngine engine; + private boolean usedBackup = false; + + /** + * Initialize based on a configuration. + * @param mongoConf Should contain database information; cannot be null. If + * passed a stateful configuration, uses the existing mongo client, + * otherwise creates one. + */ + public MongoPipelineStrategy(MongoDBRdfConfiguration mongoConf) throws ForwardChainException { + Preconditions.checkNotNull(mongoConf); + final String mongoDBName = mongoConf.getMongoDBName(); + final String collectionName = mongoConf.getTriplesCollectionName(); + mongoConf.setFlush(false); + final StatefulMongoDBRdfConfiguration statefulConf; + try { + if (mongoConf instanceof StatefulMongoDBRdfConfiguration) { + statefulConf = (StatefulMongoDBRdfConfiguration) mongoConf; + this.dao = new MongoDBRyaDAO(); + this.dao.setConf(statefulConf); + this.dao.init(); + } + else { + this.dao = RyaSailFactory.getMongoDAO(mongoConf); + statefulConf = this.dao.getConf(); + } + } catch (RyaDAOException e) { + throw new ForwardChainException("Can't connect to Rya.", e); + } + final MongoClient mongoClient = statefulConf.getMongoClient(); + final MongoDatabase mongoDB = mongoClient.getDatabase(mongoDBName); + this.baseCollection = mongoDB.getCollection(collectionName); + this.pipelineVisitor = new SparqlToPipelineTransformVisitor(this.baseCollection); + this.engine = this.dao.getQueryEngine(); + this.backup = new SailExecutionStrategy(statefulConf); + final MongoDbBatchWriterConfig writerConfig = MongoDbBatchWriterUtils.getMongoDbBatchWriterConfig(statefulConf); + final CollectionType ct = new MongoCollectionType(baseCollection); + this.batchWriter = new MongoDbBatchWriter<>(ct, writerConfig); + try { + this.batchWriter.start(); + } catch (final MongoDbBatchWriterException e) { + throw new ForwardChainException("Error starting MongoDB batch writer", e); + } + } + + /** + * Execute a CONSTRUCT rule by converting it into a pipeline, iterating + * through the resulting documents, and inserting them back to the data + * store as new triples. If pipeline conversion fails, falls back on + * default execution strategy. + * @param rule A construct query rule; not null. + * @param metadata StatementMetadata to attach to new triples; not null. + * @return The number of new triples inferred. + * @throws ForwardChainException if execution fails. + */ + @Override + public long executeConstructRule(AbstractConstructRule rule, + StatementMetadata metadata) throws ForwardChainException { + Preconditions.checkNotNull(rule); + logger.info("Applying inference rule " + rule + "..."); + long timestamp = System.currentTimeMillis(); + // Get a pipeline that turns individual matches into triples + List pipeline = null; + try { + int requireSourceLevel = 0; + if (!usedBackup) { + // If we can assume derivation levels are set properly, we can optimize by + // pruning any derived fact whose sources are all old information. (i.e. we can + // infer that the pruned fact would have already been derived in a previous + // step.) But if the backup strategy has ever been used, the source triples aren't + // guaranteed to have derivation level set. + requireSourceLevel = requiredLevel; + } + pipeline = toPipeline(rule, requireSourceLevel, timestamp); + } + catch (ForwardChainException e) { + logger.error(e); + } + if (pipeline == null) { + if (backup == null) { + logger.error("Couldn't convert " + rule + " to pipeline:"); + for (String line : rule.getQuery().toString().split("\n")) { + logger.error("\t" + line); + } + throw new UnsupportedOperationException("Couldn't convert query to pipeline."); + } + else { + logger.debug("Couldn't convert " + rule + " to pipeline:"); + for (String line : rule.getQuery().toString().split("\n")) { + logger.debug("\t" + line); + } + logger.debug("Using fallback strategy."); + usedBackup = true; + return backup.executeConstructRule(rule, metadata); + } + } + // Execute the pipeline + for (Bson step : pipeline) { + logger.debug("\t" + step.toString()); + } + LongAdder count = new LongAdder(); + baseCollection.aggregate(pipeline) + .allowDiskUse(true) + .batchSize(PIPELINE_BATCH_SIZE) + .forEach(new Block() { + @Override + public void apply(Document doc) { + final DBObject dbo = (DBObject) JSON.parse(doc.toJson()); + RyaStatement rstmt = storageStrategy.deserializeDBObject(dbo); + if (!statementExists(rstmt)) { + count.increment(); + doc.replace(SimpleMongoDBStorageStrategy.STATEMENT_METADATA, metadata.toString()); + try { + batchWriter.addObjectToQueue(doc); + } catch (MongoDbBatchWriterException e) { + logger.error("Couldn't insert " + rstmt, e); + } + } + } + }); + try { + batchWriter.flush(); + } catch (MongoDbBatchWriterException e) { + throw new ForwardChainException("Error writing to Mongo", e); + } + logger.info("Added " + count + " new statements."); + executionTimes.compute(rule, (r, previous) -> { + if (previous != null && previous > timestamp) { + return previous; + } + else { + return timestamp; + } + }); + return count.longValue(); + } + + private boolean statementExists(RyaStatement rstmt) { + try { + return engine.query(new RyaQuery(rstmt)).iterator().hasNext(); + } catch (RyaDAOException e) { + logger.error("Error querying for " + rstmt, e); + return false; + } + } + + /** + * Flush and close the batch writer, and shut down the backup + * SailExecutionStrategy. + * @throws ForwardChainException if the batch writer or backup strategy + * throw any errors. + */ + @Override + public void shutDown() throws ForwardChainException { + backup.shutDown(); + try { + batchWriter.shutdown(); + } catch (MongoDbBatchWriterException e) { + throw new ForwardChainException("Error shutting down batch writer", e); + } + } + + /** + * Converts a construct rule into a series of documents representing + * aggregation pipeline steps. + * @param rule A construct query rule. + * @param sourceLevel Only make derivations whose source triples have this + * derivation level or higher, i.e. took some number of forward chaining + * steps to infer. Set to zero to skip this check. + * @param timestamp Timestamp to be set for all inferred triples. + * @return An aggregation pipeline. + * @throws ForwardChainException if pipeline construction fails. + */ + private List toPipeline(AbstractConstructRule rule, int sourceLevel, + long timestamp) throws ForwardChainException { + TupleExpr tupleExpr = rule.getQuery().getTupleExpr(); + if (!(tupleExpr instanceof QueryRoot)) { + tupleExpr = new QueryRoot(tupleExpr); + } + try { + tupleExpr.visit(pipelineVisitor); + } catch (Exception e) { + throw new ForwardChainException("Error converting construct rule to an aggregation pipeline", e); + } + if (tupleExpr instanceof QueryRoot) { + QueryRoot root = (QueryRoot) tupleExpr; + if (root.getArg() instanceof AggregationPipelineQueryNode) { + AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) root.getArg(); + pipelineNode.distinct(); // require distinct triples + pipelineNode.requireSourceDerivationDepth(sourceLevel); + long latestTime = executionTimes.getOrDefault(rule, 0L); + if (latestTime > 0) { + pipelineNode.requireSourceTimestamp(latestTime); + } + return pipelineNode.getTriplePipeline(timestamp, false); + } + } + return null; + } +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/RoundRobinStrategy.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/RoundRobinStrategy.java new file mode 100644 index 000000000..eb044fcf1 --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/RoundRobinStrategy.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.strategy; + +import java.util.Comparator; +import java.util.HashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.log4j.Logger; +import org.apache.rya.api.domain.RyaType; +import org.apache.rya.api.domain.StatementMetadata; +import org.apache.rya.forwardchain.ForwardChainConstants; +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.rule.Rule; +import org.apache.rya.forwardchain.rule.Ruleset; +import org.openrdf.model.vocabulary.XMLSchema; + +import com.google.common.base.Preconditions; + +/** + * A simple {@link AbstractForwardChainStrategy} that iterates over every + * relevant rule once, and repeats until no rules are relevant. + *

+ * Initially, all rules are considered relevant. Iteration 1 executes each rule + * once. + *

+ * When a rule produces inferences, all rules that are marked as that rule's + * successors according to the {@link Ruleset} are triggered as potentially + * relevant for future execution. If a triggered rule is scheduled to be + * executed during the current iteration, nothing changes. If a triggered rule + * has already been executed once during the current iteration, or was not + * activated for the current iteration at all, it is flagged to be executed + * during the next iteration. + *

+ * When an iteration concludes, a new iteration begins with the relevant set of + * rules having been determined during the previous iteration. If there are no + * such rules, forward chaining ends. + *

+ * Within each iteration, rules are processed such that a rule which may trigger + * many other rules is given priority over a rule that may be triggered by many + * other rules. + *

+ * The observation that one rule may trigger another is based on the + * relationships between triple patterns produced and consumed by the rules in + * general, not based on any triples that were actually generated. Therefore, + * there may be false positives but not false negatives: Rules triggered by the + * current rule may or may not produce more triples in response, but any rule + * that could produce triples in response will be triggered. + *

+ * The procedure for executing the individual rules is governed by the + * {@link RuleExecutionStrategy}. This class uses the strategy's reported counts + * to determine whether or not a rule has produced inferences. + */ +public class RoundRobinStrategy extends AbstractForwardChainStrategy { + private static final Logger logger = Logger.getLogger(RoundRobinStrategy.class); + + private final AbstractRuleExecutionStrategy ruleStrategy; + private int iteration; + private Ruleset ruleset; + private Set activeNow; + private Set activeNextIteration; + private long inferencesThisIteration; + private AtomicBoolean initialized = new AtomicBoolean(false); + + /** + * Instantiate a RoundRobinStrategy by providing the RuleExecutionStrategy. + * @param ruleStrategy Defines how to execute individual rules; not null. + */ + public RoundRobinStrategy(AbstractRuleExecutionStrategy ruleStrategy) { + Preconditions.checkNotNull(ruleStrategy); + this.ruleStrategy = ruleStrategy; + } + + @Override + public void initialize(Ruleset withRuleset) throws ForwardChainException { + Preconditions.checkNotNull(withRuleset); + iteration = 0; + ruleset = withRuleset; + activeNow = new HashSet<>(); + activeNextIteration = new HashSet<>(ruleset.getRules()); + logger.info("Initializing round robin forward chaining, with " + + activeNextIteration.size() + " rules."); + initialized.set(true); + prepareQueue(); + } + + private void prepareQueue() throws ForwardChainException { + if (initialized.get()) { + if (activeNow.isEmpty()) { + if (iteration > 0) { + logger.info("Finished iteration " + iteration + "; made " + + inferencesThisIteration + " inferences."); + } + if (activeNextIteration.isEmpty()) { + logger.info("Finished forward chaining after " + iteration + " iterations."); + setDone(); + } + else { + ruleStrategy.setRequiredLevel(iteration); + iteration++; + inferencesThisIteration = 0; + activeNow.addAll(activeNextIteration); + activeNextIteration.clear(); + logger.info("Beginning iteration " + iteration + ", with " + + activeNow.size() + " rules to execute..."); + } + } + } + } + + private void setDone() throws ForwardChainException { + initialized.set(false); + if (ruleStrategy != null) { + ruleStrategy.shutDown(); + } + } + + @Override + public boolean isActive() { + return initialized.get(); + } + + @Override + public long executeNext() throws ForwardChainException { + if (!initialized.get()) { + return 0; + } + Rule rule = getNextRule(); + if (rule == null) { + return 0; + } + StatementMetadata metadata = new StatementMetadata(); + metadata.addMetadata(ForwardChainConstants.RYA_DERIVATION_TIME, + new RyaType(XMLSchema.INT, Integer.toString(iteration))); + long inferences = rule.execute(ruleStrategy, metadata); + inferencesThisIteration += inferences; + if (inferences > 0) { + for (Rule successor : ruleset.getSuccessorsOf(rule)) { + // If we'll handle the triggered rule in the current iteration, + // it may not need to be checked in the next one. + if (!activeNow.contains(successor)) { + activeNextIteration.add(successor); + } + } + } + prepareQueue(); + return inferences; + } + + private Rule getNextRule() { + if (activeNow.isEmpty()) { + return null; + } + Ruleset subset = new Ruleset(activeNow); + SortedSet sorted = new TreeSet<>(new Comparator() { + @Override + public int compare(Rule r1, Rule r2) { + // If one rule triggers the other (directly or indirectly) but + // not the other way around, the one that triggers the other + // should come first. + boolean forwardPath = subset.pathExists(r1, r2); + boolean backwardPath = subset.pathExists(r2, r1); + if (forwardPath && !backwardPath) { + return -1; + } + if (backwardPath && !forwardPath) { + return 1; + } + return 0; + } + }.thenComparingInt(rule -> { + // Otherwise, prioritize rules that trigger many remaining rules, + // and defer rules that can be triggered by many remaining rules. + return remainingPredecessors(rule).size() - remainingSuccessors(rule).size(); + }).thenComparing(Rule::toString)); // Fall back on string comparison + sorted.addAll(activeNow); + Rule next = sorted.first(); + activeNow.remove(next); + return next; + } + + private Set remainingSuccessors(Rule rule) { + Set successors = new HashSet<>(ruleset.getSuccessorsOf(rule)); + successors.retainAll(activeNow); + return successors; + } + + private Set remainingPredecessors(Rule rule) { + Set predecessors = new HashSet<>(ruleset.getPredecessorsOf(rule)); + predecessors.retainAll(activeNow); + return predecessors; + } +} diff --git a/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/SailExecutionStrategy.java b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/SailExecutionStrategy.java new file mode 100644 index 000000000..d09c50c81 --- /dev/null +++ b/extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/SailExecutionStrategy.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.strategy; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.log4j.Logger; +import org.apache.rya.accumulo.AccumuloRdfConfiguration; +import org.apache.rya.api.RdfCloudTripleStoreConfiguration; +import org.apache.rya.api.domain.RyaStatement; +import org.apache.rya.api.domain.StatementMetadata; +import org.apache.rya.api.persist.RyaDAO; +import org.apache.rya.api.persist.RyaDAOException; +import org.apache.rya.api.persist.query.RyaQuery; +import org.apache.rya.api.persist.query.RyaQueryEngine; +import org.apache.rya.api.resolver.RdfToRyaConversions; +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.rule.AbstractConstructRule; +import org.apache.rya.indexing.accumulo.ConfigUtils; +import org.apache.rya.mongodb.MongoDBRdfConfiguration; +import org.apache.rya.sail.config.RyaSailFactory; +import org.calrissian.mango.collect.CloseableIterable; +import org.openrdf.model.Statement; +import org.openrdf.query.GraphQuery; +import org.openrdf.query.QueryEvaluationException; +import org.openrdf.query.parser.ParsedGraphQuery; +import org.openrdf.repository.RepositoryException; +import org.openrdf.repository.sail.SailGraphQuery; +import org.openrdf.repository.sail.SailRepository; +import org.openrdf.repository.sail.SailRepositoryConnection; +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.helpers.RDFHandlerBase; + +import com.google.common.base.Preconditions; + +/** + * A naive but back-end-agnostic rule execution strategy that applies a + * construct rule by submitting the associated query to a Rya SAIL, then + * converting the resulting bindings (expecting variables "subject", + * "predicate", and "object") into triples and inserting them into a Rya DAO. + */ +public class SailExecutionStrategy extends AbstractRuleExecutionStrategy { + private static final Logger logger = Logger.getLogger(SailExecutionStrategy.class); + + private final RdfCloudTripleStoreConfiguration conf; + + private SailRepository repo = null; + private SailRepositoryConnection conn = null; + private RyaDAO dao = null; + private boolean initialized = false; + + /** + * Initialize a SailExecutionStrategy with the given configuration. + * @param conf Defines Rya connection and query parameters; not null. + */ + public SailExecutionStrategy(RdfCloudTripleStoreConfiguration conf) { + Preconditions.checkNotNull(conf); + this.conf = conf; + } + + /** + * Executes a CONSTRUCT query through the SAIL and inserts the results into + * the DAO. + * @param rule A construct query; not null. + * @param metadata Metadata to add to any inferred triples; not null. + * @return The number of inferred triples. + * @throws ForwardChainException if query execution or data insert fails. + */ + @Override + public long executeConstructRule(AbstractConstructRule rule, + StatementMetadata metadata) throws ForwardChainException { + Preconditions.checkNotNull(rule); + Preconditions.checkNotNull(metadata); + if (!initialized) { + initialize(); + } + ParsedGraphQuery graphQuery = rule.getQuery(); + long statementsAdded = 0; + logger.info("Applying inference rule " + rule + "..."); + for (String line : graphQuery.getTupleExpr().toString().split("\n")) { + logger.debug("\t" + line); + } + InferredStatementHandler handler = new InferredStatementHandler<>(dao, metadata); + try { + GraphQuery executableQuery = new SailGraphQuery(graphQuery, conn) { }; + executableQuery.evaluate(handler); + statementsAdded = handler.getNumStatementsAdded(); + logger.info("Added " + statementsAdded + " inferred statements."); + return statementsAdded; + } catch (QueryEvaluationException e) { + throw new ForwardChainException("Error evaluating query portion of construct rule", e); + } catch (RDFHandlerException e) { + throw new ForwardChainException("Error processing results of construct rule", e); + } + } + + /** + * Connect to the Rya SAIL. If a DAO wasn't provided, instantiate one from + * the configuration. + * @throws ForwardChainException if connecting fails. + */ + @Override + public void initialize() throws ForwardChainException { + try { + if (dao == null) { + dao = getDAO(); + } + repo = new SailRepository(RyaSailFactory.getInstance(conf)); + conn = repo.getConnection(); + initialized = true; + } catch (Exception e) { + shutDown(); + throw new ForwardChainException("Error connecting to SAIL", e); + } + } + + private RyaDAO getDAO() throws RyaDAOException, ForwardChainException { + if (ConfigUtils.getUseMongo(conf)) { + MongoDBRdfConfiguration mongoConf; + if (conf instanceof MongoDBRdfConfiguration) { + mongoConf = (MongoDBRdfConfiguration) conf; + } + else { + mongoConf = new MongoDBRdfConfiguration(conf); + } + return RyaSailFactory.getMongoDAO(mongoConf); + } + else { + AccumuloRdfConfiguration accumuloConf; + if (conf instanceof AccumuloRdfConfiguration) { + accumuloConf = (AccumuloRdfConfiguration) conf; + } + else { + accumuloConf = new AccumuloRdfConfiguration(conf); + } + try { + return RyaSailFactory.getAccumuloDAO(accumuloConf); + } catch (AccumuloException | AccumuloSecurityException e) { + throw new ForwardChainException(e); + } + } + } + + /** + * Shut down the SAIL connection objects. + */ + @Override + public void shutDown() { + initialized = false; + if (conn != null) { + try { + conn.close(); + } catch (RepositoryException e) { + logger.warn("Error closing SailRepositoryConnection", e); + } + } + if (repo != null && repo.isInitialized()) { + try { + repo.shutDown(); + } catch (RepositoryException e) { + logger.warn("Error shutting down SailRepository", e); + } + } + try { + if (dao != null && dao.isInitialized()) { + dao.flush(); + } + } catch (RyaDAOException e) { + logger.warn("Error flushing DAO", e); + } + } + + private static class InferredStatementHandler extends RDFHandlerBase { + private RyaDAO dao; + private RyaQueryEngine engine; + private long numStatementsAdded = 0; + private StatementMetadata metadata; + + InferredStatementHandler(RyaDAO dao, StatementMetadata metadata) { + this.dao = dao; + this.engine = dao.getQueryEngine(); + this.metadata = metadata; + this.engine.setConf(dao.getConf()); + } + + @Override + public void handleStatement(Statement statement) { + RyaStatement ryaStatement = RdfToRyaConversions.convertStatement(statement); + ryaStatement.setStatementMetadata(metadata); + try { + // Need to check whether the statement already exists, because + // we need an accurate count of newly added statements. + CloseableIterable iter = engine.query(new RyaQuery(ryaStatement)); + if (!iter.iterator().hasNext()) { + dao.add(ryaStatement); + numStatementsAdded++; + } + } catch (RyaDAOException e) { + logger.error("Error handling inferred statement", e); + } + } + + public long getNumStatementsAdded() { + return numStatementsAdded; + } + } +} diff --git a/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/batch/MongoSpinIT.java b/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/batch/MongoSpinIT.java new file mode 100644 index 000000000..c70a025ec --- /dev/null +++ b/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/batch/MongoSpinIT.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.batch; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.net.URL; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.hadoop.util.ToolRunner; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.rya.indexing.mongodb.MongoIndexingConfiguration; +import org.apache.rya.indexing.mongodb.MongoIndexingConfiguration.MongoDBIndexingConfigBuilder; +import org.apache.rya.mongodb.EmbeddedMongoFactory; +import org.apache.rya.mongodb.MongoDBRdfConfiguration; +import org.apache.rya.sail.config.RyaSailFactory; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.query.BindingSet; +import org.openrdf.query.QueryLanguage; +import org.openrdf.query.TupleQuery; +import org.openrdf.query.TupleQueryResult; +import org.openrdf.query.impl.ListBindingSet; +import org.openrdf.repository.RepositoryException; +import org.openrdf.repository.sail.SailRepository; +import org.openrdf.repository.sail.SailRepositoryConnection; +import org.openrdf.rio.RDFFormat; +import org.openrdf.rio.Rio; + +import com.google.common.io.Resources; +import com.mongodb.MongoClient; +import com.mongodb.ServerAddress; + +public class MongoSpinIT { + private static final ValueFactory VF = ValueFactoryImpl.getInstance(); + private static final String EX = "http://example.org/"; + + private MongoDBRdfConfiguration conf; + private SailRepository repository; + + @Before + public void setup() throws Exception { + Logger.getLogger("org.apache.rya.mongodb").setLevel(Level.WARN); + Logger.getLogger("org.apache.rya.forwardchain").setLevel(Level.INFO); + conf = getConf(); + repository = new SailRepository(RyaSailFactory.getInstance(conf)); + } + + @After + public void tearDown() throws Exception { + if (repository != null) { + try { + repository.shutDown(); + } catch (final RepositoryException e) { + // quietly absorb this exception + } + } + } + + @Test + public void testSailStrategy() throws Exception { + insertDataFile(Resources.getResource("data.ttl"), "http://example.org#"); + insertDataFile(Resources.getResource("university.ttl"), "http://example.org#"); + insertDataFile(Resources.getResource("owlrl.ttl"), "http://example.org#"); + Set solutions = executeQuery(Resources.getResource("query.sparql")); + Set expected = new HashSet<>(); + Assert.assertEquals(expected, solutions); + conf.setUseAggregationPipeline(false); + ForwardChainSpinTool tool = new ForwardChainSpinTool(); + ToolRunner.run(conf, tool, new String[] {}); + solutions = executeQuery(Resources.getResource("query.sparql")); + expected.add(new ListBindingSet(Arrays.asList("X", "Y"), + VF.createURI(EX, "Alice"), VF.createURI(EX, "Department1"))); + Assert.assertEquals(expected, solutions); + Assert.assertEquals(24, tool.getNumInferences()); + } + + @Test + public void testPipelineStrategy() throws Exception { + insertDataFile(Resources.getResource("data.ttl"), "http://example.org#"); + insertDataFile(Resources.getResource("university.ttl"), "http://example.org#"); + insertDataFile(Resources.getResource("owlrl.ttl"), "http://example.org#"); + Set solutions = executeQuery(Resources.getResource("query.sparql")); + Set expected = new HashSet<>(); + Assert.assertEquals(expected, solutions); + conf.setUseAggregationPipeline(true); + ForwardChainSpinTool tool = new ForwardChainSpinTool(); + ToolRunner.run(conf, tool, new String[] {}); + solutions = executeQuery(Resources.getResource("query.sparql")); + expected.add(new ListBindingSet(Arrays.asList("X", "Y"), + VF.createURI(EX, "Alice"), VF.createURI(EX, "Department1"))); + Assert.assertEquals(expected, solutions); + Assert.assertEquals(24, tool.getNumInferences()); + } + + private void insertDataFile(URL dataFile, String defaultNamespace) throws Exception { + RDFFormat format = Rio.getParserFormatForFileName(dataFile.getFile()); + SailRepositoryConnection conn = repository.getConnection(); + try { + conn.add(dataFile, defaultNamespace, format); + } finally { + closeQuietly(conn); + } + } + + Set executeQuery(URL queryFile) throws Exception { + SailRepositoryConnection conn = repository.getConnection(); + try { + InputStream queryIS = queryFile.openStream(); + BufferedReader br = new BufferedReader(new java.io.InputStreamReader(queryIS, "UTF-8")); + String query = br.lines().collect(Collectors.joining("\n")); + br.close(); + TupleQuery tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, query); + TupleQueryResult result = tupleQuery.evaluate(); + Set solutions = new HashSet<>(); + while (result.hasNext()) { + solutions.add(result.next()); + } + return solutions; + } finally { + closeQuietly(conn); + } + } + + private static MongoDBRdfConfiguration getConf() throws Exception { + MongoDBIndexingConfigBuilder builder = MongoIndexingConfiguration.builder().setUseMockMongo(true); + final MongoClient c = EmbeddedMongoFactory.newFactory().newMongoClient(); + final ServerAddress address = c.getAddress(); + builder.setMongoHost(address.getHost()); + builder.setMongoPort(Integer.toString(address.getPort())); + builder.setUseInference(false); + c.close(); + return builder.build(); + } + + private static void closeQuietly(final SailRepositoryConnection conn) { + if (conn != null) { + try { + conn.close(); + } catch (final RepositoryException e) { + // quietly absorb this exception + } + } + } +} diff --git a/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/AntecedentVisitorTest.java b/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/AntecedentVisitorTest.java new file mode 100644 index 000000000..7761a1a55 --- /dev/null +++ b/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/AntecedentVisitorTest.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import java.util.Set; + +import org.junit.Assert; +import org.junit.Test; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.model.vocabulary.FOAF; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.model.vocabulary.RDFS; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.Var; +import org.openrdf.query.algebra.StatementPattern.Scope; +import org.openrdf.query.parser.ParsedQuery; +import org.openrdf.query.parser.sparql.SPARQLParser; + +import com.google.common.collect.Sets; + +public class AntecedentVisitorTest { + private static Var c(Value val) { + Var v = new Var("-const-" + val.stringValue(), val); + v.setAnonymous(true); + return v; + } + + private static ValueFactory VF = ValueFactoryImpl.getInstance(); + private static String EX = "http://example.org/"; + private static URI G1 = VF.createURI(EX, "Graph1"); + private static URI G2 = VF.createURI(EX, "Graph2"); + + @Test + public void testSelectQuery() throws Exception { + String text = "PREFIX foaf: <" + FOAF.NAMESPACE + ">\n" + + "SELECT * WHERE {\n" + + " ?x a foaf:Person .\n" + + " ?y a foaf:Person .\n" + + " ?x foaf:knows ?y .\n" + + "}"; + ParsedQuery query = new SPARQLParser().parseQuery(text, null); + AntecedentVisitor visitor = new AntecedentVisitor(); + query.getTupleExpr().visit(visitor); + Set expected = Sets.newHashSet( + new StatementPattern(new Var("x"), c(RDF.TYPE), c(FOAF.PERSON)), + new StatementPattern(new Var("y"), c(RDF.TYPE), c(FOAF.PERSON)), + new StatementPattern(new Var("x"), c(FOAF.KNOWS), new Var("y"))); + Assert.assertEquals(expected, visitor.getAntecedents()); + } + + @Test + public void testConstructQuery() throws Exception { + String text = "PREFIX foaf: <" + FOAF.NAMESPACE + ">\n" + + "CONSTRUCT {\n" + + " ?y foaf:knows ?x .\n" + + " ?y ?x .\n" + + " ?x ?y .\n" + + "} WHERE {\n" + + " ?x a foaf:Person .\n" + + " ?y a foaf:Person .\n" + + " ?x foaf:knows ?y .\n" + + "}"; + ParsedQuery query = new SPARQLParser().parseQuery(text, null); + AntecedentVisitor visitor = new AntecedentVisitor(); + query.getTupleExpr().visit(visitor); + Set expected = Sets.newHashSet( + new StatementPattern(new Var("x"), c(RDF.TYPE), c(FOAF.PERSON)), + new StatementPattern(new Var("y"), c(RDF.TYPE), c(FOAF.PERSON)), + new StatementPattern(new Var("x"), c(FOAF.KNOWS), new Var("y"))); + Assert.assertEquals(expected, visitor.getAntecedents()); + } + + @Test + public void testComplexQuery() throws Exception { + String text = "PREFIX foaf: <" + FOAF.NAMESPACE + ">\n" + + "PREFIX ex: <" + EX + ">\n" + + "SELECT * WHERE {\n" + + " { ?x a foaf:Person } UNION {\n" + + " GRAPH ex:Graph1 { ?y a foaf:Person }\n" + + " } .\n" + + " GRAPH ex:Graph2 {\n" + + " ?x foaf:knows ?y .\n" + + " }\n ." + + " OPTIONAL { ?x foaf:mbox ?m } .\n" + + " FILTER (?x != ?y) .\n" + + "}"; + ParsedQuery query = new SPARQLParser().parseQuery(text, null); + AntecedentVisitor visitor = new AntecedentVisitor(); + query.getTupleExpr().visit(visitor); + Set expected = Sets.newHashSet( + new StatementPattern(Scope.NAMED_CONTEXTS, new Var("y"), c(RDF.TYPE), c(FOAF.PERSON), c(G1)), + new StatementPattern(new Var("x"), c(RDF.TYPE), c(FOAF.PERSON)), + new StatementPattern(Scope.NAMED_CONTEXTS, new Var("x"), c(FOAF.KNOWS), new Var("y"), c(G2)), + new StatementPattern(new Var("x"), c(FOAF.MBOX), new Var("m"))); + Assert.assertEquals(expected, visitor.getAntecedents()); + } + + @Test + public void testBNodeQuery() throws Exception { + String text = "PREFIX foaf: <" + FOAF.NAMESPACE + ">\n" + + "SELECT * WHERE {\n" + + " ?x a [ rdfs:subClassOf foaf:Person ] .\n" + + " ?x foaf:knows ?y .\n" + + "}"; + ParsedQuery query = new SPARQLParser().parseQuery(text, null); + AntecedentVisitor visitor = new AntecedentVisitor(); + query.getTupleExpr().visit(visitor); + Set actual = visitor.getAntecedents(); + Assert.assertEquals(3, actual.size()); + StatementPattern knows = new StatementPattern(new Var("x"), c(FOAF.KNOWS), new Var("y")); + Assert.assertTrue(actual.remove(knows)); + Assert.assertTrue(actual.removeIf(sp -> { + return sp.getSubjectVar().equals(new Var("x")) + && RDF.TYPE.equals(sp.getPredicateVar().getValue()) + && sp.getObjectVar().getValue() == null; + })); + Assert.assertTrue(actual.removeIf(sp -> { + return sp.getSubjectVar().getValue() == null + && RDFS.SUBCLASSOF.equals(sp.getPredicateVar().getValue()) + && FOAF.PERSON.equals(sp.getObjectVar().getValue()); + })); + } + + @Test + public void testNoSP() throws Exception { + String text = "CONSTRUCT {\n" + + " owl:Thing a owl:Class ." + + " owl:Nothing a owl:Class ." + + " owl:Nothing rdfs:subClassOf owl:Thing ." + + "} WHERE { }"; + ParsedQuery query = new SPARQLParser().parseQuery(text, null); + AntecedentVisitor visitor = new AntecedentVisitor(); + query.getTupleExpr().visit(visitor); + Set expected = Sets.newHashSet(); + Assert.assertEquals(expected, visitor.getAntecedents()); + } +} diff --git a/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/ConstructConsequentVisitorTest.java b/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/ConstructConsequentVisitorTest.java new file mode 100644 index 000000000..0865ef8b8 --- /dev/null +++ b/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/ConstructConsequentVisitorTest.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import java.util.Arrays; +import java.util.Set; + +import org.junit.Assert; +import org.junit.Test; +import org.openrdf.model.Value; +import org.openrdf.model.vocabulary.FOAF; +import org.openrdf.model.vocabulary.OWL; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.query.algebra.BNodeGenerator; +import org.openrdf.query.algebra.Extension; +import org.openrdf.query.algebra.ExtensionElem; +import org.openrdf.query.algebra.MultiProjection; +import org.openrdf.query.algebra.Projection; +import org.openrdf.query.algebra.ProjectionElem; +import org.openrdf.query.algebra.ProjectionElemList; +import org.openrdf.query.algebra.SingletonSet; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.ValueConstant; +import org.openrdf.query.algebra.Var; + +import com.google.common.collect.Sets; + +public class ConstructConsequentVisitorTest { + private static Var s(Value val) { + return new Var("subject", val); + } + private static Var p(Value val) { + return new Var("predicate", val); + } + private static Var o(Value val) { + return new Var("object", val); + } + private static Var anon(Var var) { + var.setAnonymous(true); + return var; + } + + @Test + public void testGenericSP() { + Extension extension = new Extension(new SingletonSet(), + new ExtensionElem(new Var("z"), "z")); + Projection projection = new Projection(extension, new ProjectionElemList( + new ProjectionElem("x", "subject"), + new ProjectionElem("y", "predicate"), + new ProjectionElem("z", "object"))); + ConstructConsequentVisitor visitor = new ConstructConsequentVisitor(); + projection.visit(visitor); + Set expected = Sets.newHashSet( + new StatementPattern(s(null), p(null), o(null))); + Assert.assertEquals(expected, visitor.getConsequents()); + } + + @Test + public void testConcreteSP() { + Extension extension = new Extension(new SingletonSet(), + new ExtensionElem(new ValueConstant(FOAF.PERSON), "x"), + new ExtensionElem(new ValueConstant(RDF.TYPE), "y"), + new ExtensionElem(new ValueConstant(OWL.CLASS), "z")); + Projection projection = new Projection(extension, new ProjectionElemList( + new ProjectionElem("x", "subject"), + new ProjectionElem("y", "predicate"), + new ProjectionElem("z", "object"))); + ConstructConsequentVisitor visitor = new ConstructConsequentVisitor(); + projection.visit(visitor); + Set expected = Sets.newHashSet( + new StatementPattern(s(FOAF.PERSON), p(RDF.TYPE), o(OWL.CLASS))); + Assert.assertEquals(expected, visitor.getConsequents()); + } + + @Test + public void testMissingVariables() { + Extension extension = new Extension(new SingletonSet(), + new ExtensionElem(new ValueConstant(FOAF.PERSON), "x"), + new ExtensionElem(new ValueConstant(RDF.TYPE), "y")); + Projection projection = new Projection(extension, new ProjectionElemList( + new ProjectionElem("x", "s"), + new ProjectionElem("y", "predicate"), + new ProjectionElem("z", "object"))); + ConstructConsequentVisitor visitor = new ConstructConsequentVisitor(); + projection.visit(visitor); + Set expected = Sets.newHashSet( + new StatementPattern(s(null), p(RDF.TYPE), o(null))); + Assert.assertEquals(expected, visitor.getConsequents()); + } + + @Test + public void testMultiProjection() { + Extension extension = new Extension(new SingletonSet(), + new ExtensionElem(new ValueConstant(RDF.TYPE), "rdftype"), + new ExtensionElem(new ValueConstant(OWL.OBJECTPROPERTY), "owlprop"), + new ExtensionElem(new ValueConstant(OWL.EQUIVALENTCLASS), "owleqcls"), + new ExtensionElem(new ValueConstant(OWL.CLASS), "owlclass")); + MultiProjection projection = new MultiProjection(extension, Arrays.asList( + new ProjectionElemList( + new ProjectionElem("cls", "subject"), + new ProjectionElem("rdftype", "predicate"), + new ProjectionElem("owlclass", "object")), + new ProjectionElemList( + new ProjectionElem("prop", "subject"), + new ProjectionElem("rdftype", "predicate"), + new ProjectionElem("owlprop", "object")), + new ProjectionElemList( + new ProjectionElem("owleqcls", "predicate"), + new ProjectionElem("cls", "object")))); + ConstructConsequentVisitor visitor = new ConstructConsequentVisitor(); + projection.visit(visitor); + Set expected = Sets.newHashSet( + new StatementPattern(s(null), p(RDF.TYPE), o(OWL.CLASS)), + new StatementPattern(s(null), p(RDF.TYPE), o(OWL.OBJECTPROPERTY)), + new StatementPattern(s(null), p(OWL.EQUIVALENTCLASS), o(null))); + Assert.assertEquals(expected, visitor.getConsequents()); + } + + @Test + public void testNoExtension() { + StatementPattern sp = new StatementPattern(new Var("x"), new Var("y"), new Var("z")); + Projection projection = new Projection(sp, new ProjectionElemList( + new ProjectionElem("x", "subject"), + new ProjectionElem("y", "predicate"), + new ProjectionElem("z", "object"))); + ConstructConsequentVisitor visitor = new ConstructConsequentVisitor(); + projection.visit(visitor); + Set expected = Sets.newHashSet( + new StatementPattern(s(null), p(null), o(null))); + Assert.assertEquals(expected, visitor.getConsequents()); + } + + @Test + public void testBNode() { + Extension extension = new Extension(new SingletonSet(), + new ExtensionElem(new Var("x"), "x"), + new ExtensionElem(new BNodeGenerator(), "z")); + Projection projection = new Projection(extension, new ProjectionElemList( + new ProjectionElem("x", "subject"), + new ProjectionElem("y", "predicate"), + new ProjectionElem("z", "object"))); + ConstructConsequentVisitor visitor = new ConstructConsequentVisitor(); + projection.visit(visitor); + Set expected = Sets.newHashSet( + new StatementPattern(s(null), p(null), anon(o(null)))); + Assert.assertEquals(expected, visitor.getConsequents()); + } +} diff --git a/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/RulesetTest.java b/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/RulesetTest.java new file mode 100644 index 000000000..adb851bf4 --- /dev/null +++ b/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/RulesetTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import java.util.Collection; +import java.util.Set; + +import org.apache.rya.api.domain.StatementMetadata; +import org.apache.rya.forwardchain.ForwardChainException; +import org.apache.rya.forwardchain.strategy.AbstractRuleExecutionStrategy; +import org.junit.Assert; +import org.junit.Test; +import org.openrdf.model.Value; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.model.vocabulary.RDFS; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.Var; + +import com.google.common.collect.Sets; + +public class RulesetTest { + private static Var c(Value val) { + Var v = new Var("-const-" + val.stringValue(), val); + v.setAnonymous(true); + return v; + } + + private static class TestRule implements Rule { + private final Collection consume; + private final Collection produce; + TestRule(Collection consume, Collection produce) { + this.consume = consume; + this.produce = produce; + } + @Override + public boolean canConclude(StatementPattern sp) { + return produce.contains(sp); + } + @Override + public Collection getAntecedentPatterns() { + return consume; + } + @Override + public Collection getConsequentPatterns() { + return produce; + } + @Override + public long execute(AbstractRuleExecutionStrategy strategy, + StatementMetadata metadata) throws ForwardChainException { + return 0; + } + } + + @Test + public void testDependencies() { + StatementPattern genericSP = new StatementPattern(new Var("a"), new Var("b"), new Var("c")); + StatementPattern typeSP = new StatementPattern(new Var("x"), c(RDF.TYPE), new Var("t")); + StatementPattern scoSP = new StatementPattern(new Var("x"), c(RDFS.SUBCLASSOF), new Var("y")); + Rule typeTriggersAny = new TestRule( + Sets.newHashSet(typeSP), + Sets.newHashSet(genericSP, typeSP, scoSP)); + Rule subclassTriggersType = new TestRule( + Sets.newHashSet(scoSP), + Sets.newHashSet(genericSP, typeSP)); + Rule anyTriggersNothing = new TestRule( + Sets.newHashSet(genericSP), + Sets.newHashSet()); + Set allRules = Sets.newHashSet(anyTriggersNothing, subclassTriggersType, typeTriggersAny); + Set noRules = Sets.newHashSet(); + Set produceType = Sets.newHashSet(subclassTriggersType, typeTriggersAny); + Set produceSubclass = Sets.newHashSet(typeTriggersAny); + Set produceAny = Sets.newHashSet(subclassTriggersType, typeTriggersAny); + Set consumeType = Sets.newHashSet(anyTriggersNothing, typeTriggersAny); + Ruleset ruleset = new Ruleset(allRules); + Assert.assertEquals(produceType, ruleset.getPredecessorsOf(typeTriggersAny)); + Assert.assertEquals(allRules, ruleset.getSuccessorsOf(typeTriggersAny)); + Assert.assertEquals(produceSubclass, ruleset.getPredecessorsOf(subclassTriggersType)); + Assert.assertEquals(consumeType, ruleset.getSuccessorsOf(subclassTriggersType)); + Assert.assertEquals(produceAny, ruleset.getPredecessorsOf(anyTriggersNothing)); + Assert.assertEquals(noRules, ruleset.getSuccessorsOf(anyTriggersNothing)); + } + + @Test + public void testIndirectDependencies() { + StatementPattern genericSP = new StatementPattern(new Var("a"), new Var("b"), new Var("c")); + StatementPattern typeSP = new StatementPattern(new Var("x"), c(RDF.TYPE), new Var("t")); + StatementPattern scoSP = new StatementPattern(new Var("x"), c(RDFS.SUBCLASSOF), new Var("y")); + StatementPattern spoSP = new StatementPattern(new Var("x"), c(RDFS.SUBPROPERTYOF), new Var("y")); + Rule typeTriggersAny = new TestRule( + Sets.newHashSet(typeSP), + Sets.newHashSet(genericSP, typeSP, scoSP)); + Rule subclassTriggersType = new TestRule( + Sets.newHashSet(scoSP), + Sets.newHashSet(genericSP, typeSP)); + Rule anyTriggersNothing = new TestRule( + Sets.newHashSet(genericSP), + Sets.newHashSet()); + Rule typeTriggersSubprop = new TestRule( + Sets.newHashSet(typeSP), + Sets.newHashSet(genericSP, spoSP)); + Set allRules = Sets.newHashSet(anyTriggersNothing, subclassTriggersType, + typeTriggersAny, typeTriggersSubprop); + Ruleset ruleset = new Ruleset(allRules); + Assert.assertTrue(ruleset.pathExists(typeTriggersAny, typeTriggersAny)); + Assert.assertTrue(ruleset.pathExists(typeTriggersAny, subclassTriggersType)); + Assert.assertTrue(ruleset.pathExists(typeTriggersAny, anyTriggersNothing)); + Assert.assertTrue(ruleset.pathExists(typeTriggersAny, typeTriggersSubprop)); + Assert.assertTrue(ruleset.pathExists(subclassTriggersType, typeTriggersAny)); + Assert.assertTrue(ruleset.pathExists(subclassTriggersType, subclassTriggersType)); + Assert.assertTrue(ruleset.pathExists(subclassTriggersType, anyTriggersNothing)); + Assert.assertTrue(ruleset.pathExists(subclassTriggersType, typeTriggersSubprop)); + Assert.assertFalse(ruleset.pathExists(anyTriggersNothing, typeTriggersAny)); + Assert.assertFalse(ruleset.pathExists(anyTriggersNothing, subclassTriggersType)); + Assert.assertFalse(ruleset.pathExists(anyTriggersNothing, anyTriggersNothing)); + Assert.assertFalse(ruleset.pathExists(anyTriggersNothing, typeTriggersSubprop)); + Assert.assertFalse(ruleset.pathExists(typeTriggersSubprop, typeTriggersAny)); + Assert.assertFalse(ruleset.pathExists(typeTriggersSubprop, subclassTriggersType)); + Assert.assertTrue(ruleset.pathExists(typeTriggersSubprop, anyTriggersNothing)); + Assert.assertFalse(ruleset.pathExists(typeTriggersSubprop, typeTriggersSubprop)); + } +} diff --git a/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/SpinConstructRuleTest.java b/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/SpinConstructRuleTest.java new file mode 100644 index 000000000..9bbcce09a --- /dev/null +++ b/extras/rya.forwardchain/src/test/java/org/apache/rya/forwardchain/rule/SpinConstructRuleTest.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.forwardchain.rule; + +import java.util.Arrays; + +import org.junit.Assert; +import org.junit.Test; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.model.vocabulary.FOAF; +import org.openrdf.model.vocabulary.OWL; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.model.vocabulary.RDFS; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.Var; +import org.openrdf.query.parser.ParsedGraphQuery; +import org.openrdf.query.parser.sparql.SPARQLParser; + +import com.google.common.collect.HashMultiset; +import com.google.common.collect.Multiset; + +public class SpinConstructRuleTest { + private static ValueFactory VF = ValueFactoryImpl.getInstance(); + private static SPARQLParser PARSER = new SPARQLParser(); + + private static URI RL_CAX_SCO = VF.createURI("http://example.org/rl/cax-sco"); + private static URI RL_SCM_CLS = VF.createURI("http://example.org/rl/scm-cls"); + private static URI RL_PRP_SPO1 = VF.createURI("http://example.org/rl/prp-spo"); + private static URI LIVING_THING = VF.createURI("http://example.org/LivingThing"); + + private static Var c(Value val) { + return new Var("-const-" + val.stringValue(), val); + } + private static Var ac(Value val) { + Var v = c(val); + v.setAnonymous(true); + return v; + } + + @Test + public void testEmptyWhere() throws Exception { + String text = "CONSTRUCT {\n" + + " ?this a <" + LIVING_THING.stringValue() + "> .\n" + + "} WHERE { }"; + ParsedGraphQuery query = (ParsedGraphQuery) PARSER.parseQuery(text, null); + SpinConstructRule rule = new SpinConstructRule(FOAF.PERSON, VF.createURI("urn:person-is-living"), query); + Multiset expectedAntecedents = HashMultiset.create(Arrays.asList( + new StatementPattern(new Var("this"), c(RDF.TYPE), c(FOAF.PERSON)))); + Multiset expectedConsequents = HashMultiset.create(Arrays.asList( + new StatementPattern(new Var("subject"), new Var("predicate", RDF.TYPE), new Var("object", LIVING_THING)))); + Assert.assertEquals(expectedAntecedents, HashMultiset.create(rule.getAntecedentPatterns())); + Assert.assertEquals(expectedConsequents, HashMultiset.create(rule.getConsequentPatterns())); + Assert.assertFalse(rule.hasAnonymousConsequent()); + // Basic pattern matches + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("x"), c(RDF.TYPE), c(LIVING_THING)))); + // Broader patterns match (variables in place of constants) + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("x"), c(RDF.TYPE), new Var("y")))); + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("x"), new Var("y"), c(LIVING_THING)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("a"), new Var("b"), new Var("c")))); + // Narrower patterns match (constants in place of variables) + Assert.assertTrue(rule.canConclude(new StatementPattern(c(RDF.TYPE), c(RDF.TYPE), c(LIVING_THING)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(FOAF.MBOX), c(RDF.TYPE), new Var("y")))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(RDF.ALT), new Var("y"), c(LIVING_THING)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(RDF.BAG), new Var("b"), new Var("c")))); + // Incompatible patterns don't match (different constants) + Assert.assertFalse(rule.canConclude(new StatementPattern(new Var("x"), c(RDFS.SUBCLASSOF), new Var("y")))); + Assert.assertFalse(rule.canConclude(new StatementPattern(new Var("x"), new Var("y"), c(FOAF.PERSON)))); + Assert.assertFalse(rule.canConclude(new StatementPattern(c(RDF.TYPE), c(RDF.TYPE), c(RDF.TYPE)))); + } + + @Test + public void testThisUnbound() throws Exception { + String text = "CONSTRUCT {\n" + + " ?ind a ?superclass .\n" + + "} WHERE {\n" + + " ?ind a ?subclass .\n" + + " ?subclass rdfs:subClassOf ?superclass .\n" + + "}"; + ParsedGraphQuery query = (ParsedGraphQuery) PARSER.parseQuery(text, null); + SpinConstructRule rule = new SpinConstructRule(OWL.THING, RL_CAX_SCO, query); + Multiset expectedAntecedents = HashMultiset.create(Arrays.asList( + new StatementPattern(new Var("subclass"), ac(RDFS.SUBCLASSOF), new Var("superclass")), + new StatementPattern(new Var("ind"), ac(RDF.TYPE), new Var("subclass")))); + Multiset expectedConsequents = HashMultiset.create(Arrays.asList( + new StatementPattern(new Var("subject"), new Var("predicate", RDF.TYPE), new Var("object")))); + Assert.assertEquals(expectedAntecedents, HashMultiset.create(rule.getAntecedentPatterns())); + Assert.assertEquals(expectedConsequents, HashMultiset.create(rule.getConsequentPatterns())); + Assert.assertFalse(rule.hasAnonymousConsequent()); + // Basic pattern matches + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("x"), c(RDF.TYPE), new Var("y")))); + // Broader patterns match (variables in place of constants) + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("a"), new Var("b"), new Var("c")))); + // Narrower patterns match (constants in place of variables) + Assert.assertTrue(rule.canConclude(new StatementPattern(c(RDF.TYPE), c(RDF.TYPE), c(RDF.TYPE)))); + // Incompatible patterns don't match (different constants) + Assert.assertFalse(rule.canConclude(new StatementPattern(new Var("x"), c(RDFS.SUBCLASSOF), new Var("y")))); + } + + @Test + public void testMultipleConsequents() throws Exception { + String text = "CONSTRUCT {\n" + // actual rule is "?this subClassOf ?this", but reflexive construct patterns produce + // bnodes due to an openrdf bug, resulting in incorrect matches + + " ?this rdfs:subClassOf ?something .\n" + + " ?this owl:equivalentClass ?something .\n" + + " ?this rdfs:subClassOf owl:Thing .\n" + + " owl:Nothing rdfs:subClassOf ?this .\n" + + "} WHERE { }"; + ParsedGraphQuery query = (ParsedGraphQuery) PARSER.parseQuery(text, null); + SpinConstructRule rule = new SpinConstructRule(OWL.CLASS, RL_SCM_CLS, query); + Multiset expectedAntecedents = HashMultiset.create(Arrays.asList( + new StatementPattern(new Var("this"), c(RDF.TYPE), c(OWL.CLASS)))); + Multiset expectedConsequents = HashMultiset.create(Arrays.asList( + new StatementPattern(new Var("subject"), new Var("predicate", RDFS.SUBCLASSOF), new Var("object")), + new StatementPattern(new Var("subject"), new Var("predicate", OWL.EQUIVALENTCLASS), new Var("object")), + new StatementPattern(new Var("subject"), new Var("predicate", RDFS.SUBCLASSOF), new Var("object", OWL.THING)), + new StatementPattern(new Var("subject", OWL.NOTHING), new Var("predicate", RDFS.SUBCLASSOF), new Var("object")))); + Assert.assertEquals(expectedAntecedents, HashMultiset.create(rule.getAntecedentPatterns())); + Assert.assertEquals(expectedConsequents, HashMultiset.create(rule.getConsequentPatterns())); + // Basic pattern matches + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("x"), c(RDFS.SUBCLASSOF), new Var("y")))); + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("x"), c(OWL.EQUIVALENTCLASS), new Var("y")))); + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("x"), c(RDFS.SUBCLASSOF), c(OWL.THING)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(OWL.NOTHING), c(RDFS.SUBCLASSOF), new Var("y")))); + // Broader patterns match (variables in place of constants) + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("a"), new Var("b"), new Var("c")))); + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("a"), new Var("b"), c(OWL.THING)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(OWL.NOTHING), new Var("b"), new Var("c")))); + // Narrower patterns match (constants in place of variables) + Assert.assertTrue(rule.canConclude(new StatementPattern(c(FOAF.PERSON), c(RDFS.SUBCLASSOF), new Var("x")))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(FOAF.PERSON), c(OWL.EQUIVALENTCLASS), c(FOAF.PERSON)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(OWL.NOTHING), c(RDFS.SUBCLASSOF), c(FOAF.PERSON)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(OWL.NOTHING), c(OWL.EQUIVALENTCLASS), c(FOAF.PERSON)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(OWL.NOTHING), c(OWL.EQUIVALENTCLASS), c(OWL.THING)))); + // Incompatible patterns don't match (different constants) + Assert.assertFalse(rule.canConclude(new StatementPattern(new Var("x"), c(RDFS.SUBPROPERTYOF), c(OWL.THING)))); + } + + @Test + public void testGeneralConsequent() throws Exception { + String text = "CONSTRUCT {\n" + + " ?x ?p2 ?y" + + "} WHERE {\n" + + " ?x ?p1 ?y .\n" + + " ?p1 rdfs:subPropertyOf ?p2 .\n" + + "}"; + ParsedGraphQuery query = (ParsedGraphQuery) PARSER.parseQuery(text, null); + SpinConstructRule rule = new SpinConstructRule(OWL.THING, RL_PRP_SPO1, query); + Multiset expectedAntecedents = HashMultiset.create(Arrays.asList( + new StatementPattern(new Var("p1"), ac(RDFS.SUBPROPERTYOF), new Var("p2")), + new StatementPattern(new Var("x"), new Var("p1"), new Var("y")))); + Multiset expectedConsequents = HashMultiset.create(Arrays.asList( + new StatementPattern(new Var("subject"), new Var("predicate"), new Var("object")))); + Assert.assertEquals(expectedAntecedents, HashMultiset.create(rule.getAntecedentPatterns())); + Assert.assertEquals(expectedConsequents, HashMultiset.create(rule.getConsequentPatterns())); + Assert.assertFalse(rule.hasAnonymousConsequent()); + // Basic pattern matches + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("a"), new Var("b"), new Var("c")))); + // Narrower patterns match (constants in place of variables) + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("x"), c(RDFS.SUBPROPERTYOF), c(OWL.THING)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(OWL.NOTHING), new Var("prop"), c(OWL.THING)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(FOAF.PERSON), c(RDFS.SUBCLASSOF), new Var("x")))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(OWL.NOTHING), c(RDFS.SUBCLASSOF), c(FOAF.PERSON)))); + } + + @Test + public void testAnonymousConsequent() throws Exception { + String text = "CONSTRUCT {\n" + + " ?x ?p2 _:something" + + "} WHERE {\n" + + " ?x ?p1 ?y .\n" + + " ?p1 rdfs:subPropertyOf ?p2 .\n" + + "}"; + ParsedGraphQuery query = (ParsedGraphQuery) PARSER.parseQuery(text, null); + SpinConstructRule rule = new SpinConstructRule(OWL.THING, RL_PRP_SPO1, query); + Multiset expectedAntecedents = HashMultiset.create(Arrays.asList( + new StatementPattern(new Var("p1"), ac(RDFS.SUBPROPERTYOF), new Var("p2")), + new StatementPattern(new Var("x"), new Var("p1"), new Var("y")))); + Assert.assertEquals(expectedAntecedents, HashMultiset.create(rule.getAntecedentPatterns())); + // should have detected anonymous node + Assert.assertTrue(rule.hasAnonymousConsequent()); + Var anonymousObject = new Var("object"); + anonymousObject.setAnonymous(true); + Multiset expectedConsequents = HashMultiset.create(Arrays.asList( + new StatementPattern(new Var("subject"), new Var("predicate"), anonymousObject))); + Assert.assertEquals(expectedConsequents, HashMultiset.create(rule.getConsequentPatterns())); + // Pattern matches should be unaffected by anonymous node status + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("a"), new Var("b"), new Var("c")))); + Assert.assertTrue(rule.canConclude(new StatementPattern(new Var("x"), c(RDFS.SUBPROPERTYOF), c(OWL.THING)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(OWL.NOTHING), new Var("prop"), c(OWL.THING)))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(FOAF.PERSON), c(RDFS.SUBCLASSOF), new Var("x")))); + Assert.assertTrue(rule.canConclude(new StatementPattern(c(OWL.NOTHING), c(RDFS.SUBCLASSOF), c(FOAF.PERSON)))); + } +} diff --git a/extras/rya.forwardchain/src/test/resources/data.ttl b/extras/rya.forwardchain/src/test/resources/data.ttl new file mode 100644 index 000000000..f026409a0 --- /dev/null +++ b/extras/rya.forwardchain/src/test/resources/data.ttl @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Sample data similar to LUBM + +@prefix rdf: . +@prefix rdfs: . +@prefix owl: . +@prefix xsd: . +@prefix lubm: . +@prefix ex: . + +ex:Department0 lubm:subOrganizationOf ex:College0 . +ex:Department1 lubm:subOrganizationOf ex:College1 . +ex:Department2 lubm:subOrganizationOf ex:College2 . +ex:Department3 lubm:subOrganizationOf ex:College2 . + +ex:College0 a lubm:Organization ; lubm:subOrganizationOf ex:University0 . +ex:College1 a lubm:Organization ; lubm:subOrganizationOf ex:University0 . +ex:College2 lubm:subOrganizationOf ex:University1 . + +ex:Department0 a lubm:Department . +ex:Department1 a lubm:Department . +ex:Department2 a lubm:Department . +ex:Department3 a lubm:Department . + +# Professors -- infer Faculty and therefore Person +ex:Alice a lubm:Professor . +ex:Bob a lubm:Professor . +ex:Carol a lubm:Professor . +ex:Dan a lubm:Professor . +ex:Eve a lubm:Professor . + +# Can infer Organization via rdfs:range +ex:Alice lubm:worksFor ex:Department2 . +ex:Carol lubm:worksFor ex:Department0 . +ex:Dan lubm:worksFor ex:Department2 . +ex:Eve lubm:worksFor ex:Department1 . + +ex:Alice lubm:headOf ex:Department1 . # infer Chair and worksFor +ex:Dan lubm:headOf ex:Department2 . # infer Chair, already have worksFor +ex:Eve lubm:headOf ex:ResearchGroup3 . # infer worksFor, therefore Organization, but not Chair because not a Department \ No newline at end of file diff --git a/extras/rya.forwardchain/src/test/resources/owlrl.ttl b/extras/rya.forwardchain/src/test/resources/owlrl.ttl new file mode 100644 index 000000000..b9e67ebac --- /dev/null +++ b/extras/rya.forwardchain/src/test/resources/owlrl.ttl @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Expresses a fragment of OWL RL in SPIN rules + +@prefix rdf: . +@prefix rdfs: . +@prefix owl: . +@prefix xsd: . +@prefix lubm: . +@prefix spin: . +@prefix sp: . +@prefix rl: . + +owl:Thing spin:rule rl:cls-svf1 , + rl:cax-sco , + rl:prp-spo1 , + rl:prp-dom , + rl:prp-rng . + +owl:Class spin:rule rl:scm-cls . + +rl:cls-svf1 a sp:Construct; + spin:thisUnbound "true"^^xsd:boolean ; + sp:text """ + CONSTRUCT { + ?u a ?x . + } + WHERE { + ?x owl:someValuesFrom ?y . + ?x owl:onProperty ?p . + ?u ?p ?v . + ?v a ?y . + } """ . + +rl:cax-sco a sp:Construct; + spin:thisUnbound "true"^^xsd:boolean ; + sp:text """ + CONSTRUCT { + ?this a ?super . + } + WHERE { + ?this a ?sub . + ?sub rdfs:subClassOf ?super . + } """ . + +rl:prp-spo1 a sp:Construct; + spin:thisUnbound "true"^^xsd:boolean ; + sp:text """ + CONSTRUCT { + ?x ?super ?y . + } + WHERE { + ?sub rdfs:subPropertyOf ?super . + ?x ?sub ?y . + } """ . + +rl:prp-dom a sp:Construct; + spin:thisUnbound "true"^^xsd:boolean ; + sp:text """ + CONSTRUCT { + ?s a ?c . + } + WHERE { + ?p rdfs:domain ?c . + ?s ?p ?o . + } """ . + +rl:prp-rng a rl:prp-rng-template . +rl:prp-rng-template a sp:Template; + spin:body [ + a sp:Construct ; + sp:text """ + CONSTRUCT { + ?o a ?c . + } + WHERE { + ?p rdfs:range ?c . + ?s ?p ?o . + } """ ] . + +rl:scm-cls a sp:Construct; + sp:text """ + CONSTRUCT { + ?this rdfs:subClassOf ?this . + ?this owl:equivalentClass ?this . + ?this rdfs:subClassOf owl:Thing . + owl:Nothing rdfs:subClassOf ?this . + } + WHERE { } """ . +rdfs:subClassOf rdfs:domain owl:Class . +rdfs:subClassOf rdfs:range owl:Class . \ No newline at end of file diff --git a/extras/rya.forwardchain/src/test/resources/query.sparql b/extras/rya.forwardchain/src/test/resources/query.sparql new file mode 100644 index 000000000..3b93cc89e --- /dev/null +++ b/extras/rya.forwardchain/src/test/resources/query.sparql @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# LUBM query #12 + +PREFIX rdf: +PREFIX rdfs: +PREFIX owl: +PREFIX xsd: +PREFIX lubm: +PREFIX ex: + +SELECT ?X ?Y WHERE { + ?X a lubm:Chair . + ?Y a lubm:Department . + ?X lubm:worksFor ?Y . + ?Y lubm:subOrganizationOf ex:University0 . +} \ No newline at end of file diff --git a/extras/rya.forwardchain/src/test/resources/university.ttl b/extras/rya.forwardchain/src/test/resources/university.ttl new file mode 100644 index 000000000..e195606bb --- /dev/null +++ b/extras/rya.forwardchain/src/test/resources/university.ttl @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Expresses a fragment of the LUBM ontology in a mixture of OWL and SPIN + +@prefix rdf: . +@prefix rdfs: . +@prefix owl: . +@prefix xsd: . +@prefix lubm: . +@prefix spin: . +@prefix sp: . +@prefix lr: . + +lubm:Person spin:rule lr:department-head-is-chair . +lr:department-head-is-chair a sp:Construct ; + sp:text """ + PREFIX lubm: + CONSTRUCT { + ?this a lubm:Chair . + } + WHERE { + ?this lubm:headOf [ a lubm:Department ] . + } + """ . + +lubm:Organization spin:rule lr:suborganization-transitivity . +lr:suborganization-transitivity a sp:Construct ; + sp:text """ + PREFIX lubm: + CONSTRUCT { + ?this lubm:subOrganizationOf ?parent . + } + WHERE { + ?this lubm:subOrganizationOf ?child . + ?child lubm:subOrganizationOf ?parent . + } + """ . + +lubm:Professor rdfs:subClassOf lubm:Faculty . +lubm:Faculty rdfs:subClassOf lubm:Person . + +lubm:worksFor rdfs:range lubm:Organization . +lubm:headOf rdfs:subPropertyOf lubm:worksFor . \ No newline at end of file