From b041143ceb663875bd41bcd232921babdac9876a Mon Sep 17 00:00:00 2001 From: Kevin Chilton Date: Wed, 1 Mar 2017 17:42:08 -0500 Subject: [PATCH] RYA-253 Converted the rya.prospector class from Groovy to Java. Also added documentation to the project. --- .../rya/api/persist/RdfEvalStatsDAO.java | 8 +- .../accumulo/entity/EntityOptimizer.java | 25 +- .../accumulo/entity/EntityTupleSet.java | 64 ++--- extras/rya.prospector/pom.xml | 69 ----- .../rya/prospector/domain/IndexEntry.groovy | 76 ----- .../domain/IntermediateProspect.groovy | 70 ----- .../prospector/domain/TripleValueType.java | 26 -- .../rya/prospector/mr/Prospector.groovy | 108 -------- .../prospector/mr/ProspectorCombiner.groovy | 61 ---- .../rya/prospector/mr/ProspectorMapper.groovy | 75 ----- .../prospector/mr/ProspectorReducer.groovy | 57 ---- .../rya/prospector/plans/IndexWorkPlan.groovy | 51 ---- .../prospector/plans/impl/CountPlan.groovy | 220 --------------- .../ServicesBackedIndexWorkPlanManager.groovy | 38 --- .../service/ProspectorService.groovy | 126 --------- .../ProspectorServiceEvalStatsDAO.groovy | 122 -------- .../utils/ProspectorConstants.groovy | 41 --- .../prospector/utils/ProspectorUtils.groovy | 138 --------- .../rya/prospector/domain/IndexEntry.java | 241 ++++++++++++++++ .../domain/IntermediateProspect.java | 213 ++++++++++++++ .../prospector/domain/TripleValueType.java | 101 +++++++ .../apache/rya/prospector/mr/Prospector.java | 113 ++++++++ .../rya/prospector/mr/ProspectorCombiner.java | 61 ++++ .../rya/prospector/mr/ProspectorMapper.java | 83 ++++++ .../rya/prospector/mr/ProspectorReducer.java | 65 +++++ .../rya/prospector/plans/IndexWorkPlan.java | 115 ++++++++ .../plans/IndexWorkPlanManager.java} | 19 +- .../rya/prospector/plans/impl/CountPlan.java | 262 ++++++++++++++++++ .../ServicesBackedIndexWorkPlanManager.java | 49 ++++ .../prospector/service/ProspectorService.java | 162 +++++++++++ .../ProspectorServiceEvalStatsDAO.java | 143 ++++++++++ .../rya/prospector/utils/CustomEntry.java} | 48 ++-- .../prospector/utils/ProspectorConstants.java | 52 ++++ .../rya/prospector/utils/ProspectorUtils.java | 147 ++++++++++ .../rya/prospector/mr/ProspectorTest.groovy | 178 ------------ .../ProspectorServiceEvalStatsDAOTest.groovy | 182 ------------ .../rya/prospector/mr/ProspectorTest.java | 248 +++++++++++++++++ .../ProspectorServiceEvalStatsDAOTest.java | 181 ++++++++++++ pom.xml | 55 ---- .../QueryJoinSelectOptimizerTest.java | 4 + 40 files changed, 2329 insertions(+), 1768 deletions(-) delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IntermediateProspect.java create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/TripleValueType.java create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/Prospector.java create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorCombiner.java create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorMapper.java create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorReducer.java create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlan.java rename extras/rya.prospector/src/main/{groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy => java/org/apache/rya/prospector/plans/IndexWorkPlanManager.java} (71%) create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/CountPlan.java create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.java create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorService.java create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.java rename extras/rya.prospector/src/main/{groovy/org/apache/rya/prospector/utils/CustomEntry.groovy => java/org/apache/rya/prospector/utils/CustomEntry.java} (55%) create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorConstants.java create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorUtils.java delete mode 100644 extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/mr/ProspectorTest.groovy delete mode 100644 extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.groovy create mode 100644 extras/rya.prospector/src/test/java/org/apache/rya/prospector/mr/ProspectorTest.java create mode 100644 extras/rya.prospector/src/test/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.java diff --git a/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java b/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java index b1d46c38e..0b63d5813 100644 --- a/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java +++ b/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java @@ -8,9 +8,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -24,7 +24,6 @@ import java.util.List; import org.apache.rya.api.RdfCloudTripleStoreConfiguration; - import org.openrdf.model.Resource; import org.openrdf.model.Value; @@ -44,9 +43,10 @@ public enum CARDINALITY_OF { public void destroy() throws RdfDAOException; + // XXX returns -1 if no cardinality could be found. public double getCardinality(C conf, CARDINALITY_OF card, List val) throws RdfDAOException; public double getCardinality(C conf, CARDINALITY_OF card, List val, Resource context) throws RdfDAOException; - + public void setConf(C conf); public C getConf(); diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java index f3b7183b0..244493adf 100644 --- a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java @@ -25,6 +25,11 @@ import java.util.List; import java.util.Set; +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; import org.apache.rya.accumulo.AccumuloRdfConfiguration; import org.apache.rya.api.RdfCloudTripleStoreConfiguration; import org.apache.rya.api.persist.joinselect.SelectivityEvalDAO; @@ -33,11 +38,6 @@ import org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO; import org.apache.rya.rdftriplestore.inference.DoNotExpandSP; import org.apache.rya.rdftriplestore.utils.FixedStatementPattern; - -import org.apache.accumulo.core.client.AccumuloException; -import org.apache.accumulo.core.client.AccumuloSecurityException; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; import org.openrdf.query.BindingSet; import org.openrdf.query.Dataset; import org.openrdf.query.algebra.Filter; @@ -47,12 +47,15 @@ import org.openrdf.query.algebra.TupleExpr; import org.openrdf.query.algebra.evaluation.QueryOptimizer; import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.HashMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Sets; public class EntityOptimizer implements QueryOptimizer, Configurable { + private static final Logger LOG = LoggerFactory.getLogger(EntityTupleSet.class); private SelectivityEvalDAO eval; private RdfCloudTripleStoreConfiguration conf; @@ -69,10 +72,8 @@ public EntityOptimizer(RdfCloudTripleStoreConfiguration conf) { eval = new AccumuloSelectivityEvalDAO(conf, ConfigUtils.getConnector(conf)); ((AccumuloSelectivityEvalDAO)eval).setRdfEvalDAO(new ProspectorServiceEvalStatsDAO(ConfigUtils.getConnector(conf), conf)); eval.init(); - } catch (AccumuloException e) { - e.printStackTrace(); - } catch (AccumuloSecurityException e) { - e.printStackTrace(); + } catch (final AccumuloException | AccumuloSecurityException | TableExistsException e) { + LOG.warn("A problem was encountered while constructing the EntityOptimizer.", e); } isEvalDaoSet = true; @@ -103,10 +104,8 @@ public void setConf(Configuration conf) { eval = new AccumuloSelectivityEvalDAO(this.conf, ConfigUtils.getConnector(this.conf)); ((AccumuloSelectivityEvalDAO)eval).setRdfEvalDAO(new ProspectorServiceEvalStatsDAO(ConfigUtils.getConnector(this.conf), this.conf)); eval.init(); - } catch (AccumuloException e) { - e.printStackTrace(); - } catch (AccumuloSecurityException e) { - e.printStackTrace(); + } catch (final AccumuloException | AccumuloSecurityException | TableExistsException e) { + LOG.warn("A problem was encountered while setting the Configuration for the EntityOptimizer.", e); } isEvalDaoSet = true; diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java index d829a29d5..42b7bb07a 100644 --- a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java @@ -1,5 +1,3 @@ -package org.apache.rya.indexing.accumulo.entity; - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -18,15 +16,17 @@ * specific language governing permissions and limitations * under the License. */ - - -import info.aduna.iteration.CloseableIteration; +package org.apache.rya.indexing.accumulo.entity; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Set; +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.Connector; +import org.apache.commons.io.IOUtils; import org.apache.rya.accumulo.AccumuloRdfConfiguration; import org.apache.rya.accumulo.AccumuloRyaDAO; import org.apache.rya.api.RdfCloudTripleStoreConfiguration; @@ -37,11 +37,6 @@ import org.apache.rya.rdftriplestore.RdfCloudTripleStore; import org.apache.rya.rdftriplestore.RdfCloudTripleStoreConnection; import org.apache.rya.rdftriplestore.evaluation.ExternalBatchingIterator; - -import org.apache.accumulo.core.client.AccumuloException; -import org.apache.accumulo.core.client.AccumuloSecurityException; -import org.apache.accumulo.core.client.Connector; -import org.apache.commons.io.IOUtils; import org.openrdf.query.BindingSet; import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.algebra.StatementPattern; @@ -49,12 +44,16 @@ import org.openrdf.query.algebra.evaluation.QueryBindingSet; import org.openrdf.query.algebra.evaluation.impl.ExternalSet; import org.openrdf.sail.SailException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.beust.jcommander.internal.Sets; import com.google.common.base.Joiner; -public class EntityTupleSet extends ExternalSet implements ExternalBatchingIterator { +import info.aduna.iteration.CloseableIteration; +public class EntityTupleSet extends ExternalSet implements ExternalBatchingIterator { + private static final Logger LOG = LoggerFactory.getLogger(EntityTupleSet.class); private StarQuery starQuery; private RdfCloudTripleStoreConfiguration conf; @@ -97,26 +96,29 @@ private void init() { } catch (AccumuloSecurityException e) { e.printStackTrace(); } - if (conf.isUseStats() && conf.isUseSelectivity()) { - ProspectorServiceEvalStatsDAO evalDao = new ProspectorServiceEvalStatsDAO(accCon, conf); - evalDao.init(); - AccumuloSelectivityEvalDAO ase = new AccumuloSelectivityEvalDAO(conf, accCon); - ase.setRdfEvalDAO(evalDao); - ase.init(); - - cardinality = starQuery.getCardinality(ase); - CardinalityStatementPattern csp = starQuery.getMinCardSp(ase); - - minCard = csp.getCardinality(); - minSp = csp.getSp(); - } else { - // TODO come up with a better default if cardinality is not - // initialized - cardinality = minCard = 1; - minSp = starQuery.getNodes().get(0); + try { + if (conf.isUseStats() && conf.isUseSelectivity()) { + ProspectorServiceEvalStatsDAO evalDao = new ProspectorServiceEvalStatsDAO(accCon, conf); + evalDao.init(); + AccumuloSelectivityEvalDAO ase = new AccumuloSelectivityEvalDAO(conf, accCon); + ase.setRdfEvalDAO(evalDao); + ase.init(); + + cardinality = starQuery.getCardinality(ase); + CardinalityStatementPattern csp = starQuery.getMinCardSp(ase); + + minCard = csp.getCardinality(); + minSp = csp.getSp(); + } else { + // TODO come up with a better default if cardinality is not + // initialized + cardinality = minCard = 1; + minSp = starQuery.getNodes().get(0); + } + } catch(final Exception e) { + LOG.warn("A problem was encountered while initializing the EntityTupleSet.", e); } - } @Override @@ -224,7 +226,7 @@ private int numberOfSpVars(StatementPattern sp) { @Override - public CloseableIteration evaluate(final Collection bindingset) throws QueryEvaluationException { + public CloseableIteration evaluate(Collection bindingset) throws QueryEvaluationException { if(bindingset.size() < 2 && !this.evalOptUsed) { BindingSet bs = new QueryBindingSet(); @@ -248,7 +250,7 @@ public CloseableIteration evaluate(final Co private RdfCloudTripleStoreConnection getRyaSailConnection() throws AccumuloException, AccumuloSecurityException, SailException { - final RdfCloudTripleStore store = new RdfCloudTripleStore(); + RdfCloudTripleStore store = new RdfCloudTripleStore(); AccumuloRyaDAO crdfdao = new AccumuloRyaDAO(); crdfdao.setConnector(accCon); AccumuloRdfConfiguration acc = new AccumuloRdfConfiguration(conf); diff --git a/extras/rya.prospector/pom.xml b/extras/rya.prospector/pom.xml index 952ab947b..35a9f67ac 100644 --- a/extras/rya.prospector/pom.xml +++ b/extras/rya.prospector/pom.xml @@ -48,10 +48,6 @@ under the License. com.google.guava guava - - org.codehaus.groovy - groovy-all - org.apache.mrunit @@ -75,74 +71,9 @@ under the License. - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - org.apache.maven.plugins - maven-compiler-plugin - [3.2,) - - compile - testCompile - - - - - - - - - org.codehaus.groovy - groovy-eclipse-compiler - [2.9.1-01,) - - add-groovy-build-paths - - - - - - - - - - - - maven-compiler-plugin - - groovy-eclipse-compiler - - - - org.codehaus.groovy - groovy-eclipse-compiler - 2.9.1-01 - - - - org.codehaus.groovy - groovy-eclipse-batch - 2.3.7-01 - - - - - org.codehaus.groovy - groovy-eclipse-compiler - 2.9.1-01 - true - org.apache.maven.plugins maven-shade-plugin diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy deleted file mode 100644 index 8b0b670c7..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.domain - -/** - * Date: 12/5/12 - * Time: 11:33 AM - */ -class IndexEntry { - def String index - def String data - def String dataType - def String tripleValueType - def String visibility - def Long count - def Long timestamp - - @Override - public String toString() { - return "IndexEntry{" + - "index='" + index + '\'' + - ", data='" + data + '\'' + - ", dataType='" + dataType + '\'' + - ", tripleValueType=" + tripleValueType + - ", visibility='" + visibility + '\'' + - ", timestamp='" + timestamp + '\'' + - ", count=" + count + - '}'; - } - - boolean equals(o) { - if (this.is(o)) return true - if (getClass() != o.class) return false - - IndexEntry that = (IndexEntry) o - - if (count != that.count) return false - if (timestamp != that.timestamp) return false - if (data != that.data) return false - if (dataType != that.dataType) return false - if (index != that.index) return false - if (tripleValueType != that.tripleValueType) return false - if (visibility != that.visibility) return false - - return true - } - - int hashCode() { - int result - result = (index != null ? index.hashCode() : 0) - result = 31 * result + (data != null ? data.hashCode() : 0) - result = 31 * result + (dataType != null ? dataType.hashCode() : 0) - result = 31 * result + (tripleValueType != null ? tripleValueType.hashCode() : 0) - result = 31 * result + (visibility != null ? visibility.hashCode() : 0) - result = 31 * result + (count != null ? count.hashCode() : 0) - result = 31 * result + (timestamp != null ? timestamp.hashCode() : 0) - return result - } -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy deleted file mode 100644 index c5e34c05d..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.domain - -import org.apache.hadoop.io.WritableComparable - -import static org.apache.rya.prospector.domain.TripleValueType.* - -/** - * Date: 12/3/12 - * Time: 11:15 AM - */ -class IntermediateProspect implements WritableComparable { - - def String index - def String data - def String dataType - def TripleValueType tripleValueType - def String visibility - - @Override - int compareTo(IntermediateProspect t) { - if(!index.equals(t.index)) - return index.compareTo(t.index); - if(!data.equals(t.data)) - return data.compareTo(t.data); - if(!dataType.equals(t.dataType)) - return dataType.compareTo(t.dataType); - if(!tripleValueType.equals(t.tripleValueType)) - return tripleValueType.compareTo(t.tripleValueType); - if(!visibility.equals(t.visibility)) - return visibility.compareTo(t.visibility); - return 0 - } - - @Override - void write(DataOutput dataOutput) { - dataOutput.writeUTF(index); - dataOutput.writeUTF(data); - dataOutput.writeUTF(dataType); - dataOutput.writeUTF(tripleValueType.name()); - dataOutput.writeUTF(visibility); - } - - @Override - void readFields(DataInput dataInput) { - index = dataInput.readUTF() - data = dataInput.readUTF() - dataType = dataInput.readUTF() - tripleValueType = TripleValueType.valueOf(dataInput.readUTF()) - visibility = dataInput.readUTF() - } -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java deleted file mode 100644 index 0c5307628..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java +++ /dev/null @@ -1,26 +0,0 @@ -package org.apache.rya.prospector.domain; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -public enum TripleValueType { - - subject, predicate, object, entity, subjectpredicate, predicateobject, subjectobject -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy deleted file mode 100644 index c51ecef69..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.mr - -import org.apache.rya.prospector.utils.ProspectorUtils -import org.apache.accumulo.core.data.Mutation -import org.apache.accumulo.core.data.Value -import org.apache.accumulo.core.security.ColumnVisibility -import org.apache.hadoop.conf.Configured -import org.apache.hadoop.util.Tool -import org.apache.hadoop.util.ToolRunner -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path -import org.apache.hadoop.mapreduce.Job - -import org.apache.hadoop.io.LongWritable -import org.apache.commons.lang.time.DateUtils - -import org.apache.rya.prospector.domain.IntermediateProspect - -import com.google.common.collect.Lists - -import static org.apache.rya.prospector.utils.ProspectorConstants.* -import static org.apache.rya.prospector.utils.ProspectorUtils.* - -/** - * Date: 12/3/12 - * Time: 10:57 AM - */ -class Prospector extends Configured implements Tool { - - private static long NOW = System.currentTimeMillis(); - - private Date truncatedDate; - - public static void main(String[] args) { - int res = ToolRunner.run(new Prospector(), args); - System.exit(res); - } - - @Override - int run(String[] args) { - Configuration conf = getConf(); - - truncatedDate = DateUtils.truncate(new Date(NOW), Calendar.MINUTE); - - Path configurationPath = new Path(args[0]); - conf.addResource(configurationPath); - - def inTable = conf.get("prospector.intable") - def outTable = conf.get("prospector.outtable") - def auths_str = conf.get("prospector.auths") - assert inTable != null - assert outTable != null - assert auths_str != null - - Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); - job.setJarByClass(this.getClass()); - - String[] auths = auths_str.split(",") - ProspectorUtils.initMRJob(job, inTable, outTable, auths) - - job.getConfiguration().setLong("DATE", NOW); - - def performant = conf.get(PERFORMANT) - if (Boolean.parseBoolean(performant)) { - /** - * Apply some performance tuning - */ - ProspectorUtils.addMRPerformance(job.configuration) - } - - job.setMapOutputKeyClass(IntermediateProspect.class); - job.setMapOutputValueClass(LongWritable.class); - - job.setMapperClass(ProspectorMapper.class); - job.setCombinerClass(ProspectorCombiner.class); - job.setReducerClass(ProspectorReducer.class); - job.waitForCompletion(true); - - int success = job.isSuccessful() ? 0 : 1; - - if (success == 0) { - Mutation m = new Mutation(METADATA) - m.put(PROSPECT_TIME, getReverseIndexDateTime(truncatedDate), new ColumnVisibility(DEFAULT_VIS), truncatedDate.time, new Value(EMPTY)) - writeMutations(connector(instance(conf), conf), outTable, [m]) - } - - return success - } -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy deleted file mode 100644 index 784ffd2da..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.mr - -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.rya.prospector.plans.IndexWorkPlanManager -import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager -import org.apache.commons.lang.time.DateUtils -import org.apache.hadoop.mapreduce.Reducer -import org.apache.rya.prospector.utils.ProspectorUtils - -/** - * Date: 12/3/12 - * Time: 11:06 AM - */ -class ProspectorCombiner extends Reducer { - - private Date truncatedDate; - private IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager() - Map plans - - @Override - public void setup(Reducer.Context context) throws IOException, InterruptedException { - super.setup(context); - - long now = context.getConfiguration().getLong("DATE", System.currentTimeMillis()); - truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE); - - this.plans = ProspectorUtils.planMap(manager.plans) - } - - @Override - protected void reduce(def prospect, Iterable values, Reducer.Context context) { - def plan = plans.get(prospect.index) - if (plan != null) { - def coll = plan.combine(prospect, values) - if (coll != null) { - coll.each { entry -> - context.write(entry.key, entry.value) - } - } - } - } -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy deleted file mode 100644 index 36eab604d..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.mr - -import org.apache.rya.accumulo.AccumuloRdfConfiguration -import org.apache.rya.api.RdfCloudTripleStoreConstants -import org.apache.rya.api.domain.RyaStatement -import org.apache.rya.api.resolver.RyaTripleContext -import org.apache.rya.api.resolver.triple.TripleRow -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.rya.prospector.plans.IndexWorkPlanManager -import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager - -import org.apache.commons.lang.time.DateUtils -import org.apache.hadoop.mapreduce.Mapper - -/** - * Date: 12/3/12 - * Time: 11:06 AM - */ -class ProspectorMapper extends Mapper { - - private Date truncatedDate; - private RyaTripleContext ryaContext; - private IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager() - private Collection plans = manager.plans - - @Override - public void setup(Mapper.Context context) throws IOException, InterruptedException { - super.setup(context); - - long now = context.getConfiguration().getLong("DATE", System.currentTimeMillis()); - ryaContext = RyaTripleContext.getInstance(new AccumuloRdfConfiguration(context.getConfiguration())); - truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE); - } - - @Override - public void map(def row, def data, Mapper.Context context) { - RyaStatement ryaStatement = ryaContext.deserializeTriple(RdfCloudTripleStoreConstants.TABLE_LAYOUT.SPO, - new TripleRow( - row.row.bytes, - row.columnFamily.bytes, - row.columnQualifier.bytes, - row.timestamp, - row.columnVisibility.bytes, - data.get() - ) - ) - plans.each { plan -> - def coll = plan.map(ryaStatement) - if (coll != null) { - coll.each { entry -> - context.write(entry.key, entry.value) - } - } - } - } -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy deleted file mode 100644 index 1f4352b84..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.mr - -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.rya.prospector.plans.IndexWorkPlanManager -import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager -import org.apache.commons.lang.time.DateUtils -import org.apache.hadoop.mapreduce.Reducer -import org.apache.rya.prospector.utils.ProspectorUtils - -/** - * Date: 12/3/12 - * Time: 11:06 AM - */ -class ProspectorReducer extends Reducer { - - private Date truncatedDate; - private IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager() - Map plans - - @Override - public void setup(Reducer.Context context) throws IOException, InterruptedException { - super.setup(context); - - def conf = context.getConfiguration() - long now = conf.getLong("DATE", System.currentTimeMillis()); - truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE); - - this.plans = ProspectorUtils.planMap(manager.plans) - } - - @Override - protected void reduce(def prospect, Iterable values, Reducer.Context context) { - def plan = plans.get(prospect.index) - if (plan != null) { - plan.reduce(prospect, values, truncatedDate, context) - } - } -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy deleted file mode 100644 index 80316ea1c..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.plans - -import org.apache.rya.api.domain.RyaStatement -import org.apache.rya.prospector.domain.IntermediateProspect -import org.apache.hadoop.io.LongWritable -import org.apache.hadoop.mapreduce.Reducer -import org.openrdf.model.vocabulary.XMLSchema -import org.apache.rya.prospector.domain.IndexEntry - -/** - * Date: 12/3/12 - * Time: 11:12 AM - */ -public interface IndexWorkPlan { - - public static final String URITYPE = XMLSchema.ANYURI.stringValue() - public static final LongWritable ONE = new LongWritable(1) - public static final String DELIM = "\u0000"; - - public Collection> map(RyaStatement ryaStatement) - - public Collection> combine(IntermediateProspect prospect, Iterable counts); - - public void reduce(IntermediateProspect prospect, Iterable counts, Date timestamp, Reducer.Context context) - - public String getIndexType() - - public String getCompositeValue(List indices) - - public List query(def connector, String tableName, List prospectTimes, String type, String index, String dataType, String[] auths) - -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy deleted file mode 100644 index 51527a50a..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.plans.impl - -import org.apache.rya.api.domain.RyaStatement -import org.apache.rya.prospector.domain.IndexEntry -import org.apache.rya.prospector.domain.IntermediateProspect -import org.apache.rya.prospector.domain.TripleValueType -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.rya.prospector.utils.CustomEntry -import org.apache.rya.prospector.utils.ProspectorUtils - -import org.apache.accumulo.core.data.Mutation -import org.apache.accumulo.core.data.Range -import org.apache.accumulo.core.data.Value -import org.apache.accumulo.core.security.Authorizations -import org.apache.accumulo.core.security.ColumnVisibility -import org.apache.hadoop.io.LongWritable -import org.apache.hadoop.io.Text -import org.apache.hadoop.mapreduce.Reducer -import org.openrdf.model.util.URIUtil -import org.openrdf.model.vocabulary.XMLSchema; - -import static org.apache.rya.prospector.utils.ProspectorConstants.COUNT; -import org.apache.rya.api.RdfCloudTripleStoreConstants - -/** - * Date: 12/3/12 - * Time: 12:28 PM - */ -class CountPlan implements IndexWorkPlan { - - @Override - Collection> map(RyaStatement ryaStatement) { - def subject = ryaStatement.getSubject() - def predicate = ryaStatement.getPredicate() - def subjpred = ryaStatement.getSubject().data + DELIM + ryaStatement.getPredicate().data - def predobj = ryaStatement.getPredicate().data + DELIM + ryaStatement.getObject().data - def subjobj = ryaStatement.getSubject().data + DELIM + ryaStatement.getObject().data - def object = ryaStatement.getObject() - def localIndex = URIUtil.getLocalNameIndex(subject.data) - def namespace = subject.data.substring(0, localIndex - 1) - def visibility = new String(ryaStatement.columnVisibility) - return [ - new CustomEntry( - new IntermediateProspect(index: COUNT, - data: subject.data, - dataType: URITYPE, - tripleValueType: TripleValueType.subject, - visibility: visibility), - ONE), - new CustomEntry( - new IntermediateProspect(index: COUNT, - data: predicate.data, - dataType: URITYPE, - tripleValueType: TripleValueType.predicate, - visibility: visibility - ), ONE), - new CustomEntry( - new IntermediateProspect(index: COUNT, - data: object.data, - dataType: object.dataType.stringValue(), - tripleValueType: TripleValueType.object, - visibility: visibility - ), ONE), - new CustomEntry( - new IntermediateProspect(index: COUNT, - data: subjpred, - dataType: XMLSchema.STRING, - tripleValueType: TripleValueType.subjectpredicate, - visibility: visibility - ), ONE), - new CustomEntry( - new IntermediateProspect(index: COUNT, - data: subjobj, - dataType: XMLSchema.STRING, - tripleValueType: TripleValueType.subjectobject, - visibility: visibility - ), ONE), - new CustomEntry( - new IntermediateProspect(index: COUNT, - data: predobj, - dataType: XMLSchema.STRING, - tripleValueType: TripleValueType.predicateobject, - visibility: visibility - ), ONE), - new CustomEntry( - new IntermediateProspect(index: COUNT, - data: namespace, - dataType: URITYPE, - tripleValueType: TripleValueType.entity, - visibility: visibility - ), ONE), - ] - } - - @Override - Collection> combine(IntermediateProspect prospect, Iterable counts) { - - def iter = counts.iterator() - long sum = 0; - iter.each { lw -> - sum += lw.get() - } - - return [new CustomEntry(prospect, new LongWritable(sum))] - } - - @Override - void reduce(IntermediateProspect prospect, Iterable counts, Date timestamp, Reducer.Context context) { - def iter = counts.iterator() - long sum = 0; - iter.each { lw -> - sum += lw.get() - } - - def indexType = prospect.tripleValueType.name() - - // not sure if this is the best idea.. - if ((sum >= 0) || - indexType.equals(TripleValueType.predicate.toString())) { - - Mutation m = new Mutation(indexType + DELIM + prospect.data + DELIM + ProspectorUtils.getReverseIndexDateTime(timestamp)) - m.put(COUNT, prospect.dataType, new ColumnVisibility(prospect.visibility), timestamp.getTime(), new Value("${sum}".getBytes())); - - context.write(null, m); - } - } - - @Override - String getIndexType() { - return COUNT - } - - @Override - String getCompositeValue(List indices){ - Iterator indexIt = indices.iterator(); - String compositeIndex = indexIt.next(); - while (indexIt.hasNext()){ - String value = indexIt.next(); - compositeIndex += DELIM + value; - } - return compositeIndex; - } - - @Override - List query(def connector, String tableName, List prospectTimes, String type, String compositeIndex, String dataType, String[] auths) { - - assert connector != null && tableName != null && type != null && compositeIndex != null - - def bs = connector.createBatchScanner(tableName, new Authorizations(auths), 4) - def ranges = [] - int max = 1000; //by default only return 1000 prospects maximum - if (prospectTimes != null) { - prospectTimes.each { prospect -> - ranges.add( - new Range(type + DELIM + compositeIndex + DELIM + ProspectorUtils.getReverseIndexDateTime(new Date(prospect)))) - } - } else { - max = 1; //only return the latest if no prospectTimes given - def prefix = type + DELIM + compositeIndex + DELIM; - ranges.add(new Range(prefix, prefix + RdfCloudTripleStoreConstants.LAST)) - } - bs.ranges = ranges - if (dataType != null) { - bs.fetchColumn(new Text(COUNT), new Text(dataType)) - } else { - bs.fetchColumnFamily(new Text(COUNT)) - } - - List indexEntries = new ArrayList() - def iter = bs.iterator() - - while (iter.hasNext() && indexEntries.size() <= max) { - def entry = iter.next() - def k = entry.key - def v = entry.value - - def rowArr = k.row.toString().split(DELIM) - String values = ""; - // if it is a composite index, then return the type as a composite index - if (type.equalsIgnoreCase(TripleValueType.subjectpredicate.toString()) || - type.equalsIgnoreCase(TripleValueType.subjectobject.toString()) || - type.equalsIgnoreCase(TripleValueType.predicateobject.toString())){ - values =rowArr[1] + DELIM + rowArr[2] - } - else values = rowArr[1] - - indexEntries.add(new IndexEntry(data: values, - tripleValueType: rowArr[0], - index: COUNT, - dataType: k.columnQualifier.toString(), - visibility: k.columnVisibility.toString(), - count: Long.parseLong(new String(v.get())), - timestamp: k.timestamp - )) - } - bs.close() - - return indexEntries - } - -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy deleted file mode 100644 index 07c81af7d..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.plans.impl - -import org.apache.rya.prospector.plans.IndexWorkPlan -import com.google.common.collect.Lists -import org.apache.rya.prospector.plans.IndexWorkPlanManager - -/** - * Date: 12/3/12 - * Time: 11:24 AM - */ -class ServicesBackedIndexWorkPlanManager implements IndexWorkPlanManager { - - def Collection plans - - ServicesBackedIndexWorkPlanManager() { - def iterator = ServiceLoader.load(IndexWorkPlan.class).iterator(); - plans = Lists.newArrayList(iterator) - } -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy deleted file mode 100644 index d72e0e02b..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.service - -import org.apache.rya.prospector.utils.ProspectorUtils -import org.apache.accumulo.core.data.Key -import org.apache.accumulo.core.data.Range -import org.apache.accumulo.core.security.Authorizations -import org.apache.hadoop.io.Text - -import static org.apache.rya.prospector.utils.ProspectorConstants.METADATA -import static org.apache.rya.prospector.utils.ProspectorConstants.PROSPECT_TIME -import org.apache.rya.prospector.plans.IndexWorkPlanManager -import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.rya.prospector.domain.IndexEntry - -/** - * Date: 12/5/12 - * Time: 12:28 PM - */ -class ProspectorService { - - def connector - String tableName - - IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager() - Map plans - - ProspectorService(def connector, String tableName) { - this.connector = connector - this.tableName = tableName - this.plans = ProspectorUtils.planMap(manager.plans) - - //init - def tos = connector.tableOperations() - if(!tos.exists(tableName)) { - tos.create(tableName) - } - } - - public Iterator getProspects(String[] auths) { - - def scanner = connector.createScanner(tableName, new Authorizations(auths)) - scanner.setRange(Range.exact(METADATA)); - scanner.fetchColumnFamily(new Text(PROSPECT_TIME)); - - def iterator = scanner.iterator(); - - return new Iterator() { - - - @Override - public boolean hasNext() { - return iterator.hasNext(); - } - - @Override - public Long next() { - return iterator.next().getKey().getTimestamp(); - } - - @Override - public void remove() { - iterator.remove(); - } - }; - - } - - public Iterator getProspectsInRange(long beginTime, long endTime, String[] auths) { - - def scanner = connector.createScanner(tableName, new Authorizations(auths)) - scanner.setRange(new Range( - new Key(METADATA, PROSPECT_TIME, ProspectorUtils.getReverseIndexDateTime(new Date(endTime)), "", Long.MAX_VALUE), - new Key(METADATA, PROSPECT_TIME, ProspectorUtils.getReverseIndexDateTime(new Date(beginTime)), "", 0l) - )) - def iterator = scanner.iterator(); - - return new Iterator() { - - @Override - public boolean hasNext() { - return iterator.hasNext(); - } - - @Override - public Long next() { - return iterator.next().getKey().getTimestamp(); - } - - @Override - public void remove() { - iterator.remove(); - } - }; - - } - - public List query(List prospectTimes, String indexType, String type, List index, String dataType, String[] auths) { - assert indexType != null - - def plan = plans.get(indexType) - assert plan != null: "Index Type: ${indexType} does not exist" - String compositeIndex = plan.getCompositeValue(index); - - return plan.query(connector, tableName, prospectTimes, type, compositeIndex, dataType, auths) - } -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy deleted file mode 100644 index 2c2b1539c..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.service - -import org.apache.rya.api.RdfCloudTripleStoreConfiguration -import org.apache.rya.api.persist.RdfEvalStatsDAO -import org.apache.rya.prospector.domain.TripleValueType -import org.apache.rya.prospector.utils.ProspectorConstants -import org.apache.hadoop.conf.Configuration -import org.openrdf.model.Resource -import org.openrdf.model.Value - -import org.apache.rya.api.persist.RdfEvalStatsDAO.CARDINALITY_OF - -/** - * An ${@link org.apache.rya.api.persist.RdfEvalStatsDAO} that uses the Prospector Service underneath return counts. - */ -class ProspectorServiceEvalStatsDAO implements RdfEvalStatsDAO { - - def ProspectorService prospectorService - - ProspectorServiceEvalStatsDAO() { - } - - ProspectorServiceEvalStatsDAO(ProspectorService prospectorService, RdfCloudTripleStoreConfiguration conf) { - this.prospectorService = prospectorService - } - - public ProspectorServiceEvalStatsDAO(def connector, RdfCloudTripleStoreConfiguration conf) { - this.prospectorService = new ProspectorService(connector, getProspectTableName(conf)) - } - - @Override - void init() { - assert prospectorService != null - } - - @Override - boolean isInitialized() { - return prospectorService != null - } - - @Override - void destroy() { - - } - - @Override - public double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List val) { - - assert conf != null && card != null && val != null - String triplePart = null; - switch (card) { - case (CARDINALITY_OF.SUBJECT): - triplePart = TripleValueType.subject - break; - case (CARDINALITY_OF.PREDICATE): - triplePart = TripleValueType.predicate - break; - case (CARDINALITY_OF.OBJECT): - triplePart = TripleValueType.object - break; - case (CARDINALITY_OF.SUBJECTPREDICATE): - triplePart = TripleValueType.subjectpredicate - break; - case (CARDINALITY_OF.SUBJECTOBJECT): - triplePart = TripleValueType.subjectobject - break; - case (CARDINALITY_OF.PREDICATEOBJECT): - triplePart = TripleValueType.predicateobject - break; - } - - String[] auths = conf.getAuths() - List indexedValues = new ArrayList(); - Iterator valueIt = val.iterator(); - while (valueIt.hasNext()){ - indexedValues.add(valueIt.next().stringValue()); - } - - def indexEntries = prospectorService.query(null, ProspectorConstants.COUNT, triplePart, indexedValues, null /** what is the datatype here? */, - auths) - - return indexEntries.size() > 0 ? indexEntries.head().count : -1 - } - - @Override - double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List val, Resource context) { - return getCardinality(conf, card, val) //TODO: Not sure about the context yet - } - - @Override - public void setConf(RdfCloudTripleStoreConfiguration conf) { - - } - - @Override - RdfCloudTripleStoreConfiguration getConf() { - return null - } - - public static String getProspectTableName(RdfCloudTripleStoreConfiguration conf) { - return conf.getTablePrefix() + "prospects"; - } -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy deleted file mode 100644 index 29eac3799..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.utils - -/** - * Date: 12/5/12 - * Time: 10:57 AM - */ -class ProspectorConstants { - public static final String COUNT = "count" - public static final String METADATA = "metadata" - public static final String PROSPECT_TIME = "prospectTime" - public static final String DEFAULT_VIS = "U&FOUO" - public static final byte[] EMPTY = new byte [0]; - - //config properties - public static final String PERFORMANT = "performant" - - public static final String USERNAME = "username" - public static final String PASSWORD = "password" - public static final String INSTANCE = "instance" - public static final String ZOOKEEPERS = "zookeepers" - public static final String MOCK = "mock" -} diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy deleted file mode 100644 index e4142d9bd..000000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.utils - -import org.apache.accumulo.core.client.Connector -import org.apache.accumulo.core.client.Instance -import org.apache.accumulo.core.client.ZooKeeperInstance -import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat -import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat -import org.apache.accumulo.core.client.mock.MockInstance -import org.apache.accumulo.core.data.Mutation -import org.apache.accumulo.core.security.Authorizations -import org.apache.commons.lang.Validate -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.io.Text -import org.apache.hadoop.io.compress.GzipCodec -import org.apache.hadoop.mapreduce.Job - -import java.text.SimpleDateFormat -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.accumulo.core.client.security.tokens.PasswordToken - -import static org.apache.rya.prospector.utils.ProspectorConstants.* - -/** - * Date: 12/4/12 - * Time: 4:24 PM - */ -class ProspectorUtils { - - public static final long INDEXED_DATE_SORT_VAL = 999999999999999999L; // 18 char long, same length as date format pattern below - public static final String INDEXED_DATE_FORMAT = "yyyyMMddHHmmsssSSS"; - - public static String getReverseIndexDateTime(Date date) { - Validate.notNull(date); - String formattedDateString = new SimpleDateFormat(INDEXED_DATE_FORMAT).format(date); - long diff = INDEXED_DATE_SORT_VAL - Long.valueOf(formattedDateString); - - return Long.toString(diff); - } - - public static Map planMap(def plans) { - plans.inject([:]) { map, plan -> - map.putAt(plan.indexType, plan) - map - } - } - - public static void initMRJob(Job job, String table, String outtable, String[] auths) { - Configuration conf = job.configuration - String username = conf.get(USERNAME) - String password = conf.get(PASSWORD) - String instance = conf.get(INSTANCE) - String zookeepers = conf.get(ZOOKEEPERS) - String mock = conf.get(MOCK) - - //input - if (Boolean.parseBoolean(mock)) { - AccumuloInputFormat.setMockInstance(job, instance) - AccumuloOutputFormat.setMockInstance(job, instance) - } else if (zookeepers != null) { - AccumuloInputFormat.setZooKeeperInstance(job, instance, zookeepers) - AccumuloOutputFormat.setZooKeeperInstance(job, instance, zookeepers) - } else { - throw new IllegalArgumentException("Must specify either mock or zookeepers"); - } - - AccumuloInputFormat.setConnectorInfo(job, username, new PasswordToken(password.getBytes())) - AccumuloInputFormat.setInputTableName(job, table) - job.setInputFormatClass(AccumuloInputFormat.class); - AccumuloInputFormat.setScanAuthorizations(job, new Authorizations(auths)) - - // OUTPUT - job.setOutputFormatClass(AccumuloOutputFormat.class); - job.setOutputKeyClass(Text.class); - job.setOutputValueClass(Mutation.class); - AccumuloOutputFormat.setConnectorInfo(job, username, new PasswordToken(password.getBytes())) - AccumuloOutputFormat.setDefaultTableName(job, outtable) - } - - public static void addMRPerformance(Configuration conf) { - conf.setBoolean("mapred.map.tasks.speculative.execution", false); - conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); - conf.set("io.sort.mb", "256"); - conf.setBoolean("mapred.compress.map.output", true); - conf.set("mapred.map.output.compression.codec", GzipCodec.class.getName()); - } - - public static Instance instance(Configuration conf) { - assert conf != null - - String instance_str = conf.get(INSTANCE) - String zookeepers = conf.get(ZOOKEEPERS) - String mock = conf.get(MOCK) - if (Boolean.parseBoolean(mock)) { - return new MockInstance(instance_str) - } else if (zookeepers != null) { - return new ZooKeeperInstance(instance_str, zookeepers) - } else { - throw new IllegalArgumentException("Must specify either mock or zookeepers"); - } - } - - public static Connector connector(Instance instance, Configuration conf) { - String username = conf.get(USERNAME) - String password = conf.get(PASSWORD) - if (instance == null) - instance = instance(conf) - return instance.getConnector(username, password) - } - - public static void writeMutations(Connector connector, String tableName, def mutations) { - def bw = connector.createBatchWriter(tableName, 10000l, 10000l, 4); - mutations.each { m -> - bw.addMutation(m) - } - bw.flush() - bw.close() - } - -} diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java new file mode 100644 index 000000000..4d4dfc8d9 --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.domain; + +import java.util.Objects; + +import org.apache.rya.prospector.mr.Prospector; +import org.apache.rya.prospector.plans.IndexWorkPlan; + +/** + * Represents a count that was the result of a {@link Prospector} run. + */ +public class IndexEntry { + + private final String index; + private final String data; + private final String dataType; + private final String tripleValueType; + private final String visibility; + private final Long count; + private final Long timestamp; + + /** + * Constructs an instance of {@link IndexEntry}. + * + * @param index - Indicates which {@link IndexWorkPlan} the data came from. + * @param data - The information that is being counted. + * @param dataType - The data type of {@code data}. + * @param tripleValueType - Indicates which parts of the RDF Statement are included in {@code data}. + * @param visibility - The visibility of this entry. + * @param count - The number of times the {@code data} appeared within Rya. + * @param timestamp - Identifies which Prospect run this entry belongs to. + */ + public IndexEntry( + final String index, + final String data, + final String dataType, + final String tripleValueType, + final String visibility, + final Long count, + final Long timestamp) { + this.index = index; + this.data = data; + this.dataType = dataType; + this.tripleValueType = tripleValueType; + this.visibility = visibility; + this.count = count; + this.timestamp = timestamp; + } + + /** + * @return Indicates which {@link IndexWorkPlan} the data came from. + */ + public String getIndex() { + return index; + } + + /** + * @return The information that is being counted. + */ + public String getData() { + return data; + } + + /** + * @return The data type of {@code data}. + */ + public String getDataType() { + return dataType; + } + + /** + * @return Indicates which parts of the RDF Statement are included in {@code data}. + */ + public String getTripleValueType() { + return tripleValueType; + } + + /** + * @return The visibility of this entry. + */ + public String getVisibility() { + return visibility; + } + + /** + * @return The number of times the {@code data} appeared within Rya. + */ + public Long getCount() { + return count; + } + + /** + * @return Identifies which Prospect run this entry belongs to. + */ + public Long getTimestamp() { + return timestamp; + } + + @Override + public String toString() { + return "IndexEntry{" + + "index='" + index + '\'' + + ", data='" + data + '\'' + + ", dataType='" + dataType + '\'' + + ", tripleValueType=" + tripleValueType + + ", visibility='" + visibility + '\'' + + ", timestamp='" + timestamp + '\'' + + ", count=" + count + + '}'; + } + + @Override + public int hashCode() { + return Objects.hash(index, data, dataType, tripleValueType, visibility, count, timestamp); + } + + @Override + public boolean equals(Object o) { + if(this == o) { + return true; + } + if(o instanceof IndexEntry) { + final IndexEntry entry = (IndexEntry) o; + return Objects.equals(index, entry.index) && + Objects.equals(data, entry.data) && + Objects.equals(dataType, entry.dataType) && + Objects.equals(tripleValueType, entry.tripleValueType) && + Objects.equals(visibility, entry.visibility) && + Objects.equals(count, entry.count) && + Objects.equals(timestamp, entry.timestamp); + } + return false; + } + + /** + * @return An empty instance of {@link Builder}. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builds instances of {@link IndexEntry}. + */ + public static final class Builder { + private String index; + private String data; + private String dataType; + private String tripleValueType; + private String visibility; + private Long count; + private Long timestamp; + + /** + * @param index - Indicates which {@link IndexWorkPlan} the data came from. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setIndex(String index) { + this.index = index; + return this; + } + + /** + * @param data - The information that is being counted. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setData(String data) { + this.data = data; + return this; + } + + /** + * @param dataType - The data type of {@code data}. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setDataType(String dataType) { + this.dataType = dataType; + return this; + } + + /** + * @param tripleValueType - Indicates which parts of the RDF Statement are included in {@code data}. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setTripleValueType(String tripleValueType) { + this.tripleValueType = tripleValueType; + return this; + } + + /** + * @param visibility - The visibility of this entry. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setVisibility(String visibility) { + this.visibility = visibility; + return this; + } + + /** + * @param count - The number of times the {@code data} appeared within Rya. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setCount(Long count) { + this.count = count; + return this; + } + + /** + * @param timestamp - Identifies which Prospect run this entry belongs to. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setTimestamp(Long timestamp) { + this.timestamp = timestamp; + return this; + } + + /** + * @return Constructs an instance of {@link IndexEntry} built using this builder's values. + */ + public IndexEntry build() { + return new IndexEntry(index, data, dataType, tripleValueType, visibility, count, timestamp); + } + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IntermediateProspect.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IntermediateProspect.java new file mode 100644 index 000000000..8c523787a --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IntermediateProspect.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.domain; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.WritableComparable; +import org.apache.rya.prospector.mr.Prospector; +import org.apache.rya.prospector.plans.IndexWorkPlan; + +/** + * Represents a piece of information that is being counted during the process + * of running a {@link Prospector} job. + */ +public class IntermediateProspect implements WritableComparable { + + private String index; + private String data; + private String dataType; + private TripleValueType tripleValueType; + private String visibility; + + /** + * Constructs an uninitialized instance of {@link IntermediateProspect}. + * This constructor is required to integration with Map Reduce's + * {@link WritableComparable} interface. + */ + public IntermediateProspect() { } + + /** + * Constructs an instance of {@link IntermediateProspect}. + * + * @param index - Indicates which {@link IndexWorkPlan} the data is part of. + * @param data - The information that is being counted. + * @param dataType - The data type of {@code data}. + * @param tripleValueType - Indicates which parts of the RDF Statement are included in {@code data}. + * @param visibility - The visibility of this entry. + */ + public IntermediateProspect( + final String index, + final String data, + final String dataType, + final TripleValueType tripleValueType, + final String visibility) { + this.index = index; + this.data = data; + this.dataType = dataType; + this.tripleValueType = tripleValueType; + this.visibility = visibility; + } + + /** + * @return Indicates which {@link IndexWorkPlan} the data is part of. + */ + public String getIndex() { + return index; + } + + /** + * @return The information that is being counted. + */ + public String getData() { + return data; + } + + /** + * @return The data type of {@code data}. + */ + public String getDataType() { + return dataType; + } + + /** + * @return Indicates which parts of the RDF Statement are included in {@code data}. + */ + public TripleValueType getTripleValueType() { + return tripleValueType; + } + + /** + * @return The visibility of this entry. + */ + public String getVisibility() { + return visibility; + } + + @Override + public int compareTo(IntermediateProspect t) { + if(!index.equals(t.index)) { + return index.compareTo(t.index); + } + if(!data.equals(t.data)) { + return data.compareTo(t.data); + } + if(!dataType.equals(t.dataType)) { + return dataType.compareTo(t.dataType); + } + if(!tripleValueType.equals(t.tripleValueType)) { + return tripleValueType.compareTo(t.tripleValueType); + } + if(!visibility.equals(t.visibility)) { + return visibility.compareTo(t.visibility); + } + return 0; + } + + @Override + public void write(DataOutput dataOutput) throws IOException { + dataOutput.writeUTF(index); + dataOutput.writeUTF(data); + dataOutput.writeUTF(dataType); + dataOutput.writeUTF(tripleValueType.name()); + dataOutput.writeUTF(visibility); + } + + @Override + public void readFields(DataInput dataInput) throws IOException { + index = dataInput.readUTF(); + data = dataInput.readUTF(); + dataType = dataInput.readUTF(); + tripleValueType = TripleValueType.valueOf(dataInput.readUTF()); + visibility = dataInput.readUTF(); + } + + /** + * @return An empty instance of {@link Builder}. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builds instances of {@link IntermediateProspect}. + */ + public static final class Builder { + + private String index; + private String data; + private String dataType; + private TripleValueType tripleValueType; + private String visibility; + + /** + * @param index - Indicates which {@link IndexWorkPlan} the data is part of. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setIndex(String index) { + this.index = index; + return this; + } + + /** + * @param data - The information that is being counted. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setData(String data) { + this.data = data; + return this; + } + + /** + * @param dataType - The data type of {@code data}. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setDataType(String dataType) { + this.dataType = dataType; + return this; + } + + /** + * @param tripleValueType - Indicates which parts of the RDF Statement are included in {@code data}. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setTripleValueType(TripleValueType tripleValueType) { + this.tripleValueType = tripleValueType; + return this; + } + + /** + * @param visibility - The visibility of this entry. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setVisibility(String visibility) { + this.visibility = visibility; + return this; + } + + /** + * @return Constructs an instance of {@link IntermediateProspect} built using this builder's values. + */ + public IntermediateProspect build() { + return new IntermediateProspect(index, data, dataType, tripleValueType, visibility); + } + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/TripleValueType.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/TripleValueType.java new file mode 100644 index 000000000..16e7916b7 --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/TripleValueType.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.domain; + +import static java.util.Objects.requireNonNull; + +import com.google.common.collect.ImmutableMap; + +/** + * Enumerates the different types of counts that are performed over a Rya instance's + * Statements as part of a Prospector run. + */ +public enum TripleValueType { + /** + * The data portion of an {@link IndexEntry} contains a unique Subject that + * appears within a Rya instance's Statements. + */ + SUBJECT("subject"), + + /** + * The data portion of an {@link IndexEntry} contains a unique Predicate that + * appears within a Rya instance's Statements. + */ + PREDICATE("predicate"), + + /** + * The data portion of an {@link IndexEntry} contains a unique Object that + * appears within a Rya instance's Statements. + */ + OBJECT("object"), + + /** + * The data portion of an {@link IndexEntrY} contains a unique Namespace from + * the Subjects that appear within a Rya instance. + */ + ENTITY("entity"), + + /** + * The data portion of an {@link IndexEntry} contains a unique Subject and Predicate + * pair that appears within a Rya instance's Statements. + */ + SUBJECT_PREDICATE("subjectpredicate"), + + /** + * The data portion of an {@link IndexEntry} contains a unique Predicate and Object + * pair that appears within a Rya instance's Statements. + */ + PREDICATE_OBJECT("predicateobject"), + + /** + * The data portion of an {@link IndexEntry} contains a unique Subject and Object + * pair that appears within a Rya instance's Statements. + */ + SUBJECT_OBJECT("subjectobject"); + + private final String indexType; + + private TripleValueType(String indexType) { + this.indexType = requireNonNull(indexType); + } + + /** + * @return The Prospector Index Type represented by the enum value. + */ + public String getIndexType() { + return indexType; + } + + private static final ImmutableMap lookup; + static { + ImmutableMap.Builder builder = ImmutableMap.builder(); + for(TripleValueType type : TripleValueType.values()) { + builder.put(type.getIndexType(), type); + } + lookup = builder.build(); + } + + /** + * @param indexType - The index name to lookup. + * @return The enum value that represents the index name. + */ + public TripleValueType fromIndexType(String indexType) { + return lookup.get(indexType); + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/Prospector.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/Prospector.java new file mode 100644 index 000000000..78ea37132 --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/Prospector.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.mr; + +import static org.apache.rya.prospector.utils.ProspectorConstants.DEFAULT_VIS; +import static org.apache.rya.prospector.utils.ProspectorConstants.EMPTY; +import static org.apache.rya.prospector.utils.ProspectorConstants.METADATA; +import static org.apache.rya.prospector.utils.ProspectorConstants.PERFORMANT; +import static org.apache.rya.prospector.utils.ProspectorConstants.PROSPECT_TIME; +import static org.apache.rya.prospector.utils.ProspectorUtils.connector; +import static org.apache.rya.prospector.utils.ProspectorUtils.getReverseIndexDateTime; +import static org.apache.rya.prospector.utils.ProspectorUtils.instance; +import static org.apache.rya.prospector.utils.ProspectorUtils.writeMutations; + +import java.util.Calendar; +import java.util.Collections; +import java.util.Date; + +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang.time.DateUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.rya.prospector.domain.IntermediateProspect; +import org.apache.rya.prospector.utils.ProspectorUtils; + +/** + * Configures and runs the Hadoop Map Reduce job that executes the Prospector's work. + */ +public class Prospector extends Configured implements Tool { + + private static long NOW = System.currentTimeMillis(); + + private Date truncatedDate; + + public static void main(String[] args) throws Exception { + final int res = ToolRunner.run(new Prospector(), args); + System.exit(res); + } + + @Override + public int run(String[] args) throws Exception { + final Configuration conf = getConf(); + + truncatedDate = DateUtils.truncate(new Date(NOW), Calendar.MINUTE); + + final Path configurationPath = new Path(args[0]); + conf.addResource(configurationPath); + + final String inTable = conf.get("prospector.intable"); + final String outTable = conf.get("prospector.outtable"); + final String auths_str = conf.get("prospector.auths"); + assert inTable != null; + assert outTable != null; + assert auths_str != null; + + final Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); + job.setJarByClass(this.getClass()); + + final String[] auths = auths_str.split(","); + ProspectorUtils.initMRJob(job, inTable, outTable, auths); + + job.getConfiguration().setLong("DATE", NOW); + + final String performant = conf.get(PERFORMANT); + if (Boolean.parseBoolean(performant)) { + /** + * Apply some performance tuning + */ + ProspectorUtils.addMRPerformance(job.getConfiguration()); + } + + job.setMapOutputKeyClass(IntermediateProspect.class); + job.setMapOutputValueClass(LongWritable.class); + + job.setMapperClass(ProspectorMapper.class); + job.setCombinerClass(ProspectorCombiner.class); + job.setReducerClass(ProspectorReducer.class); + job.waitForCompletion(true); + + final int success = job.isSuccessful() ? 0 : 1; + + if (success == 0) { + final Mutation m = new Mutation(METADATA); + m.put(PROSPECT_TIME, getReverseIndexDateTime(truncatedDate), new ColumnVisibility(DEFAULT_VIS), truncatedDate.getTime(), new Value(EMPTY)); + writeMutations(connector(instance(conf), conf), outTable, Collections.singleton(m)); + } + + return success; + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorCombiner.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorCombiner.java new file mode 100644 index 000000000..bc3c1eba6 --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorCombiner.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.mr; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.rya.prospector.domain.IntermediateProspect; +import org.apache.rya.prospector.plans.IndexWorkPlan; +import org.apache.rya.prospector.plans.IndexWorkPlanManager; +import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager; +import org.apache.rya.prospector.utils.ProspectorUtils; + +/** + * Used to combine intermediate Prospect job results after {@link ProspectorMapper}, + * but before the shuffle operation of the Hadoop Map Reduce framework. + */ +public class ProspectorCombiner extends Reducer { + + private final IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager(); + private Map plans; + + @Override + public void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + this.plans = ProspectorUtils.planMap(manager.getPlans()); + } + + @Override + protected void reduce(IntermediateProspect prospect, Iterable values, Context context) throws IOException, InterruptedException { + final IndexWorkPlan plan = plans.get(prospect.getIndex()); + if (plan != null) { + final Collection> coll = plan.combine(prospect, values); + if (coll != null) { + for(final Entry entry : coll) { + context.write(entry.getKey(), entry.getValue()); + } + } + } + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorMapper.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorMapper.java new file mode 100644 index 000000000..ff4c30fc7 --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorMapper.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.mr; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map.Entry; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.rya.accumulo.AccumuloRdfConfiguration; +import org.apache.rya.api.RdfCloudTripleStoreConstants; +import org.apache.rya.api.domain.RyaStatement; +import org.apache.rya.api.resolver.RyaTripleContext; +import org.apache.rya.api.resolver.triple.TripleRow; +import org.apache.rya.api.resolver.triple.TripleRowResolverException; +import org.apache.rya.prospector.domain.IntermediateProspect; +import org.apache.rya.prospector.plans.IndexWorkPlan; +import org.apache.rya.prospector.plans.IndexWorkPlanManager; +import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager; + +/** + * Loads {@link RyaStatement}s from Accumulo and maps them into {@link IntermediateProspect}s + * paired with count information during the Map portion of the Hadoop Map Reduce framework. + */ +public class ProspectorMapper extends Mapper { + + private RyaTripleContext ryaContext; + private final IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager(); + private final Collection plans = manager.getPlans(); + + @Override + public void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + ryaContext = RyaTripleContext.getInstance(new AccumuloRdfConfiguration(context.getConfiguration())); + } + + @Override + public void map(Key row, Value data, Context context) throws IOException, InterruptedException { + RyaStatement ryaStatement = null; + try { + ryaStatement = ryaContext.deserializeTriple(RdfCloudTripleStoreConstants.TABLE_LAYOUT.SPO, + new TripleRow( + row.getRow().getBytes(), + row.getColumnFamily().getBytes(), + row.getColumnQualifier().getBytes(), + row.getTimestamp(), + row.getColumnVisibility().getBytes(), + data.get() + ) + ); + } catch (final TripleRowResolverException e) { + // Do nothing. The row didn't contain a Rya Statement. + } + + if(ryaStatement != null) { + for(final IndexWorkPlan plan : plans) { + final Collection> coll = plan.map(ryaStatement); + for(final Entry entry : coll) { + context.write(entry.getKey(), entry.getValue()); + } + } + } + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorReducer.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorReducer.java new file mode 100644 index 000000000..5247b5b7a --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorReducer.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.mr; + +import java.io.IOException; +import java.util.Calendar; +import java.util.Date; +import java.util.Map; + +import org.apache.commons.lang.time.DateUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.rya.prospector.domain.IntermediateProspect; +import org.apache.rya.prospector.plans.IndexWorkPlan; +import org.apache.rya.prospector.plans.IndexWorkPlanManager; +import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager; +import org.apache.rya.prospector.utils.ProspectorUtils; + +/** + * Reduces the {@link IntermediateProspect} counts into their final values and + * writes them to their final storage location during the Reduce step of the + * Hadoop Map Reduce framework. + */ +public class ProspectorReducer extends Reducer { + + private Date truncatedDate; + private final IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager(); + private Map plans; + + @Override + public void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + + final Configuration conf = context.getConfiguration(); + final long now = conf.getLong("DATE", System.currentTimeMillis()); + truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE); + + this.plans = ProspectorUtils.planMap(manager.getPlans()); + } + + @Override + protected void reduce(IntermediateProspect prospect, Iterable values, Context context) throws IOException, InterruptedException { + final IndexWorkPlan plan = plans.get(prospect.getIndex()); + if (plan != null) { + plan.reduce(prospect, values, truncatedDate, context); + } + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlan.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlan.java new file mode 100644 index 000000000..77955e4ec --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlan.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.plans; + +import java.io.IOException; +import java.util.Collection; +import java.util.Date; +import java.util.List; +import java.util.Map; + +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.rya.api.domain.RyaStatement; +import org.apache.rya.prospector.domain.IndexEntry; +import org.apache.rya.prospector.domain.IntermediateProspect; +import org.apache.rya.prospector.mr.ProspectorCombiner; +import org.apache.rya.prospector.mr.ProspectorMapper; +import org.openrdf.model.vocabulary.XMLSchema; + +/** + * Contains the methods that perform each of the Map Reduce functions that result + * in the final {@link IndexEntry} values as well as a way to query those values + * once they have been written. + */ +public interface IndexWorkPlan { + + public static final String URITYPE = XMLSchema.ANYURI.stringValue(); + public static final LongWritable ONE = new LongWritable(1); + public static final String DELIM = "\u0000"; + + /** + * This method is invoked by {@link ProspectorMapper}. It's used to pull + * input from an Accumulo Rya instance into the Map Reduce framework. + *

+ * It must use the values of a {@link RyaStatement} to derive a bunch of + * {@link IntermediateProspect} and {@code LongWritable} pairs. This is only + * useful for prospecting jobs that count things. The {@link IntermediateProspect} + * value will be used as the key within {@link #combine(IntermediateProspect, Iterable)} and + * {@link #reduce(IntermediateProspect, Iterable, Date, org.apache.hadoop.mapreduce.Reducer.Context)}. + * + * @param ryaStatement - The RDF Statement that needs to be mapped. + * @return A collection of intermediate keys and counts. + */ + public Collection> map(RyaStatement ryaStatement); + + /** + * This method is invoked by {@link ProspectorCombiner}. It is used by to + * combine the results of {@link ProspectorMapper} before the shuffle operation + * of the Map Reduce framework. + * + * @param prospect - The intermediate prospect that is being combined. + * @param counts - The counts that need to be combined together. + * @return A collection containing the combined results. + */ + public Collection> combine(IntermediateProspect prospect, Iterable counts); + + /** + * This method is invoked by {@link ProsectorReducer}. It is used to reduce + * the counts to their final states and write them to output via the + * {@code context}.l + * + * @param prospect - The intermediate prospect that is being reduced. + * @param counts - The counts that need to be reduced. + * @param timestamp - The timestamp that identifies this Prospector run. + * @param context - The reducer context the reduced values will be written to. + * @throws IOException A problem was encountered while writing to the context. + * @throws InterruptedException Writes to the context were interrupted. + */ + public void reduce(IntermediateProspect prospect, Iterable counts, Date timestamp, Reducer.Context context) throws IOException, InterruptedException; + + /** + * @return A unique name that indicates which {@link IndexEntry}s came from this plan. + */ + public String getIndexType(); + + /** + * TODO Not sure what this generically is for. It is used by the count job to + * place a null delimiter between any {@link IndexEntry}s whose data + * section is two difference pieces of information together. + */ + public String getCompositeValue(List indices); + + /** + * Search for {@link IndexEntry}s that have values matching the provided parameters. + * + * @param connector - The Accumulo Connector used to find the table holding the data. + * @param tableName - The name of the table the Prospector results are stored within. + * @param prospectTimes - Indicates which Prospect runs will be part of the query. + * @param type - The name of the index the {@link IndexEntry}s are stored within. + * @param index - The data portion of the {@link IndexEntry}s that may be returned. + * @param dataType - The data type of the {@link IndexEntry}s that may be returned. + * @param auths - The authorizations used to search for the entries. + * @return The {@link IndexEntries} that match the provided values. + * @throws TableNotFoundException No table exists for {@code tableName}. + */ + public List query(Connector connector, String tableName, List prospectTimes, String type, String index, String dataType, String[] auths) throws TableNotFoundException; +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlanManager.java similarity index 71% rename from extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy rename to extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlanManager.java index f1029dc8c..1b7cf3b9c 100644 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlanManager.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -16,14 +16,21 @@ * specific language governing permissions and limitations * under the License. */ +package org.apache.rya.prospector.plans; -package org.apache.rya.prospector.plans +import java.util.Collection; + +import org.apache.rya.prospector.mr.Prospector; /** - * Date: 12/3/12 - * Time: 11:24 AM + * Provides access to the {@link IndexWorkPlan}s that will be executed as part + * of a {@link Prospector} run. */ public interface IndexWorkPlanManager { + /** + * @return The {@link IndexWorkPlan}s that will be executed as part of a + * {@link Prospector} run. + */ public Collection getPlans(); -} +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/CountPlan.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/CountPlan.java new file mode 100644 index 000000000..ebcf6c35d --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/CountPlan.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.plans.impl; + +import static org.apache.rya.prospector.utils.ProspectorConstants.COUNT; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.accumulo.core.client.BatchScanner; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.rya.api.RdfCloudTripleStoreConstants; +import org.apache.rya.api.domain.RyaStatement; +import org.apache.rya.api.domain.RyaType; +import org.apache.rya.api.domain.RyaURI; +import org.apache.rya.prospector.domain.IndexEntry; +import org.apache.rya.prospector.domain.IntermediateProspect; +import org.apache.rya.prospector.domain.TripleValueType; +import org.apache.rya.prospector.plans.IndexWorkPlan; +import org.apache.rya.prospector.utils.CustomEntry; +import org.apache.rya.prospector.utils.ProspectorUtils; +import org.openrdf.model.util.URIUtil; +import org.openrdf.model.vocabulary.XMLSchema; + +/** + * An implementation of {@link IndexWorkPlan} that counts the number of times + * a piece of data appears within a Rya Instance for every {@link TripleValueType}. + */ +public class CountPlan implements IndexWorkPlan { + + @Override + public Collection> map(RyaStatement ryaStatement) { + final RyaURI subject = ryaStatement.getSubject(); + final RyaURI predicate = ryaStatement.getPredicate(); + final String subjpred = ryaStatement.getSubject().getData() + DELIM + ryaStatement.getPredicate().getData(); + final String predobj = ryaStatement.getPredicate().getData() + DELIM + ryaStatement.getObject().getData(); + final String subjobj = ryaStatement.getSubject().getData() + DELIM + ryaStatement.getObject().getData(); + final RyaType object = ryaStatement.getObject(); + final int localIndex = URIUtil.getLocalNameIndex(subject.getData()); + final String namespace = subject.getData().substring(0, localIndex - 1); + final String visibility = new String(ryaStatement.getColumnVisibility()); + + final List> entries = new ArrayList<>(7); + + // Create an entry for each TripleValueType type. + entries.add(new CustomEntry( + IntermediateProspect.builder() + .setIndex(COUNT) + .setData(subject.getData()) + .setDataType(URITYPE) + .setTripleValueType( TripleValueType.SUBJECT ) + .setVisibility(visibility) + .build() + , ONE)); + + entries.add(new CustomEntry( + IntermediateProspect.builder() + .setIndex(COUNT) + .setData(predicate.getData()) + .setDataType(URITYPE) + .setTripleValueType( TripleValueType.PREDICATE ) + .setVisibility(visibility) + .build() + , ONE)); + + entries.add(new CustomEntry( + IntermediateProspect.builder() + .setIndex(COUNT) + .setData(object.getData()) + .setDataType(object.getDataType().stringValue()) + .setTripleValueType( TripleValueType.OBJECT ) + .setVisibility(visibility) + .build() + , ONE)); + + entries.add(new CustomEntry( + IntermediateProspect.builder() + .setIndex(COUNT) + .setData(subjpred) + .setDataType(XMLSchema.STRING.toString()) + .setTripleValueType( TripleValueType.SUBJECT_PREDICATE ) + .setVisibility(visibility) + .build() + , ONE)); + + entries.add(new CustomEntry( + IntermediateProspect.builder() + .setIndex(COUNT) + .setData(subjobj) + .setDataType(XMLSchema.STRING.toString()) + .setTripleValueType(TripleValueType.SUBJECT_OBJECT) + .setVisibility(visibility) + .build() + , ONE)); + + entries.add(new CustomEntry( + IntermediateProspect.builder() + .setIndex(COUNT) + .setData(predobj) + .setDataType(XMLSchema.STRING.toString()) + .setTripleValueType(TripleValueType.PREDICATE_OBJECT) + .setVisibility(visibility) + .build() + , ONE)); + + entries.add(new CustomEntry( + IntermediateProspect.builder() + .setIndex(COUNT) + .setData(namespace) + .setDataType(URITYPE) + .setTripleValueType(TripleValueType.ENTITY) + .setVisibility(visibility) + .build() + , ONE)); + return entries; + } + + @Override + public Collection> combine(IntermediateProspect prospect, Iterable counts) { + long sum = 0; + for(final LongWritable count : counts) { + sum += count.get(); + } + return Collections.singleton( new CustomEntry(prospect, new LongWritable(sum)) ); + } + + @Override + public void reduce(IntermediateProspect prospect, Iterable counts, Date timestamp, Reducer.Context context) throws IOException, InterruptedException { + long sum = 0; + for(final LongWritable count : counts) { + sum += count.get(); + } + + final String indexType = prospect.getTripleValueType().getIndexType(); + + // not sure if this is the best idea.. + if ((sum >= 0) || indexType.equals(TripleValueType.PREDICATE.getIndexType())) { + final Mutation m = new Mutation(indexType + DELIM + prospect.getData() + DELIM + ProspectorUtils.getReverseIndexDateTime(timestamp)); + + final String dataType = prospect.getDataType(); + final ColumnVisibility visibility = new ColumnVisibility(prospect.getVisibility()); + final Value sumValue = new Value(("" + sum).getBytes()); + m.put(COUNT, prospect.getDataType(), visibility, timestamp.getTime(), sumValue); + + context.write(null, m); + } + } + + @Override + public String getIndexType() { + return COUNT; + } + + @Override + public String getCompositeValue(List indices){ + final Iterator indexIt = indices.iterator(); + String compositeIndex = indexIt.next(); + while (indexIt.hasNext()){ + final String value = indexIt.next(); + compositeIndex += DELIM + value; + } + return compositeIndex; + } + + @Override + public List query(Connector connector, String tableName, List prospectTimes, String type, String compositeIndex, String dataType, String[] auths) throws TableNotFoundException { + assert connector != null && tableName != null && type != null && compositeIndex != null; + + final BatchScanner bs = connector.createBatchScanner(tableName, new Authorizations(auths), 4); + final List ranges = new ArrayList<>(); + int max = 1000; //by default only return 1000 prospects maximum + if (prospectTimes != null) { + for(final Long prospectTime : prospectTimes) { + ranges.add(new Range(type + DELIM + compositeIndex + DELIM + ProspectorUtils.getReverseIndexDateTime(new Date(prospectTime)))); + } + } else { + max = 1; //only return the latest if no prospectTimes given + final String prefix = type + DELIM + compositeIndex + DELIM; + ranges.add(new Range(prefix, prefix + RdfCloudTripleStoreConstants.LAST)); + } + + bs.setRanges(ranges); + if (dataType != null) { + bs.fetchColumn(new Text(COUNT), new Text(dataType)); + } else { + bs.fetchColumnFamily(new Text(COUNT)); + } + + final List indexEntries = new ArrayList(); + final Iterator> iter = bs.iterator(); + + while (iter.hasNext() && indexEntries.size() <= max) { + final Entry entry = iter.next(); + final Key k = entry.getKey(); + final Value v = entry.getValue(); + + final String[] rowArr = k.getRow().toString().split(DELIM); + String values = ""; + // if it is a composite index, then return the type as a composite index + if (type.equalsIgnoreCase(TripleValueType.SUBJECT_PREDICATE.getIndexType()) || + type.equalsIgnoreCase(TripleValueType.SUBJECT_OBJECT.getIndexType()) || + type.equalsIgnoreCase(TripleValueType.PREDICATE_OBJECT.getIndexType())) { + values =rowArr[1] + DELIM + rowArr[2]; + } + else { + values = rowArr[1]; + } + + // Create an entry using the values that were found. + final String entryDataType = k.getColumnQualifier().toString(); + final String entryVisibility = k.getColumnVisibility().toString(); + final Long entryCount = Long.parseLong(new String(v.get())); + + indexEntries.add( + IndexEntry.builder() + .setData(values) + .setTripleValueType(rowArr[0]) + .setIndex(COUNT) + .setDataType(entryDataType) + .setVisibility(entryVisibility) + .setCount(entryCount) + .setTimestamp(k.getTimestamp()) + .build()); + } + bs.close(); + + return indexEntries; + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.java new file mode 100644 index 000000000..b5d2320c1 --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.plans.impl; + +import java.util.Collection; +import java.util.Iterator; +import java.util.ServiceLoader; + +import org.apache.rya.prospector.plans.IndexWorkPlan; +import org.apache.rya.prospector.plans.IndexWorkPlanManager; + +import com.google.common.collect.Lists; + +/** + * Searches the classpath for any {@link IndexWorkPlan}s that are able to be service loaded. + */ +public class ServicesBackedIndexWorkPlanManager implements IndexWorkPlanManager { + + private final Collection plans; + + /** + * Constructs an instance of {@link ServicesBackedIndexWorkPlanManager}. + */ + public ServicesBackedIndexWorkPlanManager() { + final Iterator iterator = ServiceLoader.load(IndexWorkPlan.class).iterator(); + plans = Lists.newArrayList(iterator); + } + + @Override + public Collection getPlans() { + return plans; + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorService.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorService.java new file mode 100644 index 000000000..205d4fc67 --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorService.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.service; + +import static java.util.Objects.requireNonNull; +import static org.apache.rya.prospector.utils.ProspectorConstants.METADATA; +import static org.apache.rya.prospector.utils.ProspectorConstants.PROSPECT_TIME; + +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.admin.TableOperations; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.hadoop.io.Text; +import org.apache.rya.prospector.domain.IndexEntry; +import org.apache.rya.prospector.plans.IndexWorkPlan; +import org.apache.rya.prospector.plans.IndexWorkPlanManager; +import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager; +import org.apache.rya.prospector.utils.ProspectorUtils; + +/** + * Provides access to the Prospect results that have been stored within a specific Accumulo table. + */ +public class ProspectorService { + + private final Connector connector; + private final String tableName; + + private final IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager(); + private final Map plans; + + /** + * Constructs an instance of {@link ProspectorService}. + * + * @param connector - The Accumulo connector used to communicate with the table. (not null) + * @param tableName - The name of the Accumulo table that will be queried for Prospect results. (not null) + * @throws AccumuloException A problem occurred while creating the table. + * @throws AccumuloSecurityException A problem occurred while creating the table. + * @throws TableExistsException A problem occurred while creating the table. + */ + public ProspectorService(Connector connector, String tableName) throws AccumuloException, AccumuloSecurityException, TableExistsException { + this.connector = requireNonNull(connector); + this.tableName = requireNonNull(tableName); + + this.plans = ProspectorUtils.planMap(manager.getPlans()); + + // Create the table if it doesn't already exist. + final TableOperations tos = connector.tableOperations(); + if(!tos.exists(tableName)) { + tos.create(tableName); + } + } + + /** + * Get a list of timestamps that represents all of the Prospect runs that have + * ever been performed. + * + * @param auths - The authorizations used to scan the table for prospects. + * @return A list of timestamps representing each Prospect run that was found. + * @throws TableNotFoundException The table name that was provided when this + * class was constructed does not match a table that the connector has access to. + */ + public Iterator getProspects(String[] auths) throws TableNotFoundException { + final Scanner scanner = connector.createScanner(tableName, new Authorizations(auths)); + scanner.setRange(Range.exact(METADATA)); + scanner.fetchColumnFamily(new Text(PROSPECT_TIME)); + + return new ProspectTimestampIterator( scanner.iterator() ); + } + + /** + * Get a list of timestamps that represents all of the Prospect runs that + * have been performed inclusively between two timestamps. + * + * @param beginTime - The start of the time range. + * @param endTime - The end of the time range. + * @param auths - The authorizations used to scan the table for prospects. + * @return A list of timestamps representing each Prospect run that was found. + * @throws TableNotFoundException The table name that was provided when this + * class was constructed does not match a table that the connector has access to. + */ + public Iterator getProspectsInRange(long beginTime, long endTime, String[] auths) throws TableNotFoundException { + final Scanner scanner = connector.createScanner(tableName, new Authorizations(auths)); + scanner.setRange(new Range( + new Key(METADATA, PROSPECT_TIME, ProspectorUtils.getReverseIndexDateTime(new Date(endTime)), "", Long.MAX_VALUE), + new Key(METADATA, PROSPECT_TIME, ProspectorUtils.getReverseIndexDateTime(new Date(beginTime)), "", 0l) + )); + + return new ProspectTimestampIterator( scanner.iterator() ); + } + + /** + * Iterates over the results of a {@link Scanner} and interprets their keys + * contain Prospect run timestamps. + */ + private static final class ProspectTimestampIterator implements Iterator { + private final Iterator> it; + + public ProspectTimestampIterator(Iterator> it) { + this.it = requireNonNull(it); + } + + @Override + public boolean hasNext() { + return it.hasNext(); + } + + @Override + public Long next() { + return it.next().getKey().getTimestamp(); + } + } + + /** + * Search for {@link IndexEntry}s that have values matching the provided parameters. + * + * @param prospectTimes - Indicates which Prospect runs will be part of the query. + * @param indexType - The name of the index the {@link IndexEntry}s are stored within. + * @param index - The data portion of the {@link IndexEntry}s that may be returned. + * @param dataType - The data type of the {@link IndexEntry}s that may be returned. + * @param auths - The authorizations used to search for the entries. + * @return The {@link IndexEntries} that match the provided values. + * @throws TableNotFoundException No table exists for {@code tableName}. + */ + public List query(List prospectTimes, String indexType, String type, List index, String dataType, String[] auths) throws TableNotFoundException { + assert indexType != null; + + final IndexWorkPlan plan = plans.get(indexType); + assert plan != null: "Index Type: ${indexType} does not exist"; + final String compositeIndex = plan.getCompositeValue(index); + + return plan.query(connector, tableName, prospectTimes, type, compositeIndex, dataType, auths); + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.java new file mode 100644 index 000000000..3bb3b2688 --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.service; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.rya.api.RdfCloudTripleStoreConfiguration; +import org.apache.rya.api.persist.RdfDAOException; +import org.apache.rya.api.persist.RdfEvalStatsDAO; +import org.apache.rya.prospector.domain.IndexEntry; +import org.apache.rya.prospector.domain.TripleValueType; +import org.apache.rya.prospector.utils.ProspectorConstants; +import org.openrdf.model.Resource; +import org.openrdf.model.Value; + +/** + * An ${@link org.apache.rya.api.persist.RdfEvalStatsDAO} that uses the Prospector Service underneath return counts. + */ +public class ProspectorServiceEvalStatsDAO implements RdfEvalStatsDAO { + + private ProspectorService prospectorService; + + public ProspectorServiceEvalStatsDAO() { + } + + public ProspectorServiceEvalStatsDAO(ProspectorService prospectorService, RdfCloudTripleStoreConfiguration conf) { + this.prospectorService = prospectorService; + } + + public ProspectorServiceEvalStatsDAO(Connector connector, RdfCloudTripleStoreConfiguration conf) throws AccumuloException, AccumuloSecurityException, TableExistsException { + this.prospectorService = new ProspectorService(connector, getProspectTableName(conf)); + } + + @Override + public void init() { + assert prospectorService != null; + } + + @Override + public boolean isInitialized() { + return prospectorService != null; + } + + @Override + public void destroy() { + } + + @Override + public double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List val) throws RdfDAOException { + assert conf != null && card != null && val != null; + + String triplePart = null; + switch (card) { + case SUBJECT: + triplePart = TripleValueType.SUBJECT.getIndexType(); + break; + case PREDICATE: + triplePart = TripleValueType.PREDICATE.getIndexType(); + break; + case OBJECT: + triplePart = TripleValueType.OBJECT.getIndexType(); + break; + case SUBJECTPREDICATE: + triplePart = TripleValueType.SUBJECT_PREDICATE.getIndexType(); + break; + case SUBJECTOBJECT: + triplePart = TripleValueType.SUBJECT_OBJECT.getIndexType(); + break; + case PREDICATEOBJECT: + triplePart = TripleValueType.PREDICATE_OBJECT.getIndexType(); + break; + } + + final String[] auths = conf.getAuths(); + final List indexedValues = new ArrayList<>(); + final Iterator valueIt = val.iterator(); + while (valueIt.hasNext()){ + indexedValues.add(valueIt.next().stringValue()); + } + + double cardinality = -1; + try { + final List entries = prospectorService.query(null, ProspectorConstants.COUNT, triplePart, indexedValues, null, auths); + if(!entries.isEmpty()) { + cardinality = entries.iterator().next().getCount(); + } + } catch (final TableNotFoundException e) { + throw new RdfDAOException(e); + } + return cardinality; + } + + @Override + public double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List val, Resource context) { + return getCardinality(conf, card, val); //TODO: Not sure about the context yet + } + + @Override + public void setConf(RdfCloudTripleStoreConfiguration conf) { + } + + @Override + public RdfCloudTripleStoreConfiguration getConf() { + return null; + } + + public static String getProspectTableName(RdfCloudTripleStoreConfiguration conf) { + return conf.getTablePrefix() + "prospects"; + } + + /** + * This method exists so that the Rya Web project may autowrire itself together + * using the Spring framework. + * + * @param prospectorService - The {@link ProspectorService} that will be used by this DAO. + */ + public void setProspectorService(ProspectorService prospectorService) { + this.prospectorService = prospectorService; + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/CustomEntry.groovy b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/CustomEntry.java similarity index 55% rename from extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/CustomEntry.groovy rename to extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/CustomEntry.java index 9f23c4836..bff2c4297 100644 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/CustomEntry.groovy +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/CustomEntry.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -16,37 +16,43 @@ * specific language governing permissions and limitations * under the License. */ +package org.apache.rya.prospector.utils; -package org.apache.rya.prospector.utils +import java.util.Map; /** - * Date: 12/3/12 - * Time: 12:33 PM + * A convenience class that implements {@link Map.Entry}. + * + * @param - The type of the Key. + * @param - The type of the Value. */ -class CustomEntry implements Map.Entry { +public class CustomEntry implements Map.Entry { - K key; - V value; + private K key; + private V value; - CustomEntry(K key, V value) { - this.key = key - this.value = value + public CustomEntry(K key, V value) { + this.key = key; + this.value = value; } - K getKey() { - return key + @Override + public K getKey() { + return key; } - void setKey(K key) { - this.key = key + public void setKey(K key) { + this.key = key; } - V getValue() { - return value + @Override + public V getValue() { + return value; } - V setValue(V value) { - this.value = value - this.value + @Override + public V setValue(V value) { + this.value = value; + return this.value; } -} +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorConstants.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorConstants.java new file mode 100644 index 000000000..790134330 --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorConstants.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.utils; + +/** + * Constants used by the Prospector project. + */ +public class ProspectorConstants { + /** + * The name of the Count index. + */ + public static final String COUNT = "count"; + + /** + * The Row ID within Accumulo for any metadata entry related to a Prospect run. + */ + public static final String METADATA = "metadata"; + + /** + * This is the name of a Column Family within Accumulo that represents when + * a Prospect run was performed. + */ + public static final String PROSPECT_TIME = "prospectTime"; + + public static final String DEFAULT_VIS = "U&FOUO"; + public static final byte[] EMPTY = new byte [0]; + + //config properties + public static final String PERFORMANT = "performant"; + + public static final String USERNAME = "username"; + public static final String PASSWORD = "password"; + public static final String INSTANCE = "instance"; + public static final String ZOOKEEPERS = "zookeepers"; + public static final String MOCK = "mock"; +} \ No newline at end of file diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorUtils.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorUtils.java new file mode 100644 index 000000000..4dc92533a --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorUtils.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.utils; + +import static org.apache.rya.prospector.utils.ProspectorConstants.INSTANCE; +import static org.apache.rya.prospector.utils.ProspectorConstants.MOCK; +import static org.apache.rya.prospector.utils.ProspectorConstants.PASSWORD; +import static org.apache.rya.prospector.utils.ProspectorConstants.USERNAME; +import static org.apache.rya.prospector.utils.ProspectorConstants.ZOOKEEPERS; + +import java.text.SimpleDateFormat; +import java.util.Collection; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.BatchWriter; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.Instance; +import org.apache.accumulo.core.client.MutationsRejectedException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.ZooKeeperInstance; +import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat; +import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.commons.lang.Validate; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.mapreduce.Job; +import org.apache.rya.prospector.plans.IndexWorkPlan; + +public class ProspectorUtils { + + public static final long INDEXED_DATE_SORT_VAL = 999999999999999999L; // 18 char long, same length as date format pattern below + public static final String INDEXED_DATE_FORMAT = "yyyyMMddHHmmsssSSS"; + + public static String getReverseIndexDateTime(Date date) { + Validate.notNull(date); + final String formattedDateString = new SimpleDateFormat(INDEXED_DATE_FORMAT).format(date); + final long diff = INDEXED_DATE_SORT_VAL - Long.valueOf(formattedDateString); + + return Long.toString(diff); + } + + public static Map planMap(Collection plans) { + final Map planMap = new HashMap<>(); + for(final IndexWorkPlan plan : plans) { + planMap.put(plan.getIndexType(), plan); + } + return planMap; + } + + public static void initMRJob(Job job, String table, String outtable, String[] auths) throws AccumuloSecurityException { + final Configuration conf = job.getConfiguration(); + final String username = conf.get(USERNAME); + final String password = conf.get(PASSWORD); + final String instance = conf.get(INSTANCE); + final String zookeepers = conf.get(ZOOKEEPERS); + final String mock = conf.get(MOCK); + + //input + if (Boolean.parseBoolean(mock)) { + AccumuloInputFormat.setMockInstance(job, instance); + AccumuloOutputFormat.setMockInstance(job, instance); + } else if (zookeepers != null) { + AccumuloInputFormat.setZooKeeperInstance(job, instance, zookeepers); + AccumuloOutputFormat.setZooKeeperInstance(job, instance, zookeepers); + } else { + throw new IllegalArgumentException("Must specify either mock or zookeepers"); + } + + AccumuloInputFormat.setConnectorInfo(job, username, new PasswordToken(password.getBytes())); + AccumuloInputFormat.setInputTableName(job, table); + job.setInputFormatClass(AccumuloInputFormat.class); + AccumuloInputFormat.setScanAuthorizations(job, new Authorizations(auths)); + + // OUTPUT + job.setOutputFormatClass(AccumuloOutputFormat.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(Mutation.class); + AccumuloOutputFormat.setConnectorInfo(job, username, new PasswordToken(password.getBytes())); + AccumuloOutputFormat.setDefaultTableName(job, outtable); + } + + public static void addMRPerformance(Configuration conf) { + conf.setBoolean("mapred.map.tasks.speculative.execution", false); + conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); + conf.set("io.sort.mb", "256"); + conf.setBoolean("mapred.compress.map.output", true); + conf.set("mapred.map.output.compression.codec", GzipCodec.class.getName()); + } + + public static Instance instance(Configuration conf) { + assert conf != null; + + final String instance_str = conf.get(INSTANCE); + final String zookeepers = conf.get(ZOOKEEPERS); + final String mock = conf.get(MOCK); + if (Boolean.parseBoolean(mock)) { + return new MockInstance(instance_str); + } else if (zookeepers != null) { + return new ZooKeeperInstance(instance_str, zookeepers); + } else { + throw new IllegalArgumentException("Must specify either mock or zookeepers"); + } + } + + public static Connector connector(Instance instance, Configuration conf) throws AccumuloException, AccumuloSecurityException { + final String username = conf.get(USERNAME); + final String password = conf.get(PASSWORD); + if (instance == null) { + instance = instance(conf); + } + return instance.getConnector(username, new PasswordToken(password)); + } + + public static void writeMutations(Connector connector, String tableName, Collection mutations) throws TableNotFoundException, MutationsRejectedException { + final BatchWriter bw = connector.createBatchWriter(tableName, 10000l, 10000l, 4); + for(final Mutation mutation : mutations) { + bw.addMutation(mutation); + } + bw.flush(); + bw.close(); + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/mr/ProspectorTest.groovy b/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/mr/ProspectorTest.groovy deleted file mode 100644 index 02cbcd1fe..000000000 --- a/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/mr/ProspectorTest.groovy +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.mr - -import com.google.common.collect.Iterators -import com.google.common.collect.Lists -import org.apache.rya.accumulo.AccumuloRyaDAO -import org.apache.rya.accumulo.AccumuloRdfConfiguration -import org.apache.rya.api.persist.RdfEvalStatsDAO -import org.apache.rya.api.domain.RyaStatement -import org.apache.rya.api.domain.RyaType -import org.apache.rya.api.domain.RyaURI -import org.apache.rya.prospector.domain.IndexEntry -import org.apache.rya.prospector.domain.TripleValueType -import org.apache.rya.prospector.service.ProspectorService -import org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO -import org.apache.rya.prospector.utils.ProspectorConstants -import org.apache.accumulo.core.client.Instance -import org.apache.accumulo.core.client.mock.MockInstance -import org.apache.accumulo.core.security.Authorizations -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path -import org.apache.hadoop.util.ToolRunner -import org.junit.Test -import org.openrdf.model.vocabulary.XMLSchema -import org.openrdf.model.impl.URIImpl - -import static org.junit.Assert.assertEquals -import org.openrdf.model.impl.LiteralImpl -import org.openrdf.model.Value - -/** - * Date: 12/4/12 - * Time: 4:33 PM - */ -class ProspectorTest { - - @Test - public void testCount() throws Exception { - - Instance mock = new MockInstance("accumulo"); - - def connector = mock.getConnector("user", "pass".bytes) - def intable = "rya_spo" - def outtable = "rya_prospects" - if (connector.tableOperations().exists(outtable)) - connector.tableOperations().delete(outtable) - connector.tableOperations().create(outtable) - - AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO(); - ryaDAO.setConnector(connector); - ryaDAO.init() - - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12"))) - - def confFile = "stats_cluster_config.xml" - def confPath = new Path(getClass().getClassLoader().getResource(confFile).toString()) - def args = (String[]) [confPath]; - ToolRunner.run(new Prospector(), args); - debugTable(connector, outtable) - - def scanner = connector.createScanner(outtable, new Authorizations("U", "FOUO")) - def iter = scanner.iterator() -// assertEquals(11, Iterators.size(iter)) - - ryaDAO.destroy() - - def conf = new Configuration() - conf.addResource(confPath) - // debugTable(mrInfo, outtable) - - def service = new ProspectorService(connector, outtable) - def auths = (String[]) ["U", "FOUO"] - def prospects = service.getProspects(auths) - def plist = Lists.newArrayList(prospects) - assertEquals(1, plist.size()) - - def rdfConf = new AccumuloRdfConfiguration(conf) - rdfConf.setAuths("U","FOUO") - - prospects = service.getProspectsInRange(System.currentTimeMillis() - 100000, System.currentTimeMillis() + 10000, auths) - plist = Lists.newArrayList(prospects) - assertEquals(1, plist.size()) - - List queryTerms = new ArrayList(); - queryTerms.add("urn:gem:etype"); - def query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.entity.name(), queryTerms, XMLSchema.ANYURI.stringValue(), auths) - assertEquals(1, query.size()) -// assertEquals( -// new IndexEntry(index: ProspectorConstants.COUNT, data: "urn:gem:etype", dataType: XMLSchema.ANYURI.stringValue(), -// tripleValueType: TripleValueType.entity, visibility: "", count: -1, timestamp: plist.get(0)), -// query.get(0)) - - queryTerms = new ArrayList(); - queryTerms.add("urn:gem:etype#1234"); - query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.subject.name(), queryTerms, XMLSchema.ANYURI.stringValue(), auths) - assertEquals(1, query.size()) - - queryTerms = new ArrayList(); - queryTerms.add("urn:gem#pred"); - query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.predicate.name(), queryTerms, XMLSchema.ANYURI.stringValue(), auths) - assertEquals(1, query.size()) - assertEquals( - new IndexEntry(index: ProspectorConstants.COUNT, data: "urn:gem#pred", dataType: XMLSchema.ANYURI.stringValue(), - tripleValueType: TripleValueType.predicate, visibility: "", count: 4l, timestamp: plist.get(0)), - query.get(0)) - - queryTerms = new ArrayList(); - queryTerms.add("mydata1"); - query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.object.name(), queryTerms, XMLSchema.STRING.stringValue(), auths) - assertEquals(1, query.size()) -// assertEquals( -// new IndexEntry(index: ProspectorConstants.COUNT, data: "mydata1", dataType: XMLSchema.STRING.stringValue(), -// tripleValueType: TripleValueType.object, visibility: "", count: -1, timestamp: plist.get(0)), -// query.get(0)) - - queryTerms = new ArrayList(); - queryTerms.add("urn:gem:etype#1234"); - queryTerms.add("urn:gem#pred"); - query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.subjectpredicate.name(), queryTerms, XMLSchema.STRING.stringValue(), auths) - assertEquals(1, query.size()) -// assertEquals( -// new IndexEntry(index: ProspectorConstants.COUNT, data: "urn:gem:etype#1234" + "\u0000" + "urn:gem#pred", dataType: XMLSchema.STRING.stringValue(), -// tripleValueType: TripleValueType.subjectpredicate, visibility: "", count: -1, timestamp: plist.get(0)), -// query.get(0)) - - queryTerms = new ArrayList(); - queryTerms.add("urn:gem#pred"); - queryTerms.add("12"); - query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.predicateobject.name(), queryTerms, XMLSchema.STRING.stringValue(), auths) - assertEquals(1, query.size()) -// assertEquals( -// new IndexEntry(index: ProspectorConstants.COUNT, data: "urn:gem#pred" + "\u0000" + "12", dataType: XMLSchema.STRING.stringValue(), -// tripleValueType: TripleValueType.predicateobject, visibility: "", count: -1, timestamp: plist.get(0)), -// query.get(0)) - - queryTerms = new ArrayList(); - queryTerms.add("urn:gem:etype#1234"); - queryTerms.add("mydata1"); - query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.subjectobject.name(), queryTerms, XMLSchema.STRING.stringValue(), auths) - - assertEquals(1, query.size()) -// assertEquals( -// new IndexEntry(index: ProspectorConstants.COUNT, data: "urn:gem:etype#1234" + "\u0000" + "mydata1", dataType: XMLSchema.STRING.stringValue(), -// tripleValueType: TripleValueType.subjectobject, visibility: "", count: -1, timestamp: plist.get(0)), -// query.get(0)) - - //should be in a teardown method - connector.tableOperations().delete(outtable) - } - - private void debugTable(def connector, String table) { - connector.createScanner(table, new Authorizations((String[]) ["U", "FOUO"])).iterator().each { - println it - } - } -} diff --git a/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.groovy b/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.groovy deleted file mode 100644 index e518ca89a..000000000 --- a/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.groovy +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.service - -import com.google.common.collect.Iterators -import org.apache.rya.accumulo.AccumuloRdfConfiguration -import org.apache.rya.accumulo.AccumuloRyaDAO -import org.apache.rya.api.domain.RyaStatement -import org.apache.rya.api.domain.RyaType -import org.apache.rya.api.domain.RyaURI -import org.apache.rya.api.persist.RdfEvalStatsDAO -import org.apache.rya.prospector.mr.Prospector -import org.apache.accumulo.core.client.Instance -import org.apache.accumulo.core.client.mock.MockInstance -import org.apache.accumulo.core.security.Authorizations -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path -import org.apache.hadoop.util.ToolRunner -import org.junit.Test -import org.openrdf.model.impl.URIImpl -import org.openrdf.model.vocabulary.XMLSchema - -import static org.junit.Assert.assertEquals -import org.openrdf.model.impl.LiteralImpl -import org.openrdf.model.Value - -/** - * Date: 1/26/13 - * Time: 3:00 PM - */ -class ProspectorServiceEvalStatsDAOTest { - - @Test - public void testCount() throws Exception { - - Instance mock = new MockInstance("accumulo"); - - def connector = mock.getConnector("user", "pass".bytes) - def intable = "rya_spo" - def outtable = "rya_prospects" - if (connector.tableOperations().exists(outtable)) - connector.tableOperations().delete(outtable) - connector.tableOperations().create(outtable) - - AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO(); - ryaDAO.setConnector(connector); - ryaDAO.init() - - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12"))) - - def confFile = "stats_cluster_config.xml" - def confPath = new Path(getClass().getClassLoader().getResource(confFile).toString()) - def args = (String[]) [confPath]; - ToolRunner.run(new Prospector(), args); - debugTable(connector, outtable) - - def scanner = connector.createScanner(outtable, new Authorizations("U", "FOUO")) - def iter = scanner.iterator() -// assertEquals(11, Iterators.size(iter)) - - ryaDAO.destroy() - - def conf = new Configuration() - conf.addResource(confPath) -// debugTable(connector, outtable) - - def rdfConf = new AccumuloRdfConfiguration(conf) - rdfConf.setAuths("U","FOUO") - def evalDao = new ProspectorServiceEvalStatsDAO(connector, rdfConf) - evalDao.init() - - List values = new ArrayList(); - values.add( new URIImpl("urn:gem#pred")); - - def count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.PREDICATE, values) - assertEquals(4.0, count, 0.001); - - values = new ArrayList(); - values.add( new LiteralImpl("mydata1")); - - count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values); - assertEquals(1.0, count, 0.001); - - values = new ArrayList(); - values.add( new LiteralImpl("mydata3")); - - count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values); - assertEquals(-1.0, count, 0.001); - - //should be in a teardown method - connector.tableOperations().delete(outtable) - } - - @Test - public void testNoAuthsCount() throws Exception { - - Instance mock = new MockInstance("accumulo"); - def connector = mock.getConnector("user", "pass".bytes) - def intable = "rya_spo" - def outtable = "rya_prospects" - if (connector.tableOperations().exists(outtable)) - connector.tableOperations().delete(outtable) - connector.tableOperations().create(outtable) - connector.securityOperations().createUser("user", "pass".bytes, new Authorizations("U", "FOUO")) - - AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO(); - ryaDAO.setConnector(connector); - ryaDAO.init() - - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12"))) - ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12"))) - - def confFile = "stats_cluster_config.xml" - def confPath = new Path(getClass().getClassLoader().getResource(confFile).toString()) - def args = (String[]) [confPath]; - ToolRunner.run(new Prospector(), args); - - def scanner = connector.createScanner(outtable, new Authorizations("U", "FOUO")) - def iter = scanner.iterator() -// assertEquals(11, Iterators.size(iter)) - - ryaDAO.destroy() - - def conf = new Configuration() - conf.addResource(confPath) - - def rdfConf = new AccumuloRdfConfiguration(conf) -// rdfConf.setAuths("U","FOUO") - def evalDao = new ProspectorServiceEvalStatsDAO(connector, rdfConf) - evalDao.init() - - - List values = new ArrayList(); - values.add( new URIImpl("urn:gem#pred")); - def count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.PREDICATE, values) - assertEquals(4.0, count, 0.001); - - values = new ArrayList(); - values.add( new LiteralImpl("mydata1")); - count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values); - assertEquals(1.0, count, 0.001); - - values = new ArrayList(); - values.add( new LiteralImpl("mydata3")); - - count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values); - assertEquals(-1.0, count, 0.001); - - //should be in a teardown method - connector.tableOperations().delete(outtable) - } - - private void debugTable(def connector, String table) { - connector.createScanner(table, new Authorizations((String[]) ["U", "FOUO"])).iterator().each { - println it - } - } -} diff --git a/extras/rya.prospector/src/test/java/org/apache/rya/prospector/mr/ProspectorTest.java b/extras/rya.prospector/src/test/java/org/apache/rya/prospector/mr/ProspectorTest.java new file mode 100644 index 000000000..eac7aab76 --- /dev/null +++ b/extras/rya.prospector/src/test/java/org/apache/rya/prospector/mr/ProspectorTest.java @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.mr; + +import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; + +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.Instance; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.ToolRunner; +import org.apache.rya.accumulo.AccumuloRdfConfiguration; +import org.apache.rya.accumulo.AccumuloRyaDAO; +import org.apache.rya.api.domain.RyaStatement; +import org.apache.rya.api.domain.RyaType; +import org.apache.rya.api.domain.RyaURI; +import org.apache.rya.prospector.domain.IndexEntry; +import org.apache.rya.prospector.domain.TripleValueType; +import org.apache.rya.prospector.service.ProspectorService; +import org.apache.rya.prospector.utils.ProspectorConstants; +import org.junit.Test; +import org.openrdf.model.vocabulary.XMLSchema; + +import com.google.common.collect.Lists; + +/** + * Tests that show when the {@link Prospector} job is run, it creates a table + * containing the correct count information derived from the statements that + * have been stored within a Rya instance. + */ +public class ProspectorTest { + + @Test + public void testCount() throws Exception { + // Load some data into a mock Accumulo and run the Prospector MapReduce job. + final Instance mock = new MockInstance("accumulo"); + + final Connector connector = mock.getConnector("user", new PasswordToken("pass")); + final String outtable = "rya_prospects"; + if (connector.tableOperations().exists(outtable)) { + connector.tableOperations().delete(outtable); + } + connector.tableOperations().create(outtable); + + final AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO(); + ryaDAO.setConnector(connector); + ryaDAO.init(); + + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12"))); + + final String confFile = "stats_cluster_config.xml"; + final Path confPath = new Path(getClass().getClassLoader().getResource(confFile).toString()); + final String[] args = { confPath.toString() }; + ToolRunner.run(new Prospector(), args); + ryaDAO.destroy(); + + // Interrogate the results of the Prospect job to ensure the correct results were created. + final Configuration conf = new Configuration(); + conf.addResource(confPath); + + final ProspectorService service = new ProspectorService(connector, outtable); + final String[] auths = {"U", "FOUO"}; + Iterator prospects = service.getProspects(auths); + List plist = Lists.newArrayList(prospects); + assertEquals(1, plist.size()); + + final Long prospectTimestamp = plist.iterator().next(); + + final AccumuloRdfConfiguration rdfConf = new AccumuloRdfConfiguration(conf); + rdfConf.setAuths("U","FOUO"); + + prospects = service.getProspectsInRange(System.currentTimeMillis() - 100000, System.currentTimeMillis() + 10000, auths); + plist = Lists.newArrayList(prospects); + assertEquals(1, plist.size()); + + // Ensure one of the correct "entity" counts was created. + List queryTerms = new ArrayList<>(); + queryTerms.add("urn:gem:etype"); + final List entityEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.ENTITY.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths); + + final List expectedEntityEntries = Lists.newArrayList( + IndexEntry.builder() + .setIndex(ProspectorConstants.COUNT) + .setData("urn:gem:etype") + .setDataType(XMLSchema.ANYURI.stringValue()) + .setTripleValueType( TripleValueType.ENTITY.getIndexType() ) + .setVisibility("") + .setTimestamp(prospectTimestamp) + .setCount(new Long(5)) + .build()); + + assertEquals(expectedEntityEntries, entityEntries); + + // Ensure one of the correct "subject" counts was created. + queryTerms = new ArrayList(); + queryTerms.add("urn:gem:etype#1234"); + final List subjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths); + + final List expectedSubjectEntries = Lists.newArrayList( + IndexEntry.builder() + .setIndex(ProspectorConstants.COUNT) + .setData("urn:gem:etype#1234") + .setDataType(XMLSchema.ANYURI.stringValue()) + .setTripleValueType( TripleValueType.SUBJECT.getIndexType() ) + .setVisibility("") + .setTimestamp(prospectTimestamp) + .setCount(new Long(3)) + .build()); + + assertEquals(expectedSubjectEntries, subjectEntries); + + // Ensure one of the correct "predicate" counts was created. + queryTerms = new ArrayList(); + queryTerms.add("urn:gem#pred"); + final List predicateEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.PREDICATE.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths); + + final List expectedPredicateEntries = Lists.newArrayList( + IndexEntry.builder() + .setIndex(ProspectorConstants.COUNT) + .setData("urn:gem#pred") + .setDataType(XMLSchema.ANYURI.stringValue()) + .setTripleValueType( TripleValueType.PREDICATE.getIndexType() ) + .setVisibility("") + .setTimestamp(prospectTimestamp) + .setCount(new Long(4)) + .build()); + + assertEquals(expectedPredicateEntries, predicateEntries); + + // Ensure one of the correct "object" counts was created. + queryTerms = new ArrayList(); + queryTerms.add("mydata1"); + final List objectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths); + + final List expectedObjectEntries = Lists.newArrayList( + IndexEntry.builder() + .setIndex(ProspectorConstants.COUNT) + .setData("mydata1") + .setDataType(XMLSchema.STRING.stringValue()) + .setTripleValueType( TripleValueType.OBJECT.getIndexType() ) + .setVisibility("") + .setTimestamp(prospectTimestamp) + .setCount(new Long(1)) + .build()); + + assertEquals(expectedObjectEntries, objectEntries); + + // Ensure one of the correct "subjectpredicate" counts was created. + queryTerms = new ArrayList(); + queryTerms.add("urn:gem:etype#1234"); + queryTerms.add("urn:gem#pred"); + final List subjectPredicateEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT_PREDICATE.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths); + + final List expectedSubjectPredicateEntries = Lists.newArrayList( + IndexEntry.builder() + .setIndex(ProspectorConstants.COUNT) + .setData("urn:gem:etype#1234"+ "\u0000" + "urn:gem#pred") + .setDataType(XMLSchema.STRING.stringValue()) + .setTripleValueType( TripleValueType.SUBJECT_PREDICATE.getIndexType() ) + .setVisibility("") + .setTimestamp(prospectTimestamp) + .setCount(new Long(3)) + .build()); + + assertEquals(expectedSubjectPredicateEntries, subjectPredicateEntries); + + // Ensure one of the correct "predicateobject" counts was created. + queryTerms = new ArrayList(); + queryTerms.add("urn:gem#pred"); + queryTerms.add("12"); + final List predicateObjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.PREDICATE_OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths); + + final List expectedPredicateObjectEntries = Lists.newArrayList( + IndexEntry.builder() + .setIndex(ProspectorConstants.COUNT) + .setData("urn:gem#pred" + "\u0000" + "12") + .setDataType(XMLSchema.STRING.stringValue()) + .setTripleValueType( TripleValueType.PREDICATE_OBJECT.getIndexType() ) + .setVisibility("") + .setTimestamp(prospectTimestamp) + .setCount(new Long(2)) // XXX This might be a bug. The object matching doesn't care about type. + .build()); + + assertEquals(expectedPredicateObjectEntries, predicateObjectEntries); + + // Ensure one of the correct "" counts was created. + queryTerms = new ArrayList(); + queryTerms.add("urn:gem:etype#1234"); + queryTerms.add("mydata1"); + final List subjectObjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT_OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths); + + final List expectedSubjectObjectEntries = Lists.newArrayList( + IndexEntry.builder() + .setIndex(ProspectorConstants.COUNT) + .setData("urn:gem:etype#1234" + "\u0000" + "mydata1") + .setDataType(XMLSchema.STRING.stringValue()) + .setTripleValueType( TripleValueType.SUBJECT_OBJECT.getIndexType() ) + .setVisibility("") + .setTimestamp(prospectTimestamp) + .setCount(new Long(1)) + .build()); + + assertEquals(expectedSubjectObjectEntries, subjectObjectEntries); + } + + /** + * Prints the content of an Accumulo table to standard out. Only use then when + * debugging the test. + */ + private void debugTable(Connector connector, String table) throws TableNotFoundException { + final Iterator> it = connector.createScanner(table, new Authorizations(new String[]{"U", "FOUO"})).iterator(); + while(it.hasNext()) { + final Entry entry = it.next(); + System.out.println( entry ); + } + } +} \ No newline at end of file diff --git a/extras/rya.prospector/src/test/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.java b/extras/rya.prospector/src/test/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.java new file mode 100644 index 000000000..f04874212 --- /dev/null +++ b/extras/rya.prospector/src/test/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.service; + +import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; + +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.Instance; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.ToolRunner; +import org.apache.rya.accumulo.AccumuloRdfConfiguration; +import org.apache.rya.accumulo.AccumuloRyaDAO; +import org.apache.rya.api.domain.RyaStatement; +import org.apache.rya.api.domain.RyaType; +import org.apache.rya.api.domain.RyaURI; +import org.apache.rya.api.persist.RdfEvalStatsDAO; +import org.apache.rya.api.persist.RdfEvalStatsDAO.CARDINALITY_OF; +import org.apache.rya.prospector.mr.Prospector; +import org.junit.Test; +import org.openrdf.model.Value; +import org.openrdf.model.impl.LiteralImpl; +import org.openrdf.model.impl.URIImpl; +import org.openrdf.model.vocabulary.XMLSchema; + +/** + * Tests that show when the {@link Prospector} job is run, the + * {@link ProspectorServiceEvalStatsDAO} may be used to fetch cardinality + * information from the prospect table. + */ +public class ProspectorServiceEvalStatsDAOTest { + + @Test + public void testCount() throws Exception { + // Load some data into a mock Accumulo and run the Prospector MapReduce job. + final Instance mock = new MockInstance("accumulo"); + + final Connector connector = mock.getConnector("user", new PasswordToken("pass")); + final String outtable = "rya_prospects"; + if (connector.tableOperations().exists(outtable)) { + connector.tableOperations().delete(outtable); + } + connector.tableOperations().create(outtable); + + final AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO(); + ryaDAO.setConnector(connector); + ryaDAO.init(); + + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12"))); + + final String confFile = "stats_cluster_config.xml"; + final Path confPath = new Path(getClass().getClassLoader().getResource(confFile).toString()); + final String[] args = { confPath.toString() }; + ToolRunner.run(new Prospector(), args); + + ryaDAO.destroy(); + + final Configuration conf = new Configuration(); + conf.addResource(confPath); + + final AccumuloRdfConfiguration rdfConf = new AccumuloRdfConfiguration(conf); + rdfConf.setAuths("U","FOUO"); + final ProspectorServiceEvalStatsDAO evalDao = new ProspectorServiceEvalStatsDAO(connector, rdfConf); + evalDao.init(); + + // Get the cardinality of the 'urn:gem#pred' predicate. + List values = new ArrayList(); + values.add( new URIImpl("urn:gem#pred") ); + double count = evalDao.getCardinality(rdfConf, CARDINALITY_OF.PREDICATE, values); + assertEquals(4.0, count, 0.001); + + // Get the cardinality of the 'mydata1' object. + values = new ArrayList(); + values.add( new LiteralImpl("mydata1")); + count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values); + assertEquals(1.0, count, 0.001); + + // Get the cardinality of the 'mydata3' object. + values = new ArrayList(); + values.add( new LiteralImpl("mydata3")); + count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values); + assertEquals(-1.0, count, 0.001); + } + + @Test + public void testNoAuthsCount() throws Exception { + // Load some data into a mock Accumulo and run the Prospector MapReduce job. + final Instance mock = new MockInstance("accumulo"); + + final Connector connector = mock.getConnector("user", new PasswordToken("pass")); + final String outtable = "rya_prospects"; + if (connector.tableOperations().exists(outtable)) { + connector.tableOperations().delete(outtable); + } + connector.tableOperations().create(outtable); + connector.securityOperations().createUser("user", "pass".getBytes(), new Authorizations("U", "FOUO")); + + final AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO(); + ryaDAO.setConnector(connector); + ryaDAO.init(); + + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12"))); + ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12"))); + + final String confFile = "stats_cluster_config.xml"; + final Path confPath = new Path(getClass().getClassLoader().getResource(confFile).toString()); + final String[] args = { confPath.toString() }; + ToolRunner.run(new Prospector(), args); + + ryaDAO.destroy(); + + final Configuration conf = new Configuration(); + conf.addResource(confPath); + + final AccumuloRdfConfiguration rdfConf = new AccumuloRdfConfiguration(conf); + final ProspectorServiceEvalStatsDAO evalDao = new ProspectorServiceEvalStatsDAO(connector, rdfConf); + evalDao.init(); + + // Get the cardinality of the 'urn:gem#pred' predicate. + List values = new ArrayList(); + values.add( new URIImpl("urn:gem#pred")); + double count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.PREDICATE, values); + assertEquals(4.0, count, 0.001); + + // Get the cardinality of the 'mydata1' object. + values = new ArrayList(); + values.add( new LiteralImpl("mydata1")); + count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values); + assertEquals(1.0, count, 0.001); + + // Get the cardinality of the 'mydata3' object. + values = new ArrayList(); + values.add( new LiteralImpl("mydata3")); + count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values); + assertEquals(-1.0, count, 0.001); + } + + /** + * Prints the content of an Accumulo table to standard out. Only use then when + * debugging the test. + */ + private void debugTable(Connector connector, String table) throws TableNotFoundException { + final Iterator> it = connector.createScanner(table, new Authorizations(new String[]{"U", "FOUO"})).iterator(); + while(it.hasNext()) { + System.out.println( it.next() ); + } + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index ef2e7d877..4f7148a57 100644 --- a/pom.xml +++ b/pom.xml @@ -94,7 +94,6 @@ under the License. 1.6 1.3 - 2.3.11 14.0.1 @@ -400,24 +399,6 @@ under the License. ${embed.mongo.version}
- - - org.codehaus.groovy - groovy-all - ${groovy.version} - - - org.codehaus.gmaven.runtime - gmaven-runtime-1.7 - ${gmaven.version} - - - org.codehaus.groovy - groovy-all - - - - org.springframework.shell @@ -774,42 +755,6 @@ under the License. v@{project.version} - - org.codehaus.gmaven - gmaven-plugin - ${gmaven.version} - - - org.codehaus.groovy - groovy-all - ${groovy.version} - - - org.codehaus.gmaven.runtime - gmaven-runtime-1.7 - ${gmaven.version} - - - org.codehaus.groovy - groovy-all - - - - - - - - 1.7 - - - generateStubs - compile - generateTestStubs - testCompile - - - - org.apache.maven.plugins maven-shade-plugin diff --git a/sail/src/test/java/org/apache/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java b/sail/src/test/java/org/apache/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java index daf446c90..63c5d134e 100644 --- a/sail/src/test/java/org/apache/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java +++ b/sail/src/test/java/org/apache/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java @@ -642,6 +642,10 @@ public void testOptimizeQ4() throws Exception { QueryJoinSelectOptimizer qjs = new QueryJoinSelectOptimizer(ars, accc); System.out.println("Originial query is " + te); qjs.optimize(te, null, null); + + TupleExpr what = getTupleExpr(Q4); + System.out.println("lolol: \n" + what); + Assert.assertTrue(te.equals(getTupleExpr(Q4))); System.out.print("Optimized query is " + te);