From b041143ceb663875bd41bcd232921babdac9876a Mon Sep 17 00:00:00 2001
From: Kevin Chilton
Date: Wed, 1 Mar 2017 17:42:08 -0500
Subject: [PATCH] RYA-253 Converted the rya.prospector class from Groovy to
Java. Also added documentation to the project.
---
.../rya/api/persist/RdfEvalStatsDAO.java | 8 +-
.../accumulo/entity/EntityOptimizer.java | 25 +-
.../accumulo/entity/EntityTupleSet.java | 64 ++---
extras/rya.prospector/pom.xml | 69 -----
.../rya/prospector/domain/IndexEntry.groovy | 76 -----
.../domain/IntermediateProspect.groovy | 70 -----
.../prospector/domain/TripleValueType.java | 26 --
.../rya/prospector/mr/Prospector.groovy | 108 --------
.../prospector/mr/ProspectorCombiner.groovy | 61 ----
.../rya/prospector/mr/ProspectorMapper.groovy | 75 -----
.../prospector/mr/ProspectorReducer.groovy | 57 ----
.../rya/prospector/plans/IndexWorkPlan.groovy | 51 ----
.../prospector/plans/impl/CountPlan.groovy | 220 ---------------
.../ServicesBackedIndexWorkPlanManager.groovy | 38 ---
.../service/ProspectorService.groovy | 126 ---------
.../ProspectorServiceEvalStatsDAO.groovy | 122 --------
.../utils/ProspectorConstants.groovy | 41 ---
.../prospector/utils/ProspectorUtils.groovy | 138 ---------
.../rya/prospector/domain/IndexEntry.java | 241 ++++++++++++++++
.../domain/IntermediateProspect.java | 213 ++++++++++++++
.../prospector/domain/TripleValueType.java | 101 +++++++
.../apache/rya/prospector/mr/Prospector.java | 113 ++++++++
.../rya/prospector/mr/ProspectorCombiner.java | 61 ++++
.../rya/prospector/mr/ProspectorMapper.java | 83 ++++++
.../rya/prospector/mr/ProspectorReducer.java | 65 +++++
.../rya/prospector/plans/IndexWorkPlan.java | 115 ++++++++
.../plans/IndexWorkPlanManager.java} | 19 +-
.../rya/prospector/plans/impl/CountPlan.java | 262 ++++++++++++++++++
.../ServicesBackedIndexWorkPlanManager.java | 49 ++++
.../prospector/service/ProspectorService.java | 162 +++++++++++
.../ProspectorServiceEvalStatsDAO.java | 143 ++++++++++
.../rya/prospector/utils/CustomEntry.java} | 48 ++--
.../prospector/utils/ProspectorConstants.java | 52 ++++
.../rya/prospector/utils/ProspectorUtils.java | 147 ++++++++++
.../rya/prospector/mr/ProspectorTest.groovy | 178 ------------
.../ProspectorServiceEvalStatsDAOTest.groovy | 182 ------------
.../rya/prospector/mr/ProspectorTest.java | 248 +++++++++++++++++
.../ProspectorServiceEvalStatsDAOTest.java | 181 ++++++++++++
pom.xml | 55 ----
.../QueryJoinSelectOptimizerTest.java | 4 +
40 files changed, 2329 insertions(+), 1768 deletions(-)
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy
delete mode 100644 extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IntermediateProspect.java
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/TripleValueType.java
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/Prospector.java
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorCombiner.java
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorMapper.java
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorReducer.java
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlan.java
rename extras/rya.prospector/src/main/{groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy => java/org/apache/rya/prospector/plans/IndexWorkPlanManager.java} (71%)
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/CountPlan.java
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.java
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorService.java
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.java
rename extras/rya.prospector/src/main/{groovy/org/apache/rya/prospector/utils/CustomEntry.groovy => java/org/apache/rya/prospector/utils/CustomEntry.java} (55%)
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorConstants.java
create mode 100644 extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorUtils.java
delete mode 100644 extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/mr/ProspectorTest.groovy
delete mode 100644 extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.groovy
create mode 100644 extras/rya.prospector/src/test/java/org/apache/rya/prospector/mr/ProspectorTest.java
create mode 100644 extras/rya.prospector/src/test/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.java
diff --git a/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java b/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java
index b1d46c38e..0b63d5813 100644
--- a/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java
+++ b/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java
@@ -8,9 +8,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -24,7 +24,6 @@
import java.util.List;
import org.apache.rya.api.RdfCloudTripleStoreConfiguration;
-
import org.openrdf.model.Resource;
import org.openrdf.model.Value;
@@ -44,9 +43,10 @@ public enum CARDINALITY_OF {
public void destroy() throws RdfDAOException;
+ // XXX returns -1 if no cardinality could be found.
public double getCardinality(C conf, CARDINALITY_OF card, List val) throws RdfDAOException;
public double getCardinality(C conf, CARDINALITY_OF card, List val, Resource context) throws RdfDAOException;
-
+
public void setConf(C conf);
public C getConf();
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java
index f3b7183b0..244493adf 100644
--- a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java
@@ -25,6 +25,11 @@
import java.util.List;
import java.util.Set;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.TableExistsException;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
import org.apache.rya.accumulo.AccumuloRdfConfiguration;
import org.apache.rya.api.RdfCloudTripleStoreConfiguration;
import org.apache.rya.api.persist.joinselect.SelectivityEvalDAO;
@@ -33,11 +38,6 @@
import org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO;
import org.apache.rya.rdftriplestore.inference.DoNotExpandSP;
import org.apache.rya.rdftriplestore.utils.FixedStatementPattern;
-
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
import org.openrdf.query.BindingSet;
import org.openrdf.query.Dataset;
import org.openrdf.query.algebra.Filter;
@@ -47,12 +47,15 @@
import org.openrdf.query.algebra.TupleExpr;
import org.openrdf.query.algebra.evaluation.QueryOptimizer;
import org.openrdf.query.algebra.helpers.QueryModelVisitorBase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
public class EntityOptimizer implements QueryOptimizer, Configurable {
+ private static final Logger LOG = LoggerFactory.getLogger(EntityTupleSet.class);
private SelectivityEvalDAO eval;
private RdfCloudTripleStoreConfiguration conf;
@@ -69,10 +72,8 @@ public EntityOptimizer(RdfCloudTripleStoreConfiguration conf) {
eval = new AccumuloSelectivityEvalDAO(conf, ConfigUtils.getConnector(conf));
((AccumuloSelectivityEvalDAO)eval).setRdfEvalDAO(new ProspectorServiceEvalStatsDAO(ConfigUtils.getConnector(conf), conf));
eval.init();
- } catch (AccumuloException e) {
- e.printStackTrace();
- } catch (AccumuloSecurityException e) {
- e.printStackTrace();
+ } catch (final AccumuloException | AccumuloSecurityException | TableExistsException e) {
+ LOG.warn("A problem was encountered while constructing the EntityOptimizer.", e);
}
isEvalDaoSet = true;
@@ -103,10 +104,8 @@ public void setConf(Configuration conf) {
eval = new AccumuloSelectivityEvalDAO(this.conf, ConfigUtils.getConnector(this.conf));
((AccumuloSelectivityEvalDAO)eval).setRdfEvalDAO(new ProspectorServiceEvalStatsDAO(ConfigUtils.getConnector(this.conf), this.conf));
eval.init();
- } catch (AccumuloException e) {
- e.printStackTrace();
- } catch (AccumuloSecurityException e) {
- e.printStackTrace();
+ } catch (final AccumuloException | AccumuloSecurityException | TableExistsException e) {
+ LOG.warn("A problem was encountered while setting the Configuration for the EntityOptimizer.", e);
}
isEvalDaoSet = true;
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java
index d829a29d5..42b7bb07a 100644
--- a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java
@@ -1,5 +1,3 @@
-package org.apache.rya.indexing.accumulo.entity;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -18,15 +16,17 @@
* specific language governing permissions and limitations
* under the License.
*/
-
-
-import info.aduna.iteration.CloseableIteration;
+package org.apache.rya.indexing.accumulo.entity;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Set;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.commons.io.IOUtils;
import org.apache.rya.accumulo.AccumuloRdfConfiguration;
import org.apache.rya.accumulo.AccumuloRyaDAO;
import org.apache.rya.api.RdfCloudTripleStoreConfiguration;
@@ -37,11 +37,6 @@
import org.apache.rya.rdftriplestore.RdfCloudTripleStore;
import org.apache.rya.rdftriplestore.RdfCloudTripleStoreConnection;
import org.apache.rya.rdftriplestore.evaluation.ExternalBatchingIterator;
-
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.commons.io.IOUtils;
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.algebra.StatementPattern;
@@ -49,12 +44,16 @@
import org.openrdf.query.algebra.evaluation.QueryBindingSet;
import org.openrdf.query.algebra.evaluation.impl.ExternalSet;
import org.openrdf.sail.SailException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import com.beust.jcommander.internal.Sets;
import com.google.common.base.Joiner;
-public class EntityTupleSet extends ExternalSet implements ExternalBatchingIterator {
+import info.aduna.iteration.CloseableIteration;
+public class EntityTupleSet extends ExternalSet implements ExternalBatchingIterator {
+ private static final Logger LOG = LoggerFactory.getLogger(EntityTupleSet.class);
private StarQuery starQuery;
private RdfCloudTripleStoreConfiguration conf;
@@ -97,26 +96,29 @@ private void init() {
} catch (AccumuloSecurityException e) {
e.printStackTrace();
}
- if (conf.isUseStats() && conf.isUseSelectivity()) {
- ProspectorServiceEvalStatsDAO evalDao = new ProspectorServiceEvalStatsDAO(accCon, conf);
- evalDao.init();
- AccumuloSelectivityEvalDAO ase = new AccumuloSelectivityEvalDAO(conf, accCon);
- ase.setRdfEvalDAO(evalDao);
- ase.init();
-
- cardinality = starQuery.getCardinality(ase);
- CardinalityStatementPattern csp = starQuery.getMinCardSp(ase);
-
- minCard = csp.getCardinality();
- minSp = csp.getSp();
- } else {
- // TODO come up with a better default if cardinality is not
- // initialized
- cardinality = minCard = 1;
- minSp = starQuery.getNodes().get(0);
+ try {
+ if (conf.isUseStats() && conf.isUseSelectivity()) {
+ ProspectorServiceEvalStatsDAO evalDao = new ProspectorServiceEvalStatsDAO(accCon, conf);
+ evalDao.init();
+ AccumuloSelectivityEvalDAO ase = new AccumuloSelectivityEvalDAO(conf, accCon);
+ ase.setRdfEvalDAO(evalDao);
+ ase.init();
+
+ cardinality = starQuery.getCardinality(ase);
+ CardinalityStatementPattern csp = starQuery.getMinCardSp(ase);
+
+ minCard = csp.getCardinality();
+ minSp = csp.getSp();
+ } else {
+ // TODO come up with a better default if cardinality is not
+ // initialized
+ cardinality = minCard = 1;
+ minSp = starQuery.getNodes().get(0);
+ }
+ } catch(final Exception e) {
+ LOG.warn("A problem was encountered while initializing the EntityTupleSet.", e);
}
-
}
@Override
@@ -224,7 +226,7 @@ private int numberOfSpVars(StatementPattern sp) {
@Override
- public CloseableIteration evaluate(final Collection bindingset) throws QueryEvaluationException {
+ public CloseableIteration evaluate(Collection bindingset) throws QueryEvaluationException {
if(bindingset.size() < 2 && !this.evalOptUsed) {
BindingSet bs = new QueryBindingSet();
@@ -248,7 +250,7 @@ public CloseableIteration evaluate(final Co
private RdfCloudTripleStoreConnection getRyaSailConnection() throws AccumuloException,
AccumuloSecurityException, SailException {
- final RdfCloudTripleStore store = new RdfCloudTripleStore();
+ RdfCloudTripleStore store = new RdfCloudTripleStore();
AccumuloRyaDAO crdfdao = new AccumuloRyaDAO();
crdfdao.setConnector(accCon);
AccumuloRdfConfiguration acc = new AccumuloRdfConfiguration(conf);
diff --git a/extras/rya.prospector/pom.xml b/extras/rya.prospector/pom.xml
index 952ab947b..35a9f67ac 100644
--- a/extras/rya.prospector/pom.xml
+++ b/extras/rya.prospector/pom.xml
@@ -48,10 +48,6 @@ under the License.
com.google.guava
guava
-
- org.codehaus.groovy
- groovy-all
-
org.apache.mrunit
@@ -75,74 +71,9 @@ under the License.
-
-
- org.eclipse.m2e
- lifecycle-mapping
- 1.0.0
-
-
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
- [3.2,)
-
- compile
- testCompile
-
-
-
-
-
-
-
-
- org.codehaus.groovy
- groovy-eclipse-compiler
- [2.9.1-01,)
-
- add-groovy-build-paths
-
-
-
-
-
-
-
-
-
-
-
- maven-compiler-plugin
-
- groovy-eclipse-compiler
-
-
-
- org.codehaus.groovy
- groovy-eclipse-compiler
- 2.9.1-01
-
-
-
- org.codehaus.groovy
- groovy-eclipse-batch
- 2.3.7-01
-
-
-
-
- org.codehaus.groovy
- groovy-eclipse-compiler
- 2.9.1-01
- true
-
org.apache.maven.plugins
maven-shade-plugin
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy
deleted file mode 100644
index 8b0b670c7..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.domain
-
-/**
- * Date: 12/5/12
- * Time: 11:33 AM
- */
-class IndexEntry {
- def String index
- def String data
- def String dataType
- def String tripleValueType
- def String visibility
- def Long count
- def Long timestamp
-
- @Override
- public String toString() {
- return "IndexEntry{" +
- "index='" + index + '\'' +
- ", data='" + data + '\'' +
- ", dataType='" + dataType + '\'' +
- ", tripleValueType=" + tripleValueType +
- ", visibility='" + visibility + '\'' +
- ", timestamp='" + timestamp + '\'' +
- ", count=" + count +
- '}';
- }
-
- boolean equals(o) {
- if (this.is(o)) return true
- if (getClass() != o.class) return false
-
- IndexEntry that = (IndexEntry) o
-
- if (count != that.count) return false
- if (timestamp != that.timestamp) return false
- if (data != that.data) return false
- if (dataType != that.dataType) return false
- if (index != that.index) return false
- if (tripleValueType != that.tripleValueType) return false
- if (visibility != that.visibility) return false
-
- return true
- }
-
- int hashCode() {
- int result
- result = (index != null ? index.hashCode() : 0)
- result = 31 * result + (data != null ? data.hashCode() : 0)
- result = 31 * result + (dataType != null ? dataType.hashCode() : 0)
- result = 31 * result + (tripleValueType != null ? tripleValueType.hashCode() : 0)
- result = 31 * result + (visibility != null ? visibility.hashCode() : 0)
- result = 31 * result + (count != null ? count.hashCode() : 0)
- result = 31 * result + (timestamp != null ? timestamp.hashCode() : 0)
- return result
- }
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy
deleted file mode 100644
index c5e34c05d..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.domain
-
-import org.apache.hadoop.io.WritableComparable
-
-import static org.apache.rya.prospector.domain.TripleValueType.*
-
-/**
- * Date: 12/3/12
- * Time: 11:15 AM
- */
-class IntermediateProspect implements WritableComparable {
-
- def String index
- def String data
- def String dataType
- def TripleValueType tripleValueType
- def String visibility
-
- @Override
- int compareTo(IntermediateProspect t) {
- if(!index.equals(t.index))
- return index.compareTo(t.index);
- if(!data.equals(t.data))
- return data.compareTo(t.data);
- if(!dataType.equals(t.dataType))
- return dataType.compareTo(t.dataType);
- if(!tripleValueType.equals(t.tripleValueType))
- return tripleValueType.compareTo(t.tripleValueType);
- if(!visibility.equals(t.visibility))
- return visibility.compareTo(t.visibility);
- return 0
- }
-
- @Override
- void write(DataOutput dataOutput) {
- dataOutput.writeUTF(index);
- dataOutput.writeUTF(data);
- dataOutput.writeUTF(dataType);
- dataOutput.writeUTF(tripleValueType.name());
- dataOutput.writeUTF(visibility);
- }
-
- @Override
- void readFields(DataInput dataInput) {
- index = dataInput.readUTF()
- data = dataInput.readUTF()
- dataType = dataInput.readUTF()
- tripleValueType = TripleValueType.valueOf(dataInput.readUTF())
- visibility = dataInput.readUTF()
- }
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java
deleted file mode 100644
index 0c5307628..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package org.apache.rya.prospector.domain;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-public enum TripleValueType {
-
- subject, predicate, object, entity, subjectpredicate, predicateobject, subjectobject
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy
deleted file mode 100644
index c51ecef69..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.mr
-
-import org.apache.rya.prospector.utils.ProspectorUtils
-import org.apache.accumulo.core.data.Mutation
-import org.apache.accumulo.core.data.Value
-import org.apache.accumulo.core.security.ColumnVisibility
-import org.apache.hadoop.conf.Configured
-import org.apache.hadoop.util.Tool
-import org.apache.hadoop.util.ToolRunner
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.mapreduce.Job
-
-import org.apache.hadoop.io.LongWritable
-import org.apache.commons.lang.time.DateUtils
-
-import org.apache.rya.prospector.domain.IntermediateProspect
-
-import com.google.common.collect.Lists
-
-import static org.apache.rya.prospector.utils.ProspectorConstants.*
-import static org.apache.rya.prospector.utils.ProspectorUtils.*
-
-/**
- * Date: 12/3/12
- * Time: 10:57 AM
- */
-class Prospector extends Configured implements Tool {
-
- private static long NOW = System.currentTimeMillis();
-
- private Date truncatedDate;
-
- public static void main(String[] args) {
- int res = ToolRunner.run(new Prospector(), args);
- System.exit(res);
- }
-
- @Override
- int run(String[] args) {
- Configuration conf = getConf();
-
- truncatedDate = DateUtils.truncate(new Date(NOW), Calendar.MINUTE);
-
- Path configurationPath = new Path(args[0]);
- conf.addResource(configurationPath);
-
- def inTable = conf.get("prospector.intable")
- def outTable = conf.get("prospector.outtable")
- def auths_str = conf.get("prospector.auths")
- assert inTable != null
- assert outTable != null
- assert auths_str != null
-
- Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
- job.setJarByClass(this.getClass());
-
- String[] auths = auths_str.split(",")
- ProspectorUtils.initMRJob(job, inTable, outTable, auths)
-
- job.getConfiguration().setLong("DATE", NOW);
-
- def performant = conf.get(PERFORMANT)
- if (Boolean.parseBoolean(performant)) {
- /**
- * Apply some performance tuning
- */
- ProspectorUtils.addMRPerformance(job.configuration)
- }
-
- job.setMapOutputKeyClass(IntermediateProspect.class);
- job.setMapOutputValueClass(LongWritable.class);
-
- job.setMapperClass(ProspectorMapper.class);
- job.setCombinerClass(ProspectorCombiner.class);
- job.setReducerClass(ProspectorReducer.class);
- job.waitForCompletion(true);
-
- int success = job.isSuccessful() ? 0 : 1;
-
- if (success == 0) {
- Mutation m = new Mutation(METADATA)
- m.put(PROSPECT_TIME, getReverseIndexDateTime(truncatedDate), new ColumnVisibility(DEFAULT_VIS), truncatedDate.time, new Value(EMPTY))
- writeMutations(connector(instance(conf), conf), outTable, [m])
- }
-
- return success
- }
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy
deleted file mode 100644
index 784ffd2da..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.mr
-
-import org.apache.rya.prospector.plans.IndexWorkPlan
-import org.apache.rya.prospector.plans.IndexWorkPlanManager
-import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager
-import org.apache.commons.lang.time.DateUtils
-import org.apache.hadoop.mapreduce.Reducer
-import org.apache.rya.prospector.utils.ProspectorUtils
-
-/**
- * Date: 12/3/12
- * Time: 11:06 AM
- */
-class ProspectorCombiner extends Reducer {
-
- private Date truncatedDate;
- private IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager()
- Map plans
-
- @Override
- public void setup(Reducer.Context context) throws IOException, InterruptedException {
- super.setup(context);
-
- long now = context.getConfiguration().getLong("DATE", System.currentTimeMillis());
- truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE);
-
- this.plans = ProspectorUtils.planMap(manager.plans)
- }
-
- @Override
- protected void reduce(def prospect, Iterable values, Reducer.Context context) {
- def plan = plans.get(prospect.index)
- if (plan != null) {
- def coll = plan.combine(prospect, values)
- if (coll != null) {
- coll.each { entry ->
- context.write(entry.key, entry.value)
- }
- }
- }
- }
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy
deleted file mode 100644
index 36eab604d..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.mr
-
-import org.apache.rya.accumulo.AccumuloRdfConfiguration
-import org.apache.rya.api.RdfCloudTripleStoreConstants
-import org.apache.rya.api.domain.RyaStatement
-import org.apache.rya.api.resolver.RyaTripleContext
-import org.apache.rya.api.resolver.triple.TripleRow
-import org.apache.rya.prospector.plans.IndexWorkPlan
-import org.apache.rya.prospector.plans.IndexWorkPlanManager
-import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager
-
-import org.apache.commons.lang.time.DateUtils
-import org.apache.hadoop.mapreduce.Mapper
-
-/**
- * Date: 12/3/12
- * Time: 11:06 AM
- */
-class ProspectorMapper extends Mapper {
-
- private Date truncatedDate;
- private RyaTripleContext ryaContext;
- private IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager()
- private Collection plans = manager.plans
-
- @Override
- public void setup(Mapper.Context context) throws IOException, InterruptedException {
- super.setup(context);
-
- long now = context.getConfiguration().getLong("DATE", System.currentTimeMillis());
- ryaContext = RyaTripleContext.getInstance(new AccumuloRdfConfiguration(context.getConfiguration()));
- truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE);
- }
-
- @Override
- public void map(def row, def data, Mapper.Context context) {
- RyaStatement ryaStatement = ryaContext.deserializeTriple(RdfCloudTripleStoreConstants.TABLE_LAYOUT.SPO,
- new TripleRow(
- row.row.bytes,
- row.columnFamily.bytes,
- row.columnQualifier.bytes,
- row.timestamp,
- row.columnVisibility.bytes,
- data.get()
- )
- )
- plans.each { plan ->
- def coll = plan.map(ryaStatement)
- if (coll != null) {
- coll.each { entry ->
- context.write(entry.key, entry.value)
- }
- }
- }
- }
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy
deleted file mode 100644
index 1f4352b84..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.mr
-
-import org.apache.rya.prospector.plans.IndexWorkPlan
-import org.apache.rya.prospector.plans.IndexWorkPlanManager
-import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager
-import org.apache.commons.lang.time.DateUtils
-import org.apache.hadoop.mapreduce.Reducer
-import org.apache.rya.prospector.utils.ProspectorUtils
-
-/**
- * Date: 12/3/12
- * Time: 11:06 AM
- */
-class ProspectorReducer extends Reducer {
-
- private Date truncatedDate;
- private IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager()
- Map plans
-
- @Override
- public void setup(Reducer.Context context) throws IOException, InterruptedException {
- super.setup(context);
-
- def conf = context.getConfiguration()
- long now = conf.getLong("DATE", System.currentTimeMillis());
- truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE);
-
- this.plans = ProspectorUtils.planMap(manager.plans)
- }
-
- @Override
- protected void reduce(def prospect, Iterable values, Reducer.Context context) {
- def plan = plans.get(prospect.index)
- if (plan != null) {
- plan.reduce(prospect, values, truncatedDate, context)
- }
- }
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy
deleted file mode 100644
index 80316ea1c..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.plans
-
-import org.apache.rya.api.domain.RyaStatement
-import org.apache.rya.prospector.domain.IntermediateProspect
-import org.apache.hadoop.io.LongWritable
-import org.apache.hadoop.mapreduce.Reducer
-import org.openrdf.model.vocabulary.XMLSchema
-import org.apache.rya.prospector.domain.IndexEntry
-
-/**
- * Date: 12/3/12
- * Time: 11:12 AM
- */
-public interface IndexWorkPlan {
-
- public static final String URITYPE = XMLSchema.ANYURI.stringValue()
- public static final LongWritable ONE = new LongWritable(1)
- public static final String DELIM = "\u0000";
-
- public Collection> map(RyaStatement ryaStatement)
-
- public Collection> combine(IntermediateProspect prospect, Iterable counts);
-
- public void reduce(IntermediateProspect prospect, Iterable counts, Date timestamp, Reducer.Context context)
-
- public String getIndexType()
-
- public String getCompositeValue(List indices)
-
- public List query(def connector, String tableName, List prospectTimes, String type, String index, String dataType, String[] auths)
-
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy
deleted file mode 100644
index 51527a50a..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.plans.impl
-
-import org.apache.rya.api.domain.RyaStatement
-import org.apache.rya.prospector.domain.IndexEntry
-import org.apache.rya.prospector.domain.IntermediateProspect
-import org.apache.rya.prospector.domain.TripleValueType
-import org.apache.rya.prospector.plans.IndexWorkPlan
-import org.apache.rya.prospector.utils.CustomEntry
-import org.apache.rya.prospector.utils.ProspectorUtils
-
-import org.apache.accumulo.core.data.Mutation
-import org.apache.accumulo.core.data.Range
-import org.apache.accumulo.core.data.Value
-import org.apache.accumulo.core.security.Authorizations
-import org.apache.accumulo.core.security.ColumnVisibility
-import org.apache.hadoop.io.LongWritable
-import org.apache.hadoop.io.Text
-import org.apache.hadoop.mapreduce.Reducer
-import org.openrdf.model.util.URIUtil
-import org.openrdf.model.vocabulary.XMLSchema;
-
-import static org.apache.rya.prospector.utils.ProspectorConstants.COUNT;
-import org.apache.rya.api.RdfCloudTripleStoreConstants
-
-/**
- * Date: 12/3/12
- * Time: 12:28 PM
- */
-class CountPlan implements IndexWorkPlan {
-
- @Override
- Collection> map(RyaStatement ryaStatement) {
- def subject = ryaStatement.getSubject()
- def predicate = ryaStatement.getPredicate()
- def subjpred = ryaStatement.getSubject().data + DELIM + ryaStatement.getPredicate().data
- def predobj = ryaStatement.getPredicate().data + DELIM + ryaStatement.getObject().data
- def subjobj = ryaStatement.getSubject().data + DELIM + ryaStatement.getObject().data
- def object = ryaStatement.getObject()
- def localIndex = URIUtil.getLocalNameIndex(subject.data)
- def namespace = subject.data.substring(0, localIndex - 1)
- def visibility = new String(ryaStatement.columnVisibility)
- return [
- new CustomEntry(
- new IntermediateProspect(index: COUNT,
- data: subject.data,
- dataType: URITYPE,
- tripleValueType: TripleValueType.subject,
- visibility: visibility),
- ONE),
- new CustomEntry(
- new IntermediateProspect(index: COUNT,
- data: predicate.data,
- dataType: URITYPE,
- tripleValueType: TripleValueType.predicate,
- visibility: visibility
- ), ONE),
- new CustomEntry(
- new IntermediateProspect(index: COUNT,
- data: object.data,
- dataType: object.dataType.stringValue(),
- tripleValueType: TripleValueType.object,
- visibility: visibility
- ), ONE),
- new CustomEntry(
- new IntermediateProspect(index: COUNT,
- data: subjpred,
- dataType: XMLSchema.STRING,
- tripleValueType: TripleValueType.subjectpredicate,
- visibility: visibility
- ), ONE),
- new CustomEntry(
- new IntermediateProspect(index: COUNT,
- data: subjobj,
- dataType: XMLSchema.STRING,
- tripleValueType: TripleValueType.subjectobject,
- visibility: visibility
- ), ONE),
- new CustomEntry(
- new IntermediateProspect(index: COUNT,
- data: predobj,
- dataType: XMLSchema.STRING,
- tripleValueType: TripleValueType.predicateobject,
- visibility: visibility
- ), ONE),
- new CustomEntry(
- new IntermediateProspect(index: COUNT,
- data: namespace,
- dataType: URITYPE,
- tripleValueType: TripleValueType.entity,
- visibility: visibility
- ), ONE),
- ]
- }
-
- @Override
- Collection> combine(IntermediateProspect prospect, Iterable counts) {
-
- def iter = counts.iterator()
- long sum = 0;
- iter.each { lw ->
- sum += lw.get()
- }
-
- return [new CustomEntry(prospect, new LongWritable(sum))]
- }
-
- @Override
- void reduce(IntermediateProspect prospect, Iterable counts, Date timestamp, Reducer.Context context) {
- def iter = counts.iterator()
- long sum = 0;
- iter.each { lw ->
- sum += lw.get()
- }
-
- def indexType = prospect.tripleValueType.name()
-
- // not sure if this is the best idea..
- if ((sum >= 0) ||
- indexType.equals(TripleValueType.predicate.toString())) {
-
- Mutation m = new Mutation(indexType + DELIM + prospect.data + DELIM + ProspectorUtils.getReverseIndexDateTime(timestamp))
- m.put(COUNT, prospect.dataType, new ColumnVisibility(prospect.visibility), timestamp.getTime(), new Value("${sum}".getBytes()));
-
- context.write(null, m);
- }
- }
-
- @Override
- String getIndexType() {
- return COUNT
- }
-
- @Override
- String getCompositeValue(List indices){
- Iterator indexIt = indices.iterator();
- String compositeIndex = indexIt.next();
- while (indexIt.hasNext()){
- String value = indexIt.next();
- compositeIndex += DELIM + value;
- }
- return compositeIndex;
- }
-
- @Override
- List query(def connector, String tableName, List prospectTimes, String type, String compositeIndex, String dataType, String[] auths) {
-
- assert connector != null && tableName != null && type != null && compositeIndex != null
-
- def bs = connector.createBatchScanner(tableName, new Authorizations(auths), 4)
- def ranges = []
- int max = 1000; //by default only return 1000 prospects maximum
- if (prospectTimes != null) {
- prospectTimes.each { prospect ->
- ranges.add(
- new Range(type + DELIM + compositeIndex + DELIM + ProspectorUtils.getReverseIndexDateTime(new Date(prospect))))
- }
- } else {
- max = 1; //only return the latest if no prospectTimes given
- def prefix = type + DELIM + compositeIndex + DELIM;
- ranges.add(new Range(prefix, prefix + RdfCloudTripleStoreConstants.LAST))
- }
- bs.ranges = ranges
- if (dataType != null) {
- bs.fetchColumn(new Text(COUNT), new Text(dataType))
- } else {
- bs.fetchColumnFamily(new Text(COUNT))
- }
-
- List indexEntries = new ArrayList()
- def iter = bs.iterator()
-
- while (iter.hasNext() && indexEntries.size() <= max) {
- def entry = iter.next()
- def k = entry.key
- def v = entry.value
-
- def rowArr = k.row.toString().split(DELIM)
- String values = "";
- // if it is a composite index, then return the type as a composite index
- if (type.equalsIgnoreCase(TripleValueType.subjectpredicate.toString()) ||
- type.equalsIgnoreCase(TripleValueType.subjectobject.toString()) ||
- type.equalsIgnoreCase(TripleValueType.predicateobject.toString())){
- values =rowArr[1] + DELIM + rowArr[2]
- }
- else values = rowArr[1]
-
- indexEntries.add(new IndexEntry(data: values,
- tripleValueType: rowArr[0],
- index: COUNT,
- dataType: k.columnQualifier.toString(),
- visibility: k.columnVisibility.toString(),
- count: Long.parseLong(new String(v.get())),
- timestamp: k.timestamp
- ))
- }
- bs.close()
-
- return indexEntries
- }
-
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy
deleted file mode 100644
index 07c81af7d..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.plans.impl
-
-import org.apache.rya.prospector.plans.IndexWorkPlan
-import com.google.common.collect.Lists
-import org.apache.rya.prospector.plans.IndexWorkPlanManager
-
-/**
- * Date: 12/3/12
- * Time: 11:24 AM
- */
-class ServicesBackedIndexWorkPlanManager implements IndexWorkPlanManager {
-
- def Collection plans
-
- ServicesBackedIndexWorkPlanManager() {
- def iterator = ServiceLoader.load(IndexWorkPlan.class).iterator();
- plans = Lists.newArrayList(iterator)
- }
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy
deleted file mode 100644
index d72e0e02b..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.service
-
-import org.apache.rya.prospector.utils.ProspectorUtils
-import org.apache.accumulo.core.data.Key
-import org.apache.accumulo.core.data.Range
-import org.apache.accumulo.core.security.Authorizations
-import org.apache.hadoop.io.Text
-
-import static org.apache.rya.prospector.utils.ProspectorConstants.METADATA
-import static org.apache.rya.prospector.utils.ProspectorConstants.PROSPECT_TIME
-import org.apache.rya.prospector.plans.IndexWorkPlanManager
-import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager
-import org.apache.rya.prospector.plans.IndexWorkPlan
-import org.apache.rya.prospector.domain.IndexEntry
-
-/**
- * Date: 12/5/12
- * Time: 12:28 PM
- */
-class ProspectorService {
-
- def connector
- String tableName
-
- IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager()
- Map plans
-
- ProspectorService(def connector, String tableName) {
- this.connector = connector
- this.tableName = tableName
- this.plans = ProspectorUtils.planMap(manager.plans)
-
- //init
- def tos = connector.tableOperations()
- if(!tos.exists(tableName)) {
- tos.create(tableName)
- }
- }
-
- public Iterator getProspects(String[] auths) {
-
- def scanner = connector.createScanner(tableName, new Authorizations(auths))
- scanner.setRange(Range.exact(METADATA));
- scanner.fetchColumnFamily(new Text(PROSPECT_TIME));
-
- def iterator = scanner.iterator();
-
- return new Iterator() {
-
-
- @Override
- public boolean hasNext() {
- return iterator.hasNext();
- }
-
- @Override
- public Long next() {
- return iterator.next().getKey().getTimestamp();
- }
-
- @Override
- public void remove() {
- iterator.remove();
- }
- };
-
- }
-
- public Iterator getProspectsInRange(long beginTime, long endTime, String[] auths) {
-
- def scanner = connector.createScanner(tableName, new Authorizations(auths))
- scanner.setRange(new Range(
- new Key(METADATA, PROSPECT_TIME, ProspectorUtils.getReverseIndexDateTime(new Date(endTime)), "", Long.MAX_VALUE),
- new Key(METADATA, PROSPECT_TIME, ProspectorUtils.getReverseIndexDateTime(new Date(beginTime)), "", 0l)
- ))
- def iterator = scanner.iterator();
-
- return new Iterator() {
-
- @Override
- public boolean hasNext() {
- return iterator.hasNext();
- }
-
- @Override
- public Long next() {
- return iterator.next().getKey().getTimestamp();
- }
-
- @Override
- public void remove() {
- iterator.remove();
- }
- };
-
- }
-
- public List query(List prospectTimes, String indexType, String type, List index, String dataType, String[] auths) {
- assert indexType != null
-
- def plan = plans.get(indexType)
- assert plan != null: "Index Type: ${indexType} does not exist"
- String compositeIndex = plan.getCompositeValue(index);
-
- return plan.query(connector, tableName, prospectTimes, type, compositeIndex, dataType, auths)
- }
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy
deleted file mode 100644
index 2c2b1539c..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.service
-
-import org.apache.rya.api.RdfCloudTripleStoreConfiguration
-import org.apache.rya.api.persist.RdfEvalStatsDAO
-import org.apache.rya.prospector.domain.TripleValueType
-import org.apache.rya.prospector.utils.ProspectorConstants
-import org.apache.hadoop.conf.Configuration
-import org.openrdf.model.Resource
-import org.openrdf.model.Value
-
-import org.apache.rya.api.persist.RdfEvalStatsDAO.CARDINALITY_OF
-
-/**
- * An ${@link org.apache.rya.api.persist.RdfEvalStatsDAO} that uses the Prospector Service underneath return counts.
- */
-class ProspectorServiceEvalStatsDAO implements RdfEvalStatsDAO {
-
- def ProspectorService prospectorService
-
- ProspectorServiceEvalStatsDAO() {
- }
-
- ProspectorServiceEvalStatsDAO(ProspectorService prospectorService, RdfCloudTripleStoreConfiguration conf) {
- this.prospectorService = prospectorService
- }
-
- public ProspectorServiceEvalStatsDAO(def connector, RdfCloudTripleStoreConfiguration conf) {
- this.prospectorService = new ProspectorService(connector, getProspectTableName(conf))
- }
-
- @Override
- void init() {
- assert prospectorService != null
- }
-
- @Override
- boolean isInitialized() {
- return prospectorService != null
- }
-
- @Override
- void destroy() {
-
- }
-
- @Override
- public double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List val) {
-
- assert conf != null && card != null && val != null
- String triplePart = null;
- switch (card) {
- case (CARDINALITY_OF.SUBJECT):
- triplePart = TripleValueType.subject
- break;
- case (CARDINALITY_OF.PREDICATE):
- triplePart = TripleValueType.predicate
- break;
- case (CARDINALITY_OF.OBJECT):
- triplePart = TripleValueType.object
- break;
- case (CARDINALITY_OF.SUBJECTPREDICATE):
- triplePart = TripleValueType.subjectpredicate
- break;
- case (CARDINALITY_OF.SUBJECTOBJECT):
- triplePart = TripleValueType.subjectobject
- break;
- case (CARDINALITY_OF.PREDICATEOBJECT):
- triplePart = TripleValueType.predicateobject
- break;
- }
-
- String[] auths = conf.getAuths()
- List indexedValues = new ArrayList();
- Iterator valueIt = val.iterator();
- while (valueIt.hasNext()){
- indexedValues.add(valueIt.next().stringValue());
- }
-
- def indexEntries = prospectorService.query(null, ProspectorConstants.COUNT, triplePart, indexedValues, null /** what is the datatype here? */,
- auths)
-
- return indexEntries.size() > 0 ? indexEntries.head().count : -1
- }
-
- @Override
- double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List val, Resource context) {
- return getCardinality(conf, card, val) //TODO: Not sure about the context yet
- }
-
- @Override
- public void setConf(RdfCloudTripleStoreConfiguration conf) {
-
- }
-
- @Override
- RdfCloudTripleStoreConfiguration getConf() {
- return null
- }
-
- public static String getProspectTableName(RdfCloudTripleStoreConfiguration conf) {
- return conf.getTablePrefix() + "prospects";
- }
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy
deleted file mode 100644
index 29eac3799..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.utils
-
-/**
- * Date: 12/5/12
- * Time: 10:57 AM
- */
-class ProspectorConstants {
- public static final String COUNT = "count"
- public static final String METADATA = "metadata"
- public static final String PROSPECT_TIME = "prospectTime"
- public static final String DEFAULT_VIS = "U&FOUO"
- public static final byte[] EMPTY = new byte [0];
-
- //config properties
- public static final String PERFORMANT = "performant"
-
- public static final String USERNAME = "username"
- public static final String PASSWORD = "password"
- public static final String INSTANCE = "instance"
- public static final String ZOOKEEPERS = "zookeepers"
- public static final String MOCK = "mock"
-}
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy
deleted file mode 100644
index e4142d9bd..000000000
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.utils
-
-import org.apache.accumulo.core.client.Connector
-import org.apache.accumulo.core.client.Instance
-import org.apache.accumulo.core.client.ZooKeeperInstance
-import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat
-import org.apache.accumulo.core.client.mock.MockInstance
-import org.apache.accumulo.core.data.Mutation
-import org.apache.accumulo.core.security.Authorizations
-import org.apache.commons.lang.Validate
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.io.Text
-import org.apache.hadoop.io.compress.GzipCodec
-import org.apache.hadoop.mapreduce.Job
-
-import java.text.SimpleDateFormat
-import org.apache.rya.prospector.plans.IndexWorkPlan
-import org.apache.accumulo.core.client.security.tokens.PasswordToken
-
-import static org.apache.rya.prospector.utils.ProspectorConstants.*
-
-/**
- * Date: 12/4/12
- * Time: 4:24 PM
- */
-class ProspectorUtils {
-
- public static final long INDEXED_DATE_SORT_VAL = 999999999999999999L; // 18 char long, same length as date format pattern below
- public static final String INDEXED_DATE_FORMAT = "yyyyMMddHHmmsssSSS";
-
- public static String getReverseIndexDateTime(Date date) {
- Validate.notNull(date);
- String formattedDateString = new SimpleDateFormat(INDEXED_DATE_FORMAT).format(date);
- long diff = INDEXED_DATE_SORT_VAL - Long.valueOf(formattedDateString);
-
- return Long.toString(diff);
- }
-
- public static Map planMap(def plans) {
- plans.inject([:]) { map, plan ->
- map.putAt(plan.indexType, plan)
- map
- }
- }
-
- public static void initMRJob(Job job, String table, String outtable, String[] auths) {
- Configuration conf = job.configuration
- String username = conf.get(USERNAME)
- String password = conf.get(PASSWORD)
- String instance = conf.get(INSTANCE)
- String zookeepers = conf.get(ZOOKEEPERS)
- String mock = conf.get(MOCK)
-
- //input
- if (Boolean.parseBoolean(mock)) {
- AccumuloInputFormat.setMockInstance(job, instance)
- AccumuloOutputFormat.setMockInstance(job, instance)
- } else if (zookeepers != null) {
- AccumuloInputFormat.setZooKeeperInstance(job, instance, zookeepers)
- AccumuloOutputFormat.setZooKeeperInstance(job, instance, zookeepers)
- } else {
- throw new IllegalArgumentException("Must specify either mock or zookeepers");
- }
-
- AccumuloInputFormat.setConnectorInfo(job, username, new PasswordToken(password.getBytes()))
- AccumuloInputFormat.setInputTableName(job, table)
- job.setInputFormatClass(AccumuloInputFormat.class);
- AccumuloInputFormat.setScanAuthorizations(job, new Authorizations(auths))
-
- // OUTPUT
- job.setOutputFormatClass(AccumuloOutputFormat.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Mutation.class);
- AccumuloOutputFormat.setConnectorInfo(job, username, new PasswordToken(password.getBytes()))
- AccumuloOutputFormat.setDefaultTableName(job, outtable)
- }
-
- public static void addMRPerformance(Configuration conf) {
- conf.setBoolean("mapred.map.tasks.speculative.execution", false);
- conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
- conf.set("io.sort.mb", "256");
- conf.setBoolean("mapred.compress.map.output", true);
- conf.set("mapred.map.output.compression.codec", GzipCodec.class.getName());
- }
-
- public static Instance instance(Configuration conf) {
- assert conf != null
-
- String instance_str = conf.get(INSTANCE)
- String zookeepers = conf.get(ZOOKEEPERS)
- String mock = conf.get(MOCK)
- if (Boolean.parseBoolean(mock)) {
- return new MockInstance(instance_str)
- } else if (zookeepers != null) {
- return new ZooKeeperInstance(instance_str, zookeepers)
- } else {
- throw new IllegalArgumentException("Must specify either mock or zookeepers");
- }
- }
-
- public static Connector connector(Instance instance, Configuration conf) {
- String username = conf.get(USERNAME)
- String password = conf.get(PASSWORD)
- if (instance == null)
- instance = instance(conf)
- return instance.getConnector(username, password)
- }
-
- public static void writeMutations(Connector connector, String tableName, def mutations) {
- def bw = connector.createBatchWriter(tableName, 10000l, 10000l, 4);
- mutations.each { m ->
- bw.addMutation(m)
- }
- bw.flush()
- bw.close()
- }
-
-}
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java
new file mode 100644
index 000000000..4d4dfc8d9
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.domain;
+
+import java.util.Objects;
+
+import org.apache.rya.prospector.mr.Prospector;
+import org.apache.rya.prospector.plans.IndexWorkPlan;
+
+/**
+ * Represents a count that was the result of a {@link Prospector} run.
+ */
+public class IndexEntry {
+
+ private final String index;
+ private final String data;
+ private final String dataType;
+ private final String tripleValueType;
+ private final String visibility;
+ private final Long count;
+ private final Long timestamp;
+
+ /**
+ * Constructs an instance of {@link IndexEntry}.
+ *
+ * @param index - Indicates which {@link IndexWorkPlan} the data came from.
+ * @param data - The information that is being counted.
+ * @param dataType - The data type of {@code data}.
+ * @param tripleValueType - Indicates which parts of the RDF Statement are included in {@code data}.
+ * @param visibility - The visibility of this entry.
+ * @param count - The number of times the {@code data} appeared within Rya.
+ * @param timestamp - Identifies which Prospect run this entry belongs to.
+ */
+ public IndexEntry(
+ final String index,
+ final String data,
+ final String dataType,
+ final String tripleValueType,
+ final String visibility,
+ final Long count,
+ final Long timestamp) {
+ this.index = index;
+ this.data = data;
+ this.dataType = dataType;
+ this.tripleValueType = tripleValueType;
+ this.visibility = visibility;
+ this.count = count;
+ this.timestamp = timestamp;
+ }
+
+ /**
+ * @return Indicates which {@link IndexWorkPlan} the data came from.
+ */
+ public String getIndex() {
+ return index;
+ }
+
+ /**
+ * @return The information that is being counted.
+ */
+ public String getData() {
+ return data;
+ }
+
+ /**
+ * @return The data type of {@code data}.
+ */
+ public String getDataType() {
+ return dataType;
+ }
+
+ /**
+ * @return Indicates which parts of the RDF Statement are included in {@code data}.
+ */
+ public String getTripleValueType() {
+ return tripleValueType;
+ }
+
+ /**
+ * @return The visibility of this entry.
+ */
+ public String getVisibility() {
+ return visibility;
+ }
+
+ /**
+ * @return The number of times the {@code data} appeared within Rya.
+ */
+ public Long getCount() {
+ return count;
+ }
+
+ /**
+ * @return Identifies which Prospect run this entry belongs to.
+ */
+ public Long getTimestamp() {
+ return timestamp;
+ }
+
+ @Override
+ public String toString() {
+ return "IndexEntry{" +
+ "index='" + index + '\'' +
+ ", data='" + data + '\'' +
+ ", dataType='" + dataType + '\'' +
+ ", tripleValueType=" + tripleValueType +
+ ", visibility='" + visibility + '\'' +
+ ", timestamp='" + timestamp + '\'' +
+ ", count=" + count +
+ '}';
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(index, data, dataType, tripleValueType, visibility, count, timestamp);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if(this == o) {
+ return true;
+ }
+ if(o instanceof IndexEntry) {
+ final IndexEntry entry = (IndexEntry) o;
+ return Objects.equals(index, entry.index) &&
+ Objects.equals(data, entry.data) &&
+ Objects.equals(dataType, entry.dataType) &&
+ Objects.equals(tripleValueType, entry.tripleValueType) &&
+ Objects.equals(visibility, entry.visibility) &&
+ Objects.equals(count, entry.count) &&
+ Objects.equals(timestamp, entry.timestamp);
+ }
+ return false;
+ }
+
+ /**
+ * @return An empty instance of {@link Builder}.
+ */
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ /**
+ * Builds instances of {@link IndexEntry}.
+ */
+ public static final class Builder {
+ private String index;
+ private String data;
+ private String dataType;
+ private String tripleValueType;
+ private String visibility;
+ private Long count;
+ private Long timestamp;
+
+ /**
+ * @param index - Indicates which {@link IndexWorkPlan} the data came from.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setIndex(String index) {
+ this.index = index;
+ return this;
+ }
+
+ /**
+ * @param data - The information that is being counted.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setData(String data) {
+ this.data = data;
+ return this;
+ }
+
+ /**
+ * @param dataType - The data type of {@code data}.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setDataType(String dataType) {
+ this.dataType = dataType;
+ return this;
+ }
+
+ /**
+ * @param tripleValueType - Indicates which parts of the RDF Statement are included in {@code data}.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setTripleValueType(String tripleValueType) {
+ this.tripleValueType = tripleValueType;
+ return this;
+ }
+
+ /**
+ * @param visibility - The visibility of this entry.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setVisibility(String visibility) {
+ this.visibility = visibility;
+ return this;
+ }
+
+ /**
+ * @param count - The number of times the {@code data} appeared within Rya.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setCount(Long count) {
+ this.count = count;
+ return this;
+ }
+
+ /**
+ * @param timestamp - Identifies which Prospect run this entry belongs to.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setTimestamp(Long timestamp) {
+ this.timestamp = timestamp;
+ return this;
+ }
+
+ /**
+ * @return Constructs an instance of {@link IndexEntry} built using this builder's values.
+ */
+ public IndexEntry build() {
+ return new IndexEntry(index, data, dataType, tripleValueType, visibility, count, timestamp);
+ }
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IntermediateProspect.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IntermediateProspect.java
new file mode 100644
index 000000000..8c523787a
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IntermediateProspect.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.domain;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.rya.prospector.mr.Prospector;
+import org.apache.rya.prospector.plans.IndexWorkPlan;
+
+/**
+ * Represents a piece of information that is being counted during the process
+ * of running a {@link Prospector} job.
+ */
+public class IntermediateProspect implements WritableComparable {
+
+ private String index;
+ private String data;
+ private String dataType;
+ private TripleValueType tripleValueType;
+ private String visibility;
+
+ /**
+ * Constructs an uninitialized instance of {@link IntermediateProspect}.
+ * This constructor is required to integration with Map Reduce's
+ * {@link WritableComparable} interface.
+ */
+ public IntermediateProspect() { }
+
+ /**
+ * Constructs an instance of {@link IntermediateProspect}.
+ *
+ * @param index - Indicates which {@link IndexWorkPlan} the data is part of.
+ * @param data - The information that is being counted.
+ * @param dataType - The data type of {@code data}.
+ * @param tripleValueType - Indicates which parts of the RDF Statement are included in {@code data}.
+ * @param visibility - The visibility of this entry.
+ */
+ public IntermediateProspect(
+ final String index,
+ final String data,
+ final String dataType,
+ final TripleValueType tripleValueType,
+ final String visibility) {
+ this.index = index;
+ this.data = data;
+ this.dataType = dataType;
+ this.tripleValueType = tripleValueType;
+ this.visibility = visibility;
+ }
+
+ /**
+ * @return Indicates which {@link IndexWorkPlan} the data is part of.
+ */
+ public String getIndex() {
+ return index;
+ }
+
+ /**
+ * @return The information that is being counted.
+ */
+ public String getData() {
+ return data;
+ }
+
+ /**
+ * @return The data type of {@code data}.
+ */
+ public String getDataType() {
+ return dataType;
+ }
+
+ /**
+ * @return Indicates which parts of the RDF Statement are included in {@code data}.
+ */
+ public TripleValueType getTripleValueType() {
+ return tripleValueType;
+ }
+
+ /**
+ * @return The visibility of this entry.
+ */
+ public String getVisibility() {
+ return visibility;
+ }
+
+ @Override
+ public int compareTo(IntermediateProspect t) {
+ if(!index.equals(t.index)) {
+ return index.compareTo(t.index);
+ }
+ if(!data.equals(t.data)) {
+ return data.compareTo(t.data);
+ }
+ if(!dataType.equals(t.dataType)) {
+ return dataType.compareTo(t.dataType);
+ }
+ if(!tripleValueType.equals(t.tripleValueType)) {
+ return tripleValueType.compareTo(t.tripleValueType);
+ }
+ if(!visibility.equals(t.visibility)) {
+ return visibility.compareTo(t.visibility);
+ }
+ return 0;
+ }
+
+ @Override
+ public void write(DataOutput dataOutput) throws IOException {
+ dataOutput.writeUTF(index);
+ dataOutput.writeUTF(data);
+ dataOutput.writeUTF(dataType);
+ dataOutput.writeUTF(tripleValueType.name());
+ dataOutput.writeUTF(visibility);
+ }
+
+ @Override
+ public void readFields(DataInput dataInput) throws IOException {
+ index = dataInput.readUTF();
+ data = dataInput.readUTF();
+ dataType = dataInput.readUTF();
+ tripleValueType = TripleValueType.valueOf(dataInput.readUTF());
+ visibility = dataInput.readUTF();
+ }
+
+ /**
+ * @return An empty instance of {@link Builder}.
+ */
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ /**
+ * Builds instances of {@link IntermediateProspect}.
+ */
+ public static final class Builder {
+
+ private String index;
+ private String data;
+ private String dataType;
+ private TripleValueType tripleValueType;
+ private String visibility;
+
+ /**
+ * @param index - Indicates which {@link IndexWorkPlan} the data is part of.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setIndex(String index) {
+ this.index = index;
+ return this;
+ }
+
+ /**
+ * @param data - The information that is being counted.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setData(String data) {
+ this.data = data;
+ return this;
+ }
+
+ /**
+ * @param dataType - The data type of {@code data}.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setDataType(String dataType) {
+ this.dataType = dataType;
+ return this;
+ }
+
+ /**
+ * @param tripleValueType - Indicates which parts of the RDF Statement are included in {@code data}.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setTripleValueType(TripleValueType tripleValueType) {
+ this.tripleValueType = tripleValueType;
+ return this;
+ }
+
+ /**
+ * @param visibility - The visibility of this entry.
+ * @return This {@link Builder} so that method invocations may be chained.
+ */
+ public Builder setVisibility(String visibility) {
+ this.visibility = visibility;
+ return this;
+ }
+
+ /**
+ * @return Constructs an instance of {@link IntermediateProspect} built using this builder's values.
+ */
+ public IntermediateProspect build() {
+ return new IntermediateProspect(index, data, dataType, tripleValueType, visibility);
+ }
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/TripleValueType.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/TripleValueType.java
new file mode 100644
index 000000000..16e7916b7
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/TripleValueType.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.domain;
+
+import static java.util.Objects.requireNonNull;
+
+import com.google.common.collect.ImmutableMap;
+
+/**
+ * Enumerates the different types of counts that are performed over a Rya instance's
+ * Statements as part of a Prospector run.
+ */
+public enum TripleValueType {
+ /**
+ * The data portion of an {@link IndexEntry} contains a unique Subject that
+ * appears within a Rya instance's Statements.
+ */
+ SUBJECT("subject"),
+
+ /**
+ * The data portion of an {@link IndexEntry} contains a unique Predicate that
+ * appears within a Rya instance's Statements.
+ */
+ PREDICATE("predicate"),
+
+ /**
+ * The data portion of an {@link IndexEntry} contains a unique Object that
+ * appears within a Rya instance's Statements.
+ */
+ OBJECT("object"),
+
+ /**
+ * The data portion of an {@link IndexEntrY} contains a unique Namespace from
+ * the Subjects that appear within a Rya instance.
+ */
+ ENTITY("entity"),
+
+ /**
+ * The data portion of an {@link IndexEntry} contains a unique Subject and Predicate
+ * pair that appears within a Rya instance's Statements.
+ */
+ SUBJECT_PREDICATE("subjectpredicate"),
+
+ /**
+ * The data portion of an {@link IndexEntry} contains a unique Predicate and Object
+ * pair that appears within a Rya instance's Statements.
+ */
+ PREDICATE_OBJECT("predicateobject"),
+
+ /**
+ * The data portion of an {@link IndexEntry} contains a unique Subject and Object
+ * pair that appears within a Rya instance's Statements.
+ */
+ SUBJECT_OBJECT("subjectobject");
+
+ private final String indexType;
+
+ private TripleValueType(String indexType) {
+ this.indexType = requireNonNull(indexType);
+ }
+
+ /**
+ * @return The Prospector Index Type represented by the enum value.
+ */
+ public String getIndexType() {
+ return indexType;
+ }
+
+ private static final ImmutableMap lookup;
+ static {
+ ImmutableMap.Builder builder = ImmutableMap.builder();
+ for(TripleValueType type : TripleValueType.values()) {
+ builder.put(type.getIndexType(), type);
+ }
+ lookup = builder.build();
+ }
+
+ /**
+ * @param indexType - The index name to lookup.
+ * @return The enum value that represents the index name.
+ */
+ public TripleValueType fromIndexType(String indexType) {
+ return lookup.get(indexType);
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/Prospector.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/Prospector.java
new file mode 100644
index 000000000..78ea37132
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/Prospector.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.mr;
+
+import static org.apache.rya.prospector.utils.ProspectorConstants.DEFAULT_VIS;
+import static org.apache.rya.prospector.utils.ProspectorConstants.EMPTY;
+import static org.apache.rya.prospector.utils.ProspectorConstants.METADATA;
+import static org.apache.rya.prospector.utils.ProspectorConstants.PERFORMANT;
+import static org.apache.rya.prospector.utils.ProspectorConstants.PROSPECT_TIME;
+import static org.apache.rya.prospector.utils.ProspectorUtils.connector;
+import static org.apache.rya.prospector.utils.ProspectorUtils.getReverseIndexDateTime;
+import static org.apache.rya.prospector.utils.ProspectorUtils.instance;
+import static org.apache.rya.prospector.utils.ProspectorUtils.writeMutations;
+
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.Date;
+
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.commons.lang.time.DateUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.rya.prospector.domain.IntermediateProspect;
+import org.apache.rya.prospector.utils.ProspectorUtils;
+
+/**
+ * Configures and runs the Hadoop Map Reduce job that executes the Prospector's work.
+ */
+public class Prospector extends Configured implements Tool {
+
+ private static long NOW = System.currentTimeMillis();
+
+ private Date truncatedDate;
+
+ public static void main(String[] args) throws Exception {
+ final int res = ToolRunner.run(new Prospector(), args);
+ System.exit(res);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ final Configuration conf = getConf();
+
+ truncatedDate = DateUtils.truncate(new Date(NOW), Calendar.MINUTE);
+
+ final Path configurationPath = new Path(args[0]);
+ conf.addResource(configurationPath);
+
+ final String inTable = conf.get("prospector.intable");
+ final String outTable = conf.get("prospector.outtable");
+ final String auths_str = conf.get("prospector.auths");
+ assert inTable != null;
+ assert outTable != null;
+ assert auths_str != null;
+
+ final Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
+ job.setJarByClass(this.getClass());
+
+ final String[] auths = auths_str.split(",");
+ ProspectorUtils.initMRJob(job, inTable, outTable, auths);
+
+ job.getConfiguration().setLong("DATE", NOW);
+
+ final String performant = conf.get(PERFORMANT);
+ if (Boolean.parseBoolean(performant)) {
+ /**
+ * Apply some performance tuning
+ */
+ ProspectorUtils.addMRPerformance(job.getConfiguration());
+ }
+
+ job.setMapOutputKeyClass(IntermediateProspect.class);
+ job.setMapOutputValueClass(LongWritable.class);
+
+ job.setMapperClass(ProspectorMapper.class);
+ job.setCombinerClass(ProspectorCombiner.class);
+ job.setReducerClass(ProspectorReducer.class);
+ job.waitForCompletion(true);
+
+ final int success = job.isSuccessful() ? 0 : 1;
+
+ if (success == 0) {
+ final Mutation m = new Mutation(METADATA);
+ m.put(PROSPECT_TIME, getReverseIndexDateTime(truncatedDate), new ColumnVisibility(DEFAULT_VIS), truncatedDate.getTime(), new Value(EMPTY));
+ writeMutations(connector(instance(conf), conf), outTable, Collections.singleton(m));
+ }
+
+ return success;
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorCombiner.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorCombiner.java
new file mode 100644
index 000000000..bc3c1eba6
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorCombiner.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.mr;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.rya.prospector.domain.IntermediateProspect;
+import org.apache.rya.prospector.plans.IndexWorkPlan;
+import org.apache.rya.prospector.plans.IndexWorkPlanManager;
+import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager;
+import org.apache.rya.prospector.utils.ProspectorUtils;
+
+/**
+ * Used to combine intermediate Prospect job results after {@link ProspectorMapper},
+ * but before the shuffle operation of the Hadoop Map Reduce framework.
+ */
+public class ProspectorCombiner extends Reducer {
+
+ private final IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager();
+ private Map plans;
+
+ @Override
+ public void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.plans = ProspectorUtils.planMap(manager.getPlans());
+ }
+
+ @Override
+ protected void reduce(IntermediateProspect prospect, Iterable values, Context context) throws IOException, InterruptedException {
+ final IndexWorkPlan plan = plans.get(prospect.getIndex());
+ if (plan != null) {
+ final Collection> coll = plan.combine(prospect, values);
+ if (coll != null) {
+ for(final Entry entry : coll) {
+ context.write(entry.getKey(), entry.getValue());
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorMapper.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorMapper.java
new file mode 100644
index 000000000..ff4c30fc7
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorMapper.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.mr;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.rya.accumulo.AccumuloRdfConfiguration;
+import org.apache.rya.api.RdfCloudTripleStoreConstants;
+import org.apache.rya.api.domain.RyaStatement;
+import org.apache.rya.api.resolver.RyaTripleContext;
+import org.apache.rya.api.resolver.triple.TripleRow;
+import org.apache.rya.api.resolver.triple.TripleRowResolverException;
+import org.apache.rya.prospector.domain.IntermediateProspect;
+import org.apache.rya.prospector.plans.IndexWorkPlan;
+import org.apache.rya.prospector.plans.IndexWorkPlanManager;
+import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager;
+
+/**
+ * Loads {@link RyaStatement}s from Accumulo and maps them into {@link IntermediateProspect}s
+ * paired with count information during the Map portion of the Hadoop Map Reduce framework.
+ */
+public class ProspectorMapper extends Mapper {
+
+ private RyaTripleContext ryaContext;
+ private final IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager();
+ private final Collection plans = manager.getPlans();
+
+ @Override
+ public void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ ryaContext = RyaTripleContext.getInstance(new AccumuloRdfConfiguration(context.getConfiguration()));
+ }
+
+ @Override
+ public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
+ RyaStatement ryaStatement = null;
+ try {
+ ryaStatement = ryaContext.deserializeTriple(RdfCloudTripleStoreConstants.TABLE_LAYOUT.SPO,
+ new TripleRow(
+ row.getRow().getBytes(),
+ row.getColumnFamily().getBytes(),
+ row.getColumnQualifier().getBytes(),
+ row.getTimestamp(),
+ row.getColumnVisibility().getBytes(),
+ data.get()
+ )
+ );
+ } catch (final TripleRowResolverException e) {
+ // Do nothing. The row didn't contain a Rya Statement.
+ }
+
+ if(ryaStatement != null) {
+ for(final IndexWorkPlan plan : plans) {
+ final Collection> coll = plan.map(ryaStatement);
+ for(final Entry entry : coll) {
+ context.write(entry.getKey(), entry.getValue());
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorReducer.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorReducer.java
new file mode 100644
index 000000000..5247b5b7a
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/mr/ProspectorReducer.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.mr;
+
+import java.io.IOException;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.Map;
+
+import org.apache.commons.lang.time.DateUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.rya.prospector.domain.IntermediateProspect;
+import org.apache.rya.prospector.plans.IndexWorkPlan;
+import org.apache.rya.prospector.plans.IndexWorkPlanManager;
+import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager;
+import org.apache.rya.prospector.utils.ProspectorUtils;
+
+/**
+ * Reduces the {@link IntermediateProspect} counts into their final values and
+ * writes them to their final storage location during the Reduce step of the
+ * Hadoop Map Reduce framework.
+ */
+public class ProspectorReducer extends Reducer {
+
+ private Date truncatedDate;
+ private final IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager();
+ private Map plans;
+
+ @Override
+ public void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+
+ final Configuration conf = context.getConfiguration();
+ final long now = conf.getLong("DATE", System.currentTimeMillis());
+ truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE);
+
+ this.plans = ProspectorUtils.planMap(manager.getPlans());
+ }
+
+ @Override
+ protected void reduce(IntermediateProspect prospect, Iterable values, Context context) throws IOException, InterruptedException {
+ final IndexWorkPlan plan = plans.get(prospect.getIndex());
+ if (plan != null) {
+ plan.reduce(prospect, values, truncatedDate, context);
+ }
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlan.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlan.java
new file mode 100644
index 000000000..77955e4ec
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlan.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.plans;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.rya.api.domain.RyaStatement;
+import org.apache.rya.prospector.domain.IndexEntry;
+import org.apache.rya.prospector.domain.IntermediateProspect;
+import org.apache.rya.prospector.mr.ProspectorCombiner;
+import org.apache.rya.prospector.mr.ProspectorMapper;
+import org.openrdf.model.vocabulary.XMLSchema;
+
+/**
+ * Contains the methods that perform each of the Map Reduce functions that result
+ * in the final {@link IndexEntry} values as well as a way to query those values
+ * once they have been written.
+ */
+public interface IndexWorkPlan {
+
+ public static final String URITYPE = XMLSchema.ANYURI.stringValue();
+ public static final LongWritable ONE = new LongWritable(1);
+ public static final String DELIM = "\u0000";
+
+ /**
+ * This method is invoked by {@link ProspectorMapper}. It's used to pull
+ * input from an Accumulo Rya instance into the Map Reduce framework.
+ *
+ * It must use the values of a {@link RyaStatement} to derive a bunch of
+ * {@link IntermediateProspect} and {@code LongWritable} pairs. This is only
+ * useful for prospecting jobs that count things. The {@link IntermediateProspect}
+ * value will be used as the key within {@link #combine(IntermediateProspect, Iterable)} and
+ * {@link #reduce(IntermediateProspect, Iterable, Date, org.apache.hadoop.mapreduce.Reducer.Context)}.
+ *
+ * @param ryaStatement - The RDF Statement that needs to be mapped.
+ * @return A collection of intermediate keys and counts.
+ */
+ public Collection> map(RyaStatement ryaStatement);
+
+ /**
+ * This method is invoked by {@link ProspectorCombiner}. It is used by to
+ * combine the results of {@link ProspectorMapper} before the shuffle operation
+ * of the Map Reduce framework.
+ *
+ * @param prospect - The intermediate prospect that is being combined.
+ * @param counts - The counts that need to be combined together.
+ * @return A collection containing the combined results.
+ */
+ public Collection> combine(IntermediateProspect prospect, Iterable counts);
+
+ /**
+ * This method is invoked by {@link ProsectorReducer}. It is used to reduce
+ * the counts to their final states and write them to output via the
+ * {@code context}.l
+ *
+ * @param prospect - The intermediate prospect that is being reduced.
+ * @param counts - The counts that need to be reduced.
+ * @param timestamp - The timestamp that identifies this Prospector run.
+ * @param context - The reducer context the reduced values will be written to.
+ * @throws IOException A problem was encountered while writing to the context.
+ * @throws InterruptedException Writes to the context were interrupted.
+ */
+ public void reduce(IntermediateProspect prospect, Iterable counts, Date timestamp, Reducer.Context context) throws IOException, InterruptedException;
+
+ /**
+ * @return A unique name that indicates which {@link IndexEntry}s came from this plan.
+ */
+ public String getIndexType();
+
+ /**
+ * TODO Not sure what this generically is for. It is used by the count job to
+ * place a null delimiter between any {@link IndexEntry}s whose data
+ * section is two difference pieces of information together.
+ */
+ public String getCompositeValue(List indices);
+
+ /**
+ * Search for {@link IndexEntry}s that have values matching the provided parameters.
+ *
+ * @param connector - The Accumulo Connector used to find the table holding the data.
+ * @param tableName - The name of the table the Prospector results are stored within.
+ * @param prospectTimes - Indicates which Prospect runs will be part of the query.
+ * @param type - The name of the index the {@link IndexEntry}s are stored within.
+ * @param index - The data portion of the {@link IndexEntry}s that may be returned.
+ * @param dataType - The data type of the {@link IndexEntry}s that may be returned.
+ * @param auths - The authorizations used to search for the entries.
+ * @return The {@link IndexEntries} that match the provided values.
+ * @throws TableNotFoundException No table exists for {@code tableName}.
+ */
+ public List query(Connector connector, String tableName, List prospectTimes, String type, String index, String dataType, String[] auths) throws TableNotFoundException;
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlanManager.java
similarity index 71%
rename from extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy
rename to extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlanManager.java
index f1029dc8c..1b7cf3b9c 100644
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlanManager.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -16,14 +16,21 @@
* specific language governing permissions and limitations
* under the License.
*/
+package org.apache.rya.prospector.plans;
-package org.apache.rya.prospector.plans
+import java.util.Collection;
+
+import org.apache.rya.prospector.mr.Prospector;
/**
- * Date: 12/3/12
- * Time: 11:24 AM
+ * Provides access to the {@link IndexWorkPlan}s that will be executed as part
+ * of a {@link Prospector} run.
*/
public interface IndexWorkPlanManager {
+ /**
+ * @return The {@link IndexWorkPlan}s that will be executed as part of a
+ * {@link Prospector} run.
+ */
public Collection getPlans();
-}
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/CountPlan.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/CountPlan.java
new file mode 100644
index 000000000..ebcf6c35d
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/CountPlan.java
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.plans.impl;
+
+import static org.apache.rya.prospector.utils.ProspectorConstants.COUNT;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.BatchScanner;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.rya.api.RdfCloudTripleStoreConstants;
+import org.apache.rya.api.domain.RyaStatement;
+import org.apache.rya.api.domain.RyaType;
+import org.apache.rya.api.domain.RyaURI;
+import org.apache.rya.prospector.domain.IndexEntry;
+import org.apache.rya.prospector.domain.IntermediateProspect;
+import org.apache.rya.prospector.domain.TripleValueType;
+import org.apache.rya.prospector.plans.IndexWorkPlan;
+import org.apache.rya.prospector.utils.CustomEntry;
+import org.apache.rya.prospector.utils.ProspectorUtils;
+import org.openrdf.model.util.URIUtil;
+import org.openrdf.model.vocabulary.XMLSchema;
+
+/**
+ * An implementation of {@link IndexWorkPlan} that counts the number of times
+ * a piece of data appears within a Rya Instance for every {@link TripleValueType}.
+ */
+public class CountPlan implements IndexWorkPlan {
+
+ @Override
+ public Collection> map(RyaStatement ryaStatement) {
+ final RyaURI subject = ryaStatement.getSubject();
+ final RyaURI predicate = ryaStatement.getPredicate();
+ final String subjpred = ryaStatement.getSubject().getData() + DELIM + ryaStatement.getPredicate().getData();
+ final String predobj = ryaStatement.getPredicate().getData() + DELIM + ryaStatement.getObject().getData();
+ final String subjobj = ryaStatement.getSubject().getData() + DELIM + ryaStatement.getObject().getData();
+ final RyaType object = ryaStatement.getObject();
+ final int localIndex = URIUtil.getLocalNameIndex(subject.getData());
+ final String namespace = subject.getData().substring(0, localIndex - 1);
+ final String visibility = new String(ryaStatement.getColumnVisibility());
+
+ final List> entries = new ArrayList<>(7);
+
+ // Create an entry for each TripleValueType type.
+ entries.add(new CustomEntry(
+ IntermediateProspect.builder()
+ .setIndex(COUNT)
+ .setData(subject.getData())
+ .setDataType(URITYPE)
+ .setTripleValueType( TripleValueType.SUBJECT )
+ .setVisibility(visibility)
+ .build()
+ , ONE));
+
+ entries.add(new CustomEntry(
+ IntermediateProspect.builder()
+ .setIndex(COUNT)
+ .setData(predicate.getData())
+ .setDataType(URITYPE)
+ .setTripleValueType( TripleValueType.PREDICATE )
+ .setVisibility(visibility)
+ .build()
+ , ONE));
+
+ entries.add(new CustomEntry(
+ IntermediateProspect.builder()
+ .setIndex(COUNT)
+ .setData(object.getData())
+ .setDataType(object.getDataType().stringValue())
+ .setTripleValueType( TripleValueType.OBJECT )
+ .setVisibility(visibility)
+ .build()
+ , ONE));
+
+ entries.add(new CustomEntry(
+ IntermediateProspect.builder()
+ .setIndex(COUNT)
+ .setData(subjpred)
+ .setDataType(XMLSchema.STRING.toString())
+ .setTripleValueType( TripleValueType.SUBJECT_PREDICATE )
+ .setVisibility(visibility)
+ .build()
+ , ONE));
+
+ entries.add(new CustomEntry(
+ IntermediateProspect.builder()
+ .setIndex(COUNT)
+ .setData(subjobj)
+ .setDataType(XMLSchema.STRING.toString())
+ .setTripleValueType(TripleValueType.SUBJECT_OBJECT)
+ .setVisibility(visibility)
+ .build()
+ , ONE));
+
+ entries.add(new CustomEntry(
+ IntermediateProspect.builder()
+ .setIndex(COUNT)
+ .setData(predobj)
+ .setDataType(XMLSchema.STRING.toString())
+ .setTripleValueType(TripleValueType.PREDICATE_OBJECT)
+ .setVisibility(visibility)
+ .build()
+ , ONE));
+
+ entries.add(new CustomEntry(
+ IntermediateProspect.builder()
+ .setIndex(COUNT)
+ .setData(namespace)
+ .setDataType(URITYPE)
+ .setTripleValueType(TripleValueType.ENTITY)
+ .setVisibility(visibility)
+ .build()
+ , ONE));
+ return entries;
+ }
+
+ @Override
+ public Collection> combine(IntermediateProspect prospect, Iterable counts) {
+ long sum = 0;
+ for(final LongWritable count : counts) {
+ sum += count.get();
+ }
+ return Collections.singleton( new CustomEntry(prospect, new LongWritable(sum)) );
+ }
+
+ @Override
+ public void reduce(IntermediateProspect prospect, Iterable counts, Date timestamp, Reducer.Context context) throws IOException, InterruptedException {
+ long sum = 0;
+ for(final LongWritable count : counts) {
+ sum += count.get();
+ }
+
+ final String indexType = prospect.getTripleValueType().getIndexType();
+
+ // not sure if this is the best idea..
+ if ((sum >= 0) || indexType.equals(TripleValueType.PREDICATE.getIndexType())) {
+ final Mutation m = new Mutation(indexType + DELIM + prospect.getData() + DELIM + ProspectorUtils.getReverseIndexDateTime(timestamp));
+
+ final String dataType = prospect.getDataType();
+ final ColumnVisibility visibility = new ColumnVisibility(prospect.getVisibility());
+ final Value sumValue = new Value(("" + sum).getBytes());
+ m.put(COUNT, prospect.getDataType(), visibility, timestamp.getTime(), sumValue);
+
+ context.write(null, m);
+ }
+ }
+
+ @Override
+ public String getIndexType() {
+ return COUNT;
+ }
+
+ @Override
+ public String getCompositeValue(List indices){
+ final Iterator indexIt = indices.iterator();
+ String compositeIndex = indexIt.next();
+ while (indexIt.hasNext()){
+ final String value = indexIt.next();
+ compositeIndex += DELIM + value;
+ }
+ return compositeIndex;
+ }
+
+ @Override
+ public List query(Connector connector, String tableName, List prospectTimes, String type, String compositeIndex, String dataType, String[] auths) throws TableNotFoundException {
+ assert connector != null && tableName != null && type != null && compositeIndex != null;
+
+ final BatchScanner bs = connector.createBatchScanner(tableName, new Authorizations(auths), 4);
+ final List ranges = new ArrayList<>();
+ int max = 1000; //by default only return 1000 prospects maximum
+ if (prospectTimes != null) {
+ for(final Long prospectTime : prospectTimes) {
+ ranges.add(new Range(type + DELIM + compositeIndex + DELIM + ProspectorUtils.getReverseIndexDateTime(new Date(prospectTime))));
+ }
+ } else {
+ max = 1; //only return the latest if no prospectTimes given
+ final String prefix = type + DELIM + compositeIndex + DELIM;
+ ranges.add(new Range(prefix, prefix + RdfCloudTripleStoreConstants.LAST));
+ }
+
+ bs.setRanges(ranges);
+ if (dataType != null) {
+ bs.fetchColumn(new Text(COUNT), new Text(dataType));
+ } else {
+ bs.fetchColumnFamily(new Text(COUNT));
+ }
+
+ final List indexEntries = new ArrayList();
+ final Iterator> iter = bs.iterator();
+
+ while (iter.hasNext() && indexEntries.size() <= max) {
+ final Entry entry = iter.next();
+ final Key k = entry.getKey();
+ final Value v = entry.getValue();
+
+ final String[] rowArr = k.getRow().toString().split(DELIM);
+ String values = "";
+ // if it is a composite index, then return the type as a composite index
+ if (type.equalsIgnoreCase(TripleValueType.SUBJECT_PREDICATE.getIndexType()) ||
+ type.equalsIgnoreCase(TripleValueType.SUBJECT_OBJECT.getIndexType()) ||
+ type.equalsIgnoreCase(TripleValueType.PREDICATE_OBJECT.getIndexType())) {
+ values =rowArr[1] + DELIM + rowArr[2];
+ }
+ else {
+ values = rowArr[1];
+ }
+
+ // Create an entry using the values that were found.
+ final String entryDataType = k.getColumnQualifier().toString();
+ final String entryVisibility = k.getColumnVisibility().toString();
+ final Long entryCount = Long.parseLong(new String(v.get()));
+
+ indexEntries.add(
+ IndexEntry.builder()
+ .setData(values)
+ .setTripleValueType(rowArr[0])
+ .setIndex(COUNT)
+ .setDataType(entryDataType)
+ .setVisibility(entryVisibility)
+ .setCount(entryCount)
+ .setTimestamp(k.getTimestamp())
+ .build());
+ }
+ bs.close();
+
+ return indexEntries;
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.java
new file mode 100644
index 000000000..b5d2320c1
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.plans.impl;
+
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.ServiceLoader;
+
+import org.apache.rya.prospector.plans.IndexWorkPlan;
+import org.apache.rya.prospector.plans.IndexWorkPlanManager;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Searches the classpath for any {@link IndexWorkPlan}s that are able to be service loaded.
+ */
+public class ServicesBackedIndexWorkPlanManager implements IndexWorkPlanManager {
+
+ private final Collection plans;
+
+ /**
+ * Constructs an instance of {@link ServicesBackedIndexWorkPlanManager}.
+ */
+ public ServicesBackedIndexWorkPlanManager() {
+ final Iterator iterator = ServiceLoader.load(IndexWorkPlan.class).iterator();
+ plans = Lists.newArrayList(iterator);
+ }
+
+ @Override
+ public Collection getPlans() {
+ return plans;
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorService.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorService.java
new file mode 100644
index 000000000..205d4fc67
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorService.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.service;
+
+import static java.util.Objects.requireNonNull;
+import static org.apache.rya.prospector.utils.ProspectorConstants.METADATA;
+import static org.apache.rya.prospector.utils.ProspectorConstants.PROSPECT_TIME;
+
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableExistsException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.TableOperations;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.hadoop.io.Text;
+import org.apache.rya.prospector.domain.IndexEntry;
+import org.apache.rya.prospector.plans.IndexWorkPlan;
+import org.apache.rya.prospector.plans.IndexWorkPlanManager;
+import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager;
+import org.apache.rya.prospector.utils.ProspectorUtils;
+
+/**
+ * Provides access to the Prospect results that have been stored within a specific Accumulo table.
+ */
+public class ProspectorService {
+
+ private final Connector connector;
+ private final String tableName;
+
+ private final IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager();
+ private final Map plans;
+
+ /**
+ * Constructs an instance of {@link ProspectorService}.
+ *
+ * @param connector - The Accumulo connector used to communicate with the table. (not null)
+ * @param tableName - The name of the Accumulo table that will be queried for Prospect results. (not null)
+ * @throws AccumuloException A problem occurred while creating the table.
+ * @throws AccumuloSecurityException A problem occurred while creating the table.
+ * @throws TableExistsException A problem occurred while creating the table.
+ */
+ public ProspectorService(Connector connector, String tableName) throws AccumuloException, AccumuloSecurityException, TableExistsException {
+ this.connector = requireNonNull(connector);
+ this.tableName = requireNonNull(tableName);
+
+ this.plans = ProspectorUtils.planMap(manager.getPlans());
+
+ // Create the table if it doesn't already exist.
+ final TableOperations tos = connector.tableOperations();
+ if(!tos.exists(tableName)) {
+ tos.create(tableName);
+ }
+ }
+
+ /**
+ * Get a list of timestamps that represents all of the Prospect runs that have
+ * ever been performed.
+ *
+ * @param auths - The authorizations used to scan the table for prospects.
+ * @return A list of timestamps representing each Prospect run that was found.
+ * @throws TableNotFoundException The table name that was provided when this
+ * class was constructed does not match a table that the connector has access to.
+ */
+ public Iterator getProspects(String[] auths) throws TableNotFoundException {
+ final Scanner scanner = connector.createScanner(tableName, new Authorizations(auths));
+ scanner.setRange(Range.exact(METADATA));
+ scanner.fetchColumnFamily(new Text(PROSPECT_TIME));
+
+ return new ProspectTimestampIterator( scanner.iterator() );
+ }
+
+ /**
+ * Get a list of timestamps that represents all of the Prospect runs that
+ * have been performed inclusively between two timestamps.
+ *
+ * @param beginTime - The start of the time range.
+ * @param endTime - The end of the time range.
+ * @param auths - The authorizations used to scan the table for prospects.
+ * @return A list of timestamps representing each Prospect run that was found.
+ * @throws TableNotFoundException The table name that was provided when this
+ * class was constructed does not match a table that the connector has access to.
+ */
+ public Iterator getProspectsInRange(long beginTime, long endTime, String[] auths) throws TableNotFoundException {
+ final Scanner scanner = connector.createScanner(tableName, new Authorizations(auths));
+ scanner.setRange(new Range(
+ new Key(METADATA, PROSPECT_TIME, ProspectorUtils.getReverseIndexDateTime(new Date(endTime)), "", Long.MAX_VALUE),
+ new Key(METADATA, PROSPECT_TIME, ProspectorUtils.getReverseIndexDateTime(new Date(beginTime)), "", 0l)
+ ));
+
+ return new ProspectTimestampIterator( scanner.iterator() );
+ }
+
+ /**
+ * Iterates over the results of a {@link Scanner} and interprets their keys
+ * contain Prospect run timestamps.
+ */
+ private static final class ProspectTimestampIterator implements Iterator {
+ private final Iterator> it;
+
+ public ProspectTimestampIterator(Iterator> it) {
+ this.it = requireNonNull(it);
+ }
+
+ @Override
+ public boolean hasNext() {
+ return it.hasNext();
+ }
+
+ @Override
+ public Long next() {
+ return it.next().getKey().getTimestamp();
+ }
+ }
+
+ /**
+ * Search for {@link IndexEntry}s that have values matching the provided parameters.
+ *
+ * @param prospectTimes - Indicates which Prospect runs will be part of the query.
+ * @param indexType - The name of the index the {@link IndexEntry}s are stored within.
+ * @param index - The data portion of the {@link IndexEntry}s that may be returned.
+ * @param dataType - The data type of the {@link IndexEntry}s that may be returned.
+ * @param auths - The authorizations used to search for the entries.
+ * @return The {@link IndexEntries} that match the provided values.
+ * @throws TableNotFoundException No table exists for {@code tableName}.
+ */
+ public List query(List prospectTimes, String indexType, String type, List index, String dataType, String[] auths) throws TableNotFoundException {
+ assert indexType != null;
+
+ final IndexWorkPlan plan = plans.get(indexType);
+ assert plan != null: "Index Type: ${indexType} does not exist";
+ final String compositeIndex = plan.getCompositeValue(index);
+
+ return plan.query(connector, tableName, prospectTimes, type, compositeIndex, dataType, auths);
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.java
new file mode 100644
index 000000000..3bb3b2688
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.service;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.TableExistsException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.rya.api.RdfCloudTripleStoreConfiguration;
+import org.apache.rya.api.persist.RdfDAOException;
+import org.apache.rya.api.persist.RdfEvalStatsDAO;
+import org.apache.rya.prospector.domain.IndexEntry;
+import org.apache.rya.prospector.domain.TripleValueType;
+import org.apache.rya.prospector.utils.ProspectorConstants;
+import org.openrdf.model.Resource;
+import org.openrdf.model.Value;
+
+/**
+ * An ${@link org.apache.rya.api.persist.RdfEvalStatsDAO} that uses the Prospector Service underneath return counts.
+ */
+public class ProspectorServiceEvalStatsDAO implements RdfEvalStatsDAO {
+
+ private ProspectorService prospectorService;
+
+ public ProspectorServiceEvalStatsDAO() {
+ }
+
+ public ProspectorServiceEvalStatsDAO(ProspectorService prospectorService, RdfCloudTripleStoreConfiguration conf) {
+ this.prospectorService = prospectorService;
+ }
+
+ public ProspectorServiceEvalStatsDAO(Connector connector, RdfCloudTripleStoreConfiguration conf) throws AccumuloException, AccumuloSecurityException, TableExistsException {
+ this.prospectorService = new ProspectorService(connector, getProspectTableName(conf));
+ }
+
+ @Override
+ public void init() {
+ assert prospectorService != null;
+ }
+
+ @Override
+ public boolean isInitialized() {
+ return prospectorService != null;
+ }
+
+ @Override
+ public void destroy() {
+ }
+
+ @Override
+ public double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List val) throws RdfDAOException {
+ assert conf != null && card != null && val != null;
+
+ String triplePart = null;
+ switch (card) {
+ case SUBJECT:
+ triplePart = TripleValueType.SUBJECT.getIndexType();
+ break;
+ case PREDICATE:
+ triplePart = TripleValueType.PREDICATE.getIndexType();
+ break;
+ case OBJECT:
+ triplePart = TripleValueType.OBJECT.getIndexType();
+ break;
+ case SUBJECTPREDICATE:
+ triplePart = TripleValueType.SUBJECT_PREDICATE.getIndexType();
+ break;
+ case SUBJECTOBJECT:
+ triplePart = TripleValueType.SUBJECT_OBJECT.getIndexType();
+ break;
+ case PREDICATEOBJECT:
+ triplePart = TripleValueType.PREDICATE_OBJECT.getIndexType();
+ break;
+ }
+
+ final String[] auths = conf.getAuths();
+ final List indexedValues = new ArrayList<>();
+ final Iterator valueIt = val.iterator();
+ while (valueIt.hasNext()){
+ indexedValues.add(valueIt.next().stringValue());
+ }
+
+ double cardinality = -1;
+ try {
+ final List entries = prospectorService.query(null, ProspectorConstants.COUNT, triplePart, indexedValues, null, auths);
+ if(!entries.isEmpty()) {
+ cardinality = entries.iterator().next().getCount();
+ }
+ } catch (final TableNotFoundException e) {
+ throw new RdfDAOException(e);
+ }
+ return cardinality;
+ }
+
+ @Override
+ public double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List val, Resource context) {
+ return getCardinality(conf, card, val); //TODO: Not sure about the context yet
+ }
+
+ @Override
+ public void setConf(RdfCloudTripleStoreConfiguration conf) {
+ }
+
+ @Override
+ public RdfCloudTripleStoreConfiguration getConf() {
+ return null;
+ }
+
+ public static String getProspectTableName(RdfCloudTripleStoreConfiguration conf) {
+ return conf.getTablePrefix() + "prospects";
+ }
+
+ /**
+ * This method exists so that the Rya Web project may autowrire itself together
+ * using the Spring framework.
+ *
+ * @param prospectorService - The {@link ProspectorService} that will be used by this DAO.
+ */
+ public void setProspectorService(ProspectorService prospectorService) {
+ this.prospectorService = prospectorService;
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/CustomEntry.groovy b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/CustomEntry.java
similarity index 55%
rename from extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/CustomEntry.groovy
rename to extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/CustomEntry.java
index 9f23c4836..bff2c4297 100644
--- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/CustomEntry.groovy
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/CustomEntry.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -16,37 +16,43 @@
* specific language governing permissions and limitations
* under the License.
*/
+package org.apache.rya.prospector.utils;
-package org.apache.rya.prospector.utils
+import java.util.Map;
/**
- * Date: 12/3/12
- * Time: 12:33 PM
+ * A convenience class that implements {@link Map.Entry}.
+ *
+ * @param - The type of the Key.
+ * @param - The type of the Value.
*/
-class CustomEntry implements Map.Entry {
+public class CustomEntry implements Map.Entry {
- K key;
- V value;
+ private K key;
+ private V value;
- CustomEntry(K key, V value) {
- this.key = key
- this.value = value
+ public CustomEntry(K key, V value) {
+ this.key = key;
+ this.value = value;
}
- K getKey() {
- return key
+ @Override
+ public K getKey() {
+ return key;
}
- void setKey(K key) {
- this.key = key
+ public void setKey(K key) {
+ this.key = key;
}
- V getValue() {
- return value
+ @Override
+ public V getValue() {
+ return value;
}
- V setValue(V value) {
- this.value = value
- this.value
+ @Override
+ public V setValue(V value) {
+ this.value = value;
+ return this.value;
}
-}
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorConstants.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorConstants.java
new file mode 100644
index 000000000..790134330
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorConstants.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.utils;
+
+/**
+ * Constants used by the Prospector project.
+ */
+public class ProspectorConstants {
+ /**
+ * The name of the Count index.
+ */
+ public static final String COUNT = "count";
+
+ /**
+ * The Row ID within Accumulo for any metadata entry related to a Prospect run.
+ */
+ public static final String METADATA = "metadata";
+
+ /**
+ * This is the name of a Column Family within Accumulo that represents when
+ * a Prospect run was performed.
+ */
+ public static final String PROSPECT_TIME = "prospectTime";
+
+ public static final String DEFAULT_VIS = "U&FOUO";
+ public static final byte[] EMPTY = new byte [0];
+
+ //config properties
+ public static final String PERFORMANT = "performant";
+
+ public static final String USERNAME = "username";
+ public static final String PASSWORD = "password";
+ public static final String INSTANCE = "instance";
+ public static final String ZOOKEEPERS = "zookeepers";
+ public static final String MOCK = "mock";
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorUtils.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorUtils.java
new file mode 100644
index 000000000..4dc92533a
--- /dev/null
+++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/utils/ProspectorUtils.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.utils;
+
+import static org.apache.rya.prospector.utils.ProspectorConstants.INSTANCE;
+import static org.apache.rya.prospector.utils.ProspectorConstants.MOCK;
+import static org.apache.rya.prospector.utils.ProspectorConstants.PASSWORD;
+import static org.apache.rya.prospector.utils.ProspectorConstants.USERNAME;
+import static org.apache.rya.prospector.utils.ProspectorConstants.ZOOKEEPERS;
+
+import java.text.SimpleDateFormat;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.commons.lang.Validate;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.rya.prospector.plans.IndexWorkPlan;
+
+public class ProspectorUtils {
+
+ public static final long INDEXED_DATE_SORT_VAL = 999999999999999999L; // 18 char long, same length as date format pattern below
+ public static final String INDEXED_DATE_FORMAT = "yyyyMMddHHmmsssSSS";
+
+ public static String getReverseIndexDateTime(Date date) {
+ Validate.notNull(date);
+ final String formattedDateString = new SimpleDateFormat(INDEXED_DATE_FORMAT).format(date);
+ final long diff = INDEXED_DATE_SORT_VAL - Long.valueOf(formattedDateString);
+
+ return Long.toString(diff);
+ }
+
+ public static Map planMap(Collection plans) {
+ final Map planMap = new HashMap<>();
+ for(final IndexWorkPlan plan : plans) {
+ planMap.put(plan.getIndexType(), plan);
+ }
+ return planMap;
+ }
+
+ public static void initMRJob(Job job, String table, String outtable, String[] auths) throws AccumuloSecurityException {
+ final Configuration conf = job.getConfiguration();
+ final String username = conf.get(USERNAME);
+ final String password = conf.get(PASSWORD);
+ final String instance = conf.get(INSTANCE);
+ final String zookeepers = conf.get(ZOOKEEPERS);
+ final String mock = conf.get(MOCK);
+
+ //input
+ if (Boolean.parseBoolean(mock)) {
+ AccumuloInputFormat.setMockInstance(job, instance);
+ AccumuloOutputFormat.setMockInstance(job, instance);
+ } else if (zookeepers != null) {
+ AccumuloInputFormat.setZooKeeperInstance(job, instance, zookeepers);
+ AccumuloOutputFormat.setZooKeeperInstance(job, instance, zookeepers);
+ } else {
+ throw new IllegalArgumentException("Must specify either mock or zookeepers");
+ }
+
+ AccumuloInputFormat.setConnectorInfo(job, username, new PasswordToken(password.getBytes()));
+ AccumuloInputFormat.setInputTableName(job, table);
+ job.setInputFormatClass(AccumuloInputFormat.class);
+ AccumuloInputFormat.setScanAuthorizations(job, new Authorizations(auths));
+
+ // OUTPUT
+ job.setOutputFormatClass(AccumuloOutputFormat.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(Mutation.class);
+ AccumuloOutputFormat.setConnectorInfo(job, username, new PasswordToken(password.getBytes()));
+ AccumuloOutputFormat.setDefaultTableName(job, outtable);
+ }
+
+ public static void addMRPerformance(Configuration conf) {
+ conf.setBoolean("mapred.map.tasks.speculative.execution", false);
+ conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
+ conf.set("io.sort.mb", "256");
+ conf.setBoolean("mapred.compress.map.output", true);
+ conf.set("mapred.map.output.compression.codec", GzipCodec.class.getName());
+ }
+
+ public static Instance instance(Configuration conf) {
+ assert conf != null;
+
+ final String instance_str = conf.get(INSTANCE);
+ final String zookeepers = conf.get(ZOOKEEPERS);
+ final String mock = conf.get(MOCK);
+ if (Boolean.parseBoolean(mock)) {
+ return new MockInstance(instance_str);
+ } else if (zookeepers != null) {
+ return new ZooKeeperInstance(instance_str, zookeepers);
+ } else {
+ throw new IllegalArgumentException("Must specify either mock or zookeepers");
+ }
+ }
+
+ public static Connector connector(Instance instance, Configuration conf) throws AccumuloException, AccumuloSecurityException {
+ final String username = conf.get(USERNAME);
+ final String password = conf.get(PASSWORD);
+ if (instance == null) {
+ instance = instance(conf);
+ }
+ return instance.getConnector(username, new PasswordToken(password));
+ }
+
+ public static void writeMutations(Connector connector, String tableName, Collection mutations) throws TableNotFoundException, MutationsRejectedException {
+ final BatchWriter bw = connector.createBatchWriter(tableName, 10000l, 10000l, 4);
+ for(final Mutation mutation : mutations) {
+ bw.addMutation(mutation);
+ }
+ bw.flush();
+ bw.close();
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/mr/ProspectorTest.groovy b/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/mr/ProspectorTest.groovy
deleted file mode 100644
index 02cbcd1fe..000000000
--- a/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/mr/ProspectorTest.groovy
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.mr
-
-import com.google.common.collect.Iterators
-import com.google.common.collect.Lists
-import org.apache.rya.accumulo.AccumuloRyaDAO
-import org.apache.rya.accumulo.AccumuloRdfConfiguration
-import org.apache.rya.api.persist.RdfEvalStatsDAO
-import org.apache.rya.api.domain.RyaStatement
-import org.apache.rya.api.domain.RyaType
-import org.apache.rya.api.domain.RyaURI
-import org.apache.rya.prospector.domain.IndexEntry
-import org.apache.rya.prospector.domain.TripleValueType
-import org.apache.rya.prospector.service.ProspectorService
-import org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO
-import org.apache.rya.prospector.utils.ProspectorConstants
-import org.apache.accumulo.core.client.Instance
-import org.apache.accumulo.core.client.mock.MockInstance
-import org.apache.accumulo.core.security.Authorizations
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.util.ToolRunner
-import org.junit.Test
-import org.openrdf.model.vocabulary.XMLSchema
-import org.openrdf.model.impl.URIImpl
-
-import static org.junit.Assert.assertEquals
-import org.openrdf.model.impl.LiteralImpl
-import org.openrdf.model.Value
-
-/**
- * Date: 12/4/12
- * Time: 4:33 PM
- */
-class ProspectorTest {
-
- @Test
- public void testCount() throws Exception {
-
- Instance mock = new MockInstance("accumulo");
-
- def connector = mock.getConnector("user", "pass".bytes)
- def intable = "rya_spo"
- def outtable = "rya_prospects"
- if (connector.tableOperations().exists(outtable))
- connector.tableOperations().delete(outtable)
- connector.tableOperations().create(outtable)
-
- AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO();
- ryaDAO.setConnector(connector);
- ryaDAO.init()
-
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12")))
-
- def confFile = "stats_cluster_config.xml"
- def confPath = new Path(getClass().getClassLoader().getResource(confFile).toString())
- def args = (String[]) [confPath];
- ToolRunner.run(new Prospector(), args);
- debugTable(connector, outtable)
-
- def scanner = connector.createScanner(outtable, new Authorizations("U", "FOUO"))
- def iter = scanner.iterator()
-// assertEquals(11, Iterators.size(iter))
-
- ryaDAO.destroy()
-
- def conf = new Configuration()
- conf.addResource(confPath)
- // debugTable(mrInfo, outtable)
-
- def service = new ProspectorService(connector, outtable)
- def auths = (String[]) ["U", "FOUO"]
- def prospects = service.getProspects(auths)
- def plist = Lists.newArrayList(prospects)
- assertEquals(1, plist.size())
-
- def rdfConf = new AccumuloRdfConfiguration(conf)
- rdfConf.setAuths("U","FOUO")
-
- prospects = service.getProspectsInRange(System.currentTimeMillis() - 100000, System.currentTimeMillis() + 10000, auths)
- plist = Lists.newArrayList(prospects)
- assertEquals(1, plist.size())
-
- List queryTerms = new ArrayList();
- queryTerms.add("urn:gem:etype");
- def query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.entity.name(), queryTerms, XMLSchema.ANYURI.stringValue(), auths)
- assertEquals(1, query.size())
-// assertEquals(
-// new IndexEntry(index: ProspectorConstants.COUNT, data: "urn:gem:etype", dataType: XMLSchema.ANYURI.stringValue(),
-// tripleValueType: TripleValueType.entity, visibility: "", count: -1, timestamp: plist.get(0)),
-// query.get(0))
-
- queryTerms = new ArrayList();
- queryTerms.add("urn:gem:etype#1234");
- query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.subject.name(), queryTerms, XMLSchema.ANYURI.stringValue(), auths)
- assertEquals(1, query.size())
-
- queryTerms = new ArrayList();
- queryTerms.add("urn:gem#pred");
- query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.predicate.name(), queryTerms, XMLSchema.ANYURI.stringValue(), auths)
- assertEquals(1, query.size())
- assertEquals(
- new IndexEntry(index: ProspectorConstants.COUNT, data: "urn:gem#pred", dataType: XMLSchema.ANYURI.stringValue(),
- tripleValueType: TripleValueType.predicate, visibility: "", count: 4l, timestamp: plist.get(0)),
- query.get(0))
-
- queryTerms = new ArrayList();
- queryTerms.add("mydata1");
- query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.object.name(), queryTerms, XMLSchema.STRING.stringValue(), auths)
- assertEquals(1, query.size())
-// assertEquals(
-// new IndexEntry(index: ProspectorConstants.COUNT, data: "mydata1", dataType: XMLSchema.STRING.stringValue(),
-// tripleValueType: TripleValueType.object, visibility: "", count: -1, timestamp: plist.get(0)),
-// query.get(0))
-
- queryTerms = new ArrayList();
- queryTerms.add("urn:gem:etype#1234");
- queryTerms.add("urn:gem#pred");
- query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.subjectpredicate.name(), queryTerms, XMLSchema.STRING.stringValue(), auths)
- assertEquals(1, query.size())
-// assertEquals(
-// new IndexEntry(index: ProspectorConstants.COUNT, data: "urn:gem:etype#1234" + "\u0000" + "urn:gem#pred", dataType: XMLSchema.STRING.stringValue(),
-// tripleValueType: TripleValueType.subjectpredicate, visibility: "", count: -1, timestamp: plist.get(0)),
-// query.get(0))
-
- queryTerms = new ArrayList();
- queryTerms.add("urn:gem#pred");
- queryTerms.add("12");
- query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.predicateobject.name(), queryTerms, XMLSchema.STRING.stringValue(), auths)
- assertEquals(1, query.size())
-// assertEquals(
-// new IndexEntry(index: ProspectorConstants.COUNT, data: "urn:gem#pred" + "\u0000" + "12", dataType: XMLSchema.STRING.stringValue(),
-// tripleValueType: TripleValueType.predicateobject, visibility: "", count: -1, timestamp: plist.get(0)),
-// query.get(0))
-
- queryTerms = new ArrayList();
- queryTerms.add("urn:gem:etype#1234");
- queryTerms.add("mydata1");
- query = service.query(plist, ProspectorConstants.COUNT, TripleValueType.subjectobject.name(), queryTerms, XMLSchema.STRING.stringValue(), auths)
-
- assertEquals(1, query.size())
-// assertEquals(
-// new IndexEntry(index: ProspectorConstants.COUNT, data: "urn:gem:etype#1234" + "\u0000" + "mydata1", dataType: XMLSchema.STRING.stringValue(),
-// tripleValueType: TripleValueType.subjectobject, visibility: "", count: -1, timestamp: plist.get(0)),
-// query.get(0))
-
- //should be in a teardown method
- connector.tableOperations().delete(outtable)
- }
-
- private void debugTable(def connector, String table) {
- connector.createScanner(table, new Authorizations((String[]) ["U", "FOUO"])).iterator().each {
- println it
- }
- }
-}
diff --git a/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.groovy b/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.groovy
deleted file mode 100644
index e518ca89a..000000000
--- a/extras/rya.prospector/src/test/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.groovy
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.rya.prospector.service
-
-import com.google.common.collect.Iterators
-import org.apache.rya.accumulo.AccumuloRdfConfiguration
-import org.apache.rya.accumulo.AccumuloRyaDAO
-import org.apache.rya.api.domain.RyaStatement
-import org.apache.rya.api.domain.RyaType
-import org.apache.rya.api.domain.RyaURI
-import org.apache.rya.api.persist.RdfEvalStatsDAO
-import org.apache.rya.prospector.mr.Prospector
-import org.apache.accumulo.core.client.Instance
-import org.apache.accumulo.core.client.mock.MockInstance
-import org.apache.accumulo.core.security.Authorizations
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.util.ToolRunner
-import org.junit.Test
-import org.openrdf.model.impl.URIImpl
-import org.openrdf.model.vocabulary.XMLSchema
-
-import static org.junit.Assert.assertEquals
-import org.openrdf.model.impl.LiteralImpl
-import org.openrdf.model.Value
-
-/**
- * Date: 1/26/13
- * Time: 3:00 PM
- */
-class ProspectorServiceEvalStatsDAOTest {
-
- @Test
- public void testCount() throws Exception {
-
- Instance mock = new MockInstance("accumulo");
-
- def connector = mock.getConnector("user", "pass".bytes)
- def intable = "rya_spo"
- def outtable = "rya_prospects"
- if (connector.tableOperations().exists(outtable))
- connector.tableOperations().delete(outtable)
- connector.tableOperations().create(outtable)
-
- AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO();
- ryaDAO.setConnector(connector);
- ryaDAO.init()
-
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12")))
-
- def confFile = "stats_cluster_config.xml"
- def confPath = new Path(getClass().getClassLoader().getResource(confFile).toString())
- def args = (String[]) [confPath];
- ToolRunner.run(new Prospector(), args);
- debugTable(connector, outtable)
-
- def scanner = connector.createScanner(outtable, new Authorizations("U", "FOUO"))
- def iter = scanner.iterator()
-// assertEquals(11, Iterators.size(iter))
-
- ryaDAO.destroy()
-
- def conf = new Configuration()
- conf.addResource(confPath)
-// debugTable(connector, outtable)
-
- def rdfConf = new AccumuloRdfConfiguration(conf)
- rdfConf.setAuths("U","FOUO")
- def evalDao = new ProspectorServiceEvalStatsDAO(connector, rdfConf)
- evalDao.init()
-
- List values = new ArrayList();
- values.add( new URIImpl("urn:gem#pred"));
-
- def count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.PREDICATE, values)
- assertEquals(4.0, count, 0.001);
-
- values = new ArrayList();
- values.add( new LiteralImpl("mydata1"));
-
- count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values);
- assertEquals(1.0, count, 0.001);
-
- values = new ArrayList();
- values.add( new LiteralImpl("mydata3"));
-
- count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values);
- assertEquals(-1.0, count, 0.001);
-
- //should be in a teardown method
- connector.tableOperations().delete(outtable)
- }
-
- @Test
- public void testNoAuthsCount() throws Exception {
-
- Instance mock = new MockInstance("accumulo");
- def connector = mock.getConnector("user", "pass".bytes)
- def intable = "rya_spo"
- def outtable = "rya_prospects"
- if (connector.tableOperations().exists(outtable))
- connector.tableOperations().delete(outtable)
- connector.tableOperations().create(outtable)
- connector.securityOperations().createUser("user", "pass".bytes, new Authorizations("U", "FOUO"))
-
- AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO();
- ryaDAO.setConnector(connector);
- ryaDAO.init()
-
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12")))
- ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12")))
-
- def confFile = "stats_cluster_config.xml"
- def confPath = new Path(getClass().getClassLoader().getResource(confFile).toString())
- def args = (String[]) [confPath];
- ToolRunner.run(new Prospector(), args);
-
- def scanner = connector.createScanner(outtable, new Authorizations("U", "FOUO"))
- def iter = scanner.iterator()
-// assertEquals(11, Iterators.size(iter))
-
- ryaDAO.destroy()
-
- def conf = new Configuration()
- conf.addResource(confPath)
-
- def rdfConf = new AccumuloRdfConfiguration(conf)
-// rdfConf.setAuths("U","FOUO")
- def evalDao = new ProspectorServiceEvalStatsDAO(connector, rdfConf)
- evalDao.init()
-
-
- List values = new ArrayList();
- values.add( new URIImpl("urn:gem#pred"));
- def count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.PREDICATE, values)
- assertEquals(4.0, count, 0.001);
-
- values = new ArrayList();
- values.add( new LiteralImpl("mydata1"));
- count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values);
- assertEquals(1.0, count, 0.001);
-
- values = new ArrayList();
- values.add( new LiteralImpl("mydata3"));
-
- count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values);
- assertEquals(-1.0, count, 0.001);
-
- //should be in a teardown method
- connector.tableOperations().delete(outtable)
- }
-
- private void debugTable(def connector, String table) {
- connector.createScanner(table, new Authorizations((String[]) ["U", "FOUO"])).iterator().each {
- println it
- }
- }
-}
diff --git a/extras/rya.prospector/src/test/java/org/apache/rya/prospector/mr/ProspectorTest.java b/extras/rya.prospector/src/test/java/org/apache/rya/prospector/mr/ProspectorTest.java
new file mode 100644
index 000000000..eac7aab76
--- /dev/null
+++ b/extras/rya.prospector/src/test/java/org/apache/rya/prospector/mr/ProspectorTest.java
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.mr;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.rya.accumulo.AccumuloRdfConfiguration;
+import org.apache.rya.accumulo.AccumuloRyaDAO;
+import org.apache.rya.api.domain.RyaStatement;
+import org.apache.rya.api.domain.RyaType;
+import org.apache.rya.api.domain.RyaURI;
+import org.apache.rya.prospector.domain.IndexEntry;
+import org.apache.rya.prospector.domain.TripleValueType;
+import org.apache.rya.prospector.service.ProspectorService;
+import org.apache.rya.prospector.utils.ProspectorConstants;
+import org.junit.Test;
+import org.openrdf.model.vocabulary.XMLSchema;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Tests that show when the {@link Prospector} job is run, it creates a table
+ * containing the correct count information derived from the statements that
+ * have been stored within a Rya instance.
+ */
+public class ProspectorTest {
+
+ @Test
+ public void testCount() throws Exception {
+ // Load some data into a mock Accumulo and run the Prospector MapReduce job.
+ final Instance mock = new MockInstance("accumulo");
+
+ final Connector connector = mock.getConnector("user", new PasswordToken("pass"));
+ final String outtable = "rya_prospects";
+ if (connector.tableOperations().exists(outtable)) {
+ connector.tableOperations().delete(outtable);
+ }
+ connector.tableOperations().create(outtable);
+
+ final AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO();
+ ryaDAO.setConnector(connector);
+ ryaDAO.init();
+
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12")));
+
+ final String confFile = "stats_cluster_config.xml";
+ final Path confPath = new Path(getClass().getClassLoader().getResource(confFile).toString());
+ final String[] args = { confPath.toString() };
+ ToolRunner.run(new Prospector(), args);
+ ryaDAO.destroy();
+
+ // Interrogate the results of the Prospect job to ensure the correct results were created.
+ final Configuration conf = new Configuration();
+ conf.addResource(confPath);
+
+ final ProspectorService service = new ProspectorService(connector, outtable);
+ final String[] auths = {"U", "FOUO"};
+ Iterator prospects = service.getProspects(auths);
+ List plist = Lists.newArrayList(prospects);
+ assertEquals(1, plist.size());
+
+ final Long prospectTimestamp = plist.iterator().next();
+
+ final AccumuloRdfConfiguration rdfConf = new AccumuloRdfConfiguration(conf);
+ rdfConf.setAuths("U","FOUO");
+
+ prospects = service.getProspectsInRange(System.currentTimeMillis() - 100000, System.currentTimeMillis() + 10000, auths);
+ plist = Lists.newArrayList(prospects);
+ assertEquals(1, plist.size());
+
+ // Ensure one of the correct "entity" counts was created.
+ List queryTerms = new ArrayList<>();
+ queryTerms.add("urn:gem:etype");
+ final List entityEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.ENTITY.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
+
+ final List expectedEntityEntries = Lists.newArrayList(
+ IndexEntry.builder()
+ .setIndex(ProspectorConstants.COUNT)
+ .setData("urn:gem:etype")
+ .setDataType(XMLSchema.ANYURI.stringValue())
+ .setTripleValueType( TripleValueType.ENTITY.getIndexType() )
+ .setVisibility("")
+ .setTimestamp(prospectTimestamp)
+ .setCount(new Long(5))
+ .build());
+
+ assertEquals(expectedEntityEntries, entityEntries);
+
+ // Ensure one of the correct "subject" counts was created.
+ queryTerms = new ArrayList();
+ queryTerms.add("urn:gem:etype#1234");
+ final List subjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
+
+ final List expectedSubjectEntries = Lists.newArrayList(
+ IndexEntry.builder()
+ .setIndex(ProspectorConstants.COUNT)
+ .setData("urn:gem:etype#1234")
+ .setDataType(XMLSchema.ANYURI.stringValue())
+ .setTripleValueType( TripleValueType.SUBJECT.getIndexType() )
+ .setVisibility("")
+ .setTimestamp(prospectTimestamp)
+ .setCount(new Long(3))
+ .build());
+
+ assertEquals(expectedSubjectEntries, subjectEntries);
+
+ // Ensure one of the correct "predicate" counts was created.
+ queryTerms = new ArrayList();
+ queryTerms.add("urn:gem#pred");
+ final List predicateEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.PREDICATE.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
+
+ final List expectedPredicateEntries = Lists.newArrayList(
+ IndexEntry.builder()
+ .setIndex(ProspectorConstants.COUNT)
+ .setData("urn:gem#pred")
+ .setDataType(XMLSchema.ANYURI.stringValue())
+ .setTripleValueType( TripleValueType.PREDICATE.getIndexType() )
+ .setVisibility("")
+ .setTimestamp(prospectTimestamp)
+ .setCount(new Long(4))
+ .build());
+
+ assertEquals(expectedPredicateEntries, predicateEntries);
+
+ // Ensure one of the correct "object" counts was created.
+ queryTerms = new ArrayList();
+ queryTerms.add("mydata1");
+ final List objectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
+
+ final List expectedObjectEntries = Lists.newArrayList(
+ IndexEntry.builder()
+ .setIndex(ProspectorConstants.COUNT)
+ .setData("mydata1")
+ .setDataType(XMLSchema.STRING.stringValue())
+ .setTripleValueType( TripleValueType.OBJECT.getIndexType() )
+ .setVisibility("")
+ .setTimestamp(prospectTimestamp)
+ .setCount(new Long(1))
+ .build());
+
+ assertEquals(expectedObjectEntries, objectEntries);
+
+ // Ensure one of the correct "subjectpredicate" counts was created.
+ queryTerms = new ArrayList();
+ queryTerms.add("urn:gem:etype#1234");
+ queryTerms.add("urn:gem#pred");
+ final List subjectPredicateEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT_PREDICATE.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
+
+ final List expectedSubjectPredicateEntries = Lists.newArrayList(
+ IndexEntry.builder()
+ .setIndex(ProspectorConstants.COUNT)
+ .setData("urn:gem:etype#1234"+ "\u0000" + "urn:gem#pred")
+ .setDataType(XMLSchema.STRING.stringValue())
+ .setTripleValueType( TripleValueType.SUBJECT_PREDICATE.getIndexType() )
+ .setVisibility("")
+ .setTimestamp(prospectTimestamp)
+ .setCount(new Long(3))
+ .build());
+
+ assertEquals(expectedSubjectPredicateEntries, subjectPredicateEntries);
+
+ // Ensure one of the correct "predicateobject" counts was created.
+ queryTerms = new ArrayList();
+ queryTerms.add("urn:gem#pred");
+ queryTerms.add("12");
+ final List predicateObjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.PREDICATE_OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
+
+ final List expectedPredicateObjectEntries = Lists.newArrayList(
+ IndexEntry.builder()
+ .setIndex(ProspectorConstants.COUNT)
+ .setData("urn:gem#pred" + "\u0000" + "12")
+ .setDataType(XMLSchema.STRING.stringValue())
+ .setTripleValueType( TripleValueType.PREDICATE_OBJECT.getIndexType() )
+ .setVisibility("")
+ .setTimestamp(prospectTimestamp)
+ .setCount(new Long(2)) // XXX This might be a bug. The object matching doesn't care about type.
+ .build());
+
+ assertEquals(expectedPredicateObjectEntries, predicateObjectEntries);
+
+ // Ensure one of the correct "" counts was created.
+ queryTerms = new ArrayList();
+ queryTerms.add("urn:gem:etype#1234");
+ queryTerms.add("mydata1");
+ final List subjectObjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT_OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
+
+ final List expectedSubjectObjectEntries = Lists.newArrayList(
+ IndexEntry.builder()
+ .setIndex(ProspectorConstants.COUNT)
+ .setData("urn:gem:etype#1234" + "\u0000" + "mydata1")
+ .setDataType(XMLSchema.STRING.stringValue())
+ .setTripleValueType( TripleValueType.SUBJECT_OBJECT.getIndexType() )
+ .setVisibility("")
+ .setTimestamp(prospectTimestamp)
+ .setCount(new Long(1))
+ .build());
+
+ assertEquals(expectedSubjectObjectEntries, subjectObjectEntries);
+ }
+
+ /**
+ * Prints the content of an Accumulo table to standard out. Only use then when
+ * debugging the test.
+ */
+ private void debugTable(Connector connector, String table) throws TableNotFoundException {
+ final Iterator> it = connector.createScanner(table, new Authorizations(new String[]{"U", "FOUO"})).iterator();
+ while(it.hasNext()) {
+ final Entry entry = it.next();
+ System.out.println( entry );
+ }
+ }
+}
\ No newline at end of file
diff --git a/extras/rya.prospector/src/test/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.java b/extras/rya.prospector/src/test/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.java
new file mode 100644
index 000000000..f04874212
--- /dev/null
+++ b/extras/rya.prospector/src/test/java/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAOTest.java
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.prospector.service;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.rya.accumulo.AccumuloRdfConfiguration;
+import org.apache.rya.accumulo.AccumuloRyaDAO;
+import org.apache.rya.api.domain.RyaStatement;
+import org.apache.rya.api.domain.RyaType;
+import org.apache.rya.api.domain.RyaURI;
+import org.apache.rya.api.persist.RdfEvalStatsDAO;
+import org.apache.rya.api.persist.RdfEvalStatsDAO.CARDINALITY_OF;
+import org.apache.rya.prospector.mr.Prospector;
+import org.junit.Test;
+import org.openrdf.model.Value;
+import org.openrdf.model.impl.LiteralImpl;
+import org.openrdf.model.impl.URIImpl;
+import org.openrdf.model.vocabulary.XMLSchema;
+
+/**
+ * Tests that show when the {@link Prospector} job is run, the
+ * {@link ProspectorServiceEvalStatsDAO} may be used to fetch cardinality
+ * information from the prospect table.
+ */
+public class ProspectorServiceEvalStatsDAOTest {
+
+ @Test
+ public void testCount() throws Exception {
+ // Load some data into a mock Accumulo and run the Prospector MapReduce job.
+ final Instance mock = new MockInstance("accumulo");
+
+ final Connector connector = mock.getConnector("user", new PasswordToken("pass"));
+ final String outtable = "rya_prospects";
+ if (connector.tableOperations().exists(outtable)) {
+ connector.tableOperations().delete(outtable);
+ }
+ connector.tableOperations().create(outtable);
+
+ final AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO();
+ ryaDAO.setConnector(connector);
+ ryaDAO.init();
+
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12")));
+
+ final String confFile = "stats_cluster_config.xml";
+ final Path confPath = new Path(getClass().getClassLoader().getResource(confFile).toString());
+ final String[] args = { confPath.toString() };
+ ToolRunner.run(new Prospector(), args);
+
+ ryaDAO.destroy();
+
+ final Configuration conf = new Configuration();
+ conf.addResource(confPath);
+
+ final AccumuloRdfConfiguration rdfConf = new AccumuloRdfConfiguration(conf);
+ rdfConf.setAuths("U","FOUO");
+ final ProspectorServiceEvalStatsDAO evalDao = new ProspectorServiceEvalStatsDAO(connector, rdfConf);
+ evalDao.init();
+
+ // Get the cardinality of the 'urn:gem#pred' predicate.
+ List values = new ArrayList();
+ values.add( new URIImpl("urn:gem#pred") );
+ double count = evalDao.getCardinality(rdfConf, CARDINALITY_OF.PREDICATE, values);
+ assertEquals(4.0, count, 0.001);
+
+ // Get the cardinality of the 'mydata1' object.
+ values = new ArrayList();
+ values.add( new LiteralImpl("mydata1"));
+ count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values);
+ assertEquals(1.0, count, 0.001);
+
+ // Get the cardinality of the 'mydata3' object.
+ values = new ArrayList();
+ values.add( new LiteralImpl("mydata3"));
+ count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values);
+ assertEquals(-1.0, count, 0.001);
+ }
+
+ @Test
+ public void testNoAuthsCount() throws Exception {
+ // Load some data into a mock Accumulo and run the Prospector MapReduce job.
+ final Instance mock = new MockInstance("accumulo");
+
+ final Connector connector = mock.getConnector("user", new PasswordToken("pass"));
+ final String outtable = "rya_prospects";
+ if (connector.tableOperations().exists(outtable)) {
+ connector.tableOperations().delete(outtable);
+ }
+ connector.tableOperations().create(outtable);
+ connector.securityOperations().createUser("user", "pass".getBytes(), new Authorizations("U", "FOUO"));
+
+ final AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO();
+ ryaDAO.setConnector(connector);
+ ryaDAO.init();
+
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12")));
+ ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12")));
+
+ final String confFile = "stats_cluster_config.xml";
+ final Path confPath = new Path(getClass().getClassLoader().getResource(confFile).toString());
+ final String[] args = { confPath.toString() };
+ ToolRunner.run(new Prospector(), args);
+
+ ryaDAO.destroy();
+
+ final Configuration conf = new Configuration();
+ conf.addResource(confPath);
+
+ final AccumuloRdfConfiguration rdfConf = new AccumuloRdfConfiguration(conf);
+ final ProspectorServiceEvalStatsDAO evalDao = new ProspectorServiceEvalStatsDAO(connector, rdfConf);
+ evalDao.init();
+
+ // Get the cardinality of the 'urn:gem#pred' predicate.
+ List values = new ArrayList();
+ values.add( new URIImpl("urn:gem#pred"));
+ double count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.PREDICATE, values);
+ assertEquals(4.0, count, 0.001);
+
+ // Get the cardinality of the 'mydata1' object.
+ values = new ArrayList();
+ values.add( new LiteralImpl("mydata1"));
+ count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values);
+ assertEquals(1.0, count, 0.001);
+
+ // Get the cardinality of the 'mydata3' object.
+ values = new ArrayList();
+ values.add( new LiteralImpl("mydata3"));
+ count = evalDao.getCardinality(rdfConf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values);
+ assertEquals(-1.0, count, 0.001);
+ }
+
+ /**
+ * Prints the content of an Accumulo table to standard out. Only use then when
+ * debugging the test.
+ */
+ private void debugTable(Connector connector, String table) throws TableNotFoundException {
+ final Iterator> it = connector.createScanner(table, new Authorizations(new String[]{"U", "FOUO"})).iterator();
+ while(it.hasNext()) {
+ System.out.println( it.next() );
+ }
+ }
+}
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index ef2e7d877..4f7148a57 100644
--- a/pom.xml
+++ b/pom.xml
@@ -94,7 +94,6 @@ under the License.
1.6
1.3
- 2.3.11
14.0.1
@@ -400,24 +399,6 @@ under the License.
${embed.mongo.version}
-
-
- org.codehaus.groovy
- groovy-all
- ${groovy.version}
-
-
- org.codehaus.gmaven.runtime
- gmaven-runtime-1.7
- ${gmaven.version}
-
-
- org.codehaus.groovy
- groovy-all
-
-
-
-
org.springframework.shell
@@ -774,42 +755,6 @@ under the License.
v@{project.version}
-
- org.codehaus.gmaven
- gmaven-plugin
- ${gmaven.version}
-
-
- org.codehaus.groovy
- groovy-all
- ${groovy.version}
-
-
- org.codehaus.gmaven.runtime
- gmaven-runtime-1.7
- ${gmaven.version}
-
-
- org.codehaus.groovy
- groovy-all
-
-
-
-
-
-
-
- 1.7
-
-
- generateStubs
- compile
- generateTestStubs
- testCompile
-
-
-
-
org.apache.maven.plugins
maven-shade-plugin
diff --git a/sail/src/test/java/org/apache/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java b/sail/src/test/java/org/apache/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java
index daf446c90..63c5d134e 100644
--- a/sail/src/test/java/org/apache/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java
+++ b/sail/src/test/java/org/apache/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java
@@ -642,6 +642,10 @@ public void testOptimizeQ4() throws Exception {
QueryJoinSelectOptimizer qjs = new QueryJoinSelectOptimizer(ars, accc);
System.out.println("Originial query is " + te);
qjs.optimize(te, null, null);
+
+ TupleExpr what = getTupleExpr(Q4);
+ System.out.println("lolol: \n" + what);
+
Assert.assertTrue(te.equals(getTupleExpr(Q4)));
System.out.print("Optimized query is " + te);