Skip to content
Permalink
Browse files
Merge branch 'trunk' into parallel-merge
  • Loading branch information
Ewocker committed Mar 21, 2022
2 parents 5b06983 + a316bb3 commit 635df524282498ab2e6bf9a8efe71d730c3c54bd
Showing 37 changed files with 1,885 additions and 137 deletions.
@@ -57,6 +57,11 @@ public class IndexImporter {
* Symbolic name use to indicate sync indexes
*/
static final String ASYNC_LANE_SYNC = "sync";
/*
* System property name for flag for preserve checkpoint. If this is set to true, then checkpoint cleanup will be skipped.
* Default is set to false.
*/
public static final String OAK_INDEX_IMPORTER_PRESERVE_CHECKPOINT = "oak.index.importer.preserveCheckpoint";

private final Logger log = LoggerFactory.getLogger(getClass());
private final NodeStore nodeStore;
@@ -69,6 +74,7 @@ public class IndexImporter {
private final IndexEditorProvider indexEditorProvider;
private final AsyncIndexerLock indexerLock;
private final IndexDefinitionUpdater indexDefinitionUpdater;
private final boolean preserveCheckpoint = Boolean.getBoolean(OAK_INDEX_IMPORTER_PRESERVE_CHECKPOINT);

public IndexImporter(NodeStore nodeStore, File indexDir, IndexEditorProvider indexEditorProvider,
AsyncIndexerLock indexerLock) throws IOException {
@@ -290,8 +296,14 @@ static String getAsyncLaneName(String indexPath, NodeState indexState) {
}

private void releaseCheckpoint() {
nodeStore.release(indexerInfo.checkpoint);
log.info("Released the referred checkpoint [{}]", indexerInfo.checkpoint);
if (preserveCheckpoint) {
log.info("Preserving the referred checkpoint [{}]. This could have been done in case this checkpoint is needed by a process later on." +
" Please make sure to remove the checkpoint once it's no longer needed.", indexerInfo.checkpoint);
} else {
nodeStore.release(indexerInfo.checkpoint);
log.info("Released the referred checkpoint [{}]", indexerInfo.checkpoint);
}

}

private void incrementReIndexCount(NodeBuilder definition) {
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
@Version("0.1.0")
@Version("0.2.0")
package org.apache.jackrabbit.oak.plugins.index.importer;

import org.osgi.annotation.versioning.Version;
@@ -536,7 +536,7 @@ Clients wanting to obtain spellchecks could use the following JCR code:
RowIterator it = result.getRows();
String spellchecks = "";
if (it.hasNext()) {
spellchecks = row.getValue("rep:spellcheck()").getString()
spellchecks = it.getValue("rep:spellcheck()").getString()
}

The `spellchecks` String would be have the following pattern `\[[\w|\W]+(\,\s[\w|\W]+)*\]`, e.g.:
@@ -551,7 +551,7 @@ The `spellchecks` String would be have the following pattern `\[[\w|\W]+(\,\s[\w
RowIterator it = result.getRows();
List<String> spellchecks = new LinkedList<String>();
while (it.hasNext()) {
spellchecks.add(row.getValue("rep:spellcheck()").getString());
spellchecks.add(it.getValue("rep:spellcheck()").getString());
}

If either Lucene or Solr were configured to provide the spellcheck feature, see
@@ -587,7 +587,7 @@ Clients wanting to obtain suggestions could use the following JCR code:
RowIterator it = result.getRows();
String suggestions = "";
if (it.hasNext()) {
suggestions = row.getValue("rep:suggest()").getString()
suggestions = it.getValue("rep:suggest()").getString()
}

The `suggestions` String would be have the following pattern
@@ -605,7 +605,7 @@ The `suggestions` String would be have the following pattern
RowIterator it = result.getRows();
List<String> suggestions = new LinkedList<String>();
while (it.hasNext()) {
suggestions.add(row.getValue("rep:suggest()").getString());
suggestions.add(it.getValue("rep:suggest()").getString());
}

If either Lucene or Solr were configured to provide the suggestions feature,
@@ -778,4 +778,4 @@ In this case, no escaping is needed. Example patterns are:
Patterns are evaluated in alphabetical order.
They are only read once, at startup.

See also [OAK-8294](https://issues.apache.org/jira/browse/OAK-8294)
See also [OAK-8294](https://issues.apache.org/jira/browse/OAK-8294)
@@ -159,6 +159,31 @@ under the License.
<item href="http://jackrabbit.apache.org/oak" name="Apache Jackrabbit Oak" />
<item href="http://jackrabbit.apache.org/" name="Apache Jackrabbit" />
</menu>

<head><![CDATA[
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://matomo.privacy.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '4']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
]]></head>
<footer><![CDATA[
<p>&#169; 2012-$date.get('yyyy')
<a href="https://www.apache.org/">The Apache Software Foundation</a> &vert; <a href="https://privacy.apache.org/policies/privacy-policy-public.html">Privacy Policy</a>
</p>
]]></footer>
</body>

<skin>
@@ -56,9 +56,15 @@ public class LuceneDocumentMaker extends FulltextDocumentMaker<Document> {
private static final Logger log = LoggerFactory.getLogger(LuceneDocumentMaker.class);

private static final String DYNAMIC_BOOST_SPLIT_REGEX = "[:/]";

// warn once every 10 seconds at most
private static final long DUPLICATE_WARNING_INTERVAL_MS = 10 * 1000;

private final FacetsConfigProvider facetsConfigProvider;
private final IndexAugmentorFactory augmentorFactory;

// when did we warn (static, as we construct new objects quite often)
private static long lastDuplicateWarning;

public LuceneDocumentMaker(IndexDefinition definition,
IndexDefinition.IndexingRule indexingRule,
@@ -283,8 +289,16 @@ protected boolean indexTypeOrderedFields(Document doc, String pname, int tag, Pr
}

if (f != null && includePropertyValue(property, 0, pd)) {
doc.add(f);
fieldAdded = true;
if (doc.getField(f.name()) == null) {
doc.add(f);
fieldAdded = true;
} else {
long now = System.currentTimeMillis();
if (now > lastDuplicateWarning + DUPLICATE_WARNING_INTERVAL_MS) {
log.warn("Duplicate value for ordered field {}; ignoring. Possibly duplicate index definition.", f.name());
lastDuplicateWarning = now;
}
}
}
} catch (Exception e) {
log.warn(
@@ -1117,6 +1117,56 @@ public void sameOrderableRelPropWithAndWithoutFunc_checkOrdering() throws Except

}

@Test
public void duplicateFunctionInIndex() throws Exception {
// Index def with same property - ordered - one with function and one without
Tree luceneIndex = createIndex("upper", Collections.<String>emptySet());
Tree prop = luceneIndex.addChild(FulltextIndexConstants.INDEX_RULES)
.addChild("nt:base")
.addChild(FulltextIndexConstants.PROP_NODE);
Tree upper1 = prop.addChild("upper1");
upper1.setProperty(FulltextIndexConstants.PROP_ORDERED,true);
upper1.setProperty(FulltextIndexConstants.PROP_FUNCTION, "fn:upper-case(jcr:content/n/@foo)");
Tree upper2 = prop.addChild("upper2");
upper2.setProperty(FulltextIndexConstants.PROP_ORDERED,true);
upper2.setProperty(FulltextIndexConstants.PROP_FUNCTION, "fn:upper-case(jcr:content/n/@foo)");
Tree upper3 = prop.addChild("upper3");
upper3.setProperty(FulltextIndexConstants.PROP_FUNCTION, "fn:upper-case(jcr:content/n/@foo)");
Tree upper4 = prop.addChild("upper4");
upper4.setProperty(FulltextIndexConstants.PROP_FUNCTION, "fn:upper-case(jcr:content/n/@foo)");

root.commit();

int i = 1;
// Create nodes that will be served by the index definition that follows
for (String node : asList("a", "c", "b", "e", "d")) {

Tree test = root.getTree("/").addChild(node);
test.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);

Tree a = test.addChild("jcr:content");
a.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);

Tree b = a.addChild("n");

b.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);
b.setProperty("foo", "bar"+i);
i++;
}

root.commit();

// Check ordering works for func and non func properties
assertOrderedPlanAndQuery(
"select * from [nt:base] order by upper([jcr:content/n/foo])",
"lucene:upper(/oak:index/upper)", asList("/a","/c","/b","/e","/d"));

assertOrderedPlanAndQuery(
"select * from [nt:base] order by upper([jcr:content/n/foo]) DESC",
"lucene:upper(/oak:index/upper)", asList("/d","/e","/b","/c","/a"));

}

/*
Given an index def with 2 orderable property definitions(non-relative) for same property - one with function and one without
Indexer should index any changes properly and ordering should work as expected.
@@ -44,7 +44,7 @@
<project.reporting.outputEncoding>
${project.build.sourceEncoding}
</project.reporting.outputEncoding>
<jackrabbit.version>2.20.4</jackrabbit.version>
<jackrabbit.version>2.20.5</jackrabbit.version>
<mongo.host>127.0.0.1</mongo.host>
<mongo.port>27017</mongo.port>
<mongo.db>MongoMKDB</mongo.db>
@@ -60,6 +60,11 @@
<groupId>org.apache.jackrabbit</groupId>
<artifactId>oak-segment-tar</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.jackrabbit</groupId>
<artifactId>oak-search</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.jackrabbit</groupId>
@@ -54,6 +54,7 @@ public class IndexOptions implements OptionsBean {
private final OptionSpec<Void> importIndex;
private final OptionSpec<Void> docTraversal;
private final OptionSpec<Void> enableCowCor;
private final OptionSpec<Void> buildFlatFileStoreSeparately;
private final OptionSpec<Integer> consistencyCheck;
private final OptionSpec<Long> asyncDelay;
protected OptionSet options;
@@ -108,6 +109,7 @@ public IndexOptions(OptionParser parser){
docTraversal = parser.accepts("doc-traversal-mode", "Use Document traversal mode for reindex in " +
"DocumentNodeStore setups. This may provide better performance in some cases (experimental)");
enableCowCor = parser.accepts("enable-cow-cor", "Enables COW/COR during async indexing using oak-run");
buildFlatFileStoreSeparately = parser.accepts("build-flatfilestore-separately", "Builds FlatFileStore as a separate step and then uses it as part of the doc-traversal-mode for reindexing");

indexImportDir = parser.accepts("index-import-dir", "Directory containing index files. This " +
"is required when --index-import operation is selected")
@@ -221,6 +223,10 @@ public boolean isCowCorEnabled() {
return options.has(enableCowCor);
}

public boolean buildFlatFileStoreSeparately() {
return options.has(buildFlatFileStoreSeparately);
}

public String getCheckpoint(){
return checkpoint.value(options);
}
@@ -113,7 +113,7 @@ public NodeState retrieveNodeStateForCheckpoint() {
return checkpointedState;
}

private void updateIndexDefinitions(NodeBuilder rootBuilder) throws IOException, CommitFailedException {
public void updateIndexDefinitions(NodeBuilder rootBuilder) throws IOException, CommitFailedException {
if (indexDefinitions != null) {
new IndexDefinitionUpdater(indexDefinitions).apply(rootBuilder);
}
@@ -27,6 +27,7 @@
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;

import com.codahale.metrics.MetricRegistry;
import com.google.common.base.Stopwatch;
@@ -51,10 +52,12 @@
import org.apache.jackrabbit.oak.plugins.index.NodeTraversalCallback;
import org.apache.jackrabbit.oak.plugins.index.progress.IndexingProgressReporter;
import org.apache.jackrabbit.oak.plugins.index.progress.MetricRateEstimator;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
import org.apache.jackrabbit.oak.plugins.metric.MetricStatisticsProvider;
import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
import org.apache.jackrabbit.oak.spi.filter.PathFilter;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStateUtils;
@@ -64,6 +67,7 @@
import org.slf4j.LoggerFactory;

import static com.google.common.base.Preconditions.checkNotNull;
import static org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder.OAK_INDEXER_SORTED_FILE_PATH;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;

public abstract class DocumentStoreIndexerBase implements Closeable{
@@ -101,15 +105,23 @@ private static class MongoNodeStateEntryTraverserFactory implements NodeStateEnt
private final MongoDocumentStore documentStore;
private final Logger traversalLogger;
private final CompositeIndexer indexer;
private final Predicate<String> pathPredicate;


private MongoNodeStateEntryTraverserFactory(RevisionVector rootRevision, DocumentNodeStore documentNodeStore,
MongoDocumentStore documentStore, Logger traversalLogger,
CompositeIndexer indexer) {
this(rootRevision, documentNodeStore, documentStore, traversalLogger, indexer, null);
}

private MongoNodeStateEntryTraverserFactory(RevisionVector rootRevision, DocumentNodeStore documentNodeStore,
MongoDocumentStore documentStore, Logger traversalLogger, CompositeIndexer indexer, Predicate<String> pathPredicate) {
this.rootRevision = rootRevision;
this.documentNodeStore = documentNodeStore;
this.documentStore = documentStore;
this.traversalLogger = traversalLogger;
this.indexer = indexer;
this.pathPredicate = pathPredicate;
}

@Override
@@ -129,11 +141,15 @@ public NodeStateEntryTraverser create(MongoDocumentTraverser.TraversingRange tra
}
traversalLogger.trace(id);
})
.withPathPredicate(indexer::shouldInclude);
.withPathPredicate((pathPredicate != null) ? pathPredicate : indexer::shouldInclude);
}
}

private FlatFileStore buildFlatFileStore(NodeState checkpointedState, CompositeIndexer indexer) throws IOException {
return buildFlatFileStore(checkpointedState, indexer, null, null);
}

private FlatFileStore buildFlatFileStore(NodeState checkpointedState, CompositeIndexer indexer, Predicate<String> pathPredicate, Set<String> preferredPathElements) throws IOException {

Stopwatch flatFileStoreWatch = Stopwatch.createStarted();
int executionCount = 1;
@@ -154,10 +170,10 @@ private FlatFileStore buildFlatFileStore(NodeState checkpointedState, CompositeI
builder = new FlatFileNodeStoreBuilder(indexHelper.getWorkDir(), memoryManager)
.withLastModifiedBreakPoints(lastModifiedBreakPoints)
.withBlobStore(indexHelper.getGCBlobStore())
.withPreferredPathElements(indexer.getRelativeIndexedNodeNames())
.withPreferredPathElements((preferredPathElements != null) ? preferredPathElements : indexer.getRelativeIndexedNodeNames())
.addExistingDataDumpDir(indexerSupport.getExistingDataDumpDir())
.withNodeStateEntryTraverserFactory(new MongoNodeStateEntryTraverserFactory(rootDocumentState.getRootRevision(),
nodeStore, getMongoDocumentStore(), traversalLog, indexer));
nodeStore, getMongoDocumentStore(), traversalLog, indexer, pathPredicate));
for (File dir : previousDownloadDirs) {
builder.addExistingDataDumpDir(dir);
}
@@ -188,6 +204,36 @@ private FlatFileStore buildFlatFileStore(NodeState checkpointedState, CompositeI
return flatFileStore;
}

/**
*
* @return an Instance of FlatFileStore, whose getFlatFileStorePath() method can be used to get the absolute path to this store.
* @throws IOException
* @throws CommitFailedException
*/
public FlatFileStore buildFlatFileStore() throws IOException, CommitFailedException {
NodeState checkpointedState = indexerSupport.retrieveNodeStateForCheckpoint();
NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
NodeBuilder builder = copyOnWriteStore.getRoot().builder();
NodeState root = builder.getNodeState();
indexerSupport.updateIndexDefinitions(builder);
IndexDefinition.Builder indexDefBuilder = new IndexDefinition.Builder();

Set<String> preferredPathElements = new HashSet<>();
Set<IndexDefinition> indexDefinitions = new HashSet<>();

for (String indexPath : indexHelper.getIndexPaths()) {
NodeBuilder idxBuilder = IndexerSupport.childBuilder(builder, indexPath, false);
IndexDefinition indexDf = indexDefBuilder.defn(idxBuilder.getNodeState()).indexPath(indexPath).root(root).build();
preferredPathElements.addAll(indexDf.getRelativeNodeNames());
indexDefinitions.add(indexDf);
}
Predicate<String> predicate = s -> indexDefinitions.stream().anyMatch(indexDef -> indexDef.getPathFilter().filter(s) != PathFilter.Result.EXCLUDE);
FlatFileStore flatFileStore = buildFlatFileStore(checkpointedState, null, predicate, preferredPathElements);
log.info("FlatFileStore built at {}. To use this flatFileStore in a reindex step, set System Property-{} with value {}",
flatFileStore.getFlatFileStorePath(), OAK_INDEXER_SORTED_FILE_PATH, flatFileStore.getFlatFileStorePath());
return flatFileStore;
}

public void reindex() throws CommitFailedException, IOException {
IndexingProgressReporter progressReporter =
new IndexingProgressReporter(IndexUpdateCallback.NOOP, NodeTraversalCallback.NOOP);
@@ -55,7 +55,6 @@ public class FlatFileNodeStoreBuilder {
public static final String OAK_INDEXER_SORT_STRATEGY_TYPE = "oak.indexer.sortStrategyType";
private static final String OAK_INDEXER_SORTED_FILE_PATH = "oak.indexer.sortedFilePath";


/**
* Default value for {@link #PROP_THREAD_POOL_SIZE}
*/

0 comments on commit 635df52

Please sign in to comment.