diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java index 9e58b60b9e5..cb4dbe796f5 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java @@ -34,15 +34,54 @@ import org.apache.jackrabbit.oak.spi.state.NodeStateDiff; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A node state of an Oak node that is stored in a tree store. * - * This is mostly a wrapper. It allows iterating over the children and reading + *

This is mostly a wrapper. It allows iterating over the children and reading * children directly. + * + *

Storage Layout

+ * + *

The underlying key-value store holds two kinds of entries: + * + *

+ * + *

For a tree that contains the nodes {@code /}, {@code /content}, + * {@code /content/dam}, and {@code /content/site}, the store holds these entries + * (shown here in sort order): + * + *

+ *   /               → {"jcr:primaryType":"rep:root", ...}   (node entry)
+ *   /\tcontent      → ""                                     (child-reference entry)
+ *   /content        → {"jcr:primaryType":"nt:folder", ...}  (node entry)
+ *   /content\tdam   → ""                                     (child-reference entry)
+ *   /content\tsite  → ""                                     (child-reference entry)
+ *   /content/dam    → {"jcr:primaryType":"sling:Folder",...} (node entry)
+ *   /content/site   → {"jcr:primaryType":"sling:Folder",...} (node entry)
+ * 
+ * + *

The tab character ({@code \t}, U+0009) sorts before the slash ({@code /}, + * U+002F) and before any letter, so a node's child-reference entries always sort + * immediately after the node entry itself and before any of its descendants. + * This makes it efficient to list a node's direct children: start the scan just after + * the node entry, consume entries with an empty value (each is one child name), and + * stop at the first non-empty value (the start of the next node's data). + * + *

See {@link TreeStore#toChildNodeEntry(String)} for the method that converts a + * node path to its corresponding child-reference key. */ public class TreeStoreNodeState implements NodeState, MemoryObject { + private static final Logger LOG = LoggerFactory.getLogger(TreeStoreNodeState.class); + private final NodeState delegate; private final String path; private final TreeStore treeStore; @@ -180,6 +219,15 @@ private Iterator getChildNodeIterator() { s -> treeStore.getNodeStateEntry(PathUtils.concat(path, s))); } + /** + * Returns an iterator over the names of this node's direct children. + * + *

The iterator relies on the child-reference entries described in the + * class-level documentation: it scans the entries that immediately follow + * this node's own entry in the store, collecting the child names encoded + * in those keys, and stops as soon as it reaches a non-empty value (which + * indicates the start of the next node's data rather than a child reference). + */ Iterator getChildNodeNamesIterator() { Iterator> it = treeStore.getSession().iterator(path); return new Iterator() { @@ -201,7 +249,15 @@ private void fetch() { if (index < 0) { throw new IllegalArgumentException(key); } - current = key.substring(index + 1); + if (!key.startsWith(path + "\t")) { + // this is the child of a _different_ node: + // that means this node doesn't have a child + String missingChild = key.substring(0, index); + LOG.warn("Missing node {} when listing children of {}", missingChild, path); + current = null; + } else { + current = key.substring(index + 1); + } } } } diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/TreeSession.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/TreeSession.java index eb724c631aa..783cb08bdbd 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/TreeSession.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/TreeSession.java @@ -536,7 +536,7 @@ public void flush() { // this is fast: internally, a stack of Position object is kept /** - * Get all entries. Do not add or move entries while + * Get all entries in key order. Do not add or move entries while * iterating. * * @return the result @@ -546,7 +546,7 @@ public Iterator> iterator() { } /** - * Get all entries. Do not add or move entries while iterating. + * Get all entries in key order. Do not add or move entries while iterating. * * @param largerThan all returned keys are larger than this; null to start at * the beginning diff --git a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreIterateTest.java b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreIterateTest.java new file mode 100644 index 00000000000..6a984a80b04 --- /dev/null +++ b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreIterateTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.index.indexer.document.tree; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; + +import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry; +import org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader; +import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class TreeStoreIterateTest { + + @ClassRule + public static TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target")); + + @Test + public void buildAndIterateTest() throws IOException { + File testFolder = temporaryFolder.newFolder(); + TreeStore store = new TreeStore("test", testFolder, + new NodeStateEntryReader(new MemoryBlobStore()), 1); + try { + store.getSession().init(); + store.putNode("/test", "{}"); + store.putNode("/test-node", "{}"); + store.putNode("/test-node/child/node", "{}"); + store.putNode("/test-node/child/node/test", "{}"); + store.putNode("/test/child", "{}"); + Iterator it = store.iterator(); + NodeStateEntry e = it.next(); + assertEquals("/test", e.getPath()); + e = it.next(); + assertEquals("/test-node", e.getPath()); + Iterator it2 = e.getNodeState().getChildNodeNames().iterator(); + assertFalse(it2.hasNext()); + e = it.next(); + assertEquals("/test-node/child/node", e.getPath()); + it2 = e.getNodeState().getChildNodeNames().iterator(); + assertTrue(it2.hasNext()); + e = it.next(); + assertEquals("/test-node/child/node/test", e.getPath()); + e = it.next(); + assertEquals("/test/child", e.getPath()); + assertFalse(it.hasNext()); + } finally { + store.close(); + } + } + +}