diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java
index 9e58b60b9e5..cb4dbe796f5 100644
--- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java
+++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java
@@ -34,15 +34,54 @@
import org.apache.jackrabbit.oak.spi.state.NodeStateDiff;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* A node state of an Oak node that is stored in a tree store.
*
- * This is mostly a wrapper. It allows iterating over the children and reading
+ *
This is mostly a wrapper. It allows iterating over the children and reading
* children directly.
+ *
+ *
Storage Layout
+ *
+ * The underlying key-value store holds two kinds of entries:
+ *
+ *
+ * - Node entries: key = node path, value = JSON-encoded node properties.
+ * - Child-reference entries: key = parent path + {@code \t} + child name,
+ * value = empty string. These represent membership in a parent's child list
+ * without duplicating any node data.
+ *
+ *
+ * For a tree that contains the nodes {@code /}, {@code /content},
+ * {@code /content/dam}, and {@code /content/site}, the store holds these entries
+ * (shown here in sort order):
+ *
+ *
+ * / → {"jcr:primaryType":"rep:root", ...} (node entry)
+ * /\tcontent → "" (child-reference entry)
+ * /content → {"jcr:primaryType":"nt:folder", ...} (node entry)
+ * /content\tdam → "" (child-reference entry)
+ * /content\tsite → "" (child-reference entry)
+ * /content/dam → {"jcr:primaryType":"sling:Folder",...} (node entry)
+ * /content/site → {"jcr:primaryType":"sling:Folder",...} (node entry)
+ *
+ *
+ * The tab character ({@code \t}, U+0009) sorts before the slash ({@code /},
+ * U+002F) and before any letter, so a node's child-reference entries always sort
+ * immediately after the node entry itself and before any of its descendants.
+ * This makes it efficient to list a node's direct children: start the scan just after
+ * the node entry, consume entries with an empty value (each is one child name), and
+ * stop at the first non-empty value (the start of the next node's data).
+ *
+ *
See {@link TreeStore#toChildNodeEntry(String)} for the method that converts a
+ * node path to its corresponding child-reference key.
*/
public class TreeStoreNodeState implements NodeState, MemoryObject {
+ private static final Logger LOG = LoggerFactory.getLogger(TreeStoreNodeState.class);
+
private final NodeState delegate;
private final String path;
private final TreeStore treeStore;
@@ -180,6 +219,15 @@ private Iterator getChildNodeIterator() {
s -> treeStore.getNodeStateEntry(PathUtils.concat(path, s)));
}
+ /**
+ * Returns an iterator over the names of this node's direct children.
+ *
+ * The iterator relies on the child-reference entries described in the
+ * class-level documentation: it scans the entries that immediately follow
+ * this node's own entry in the store, collecting the child names encoded
+ * in those keys, and stops as soon as it reaches a non-empty value (which
+ * indicates the start of the next node's data rather than a child reference).
+ */
Iterator getChildNodeNamesIterator() {
Iterator> it = treeStore.getSession().iterator(path);
return new Iterator() {
@@ -201,7 +249,15 @@ private void fetch() {
if (index < 0) {
throw new IllegalArgumentException(key);
}
- current = key.substring(index + 1);
+ if (!key.startsWith(path + "\t")) {
+ // this is the child of a _different_ node:
+ // that means this node doesn't have a child
+ String missingChild = key.substring(0, index);
+ LOG.warn("Missing node {} when listing children of {}", missingChild, path);
+ current = null;
+ } else {
+ current = key.substring(index + 1);
+ }
}
}
}
diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/TreeSession.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/TreeSession.java
index eb724c631aa..783cb08bdbd 100644
--- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/TreeSession.java
+++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/TreeSession.java
@@ -536,7 +536,7 @@ public void flush() {
// this is fast: internally, a stack of Position object is kept
/**
- * Get all entries. Do not add or move entries while
+ * Get all entries in key order. Do not add or move entries while
* iterating.
*
* @return the result
@@ -546,7 +546,7 @@ public Iterator> iterator() {
}
/**
- * Get all entries. Do not add or move entries while iterating.
+ * Get all entries in key order. Do not add or move entries while iterating.
*
* @param largerThan all returned keys are larger than this; null to start at
* the beginning
diff --git a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreIterateTest.java b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreIterateTest.java
new file mode 100644
index 00000000000..6a984a80b04
--- /dev/null
+++ b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreIterateTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.index.indexer.document.tree;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
+import org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader;
+import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class TreeStoreIterateTest {
+
+ @ClassRule
+ public static TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target"));
+
+ @Test
+ public void buildAndIterateTest() throws IOException {
+ File testFolder = temporaryFolder.newFolder();
+ TreeStore store = new TreeStore("test", testFolder,
+ new NodeStateEntryReader(new MemoryBlobStore()), 1);
+ try {
+ store.getSession().init();
+ store.putNode("/test", "{}");
+ store.putNode("/test-node", "{}");
+ store.putNode("/test-node/child/node", "{}");
+ store.putNode("/test-node/child/node/test", "{}");
+ store.putNode("/test/child", "{}");
+ Iterator it = store.iterator();
+ NodeStateEntry e = it.next();
+ assertEquals("/test", e.getPath());
+ e = it.next();
+ assertEquals("/test-node", e.getPath());
+ Iterator it2 = e.getNodeState().getChildNodeNames().iterator();
+ assertFalse(it2.hasNext());
+ e = it.next();
+ assertEquals("/test-node/child/node", e.getPath());
+ it2 = e.getNodeState().getChildNodeNames().iterator();
+ assertTrue(it2.hasNext());
+ e = it.next();
+ assertEquals("/test-node/child/node/test", e.getPath());
+ e = it.next();
+ assertEquals("/test/child", e.getPath());
+ assertFalse(it.hasNext());
+ } finally {
+ store.close();
+ }
+ }
+
+}