Skip to content

Commit

Permalink
JCR-1337: Optimize first execution queries for DescendantSelfAxisWeig…
Browse files Browse the repository at this point in the history
…ht/ChildAxisQuery

JCR-1884: CachingIndexReader.initializeParents() does not scale well with large indexes

git-svn-id: https://svn.apache.org/repos/asf/jackrabbit/branches/1.4/jackrabbit-core@748135 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
mreutegg committed Feb 26, 2009
1 parent df2558a commit 02304c4
Show file tree
Hide file tree
Showing 4 changed files with 297 additions and 9 deletions.
Expand Up @@ -232,10 +232,12 @@ protected synchronized CommittableIndexReader getIndexReader() throws IOExceptio
* read-only, that is, any attempt to delete a document from the index
* will throw an <code>UnsupportedOperationException</code>.
*
* @param initCache if the caches in the index reader should be initialized
* before the index reader is returned.
* @return a read-only index reader.
* @throws IOException if an error occurs while obtaining the index reader.
*/
synchronized ReadOnlyIndexReader getReadOnlyIndexReader()
synchronized ReadOnlyIndexReader getReadOnlyIndexReader(boolean initCache)
throws IOException {
// get current modifiable index reader
CommittableIndexReader modifiableReader = getIndexReader();
Expand Down Expand Up @@ -271,14 +273,29 @@ synchronized ReadOnlyIndexReader getReadOnlyIndexReader()
}
if (sharedReader == null) {
// create new shared reader
CachingIndexReader cr = new CachingIndexReader(IndexReader.open(getDirectory()), cache);
CachingIndexReader cr = new CachingIndexReader(
IndexReader.open(getDirectory()), cache, initCache);
sharedReader = new SharedIndexReader(cr);
}
readOnlyReader = new ReadOnlyIndexReader(sharedReader, deleted, modCount);
readOnlyReader.incrementRefCount();
return readOnlyReader;
}

/**
* Returns a read-only index reader, that can be used concurrently with
* other threads writing to this index. The returned index reader is
* read-only, that is, any attempt to delete a document from the index
* will throw an <code>UnsupportedOperationException</code>.
*
* @return a read-only index reader.
* @throws IOException if an error occurs while obtaining the index reader.
*/
protected ReadOnlyIndexReader getReadOnlyIndexReader()
throws IOException {
return getReadOnlyIndexReader(false);
}

/**
* Returns an <code>IndexWriter</code> on this index.
* @return an <code>IndexWriter</code> on this index.
Expand Down
Expand Up @@ -22,11 +22,20 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.jackrabbit.uuid.UUID;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.BitSet;
import java.util.Map;
import java.util.HashMap;
import java.util.Iterator;
import java.text.NumberFormat;

import EDU.oswego.cs.dl.util.concurrent.Executor;
import EDU.oswego.cs.dl.util.concurrent.PooledExecutor;
import EDU.oswego.cs.dl.util.concurrent.LinkedQueue;

/**
* Implements an <code>IndexReader</code> that maintains caches to resolve
Expand All @@ -40,6 +49,17 @@ class CachingIndexReader extends FilterIndexReader {
*/
private static final Logger log = LoggerFactory.getLogger(CachingIndexReader.class);

/**
* The single thread of this executor initializes the
* {@link #parents} when background initialization is requested.
*/
private static final Executor SERIAL_EXECUTOR = new PooledExecutor(
new LinkedQueue(), 1) {
{
setKeepAliveTime(500);
}
};

/**
* The current value of the global creation tick counter.
*/
Expand All @@ -52,6 +72,11 @@ class CachingIndexReader extends FilterIndexReader {
*/
private final DocId[] parents;

/**
* Initializes the {@link #parents} cache.
*/
private CacheInitializer cacheInitializer;

/**
* Tick when this index reader was created.
*/
Expand All @@ -69,11 +94,26 @@ class CachingIndexReader extends FilterIndexReader {
* @param delegatee the base <code>IndexReader</code>.
* @param cache a document number cache, or <code>null</code> if not
* available to this reader.
* @param initCache if the {@link #parents} cache should be initialized
* when this index reader is constructed. Otherwise
* initialization happens in a background thread.
*/
CachingIndexReader(IndexReader delegatee, DocNumberCache cache) {
CachingIndexReader(IndexReader delegatee,
DocNumberCache cache,
boolean initCache) {
super(delegatee);
this.cache = cache;
parents = new DocId[delegatee.maxDoc()];
this.cacheInitializer = new CacheInitializer(delegatee);
if (initCache) {
cacheInitializer.run();
} else {
try {
SERIAL_EXECUTOR.execute(cacheInitializer);
} catch (InterruptedException e) {
// ignore
}
}
}

/**
Expand Down Expand Up @@ -200,6 +240,14 @@ public TermDocs termDocs(Term term) throws IOException {
return super.termDocs(term);
}

protected void doClose() throws IOException {
try {
cacheInitializer.waitUntilStopped();
} catch (InterruptedException e) {
// ignore
}
super.doClose();
}

//----------------------< internal >----------------------------------------

Expand All @@ -214,6 +262,216 @@ private static long getNextCreationTick() {
}
}

/**
* Initializes the {@link CachingIndexReader#parents} cache.
*/
private class CacheInitializer implements Runnable {

/**
* From where to read.
*/
private final IndexReader reader;

/**
* Set to <code>true</code> while this initializer does its work.
*/
private boolean running = false;

/**
* Set to <code>true</code> when this index reader is about to be closed.
*/
private volatile boolean stopRequested = false;

/**
* Creates a new initializer with the given <code>reader</code>.
*
* @param reader an index reader.
*/
public CacheInitializer(IndexReader reader) {
this.reader = reader;
}

/**
* Initializes the cache.
*/
public void run() {
synchronized (this) {
running = true;
}
try {
if (stopRequested) {
// immediately return when stop is requested
return;
}
initializeParents(reader);
} catch (Exception e) {
// only log warn message during regular operation
if (!stopRequested) {
log.warn("Error initializing parents cache.", e);
}
} finally {
synchronized (this) {
running = false;
notifyAll();
}
}
}

/**
* Waits until this cache initializer is stopped.
*
* @throws InterruptedException if the current thread is interrupted.
*/
public void waitUntilStopped() throws InterruptedException {
stopRequested = true;
synchronized (this) {
while (running) {
wait();
}
}
}

/**
* Initializes the {@link CachingIndexReader#parents} <code>DocId</code>
* array.
*
* @param reader the underlying index reader.
* @throws IOException if an error occurs while reading from the index.
*/
private void initializeParents(IndexReader reader) throws IOException {
long time = System.currentTimeMillis();
final Map docs = new HashMap();
// read UUIDs
collectTermDocs(reader, new Term(FieldNames.UUID, ""), new TermDocsCollector() {
public void collect(Term term, TermDocs tDocs) throws IOException {
UUID uuid = UUID.fromString(term.text());
if (tDocs.next()) {
NodeInfo info = new NodeInfo(tDocs.doc(), uuid);
docs.put(new Integer(info.docId), info);
}
}
});

// read PARENTs
collectTermDocs(reader, new Term(FieldNames.PARENT, "0"), new TermDocsCollector() {
public void collect(Term term, TermDocs tDocs) throws IOException {
while (tDocs.next()) {
UUID uuid = UUID.fromString(term.text());
Integer docId = new Integer(tDocs.doc());
NodeInfo info = (NodeInfo) docs.get(docId);
info.parent = uuid;
docs.remove(docId);
docs.put(info.uuid, info);
}
}
});

if (stopRequested) {
return;
}

double foreignParents = 0;
Iterator it = docs.values().iterator();
while (it.hasNext()) {
NodeInfo info = (NodeInfo) it.next();
NodeInfo parent = (NodeInfo) docs.get(info.parent);
if (parent != null) {
parents[info.docId] = DocId.create(parent.docId);
} else if (info.parent != null) {
foreignParents++;
parents[info.docId] = DocId.create(info.parent);
} else {
// no parent -> root node
parents[info.docId] = DocId.NULL;
}
}
if (log.isDebugEnabled()) {
NumberFormat nf = NumberFormat.getPercentInstance();
nf.setMaximumFractionDigits(1);
time = System.currentTimeMillis() - time;
if (parents.length > 0) {
foreignParents /= parents.length;
}
log.debug("initialized {} DocIds in {} ms, {} foreign parents",
new Object[]{
new Integer(parents.length),
new Long(time),
nf.format(foreignParents)
});
}
}

/**
* Collects term docs for a given start term. All terms with the same
* field as <code>start</code> are enumerated.
*
* @param reader the index reader.
* @param start the term where to start the term enumeration.
* @param collector collects the term docs for each term.
* @throws IOException if an error occurs while reading from the index.
*/
private void collectTermDocs(IndexReader reader,
Term start,
TermDocsCollector collector)
throws IOException {
TermDocs tDocs = reader.termDocs();
try {
TermEnum terms = reader.terms(start);
try {
int count = 0;
do {
Term t = terms.term();
if (t != null && t.field() == start.field()) {
tDocs.seek(terms);
collector.collect(t, tDocs);
} else {
break;
}
// once in a while check if we should quit
if (++count % 10000 == 0) {
if (stopRequested) {
break;
}
}
} while (terms.next());
} finally {
terms.close();
}
} finally {
tDocs.close();
}
}
}

/**
* Simple interface to collect a term and its term docs.
*/
private interface TermDocsCollector {

/**
* Called for each term encountered.
*
* @param term the term.
* @param tDocs the term docs of <code>term</code>.
* @throws IOException if an error occurs while reading from the index.
*/
void collect(Term term, TermDocs tDocs) throws IOException;
}

private static class NodeInfo {

final int docId;

final UUID uuid;

UUID parent;

public NodeInfo(int docId, UUID uuid) {
this.docId = docId;
this.uuid = uuid;
}
}

/**
* Implements an empty TermDocs.
*/
Expand Down
19 changes: 13 additions & 6 deletions src/main/java/org/apache/jackrabbit/core/query/lucene/DocId.java
Expand Up @@ -109,6 +109,16 @@ static DocId create(int docNumber) {
* @return a <code>DocId</code> based on a node UUID.
*/
static DocId create(String uuid) {
return new UUIDDocId(UUID.fromString(uuid));
}

/**
* Creates a <code>DocId</code> based on a node UUID.
*
* @param uuid the node uuid.
* @return a <code>DocId</code> based on a node UUID.
*/
static DocId create(UUID uuid) {
return new UUIDDocId(uuid);
}

Expand Down Expand Up @@ -188,13 +198,10 @@ private static final class UUIDDocId extends DocId {
* Creates a <code>DocId</code> based on a Node uuid.
*
* @param uuid the Node uuid.
* @throws IllegalArgumentException if the <code>uuid</code> is
* malformed.
*/
UUIDDocId(String uuid) {
UUID tmp = UUID.fromString(uuid);
this.lsb = tmp.getLeastSignificantBits();
this.msb = tmp.getMostSignificantBits();
UUIDDocId(UUID uuid) {
this.lsb = uuid.getLeastSignificantBits();
this.msb = uuid.getMostSignificantBits();
}

/**
Expand Down

0 comments on commit 02304c4

Please sign in to comment.