Skip to content
Permalink
Browse files
Initial attempt at getting index functions to work in elastic indexes
  • Loading branch information
klcodanr committed Mar 17, 2022
1 parent 5284b4e commit 8892ff15477cd2d1a021ce7632222f79a953ba27
Show file tree
Hide file tree
Showing 6 changed files with 302 additions and 151 deletions.
@@ -18,21 +18,23 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elastic;

import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.jetbrains.annotations.NotNull;
import static org.apache.jackrabbit.oak.plugins.index.search.util.ConfigUtil.getOptionalValue;
import static org.apache.jackrabbit.oak.plugins.index.search.util.ConfigUtil.getOptionalValues;

import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import static org.apache.jackrabbit.oak.plugins.index.search.util.ConfigUtil.getOptionalValue;
import static org.apache.jackrabbit.oak.plugins.index.search.util.ConfigUtil.getOptionalValues;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.jetbrains.annotations.NotNull;

public class ElasticIndexDefinition extends IndexDefinition {

@@ -133,9 +135,16 @@ public ElasticIndexDefinition(NodeState root, NodeState defn, String indexPath,

this.propertiesByName = getDefinedRules()
.stream()
.flatMap(rule -> StreamSupport.stream(rule.getProperties().spliterator(), false))
.flatMap(rule -> Stream.concat(StreamSupport.stream(rule.getProperties().spliterator(), false),
rule.getFunctionRestrictions().stream()))
.filter(pd -> pd.index) // keep only properties that can be indexed
.collect(Collectors.groupingBy(pd -> pd.name));
.collect(Collectors.groupingBy(pd -> {
if(pd.function != null){
return pd.function;
} else {
return pd.name;
}
}));

this.dynamicBoostProperties = getDefinedRules()
.stream()
@@ -24,7 +24,7 @@
* {@link IndexEditor} implementation that is responsible for keeping the
* corresponding Elasticsearch index up to date
*/
class ElasticIndexEditor extends FulltextIndexEditor<ElasticDocument> {
public class ElasticIndexEditor extends FulltextIndexEditor<ElasticDocument> {
ElasticIndexEditor(FulltextIndexEditorContext<ElasticDocument> context) {
super(context);
}
@@ -57,14 +57,12 @@ protected List<QueryIndex.OrderEntry> createSortOrder(IndexDefinition.IndexingRu
// support for path ordering in both directions
orderEntries.add(o);
}
// TODO: add support for function-based sorting
// for (PropertyDefinition functionIndex : rule.getFunctionRestrictions()) {
// if (functionIndex.ordered && o.getPropertyName().equals(functionIndex.function)) {
// // can manage any order desc/asc
// orderEntries.add(o);
// result.sortedProperties.add(functionIndex);
// }
// }
for (PropertyDefinition functionIndex : rule.getFunctionRestrictions()) {
if (functionIndex.ordered && o.getPropertyName().equals(functionIndex.function)) {
// can manage any order desc/asc
orderEntries.add(o);
}
}
}

//TODO Should we return order entries only when all order clauses are satisfied
@@ -16,42 +16,180 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elastic;

import static java.util.Arrays.asList;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NODE_TYPE;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.REINDEX_PROPERTY_NAME;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;
import static org.hamcrest.CoreMatchers.containsString;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;

import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;

import org.apache.jackrabbit.JcrConstants;
import org.apache.jackrabbit.oak.InitialContentHelper;
import org.apache.jackrabbit.oak.api.ContentRepository;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.index.FunctionIndexCommonTest;
import org.apache.jackrabbit.oak.plugins.index.elastic.index.ElasticIndexEditor;
import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
import org.apache.jackrabbit.oak.plugins.memory.PropertyStates;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Ignore;

import java.util.Set;

import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NODE_TYPE;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.REINDEX_PROPERTY_NAME;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;

@Ignore
public class ElasticFunctionIndexCommonTest extends FunctionIndexCommonTest {
@ClassRule
public static final ElasticConnectionRule elasticRule =
new ElasticConnectionRule(ElasticTestUtils.ELASTIC_CONNECTION_STRING);
public static final ElasticConnectionRule elasticRule = new ElasticConnectionRule(
ElasticTestUtils.ELASTIC_CONNECTION_STRING);

@Rule
public TestName name = new TestName();

public ElasticFunctionIndexCommonTest() {
indexOptions = new ElasticIndexOptions();
}

@Before
public void filter() {
// assumeTrue(name.getMethodName().equals("lengthName"));
}

@Test
public void sameOrderableRelPropWithAndWithoutFunc_checkOrdering() throws Exception {

// Index def with same property - ordered - one with function and one without
Tree luceneIndex = createIndex("upper", Collections.<String>emptySet());
Tree nonFunc = luceneIndex.addChild(FulltextIndexConstants.INDEX_RULES)
.addChild("nt:base")
.addChild(FulltextIndexConstants.PROP_NODE)
.addChild("foo");
nonFunc.setProperty(FulltextIndexConstants.PROP_PROPERTY_INDEX, true);
nonFunc.setProperty(FulltextIndexConstants.PROP_ORDERED, true);
nonFunc.setProperty("name", "jcr:content/n/foo");

Tree func = luceneIndex.getChild(FulltextIndexConstants.INDEX_RULES)
.getChild("nt:base")
.getChild(FulltextIndexConstants.PROP_NODE)
.addChild("testOak");
func.setProperty(FulltextIndexConstants.PROP_ORDERED, true);
func.setProperty(FulltextIndexConstants.PROP_FUNCTION, "fn:upper-case(jcr:content/n/@foo)");

root.commit();

int i = 1;
// Create nodes that will be served by the index definition that follows
for (String node : asList("a", "c", "b", "e", "d")) {

Tree test = root.getTree("/").addChild(node);
test.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);

Tree a = test.addChild("jcr:content");
a.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);

Tree b = a.addChild("n");

b.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);
b.setProperty("foo", "bar" + i);
i++;
}

root.commit();
postCommitHook();

// Check ordering works for func and non func properties
assertOrderedPlanAndQuery(
"select * from [nt:base] order by upper([jcr:content/n/foo])",
getIndexProvider() + "upper(/oak:index/upper)", asList("/a", "/c", "/b", "/e", "/d"));

assertOrderedPlanAndQuery(
"select * from [nt:base] order by [jcr:content/n/foo]",
getIndexProvider() + "upper(/oak:index/upper)", asList("/a", "/c", "/b", "/e", "/d"));

assertOrderedPlanAndQuery(
"select * from [nt:base] order by upper([jcr:content/n/foo]) DESC",
getIndexProvider() + "upper(/oak:index/upper)", asList("/d", "/e", "/b", "/c", "/a"));

assertOrderedPlanAndQuery(
"select * from [nt:base] order by [jcr:content/n/foo] DESC",
getIndexProvider() + "upper(/oak:index/upper)", asList("/d", "/e", "/b", "/c", "/a"));

// Now we change the value of foo on already indexed nodes and see if changes
// get indexed properly.

i = 5;
for (String node : asList("a", "c", "b", "e", "d")) {

Tree test = root.getTree("/").getChild(node).getChild("jcr:content").getChild("n");

test.setProperty("foo", "bar" + i);
i--;
}
root.commit();
postCommitHook();

assertOrderedPlanAndQuery(
"select * from [nt:base] order by upper([jcr:content/n/foo])",
getIndexProvider() + "upper(/oak:index/upper)", asList("/d", "/e", "/b", "/c", "/a"));

assertOrderedPlanAndQuery(
"select * from [nt:base] order by [jcr:content/n/foo]",
getIndexProvider() + "upper(/oak:index/upper)", asList("/d", "/e", "/b", "/c", "/a"));

assertOrderedPlanAndQuery(
"select * from [nt:base] order by upper([jcr:content/n/foo]) DESC",
getIndexProvider() + "upper(/oak:index/upper)", asList("/a", "/c", "/b", "/e", "/d"));

assertOrderedPlanAndQuery(
"select * from [nt:base] order by [jcr:content/n/foo] DESC",
getIndexProvider() + "upper(/oak:index/upper)", asList("/a", "/c", "/b", "/e", "/d"));

}

private void assertOrderedPlanAndQuery(String query, String planExpectation, List<String> paths) {
List<String> result = assertPlanAndQuery(query, planExpectation, paths);
assertEquals("Ordering doesn't match", paths, result);
}

private List<String> assertPlanAndQuery(String query, String planExpectation, List<String> paths) {
assertThat(explain(query), containsString(planExpectation));
return assertQuery(query, paths);
}

@Override
protected String getIndexProvider() {
return "elasticsearch:";
}

@Override
protected void postCommitHook() {
try {
TimeUnit.SECONDS.sleep(2);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}

@Override
protected ContentRepository createRepository() {
repositoryOptionsUtil = new ElasticTestRepositoryBuilder(elasticRule).build();
ElasticTestRepositoryBuilder builder = new ElasticTestRepositoryBuilder(elasticRule);
builder.setNodeStore(new MemoryNodeStore(InitialContentHelper.INITIAL_CONTENT));
repositoryOptionsUtil = builder.build();

return repositoryOptionsUtil.getOak().createContentRepository();
}

@Override
protected void createTestIndexNode() {
setTraversalEnabled(false);
// setTraversalEnabled(false);
}

protected Tree createIndex(String name, Set<String> propNames) {
@@ -66,13 +204,14 @@ protected Tree createIndex(Tree index, String name, Set<String> propNames) {
def.setProperty(TYPE_PROPERTY_NAME, indexOptions.getIndexType());
def.setProperty(REINDEX_PROPERTY_NAME, true);
def.setProperty(FulltextIndexConstants.FULL_TEXT_ENABLED, false);
def.setProperty(PropertyStates.createProperty(FulltextIndexConstants.INCLUDE_PROPERTY_NAMES, propNames, Type.STRINGS));
//def.setProperty(LuceneIndexConstants.SAVE_DIR_LISTING, true);
def.setProperty(
PropertyStates.createProperty(FulltextIndexConstants.INCLUDE_PROPERTY_NAMES, propNames, Type.STRINGS));
// def.setProperty(LuceneIndexConstants.SAVE_DIR_LISTING, true);
return index.getChild(INDEX_DEFINITIONS_NAME).getChild(name);
}

@Override
protected String getLoggerName() {
return null;
return ElasticIndexEditor.class.getName();
}
}
@@ -29,7 +29,6 @@
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticIndexProvider;
import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
import org.apache.jackrabbit.oak.query.QueryEngineSettings;
import org.apache.jackrabbit.oak.spi.commit.Observer;
import org.apache.jackrabbit.oak.stats.StatisticsProvider;

import static com.google.common.collect.Lists.newArrayList;
@@ -39,7 +38,7 @@ public class ElasticTestRepositoryBuilder extends TestRepositoryBuilder {

private final ElasticConnection esConnection;
private final ElasticIndexTracker indexTracker;
private final int asyncIndexingTimeInSeconds = 5;
private final int asyncIndexingTimeInSeconds = 1;

public ElasticTestRepositoryBuilder(ElasticConnectionRule elasticRule) {
this.esConnection = elasticRule.useDocker() ? elasticRule.getElasticConnectionForDocker() :

0 comments on commit 8892ff1

Please sign in to comment.