Skip to content
Permalink
Browse files
OAK-9696 Improve query syntax support for dynamicBoost in ElasticSear…
…ch (#494)

* OAK-9696 Improve query syntax support for dynamicBoost in ElasticSearch

* OAK-9696 Improve query syntax support for dynamicBoost in ElasticSearch: improvement for code review

* OAK-9696 Improve query syntax support for dynamicBoost in ElasticSearch: change the index field to fixed one

* OAK-9696 Improve query syntax support for dynamicBoost in ElasticSearch: fixed the document storing proper outside the loop

* OAK-9696 Improve query syntax support for dynamicBoost in ElasticSearch: add tests to cover multiple boost fields query

* OAK-9696 Improve query syntax support for dynamicBoost in ElasticSearch: use fulltext field to store dynamic boost tags
  • Loading branch information
FrancoisZhang committed Feb 18, 2022
1 parent c86c8ec commit 4b285d291bcad561720ee2653334239c2741a152
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 40 deletions.
@@ -113,6 +113,18 @@ public String build() {
builder.startObject();
{
builder.field(FieldNames.PATH, path);
for (Map.Entry<String, Map<String, Double>> f : dynamicBoostFields.entrySet()) {
builder.startArray(f.getKey());
for (Map.Entry<String, Double> v : f.getValue().entrySet()) {
builder.startObject();
builder.field("value", v.getKey());
builder.field("boost", v.getValue());
builder.endObject();
// also add into fulltext field
addFulltext(v.getKey());
}
builder.endArray();
}
if (fulltext.size() > 0) {
builder.field(FieldNames.FULLTEXT, fulltext);
}
@@ -132,16 +144,6 @@ public String build() {
for (Map.Entry<String, List<Object>> prop : properties.entrySet()) {
builder.field(prop.getKey(), prop.getValue().size() == 1 ? prop.getValue().get(0) : prop.getValue());
}
for (Map.Entry<String, Map<String, Double>> f : dynamicBoostFields.entrySet()) {
builder.startArray(f.getKey());
for (Map.Entry<String, Double> v : f.getValue().entrySet()) {
builder.startObject();
builder.field("value", v.getKey());
builder.field("boost", v.getValue());
builder.endObject();
}
builder.endArray();
}
if (!similarityTags.isEmpty()) {
builder.field(ElasticIndexDefinition.SIMILARITY_TAGS, similarityTags);
}
@@ -568,11 +568,9 @@ private boolean visitTerm(String propertyName, String text, String boost, boolea
if (boost != null) {
fullTextQuery.boost(Float.parseFloat(boost));
}
BoolQueryBuilder shouldBoolQueryWrapper = boolQuery().should(fullTextQuery);
// add dynamic boosts in OR if available
BoolQueryBuilder boolQueryBuilder = boolQuery().must(fullTextQuery);
Stream<QueryBuilder> dynamicScoreQueries = dynamicScoreQueries(text);
dynamicScoreQueries.forEach(shouldBoolQueryWrapper::should);
BoolQueryBuilder boolQueryBuilder = boolQuery().must(shouldBoolQueryWrapper);
dynamicScoreQueries.forEach(boolQueryBuilder::should);

if (not) {
BoolQueryBuilder bq = boolQuery().mustNot(boolQueryBuilder);
@@ -28,7 +28,6 @@
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
import org.apache.jackrabbit.oak.plugins.nodetype.write.NodeTypeRegistry;
import org.junit.Ignore;
import org.junit.Test;

import static org.apache.jackrabbit.JcrConstants.NT_UNSTRUCTURED;
@@ -38,7 +37,7 @@ public class ElasticDynamicBoostQueryTest extends ElasticAbstractQueryTest {

@Test
public void dynamicBoost() throws CommitFailedException {
configureIndex();
configureIndex(false);

Tree test = createNodeWithType(root.getTree("/"), "test", NT_UNSTRUCTURED);
Tree item1Metadata = createNodeWithMetadata(test, "item1", "flower with a lot of red and a bit of blue");
@@ -55,7 +54,7 @@ public void dynamicBoost() throws CommitFailedException {
root.commit();

assertEventually(() -> {
assertQuery("//element(*, dam:Asset)[jcr:contains(@title, 'flower')]",
assertQuery("//element(*, dam:Asset)[jcr:contains(., 'flower')]",
XPATH, Arrays.asList("/test/item1", "/test/item2"));
assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(title, 'red flower')",
Arrays.asList("/test/item1", "/test/item2"));
@@ -66,7 +65,7 @@ public void dynamicBoost() throws CommitFailedException {

@Test
public void dynamicBoostAnalyzed() throws CommitFailedException {
configureIndex();
configureIndex(false);

Tree test = createNodeWithType(root.getTree("/"), "test", NT_UNSTRUCTURED);
Tree item1Metadata = createNodeWithMetadata(test, "item1", "flower with a lot of red and a bit of blue");
@@ -84,7 +83,7 @@ public void dynamicBoostAnalyzed() throws CommitFailedException {
root.commit();

assertEventually(() -> {
assertQuery("//element(*, dam:Asset)[jcr:contains(@title, 'flower')]",
assertQuery("//element(*, dam:Asset)[jcr:contains(., 'flower')]",
XPATH, Arrays.asList("/test/item1", "/test/item2"));
assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(title, 'red-flower')",
Arrays.asList("/test/item1", "/test/item2"));
@@ -95,7 +94,7 @@ public void dynamicBoostAnalyzed() throws CommitFailedException {

@Test
public void dynamicBoostWithAdditionalTags() throws CommitFailedException {
configureIndex();
configureIndex(false);

Tree test = createNodeWithType(root.getTree("/"), "test", NT_UNSTRUCTURED);
Tree item1Metadata = createNodeWithMetadata(test, "item1", "flower with a lot of colors");
@@ -112,58 +111,138 @@ public void dynamicBoostWithAdditionalTags() throws CommitFailedException {
root.commit();

assertEventually(() -> {
assertQuery("//element(*, dam:Asset)[jcr:contains(@title, 'flower')]",
assertQuery("//element(*, dam:Asset)[jcr:contains(., 'flower')]",
XPATH, Arrays.asList("/test/item1", "/test/item2"));
assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(title, 'red flower')",
assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(*, 'red flower')",
Arrays.asList("/test/item1", "/test/item2"));
assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(title, 'blue flower')",
assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(*, 'blue flower')",
Arrays.asList("/test/item2", "/test/item1"));
});
}

@Test
public void testQueryDynamicBoostBasic() throws CommitFailedException {
configureIndex();
configureIndex(false);
prepareTestAssets();
assertEventually(() -> {
assertQuery("//element(*, dam:Asset)[jcr:contains(@title, 'plant')]", XPATH,
assertQuery("//element(*, dam:Asset)[jcr:contains(., 'plant')]", XPATH,
Arrays.asList("/test/asset1", "/test/asset2", "/test/asset3"));
assertQuery("//element(*, dam:Asset)[jcr:contains(@title, 'flower')]", XPATH, Arrays.asList("/test/asset1", "/test/asset2"));
assertQuery("//element(*, dam:Asset)[jcr:contains(., 'flower')]", XPATH, Arrays.asList("/test/asset1", "/test/asset2"));
});
}

@Test
public void testQueryDynamicBoostCaseInsensitive() throws Exception {
configureIndex();
configureIndex(false);
prepareTestAssets();
assertEventually(() -> {
assertQuery("//element(*, dam:Asset)[jcr:contains(@title, 'FLOWER')]", XPATH, Arrays.asList("/test/asset1", "/test/asset2"));
assertQuery("//element(*, dam:Asset)[jcr:contains(., 'FLOWER')]", XPATH, Arrays.asList("/test/asset1", "/test/asset2"));
});
}

@Test
public void testQueryDynamicBoostOrder() throws Exception {
configureIndex();
configureIndex(false);
prepareTestAssets();

assertEventually(() -> {
assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(@title, 'plant')",
assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(*, 'plant')",
Arrays.asList("/test/asset2", "/test/asset3", "/test/asset1"));
});
}

// dynamic boost: space is explained as OR instead of AND, this should be documented
@Test
public void testQueryDynamicBoostWildcard() throws Exception {
configureIndex(false);
prepareTestAssets();

assertEventually(() -> {
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'blu?')", SQL2, Arrays.asList("/test/asset3"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'bl?e')", SQL2, Arrays.asList("/test/asset3"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, '?lue')", SQL2, Arrays.asList("/test/asset3"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'coff*')", SQL2, Arrays.asList("/test/asset2"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'co*ee')", SQL2, Arrays.asList("/test/asset2"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, '*ffee')", SQL2, Arrays.asList("/test/asset2"));
});
}

@Test
public void testQueryDynamicBoostSpace() throws Exception {
configureIndex();
configureIndex(false);
prepareTestAssets();

assertEventually(() -> {
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'coffee flower')", SQL2, Arrays.asList("/test/asset2"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'blue plant')", SQL2, Arrays.asList("/test/asset3"));
});
}

@Test
public void testQueryDynamicBoostExplicitOr() throws Exception {
configureIndex(false);
prepareTestAssets();

assertEventually(() -> {
assertQuery("select [jcr:path] from [dam:Asset] where contains(@title, 'blue flower')", SQL2,
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'blue OR flower')", SQL2,
Arrays.asList("/test/asset1", "/test/asset2", "/test/asset3"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'blue OR coffee')", SQL2,
Arrays.asList("/test/asset2", "/test/asset3"));
});
}

@Test
public void testQueryDynamicBoostMinus() throws Exception {
configureIndex(false);
prepareTestAssets();

assertEventually(() -> {
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'plant -flower')", SQL2, Arrays.asList("/test/asset3"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'flower -coffee')", SQL2, Arrays.asList("/test/asset1"));
});
}

@Test
public void testQueryDynamicBoostForBothBoostFields() throws CommitFailedException {
configureIndex(true);
prepareTestAssetsForBothBoostFields();
assertEventually(() -> {
// basic test
assertQuery("//element(*, dam:Asset)[jcr:contains(., 'plant')]", XPATH,
Arrays.asList("/test/asset1", "/test/asset2", "/test/asset3"));
assertQuery("//element(*, dam:Asset)[jcr:contains(., 'flower')]", XPATH, Arrays.asList("/test/asset1", "/test/asset2"));

// case insensitive
assertQuery("//element(*, dam:Asset)[jcr:contains(., 'FLOWER')]", XPATH, Arrays.asList("/test/asset1", "/test/asset2"));

// test order
assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(*, 'plant')",
Arrays.asList("/test/asset2", "/test/asset3", "/test/asset1"));

// test wildcard
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'blu?')", SQL2, Arrays.asList("/test/asset3"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'bl?e')", SQL2, Arrays.asList("/test/asset3"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, '?lue')", SQL2, Arrays.asList("/test/asset3"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'coff*')", SQL2, Arrays.asList("/test/asset2"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'co*ee')", SQL2, Arrays.asList("/test/asset2"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, '*ffee')", SQL2, Arrays.asList("/test/asset2"));

// test space as AND
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'coffee flower')", SQL2, Arrays.asList("/test/asset2"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'blue plant')", SQL2, Arrays.asList("/test/asset3"));

// explicit OR
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'blue OR flower')", SQL2,
Arrays.asList("/test/asset1", "/test/asset2", "/test/asset3"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'blue OR coffee')", SQL2,
Arrays.asList("/test/asset2", "/test/asset3"));

// exclude with minus
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'plant -flower')", SQL2, Arrays.asList("/test/asset3"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'flower -coffee')", SQL2, Arrays.asList("/test/asset1"));
});
}


// utils
private void prepareTestAssets() throws CommitFailedException {
Tree test = createNodeWithType(root.getTree("/"), "test", NT_UNSTRUCTURED);
@@ -183,6 +262,27 @@ private void prepareTestAssets() throws CommitFailedException {
root.commit();
}

private void prepareTestAssetsForBothBoostFields() throws CommitFailedException {
Tree test = createNodeWithType(root.getTree("/"), "test", NT_UNSTRUCTURED);

Tree metadata1 = createNodeWithMetadata(test, "asset1", "titleone long");
Tree metadata1New = addNewMetadata(test, "asset1");
createPredictedTag(metadata1, "plant", 0.1);
createPredictedTag(metadata1New, "flower", 0.1);

Tree metadata2 = createNodeWithMetadata(test, "asset2", "titletwo long");
Tree metadata2New = addNewMetadata(test, "asset2");
createPredictedTag(metadata2, "plant", 0.9);
createPredictedTag(metadata2New, "flower", 0.1);
createPredictedTag(metadata2New, "coffee", 0.5);

Tree metadata3 = createNodeWithMetadata(test, "asset3", "titletwo long");
Tree metadata3New = addNewMetadata(test, "asset3");
createPredictedTag(metadata3, "plant", 0.5);
createPredictedTag(metadata3New, "blue", 0.5);
root.commit();
}

private void createPredictedTag(Tree parent, String tagName, double confidence) {
Tree node = createNodeWithType(parent, tagName, NT_UNSTRUCTURED);
configureBoostedField(node, tagName, confidence);
@@ -194,30 +294,45 @@ private void configureBoostedField(Tree node, String name, double confidence) {
node.setProperty("confidence", confidence);
}

private void configureIndex() throws CommitFailedException {
private void configureIndex(boolean bothBoostFields) throws CommitFailedException {
NodeTypeRegistry.register(root, new ByteArrayInputStream(ASSET_NODE_TYPE.getBytes()), "test nodeType");
IndexDefinitionBuilder builder = createIndex(true, "dam:Asset", "title", "dynamicBoost");
IndexDefinitionBuilder.PropertyRule title = builder.indexRule("dam:Asset")
.property("title")
.analyzed();
.analyzed().nodeScopeIndex();
title.getBuilderTree().setProperty(JcrConstants.JCR_PRIMARYTYPE, NT_UNSTRUCTURED, Type.NAME);
IndexDefinitionBuilder.PropertyRule db = builder.indexRule("dam:Asset").property("dynamicBoost");
Tree dbTree = db.getBuilderTree();
IndexDefinitionBuilder.IndexRule assetIndexRule = builder.indexRule("dam:Asset");

IndexDefinitionBuilder.PropertyRule dynamicBoostPropertyRule = assetIndexRule.property("dynamicBoost");
Tree dbTree = dynamicBoostPropertyRule.getBuilderTree();
dbTree.setProperty(JcrConstants.JCR_PRIMARYTYPE, NT_UNSTRUCTURED, Type.NAME);
dbTree.setProperty("name", "jcr:content/metadata/.*");
dbTree.setProperty("isRegexp", true);
dbTree.setProperty("dynamicBoost", true);

if (bothBoostFields) {
IndexDefinitionBuilder.PropertyRule newDynamicBoostPropertyRule = assetIndexRule.property("dynamicBoostNew");
Tree anotherDbTree = newDynamicBoostPropertyRule.getBuilderTree();
anotherDbTree.setProperty(JcrConstants.JCR_PRIMARYTYPE, NT_UNSTRUCTURED, Type.NAME);
anotherDbTree.setProperty("name", "jcr:content/metadataNew/.*");
anotherDbTree.setProperty("isRegexp", true);
anotherDbTree.setProperty("dynamicBoost", true);
}

setIndex("damAsset_" + UUID.randomUUID(), builder);
root.commit();
}

private Tree createNodeWithMetadata(Tree parent, String nodeName, String title) {
Tree item = createNodeWithType(parent, nodeName, "dam:Asset");
item.setProperty("title", title);
Tree jcrContentTree = createNodeWithType(item, JcrConstants.JCR_CONTENT, NT_UNSTRUCTURED);
return createNodeWithType(jcrContentTree, "metadata", NT_UNSTRUCTURED);
}

return createNodeWithType(
createNodeWithType(item, JcrConstants.JCR_CONTENT, NT_UNSTRUCTURED),
"metadata", NT_UNSTRUCTURED);
private Tree addNewMetadata(Tree parent, String nodeName) {
Tree jcrContent = parent.getChild(nodeName).getChild(JcrConstants.JCR_CONTENT);
return createNodeWithType(jcrContent, "metadataNew", NT_UNSTRUCTURED);
}

private Tree createNodeWithType(Tree t, String nodeName, String typeName){

0 comments on commit 4b285d2

Please sign in to comment.