Skip to content

Commit

Permalink
QL: Optimize regexs without patterns as equality (#63216)
Browse files Browse the repository at this point in the history
If a QL regex doesn't contain any pattern, convert it to Equals.

Close #63196

(cherry picked from commit e22a843)
  • Loading branch information
costin committed Oct 5, 2020
1 parent efd2243 commit b67d227
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.OptimizerRule;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.PropagateEquals;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.PruneLiteralsInOrderBy;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.ReplaceMatchAll;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.ReplaceRegexMatch;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.ReplaceSurrogateFunction;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.SetAsOptimized;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.TransformDirection;
Expand Down Expand Up @@ -72,7 +72,7 @@ protected Iterable<RuleExecutor<LogicalPlan>.Batch> batches() {
Batch substitutions = new Batch("Substitution", Limiter.ONCE,
new ReplaceWildcards(),
new ReplaceSurrogateFunction(),
new ReplaceMatchAll());
new ReplaceRegexMatch());

Batch operators = new Batch("Operator Optimization",
new ConstantFolding(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

package org.elasticsearch.xpack.ql.expression.predicate.regex;

import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;

abstract class AbstractStringPattern implements StringPattern {

private Automaton automaton;

abstract Automaton createAutomaton();

private Automaton automaton() {
if (automaton == null) {
automaton = createAutomaton();
}
return automaton;
}

@Override
public boolean matchesAll() {
return Operations.isTotal(automaton());
}

@Override
public boolean isExactMatch() {
return Operations.getCommonPrefix(automaton()).equals(asString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@
*
* To prevent conflicts with ES, the string and char must be validated to not contain '*'.
*/
public class LikePattern implements StringPattern {
public class LikePattern extends AbstractStringPattern {

private final String pattern;
private final char escape;

private final String regex;
private final String wildcard;
private final String indexNameWildcard;
private final String string;

public LikePattern(String pattern, char escape) {
this.pattern = pattern;
Expand All @@ -38,6 +39,7 @@ public LikePattern(String pattern, char escape) {
this.regex = StringUtils.likeToJavaPattern(pattern, escape);
this.wildcard = StringUtils.likeToLuceneWildcard(pattern, escape);
this.indexNameWildcard = StringUtils.likeToIndexWildcard(pattern, escape);
this.string = pattern.replace(Character.toString(escape), StringUtils.EMPTY);
}

public String pattern() {
Expand All @@ -48,15 +50,20 @@ public char escape() {
return escape;
}

@Override
Automaton createAutomaton() {
Automaton automaton = WildcardQuery.toAutomaton(new Term(null, wildcard));
return MinimizationOperations.minimize(automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
}

@Override
public String asJavaRegex() {
return regex;
}

@Override
public boolean matchesAll() {
Automaton automaton = WildcardQuery.toAutomaton(new Term(null, wildcard));
return Operations.isTotal(MinimizationOperations.minimize(automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES));
public String asString() {
return string;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
*/
package org.elasticsearch.xpack.ql.expression.predicate.regex;

import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.RegExp;

public class RLikePattern implements StringPattern {
public class RLikePattern extends AbstractStringPattern {

private final String regexpPattern;

Expand All @@ -17,12 +17,17 @@ public RLikePattern(String regexpPattern) {
}

@Override
public String asJavaRegex() {
Automaton createAutomaton() {
return new RegExp(regexpPattern).toAutomaton();
}

@Override
public String asString() {
return regexpPattern;
}

@Override
public boolean matchesAll() {
return Operations.isTotal(new RegExp(regexpPattern).toAutomaton());
public String asJavaRegex() {
return regexpPattern;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,6 @@ public Boolean fold() {
return RegexProcessor.RegexOperation.match(val, pattern().asJavaRegex());
}

public boolean matchesAll() {
return pattern.matchesAll();
}

@Override
protected Processor makeProcessor() {
return new RegexProcessor(pattern().asJavaRegex());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,29 @@
*/
package org.elasticsearch.xpack.ql.expression.predicate.regex;

interface StringPattern {
public interface StringPattern {
/**
* Returns the pattern in (Java) regex format.
*/
String asJavaRegex();

/**
* Returns the pattern as a string. Should handle escaping.
*/
String asString();

/**
* Hint method on whether this pattern matches everything or not.
*/
default boolean matchesAll() {
return false;
}

/**
* Hint method on whether this pattern is exact, that is has no wildcard
* or other patterns inside.
*/
default boolean isExactMatch() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.NotEquals;
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.NullEquals;
import org.elasticsearch.xpack.ql.expression.predicate.regex.RegexMatch;
import org.elasticsearch.xpack.ql.expression.predicate.regex.StringPattern;
import org.elasticsearch.xpack.ql.plan.logical.Filter;
import org.elasticsearch.xpack.ql.plan.logical.Limit;
import org.elasticsearch.xpack.ql.plan.logical.LogicalPlan;
Expand Down Expand Up @@ -1138,17 +1139,22 @@ protected LogicalPlan rule(Limit limit) {
protected abstract LogicalPlan skipPlan(Limit limit);
}

public static class ReplaceMatchAll extends OptimizerExpressionRule {
public static class ReplaceRegexMatch extends OptimizerExpressionRule {

public ReplaceMatchAll() {
public ReplaceRegexMatch() {
super(TransformDirection.DOWN);
}

protected Expression rule(Expression e) {
if (e instanceof RegexMatch) {
RegexMatch<?> regexMatch = (RegexMatch<?>) e;
if (regexMatch.matchesAll()) {
return new IsNotNull(e.source(), regexMatch.field());
StringPattern pattern = regexMatch.pattern();
if (pattern.matchesAll()) {
e = new IsNotNull(e.source(), regexMatch.field());
}
else if (pattern.isExactMatch()) {
Literal literal = new Literal(regexMatch.source(), regexMatch.pattern().asString(), DataTypes.KEYWORD);
e = new Equals(e.source(), regexMatch.field(), literal);
}
}
return e;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,77 @@

public class StringPatternTests extends ESTestCase {

private boolean isTotalWildcard(String pattern, char escape) {
return new LikePattern(pattern, escape).matchesAll();
private LikePattern like(String pattern, char escape) {
return new LikePattern(pattern, escape);
}

private boolean isTotalRegex(String pattern) {
return new RLikePattern(pattern).matchesAll();
private RLikePattern rlike(String pattern) {
return new RLikePattern(pattern);
}

private boolean matchesAll(String pattern, char escape) {
return like(pattern, escape).matchesAll();
}

private boolean exactMatch(String pattern, char escape) {
return like(pattern, escape).isExactMatch();
}

private boolean matchesAll(String pattern) {
return rlike(pattern).matchesAll();
}

private boolean exactMatch(String pattern) {
return rlike(pattern).isExactMatch();
}

public void testWildcardMatchAll() throws Exception {
assertTrue(isTotalWildcard("%", '0'));
assertTrue(isTotalWildcard("%%", '0'));
assertTrue(matchesAll("%", '0'));
assertTrue(matchesAll("%%", '0'));

assertFalse(isTotalWildcard("a%", '0'));
assertFalse(isTotalWildcard("%_", '0'));
assertFalse(isTotalWildcard("%_%_%", '0'));
assertFalse(isTotalWildcard("_%", '0'));
assertFalse(isTotalWildcard("0%", '0'));
assertFalse(matchesAll("a%", '0'));
assertFalse(matchesAll("%_", '0'));
assertFalse(matchesAll("%_%_%", '0'));
assertFalse(matchesAll("_%", '0'));
assertFalse(matchesAll("0%", '0'));
}

public void testRegexMatchAll() throws Exception {
assertTrue(isTotalRegex(".*"));
assertTrue(isTotalRegex(".*.*"));
assertTrue(isTotalRegex(".*.?"));
assertTrue(isTotalRegex(".?.*"));
assertTrue(isTotalRegex(".*.?.*"));

assertFalse(isTotalRegex("..*"));
assertFalse(isTotalRegex("ab."));
assertFalse(isTotalRegex("..?"));
assertTrue(matchesAll(".*"));
assertTrue(matchesAll(".*.*"));
assertTrue(matchesAll(".*.?"));
assertTrue(matchesAll(".?.*"));
assertTrue(matchesAll(".*.?.*"));

assertFalse(matchesAll("..*"));
assertFalse(matchesAll("ab."));
assertFalse(matchesAll("..?"));
}

public void testWildcardExactMatch() throws Exception {
assertTrue(exactMatch("0%", '0'));
assertTrue(exactMatch("0_", '0'));
assertTrue(exactMatch("123", '0'));
assertTrue(exactMatch("1230_", '0'));
assertTrue(exactMatch("1230_321", '0'));

assertFalse(exactMatch("%", '0'));
assertFalse(exactMatch("%%", '0'));
assertFalse(exactMatch("a%", '0'));
assertFalse(exactMatch("a_", '0'));
}

public void testRegexExactMatch() throws Exception {
assertFalse(exactMatch(".*"));
assertFalse(exactMatch(".*.*"));
assertFalse(exactMatch(".*.?"));
assertFalse(exactMatch(".?.*"));
assertFalse(exactMatch(".*.?.*"));
assertFalse(exactMatch("..*"));
assertFalse(exactMatch("ab."));
assertFalse(exactMatch("..?"));

assertTrue(exactMatch("abc"));
assertTrue(exactMatch("12345"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.type.DataType;
import org.elasticsearch.xpack.ql.type.EsField;
import org.elasticsearch.xpack.ql.util.StringUtils;

import java.time.ZoneId;
import java.util.Arrays;
Expand Down Expand Up @@ -1321,14 +1322,14 @@ public void testPropagateEquals_VarEq2OrVarRangeGt3Lt4OrVarGt2OrVarNe2() {
}

//
//
// Like / Regex
//
public void testMatchAllLikeToExist() throws Exception {
for (String s : Arrays.asList("%", "%%", "%%%")) {
LikePattern pattern = new LikePattern(s, (char) 0);
FieldAttribute fa = getFieldAttribute();
Like l = new Like(EMPTY, fa, pattern);
Expression e = new OptimizerRules.ReplaceMatchAll().rule(l);
Expression e = new OptimizerRules.ReplaceRegexMatch().rule(l);
assertEquals(IsNotNull.class, e.getClass());
IsNotNull inn = (IsNotNull) e;
assertEquals(fa, inn.field());
Expand All @@ -1339,9 +1340,33 @@ public void testMatchAllRLikeToExist() throws Exception {
RLikePattern pattern = new RLikePattern(".*");
FieldAttribute fa = getFieldAttribute();
RLike l = new RLike(EMPTY, fa, pattern);
Expression e = new OptimizerRules.ReplaceMatchAll().rule(l);
Expression e = new OptimizerRules.ReplaceRegexMatch().rule(l);
assertEquals(IsNotNull.class, e.getClass());
IsNotNull inn = (IsNotNull) e;
assertEquals(fa, inn.field());
}

public void testExactMatchLike() throws Exception {
for (String s : Arrays.asList("ab", "ab0%", "ab0_c")) {
LikePattern pattern = new LikePattern(s, '0');
FieldAttribute fa = getFieldAttribute();
Like l = new Like(EMPTY, fa, pattern);
Expression e = new OptimizerRules.ReplaceRegexMatch().rule(l);
assertEquals(Equals.class, e.getClass());
Equals eq = (Equals) e;
assertEquals(fa, eq.left());
assertEquals(s.replace("0", StringUtils.EMPTY), eq.right().fold());
}
}

public void testExactMatchRLike() throws Exception {
RLikePattern pattern = new RLikePattern("abc");
FieldAttribute fa = getFieldAttribute();
RLike l = new RLike(EMPTY, fa, pattern);
Expression e = new OptimizerRules.ReplaceRegexMatch().rule(l);
assertEquals(Equals.class, e.getClass());
Equals eq = (Equals) e;
assertEquals(fa, eq.left());
assertEquals("abc", eq.right().fold());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.LessThanOrEqual;
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.NotEquals;
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.NullEquals;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.ReplaceMatchAll;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.ReplaceRegexMatch;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.BooleanLiteralsOnTheRight;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.BooleanSimplification;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.CombineBinaryComparisons;
Expand Down Expand Up @@ -119,7 +119,7 @@ public LogicalPlan optimize(LogicalPlan verified) {
protected Iterable<RuleExecutor<LogicalPlan>.Batch> batches() {
Batch substitutions = new Batch("Substitutions", Limiter.ONCE,
new RewritePivot(),
new ReplaceMatchAll());
new ReplaceRegexMatch());

Batch refs = new Batch("Replace References", Limiter.ONCE,
new ReplaceReferenceAttributeWithSource(),
Expand Down

0 comments on commit b67d227

Please sign in to comment.