Skip to content

Commit

Permalink
ESQL: Fix Analyzer to not interpret escaped * as a pattern
Browse files Browse the repository at this point in the history
Fix a bug in the Analyzer which always considered `*` (match any string)
 in field names even when escaped (back quoted). This caused the clause
 to be too greedy and either keep or drop too many fields.

Fix elastic#104955
  • Loading branch information
costin committed Feb 9, 2024
1 parent dd51f6b commit 5137ce7
Show file tree
Hide file tree
Showing 8 changed files with 300 additions and 76 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
package org.elasticsearch.xpack.esql.analysis;

import org.elasticsearch.common.logging.HeaderWarning;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.xpack.core.enrich.EnrichPolicy;
import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
import org.elasticsearch.xpack.esql.VerificationException;
import org.elasticsearch.xpack.esql.expression.UnresolvedNamePattern;
import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute;
import org.elasticsearch.xpack.esql.plan.logical.Drop;
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
Expand Down Expand Up @@ -73,6 +74,7 @@
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import static java.util.Collections.singletonList;
Expand Down Expand Up @@ -429,11 +431,6 @@ private LogicalPlan resolveEval(Eval eval, List<Attribute> childOutput) {
* row foo = 1, bar = 2 | keep *, foo -> bar, foo
* row foo = 1, bar = 2 | keep foo, * -> foo, bar
* row foo = 1, bar = 2 | keep bar*, foo, * -> bar, foo
*
*
* @param p
* @param childOutput
* @return
*/
private LogicalPlan resolveKeep(Project p, List<Attribute> childOutput) {
List<NamedExpression> resolvedProjections = new ArrayList<>();
Expand All @@ -453,12 +450,14 @@ private LogicalPlan resolveKeep(Project p, List<Attribute> childOutput) {
if (proj instanceof UnresolvedStar) {
resolved = childOutput;
priority = 2;
} else if (proj instanceof UnresolvedNamePattern up) {
resolved = resolveAgainstList(up, childOutput);
priority = 1;
} else if (proj instanceof UnresolvedAttribute ua) {
resolved = resolveAgainstList(ua, childOutput);
priority = Regex.isSimpleMatchPattern(ua.name()) ? 1 : 0;
priority = 0;
} else {
assert false : "unexpected projection: " + proj;
throw new IllegalStateException("unexpected projection: " + proj);
throw new EsqlIllegalArgumentException("unexpected projection: " + proj);
}
for (Attribute attr : resolved) {
Integer previousPrio = priorities.get(attr);
Expand All @@ -478,7 +477,16 @@ private LogicalPlan resolveDrop(Drop drop, List<Attribute> childOutput) {
List<NamedExpression> resolvedProjections = new ArrayList<>(childOutput);

for (var ne : drop.removals()) {
var resolved = ne instanceof UnresolvedAttribute ua ? resolveAgainstList(ua, childOutput) : singletonList(ne);
List<? extends NamedExpression> resolved;

if (ne instanceof UnresolvedNamePattern np) {
resolved = resolveAgainstList(np, childOutput);
} else if (ne instanceof UnresolvedAttribute ua) {
resolved = resolveAgainstList(ua, childOutput);
} else {
resolved = singletonList(ne);
}

// the return list might contain either resolved elements or unresolved ones.
// if things are resolved, remove them - if not add them to the list to trip the Verifier;
// thus make sure to remove the intersection but add the unresolved difference (if any).
Expand Down Expand Up @@ -523,7 +531,7 @@ private LogicalPlan resolveRename(Rename rename, List<Attribute> childrenOutput)
if (li.next() instanceof Alias a && a.name().equals(resolved.name())) {
reverseAliasing.put(resolved.name(), alias.name());
// update aliased projection in place
li.set((NamedExpression) alias.replaceChildren(a.children()));
li.set(alias.replaceChildren(a.children()));
updated = true;
break;
}
Expand Down Expand Up @@ -581,28 +589,37 @@ private LogicalPlan resolveEnrich(Enrich enrich, List<Attribute> childrenOutput)
}
}

private static List<Attribute> resolveAgainstList(UnresolvedAttribute u, Collection<Attribute> attrList) {
var matches = AnalyzerRules.maybeResolveAgainstList(u, attrList, false, true, Analyzer::handleSpecialFields);
private static List<Attribute> resolveAgainstList(UnresolvedNamePattern up, Collection<Attribute> attrList) {
UnresolvedAttribute ua = new UnresolvedAttribute(up.source(), up.pattern(), null);
Predicate<Attribute> matcher = a -> up.match(a.name()) || up.match(a.qualifiedName());
var matches = AnalyzerRules.maybeResolveAgainstList(matcher, () -> ua, attrList, true, a -> Analyzer.handleSpecialFields(ua, a));
return potentialCandidatesIfNoMatchesFound(ua, matches, attrList, list -> UnresolvedNamePattern.errorMessage(up.pattern(), list));
}

private static List<Attribute> resolveAgainstList(UnresolvedAttribute ua, Collection<Attribute> attrList) {
var matches = AnalyzerRules.maybeResolveAgainstList(ua, attrList, a -> Analyzer.handleSpecialFields(ua, a));
return potentialCandidatesIfNoMatchesFound(ua, matches, attrList, list -> UnresolvedAttribute.errorMessage(ua.name(), list));
}

private static List<Attribute> potentialCandidatesIfNoMatchesFound(
UnresolvedAttribute ua,
List<Attribute> matches,
Collection<Attribute> attrList,
java.util.function.Function<List<String>, String> messageProducer
) {
// none found - add error message
if (matches.isEmpty()) {
UnresolvedAttribute unresolved;
var name = u.name();
if (Regex.isSimpleMatchPattern(name)) {
unresolved = u.withUnresolvedMessage(format(null, "No match found for [{}]", name));
} else {
Set<String> names = new HashSet<>(attrList.size());
for (var a : attrList) {
String nameCandidate = a.name();
if (EsqlDataTypes.isPrimitive(a.dataType())) {
names.add(nameCandidate);
}
Set<String> names = new HashSet<>(attrList.size());
for (var a : attrList) {
String nameCandidate = a.name();
if (EsqlDataTypes.isPrimitive(a.dataType())) {
names.add(nameCandidate);
}
unresolved = u.withUnresolvedMessage(UnresolvedAttribute.errorMessage(name, StringUtils.findSimilar(name, names)));
}
return singletonList(unresolved);
var name = ua.name();
UnresolvedAttribute unresolved = ua.withUnresolvedMessage(messageProducer.apply(StringUtils.findSimilar(name, names)));
matches = singletonList(unresolved);
}

return matches;
}

Expand All @@ -615,7 +632,7 @@ private static Attribute handleSpecialFields(UnresolvedAttribute u, Attribute na
}
}

return named;
return named.withLocation(u.source());
}

private static class ResolveFunctions extends ParameterizedAnalyzerRule<LogicalPlan, AnalyzerContext> {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.expression;

import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.elasticsearch.xpack.ql.capabilities.UnresolvedException;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.expression.Nullability;
import org.elasticsearch.xpack.ql.expression.UnresolvedNamedExpression;
import org.elasticsearch.xpack.ql.tree.NodeInfo;
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.util.CollectionUtils;

import java.util.List;
import java.util.Objects;

import static java.util.Collections.emptyList;

/**
* Unresolved expression for encapsulating a pattern:
* KEEP `a*`, b*, `c*`*`d*`
* a* is an actual name (UnresolvedAttribute)
* b* is a name pattern (this class)
* `c*`*`d*` is a name pattern
*/
public class UnresolvedNamePattern extends UnresolvedNamedExpression {

private final CharacterRunAutomaton automaton;
private final String pattern;

public UnresolvedNamePattern(Source source, CharacterRunAutomaton automaton, String patternString) {
this(source, automaton, patternString, null);
}

public UnresolvedNamePattern(Source source, CharacterRunAutomaton automaton, String patternString, String qualifier) {
super(source, emptyList());
this.automaton = automaton;
this.pattern = patternString;
}

public boolean match(String string) {
return automaton.run(string);
}

public String pattern() {
return pattern;
}

@Override
public Expression replaceChildren(List<Expression> newChildren) {
throw new UnsupportedOperationException("this type of node doesn't have any children to replace");
}

@Override
protected NodeInfo<UnresolvedNamePattern> info() {
return NodeInfo.create(this, UnresolvedNamePattern::new, automaton, pattern);
}

@Override
public String unresolvedMessage() {
return "Unresolved pattern [" + pattern + "]";
}

public static String errorMessage(String pattern, List<String> potentialMatches) {
String msg = "No matches found for pattern [" + pattern + "]";
if (CollectionUtils.isEmpty(potentialMatches) == false) {
msg += ", did you mean to match "
+ (potentialMatches.size() == 1 ? "[" + potentialMatches.get(0) + "]" : "any of " + potentialMatches.toString())
+ "?";
}
return msg;
}

@Override
public Nullability nullable() {
throw new UnresolvedException("nullable", this);
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), pattern);
}

@Override
public boolean equals(Object obj) {
if (super.equals(obj)) {
UnresolvedNamePattern ua = (UnresolvedNamePattern) obj;
return Objects.equals(pattern, ua.pattern);
}
return false;
}

@Override
public String nodeString() {
return toString();
}

@Override
public String toString() {
return UNRESOLVED_PREFIX + pattern;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.Equals;
import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.GreaterThan;
import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.GreaterThanOrEqual;
Expand All @@ -21,6 +26,7 @@
import org.elasticsearch.xpack.esql.evaluator.predicate.operator.regex.RLike;
import org.elasticsearch.xpack.esql.evaluator.predicate.operator.regex.WildcardLike;
import org.elasticsearch.xpack.esql.expression.Order;
import org.elasticsearch.xpack.esql.expression.UnresolvedNamePattern;
import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div;
Expand Down Expand Up @@ -219,11 +225,88 @@ public NamedExpression visitQualifiedNamePattern(EsqlBaseParser.QualifiedNamePat
return null;
}

List<String> strings = visitList(this, ctx.identifierPattern(), String.class);
var src = source(ctx);
return strings.size() == 1 && strings.get(0).equals(WILDCARD)
? new UnresolvedStar(src, null)
: new UnresolvedAttribute(src, Strings.collectionToDelimitedString(strings, "."));
StringBuilder sb = new StringBuilder();
var patterns = ctx.identifierPattern();

boolean hasPattern = false;
// Builds a list of either strings (which map verbatim) or Automatons which match any string
List<Object> objects = new ArrayList<>(patterns.size());
for (int i = 0, s = patterns.size(); i < s; i++) {
var idCtx = patterns.get(i);
String name;
if (i > 0) {
sb.append(".");
objects.add(".");
}
// check for pattern
if (idCtx.UNQUOTED_ID_PATTERN() != null) {
name = idCtx.UNQUOTED_ID_PATTERN().getText();
sb.append(name);
// check special wildcard case
if (patterns.size() == 1 && name.equals(WILDCARD)) {
return new UnresolvedStar(src, null);
}
// loop the string itself to extract any * and make them an automaton directly
// the code is somewhat messy but doesn't invoke the full blown Regex engine either
if (Regex.isSimpleMatchPattern(name)) {
hasPattern = true;
String str = name;
boolean keepGoing = false;
do {
int localIndex = str.indexOf('*');
// in case of match
if (localIndex != -1) {
keepGoing = true;
// copy any prefix string
if (localIndex > 0) {
objects.add(str.substring(0, localIndex));
}
objects.add(Automata.makeAnyString());
localIndex++;
// trim the string
if (localIndex < str.length()) {
str = str.substring(localIndex);
} else {
keepGoing = false;
}
}
// no more matches, copy leftovers and end the loop
else {
keepGoing = false;
if (str.length() > 0) {
objects.add(str);
}
}
} while (keepGoing);
} else {
objects.add(name);
}
}
// quoted - definitely no pattern
else {
sb.append(idCtx.QUOTED_IDENTIFIER().getText());
if (i > 0) {
objects.add(".");
}
objects.add(unquoteIdentifier(idCtx.QUOTED_IDENTIFIER(), null));
}
}

NamedExpression result;
// need to combine automaton
if (hasPattern) {
// add . as optional matching
List<Automaton> list = new ArrayList<>(objects.size());
for (var o : objects) {
list.add(o instanceof Automaton a ? a : Automata.makeString(o.toString()));
}
// use the fast run variant
result = new UnresolvedNamePattern(src, new CharacterRunAutomaton(Operations.concatenate(list)), sb.toString());
} else {
result = new UnresolvedAttribute(src, Strings.collectionToDelimitedString(objects, ""));
}
return result;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,6 @@ public String visitIdentifier(IdentifierContext ctx) {
return ctx == null ? null : unquoteIdentifier(ctx.QUOTED_IDENTIFIER(), ctx.UNQUOTED_IDENTIFIER());
}

@Override
public String visitIdentifierPattern(EsqlBaseParser.IdentifierPatternContext ctx) {
return unquoteIdentifier(ctx.QUOTED_IDENTIFIER(), ctx.UNQUOTED_ID_PATTERN());
}

@Override
public String visitFromIdentifier(FromIdentifierContext ctx) {
return ctx == null ? null : unquoteIdentifier(ctx.QUOTED_IDENTIFIER(), ctx.FROM_UNQUOTED_IDENTIFIER());
Expand Down
Loading

0 comments on commit 5137ce7

Please sign in to comment.