Skip to content

Commit

Permalink
Improve FilterPathBasedFilter performance (#79826)
Browse files Browse the repository at this point in the history
FilterPathBasedFilter is used for filter XContent. Old FilterPath
is implemented with a linked list. When it has many patterns. The
filter performance is lower.

I implement a new FilterPath. It use a Tree to filter XContent. The
FilterPath Tree has two kind of children.
- One is a hashmap for term pattern. It can find the match field in O(1).
- Another is a hashmap for wildcard pattern.It will check all node of hashmap.

The benchmark I added in #80069 shows runtime halved in the non-wildcard cases.
The wildcardcases have pretty similar runtime. There is an increase in runtime
when newly building the filter each time. This is acceptable because the cases
where we have to be as fast as possible we're already reusing the filter.
```
Benchmark                 (fieldCount)         (data)  Mode  Cnt Before ns/op After ns/op
NewParserConfig               10_field  cluster_stats  avgt    3   120303.585  120724.363
NewParserConfig               10_field    index_stats  avgt    3    22163.350   25700.478
NewParserConfig               10_field     node_stats  avgt    3    25838.040   25455.610
NewParserConfig             half_field  cluster_stats  avgt    3  1052058.673  551504.323
NewParserConfig             half_field    index_stats  avgt    3    99443.623   64639.860
NewParserConfig             half_field     node_stats  avgt    3   119383.384   78739.087
NewParserConfig              all_field  cluster_stats  avgt    3  1850317.142  949238.665
NewParserConfig              all_field    index_stats  avgt    3   135923.737  102391.586
NewParserConfig              all_field     node_stats  avgt    3   177303.659  134091.199
NewParserConfig         wildcard_field  cluster_stats  avgt    3   242419.000  158402.969
NewParserConfig         wildcard_field    index_stats  avgt    3    19677.926   16604.226
NewParserConfig         wildcard_field     node_stats  avgt    3    21932.954   20135.669
NewParserConfig      10_wildcard_field  cluster_stats  avgt    3   217022.657  186846.661
NewParserConfig      10_wildcard_field    index_stats  avgt    3    33292.234   27895.644
NewParserConfig      10_wildcard_field     node_stats  avgt    3    41748.139   31325.693
ParserConfigReused            10_field  cluster_stats  avgt    3   120882.036   94980.708
ParserConfigReused            10_field    index_stats  avgt    3    16474.120   12325.762
ParserConfigReused            10_field     node_stats  avgt    3    20947.328   14945.972
ParserConfigReused          half_field  cluster_stats  avgt    3   393069.494  182547.734
ParserConfigReused          half_field    index_stats  avgt    3    28891.619   17536.035
ParserConfigReused          half_field     node_stats  avgt    3    32433.385   22170.274
ParserConfigReused           all_field  cluster_stats  avgt    3   551217.667  210148.246
ParserConfigReused           all_field    index_stats  avgt    3    45367.850   20630.002
ParserConfigReused           all_field     node_stats  avgt    3    55899.647   24953.435
ParserConfigReused      wildcard_field  cluster_stats  avgt    3   159949.383  173188.846
ParserConfigReused      wildcard_field    index_stats  avgt    3    15713.951   17187.918
ParserConfigReused      wildcard_field     node_stats  avgt    3    19171.149   19932.483
ParserConfigReused   10_wildcard_field  cluster_stats  avgt    3   172137.559  173138.035
ParserConfigReused   10_wildcard_field    index_stats  avgt    3    24635.608   21296.013
ParserConfigReused   10_wildcard_field     node_stats  avgt    3    28866.104   26567.965
```
  • Loading branch information
weizijun committed Nov 19, 2021
1 parent add386d commit c758d1c
Show file tree
Hide file tree
Showing 4 changed files with 327 additions and 249 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,102 +11,183 @@
import org.elasticsearch.core.Glob;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class FilterPath {

public static final FilterPath EMPTY = new FilterPath();

private final String filter;
private final String segment;
private final FilterPath next;
private final boolean simpleWildcard;
private final boolean doubleWildcard;

protected FilterPath(String filter, String segment, FilterPath next) {
this.filter = filter;
this.segment = segment;
this.next = next;
this.simpleWildcard = (segment != null) && (segment.length() == 1) && (segment.charAt(0) == '*');
this.doubleWildcard = (segment != null) && (segment.length() == 2) && (segment.charAt(0) == '*') && (segment.charAt(1) == '*');
private static final String WILDCARD = "*";
private static final String DOUBLE_WILDCARD = "**";

private final Map<String, FilterPath> termsChildren;
private final FilterPath[] wildcardChildren;
private final String pattern;
private final boolean isDoubleWildcard;
private final boolean isFinalNode;

private FilterPath(String pattern, boolean isFinalNode, Map<String, FilterPath> termsChildren, FilterPath[] wildcardChildren) {
this.pattern = pattern;
this.isFinalNode = isFinalNode;
this.termsChildren = Collections.unmodifiableMap(termsChildren);
this.wildcardChildren = wildcardChildren;
this.isDoubleWildcard = pattern.equals(DOUBLE_WILDCARD);
}

private FilterPath() {
this("<empty>", "", null);
}

public FilterPath matchProperty(String name) {
if ((next != null) && (simpleWildcard || doubleWildcard || Glob.globMatch(segment, name))) {
return next;
public boolean hasDoubleWildcard() {
if (isDoubleWildcard || pattern.contains(DOUBLE_WILDCARD)) {
return true;
}
return null;
for (FilterPath filterPath : wildcardChildren) {
if (filterPath.hasDoubleWildcard()) {
return true;
}
}
for (FilterPath filterPath : termsChildren.values()) {
if (filterPath.hasDoubleWildcard()) {
return true;
}
}
return false;
}

public boolean matches() {
return next == null;
private String getPattern() {
return pattern;
}

public boolean isDoubleWildcard() {
return doubleWildcard;
private boolean isFinalNode() {
return isFinalNode;
}

public boolean hasDoubleWildcard() {
if (filter == null) {
/**
* check if the name matches filter nodes
* if the name equals the filter node name, the node will add to nextFilters.
* if the filter node is a final node, it means the name matches the pattern, and return true
* if the name don't equal a final node, then return false, continue to check the inner filter node
* if current node is a double wildcard node, the node will also add to nextFilters.
* @param name the xcontent property name
* @param nextFilters nextFilters is a List, used to check the inner property of name
* @return true if the name equal a final node, otherwise return false
*/
boolean matches(String name, List<FilterPath> nextFilters) {
if (nextFilters == null) {
return false;
}
return filter.indexOf("**") >= 0;
}

public boolean isSimpleWildcard() {
return simpleWildcard;
}
FilterPath termNode = termsChildren.get(name);
if (termNode != null) {
if (termNode.isFinalNode()) {
return true;
} else {
nextFilters.add(termNode);
}
}

for (FilterPath wildcardNode : wildcardChildren) {
String wildcardPattern = wildcardNode.getPattern();
if (Glob.globMatch(wildcardPattern, name)) {
if (wildcardNode.isFinalNode()) {
return true;
} else {
nextFilters.add(wildcardNode);
}
}
}

public String getSegment() {
return segment;
if (isDoubleWildcard) {
nextFilters.add(this);
}

return false;
}

public FilterPath getNext() {
return next;
private static class FilterPathBuilder {
private class BuildNode {
private final Map<String, BuildNode> children;
private final boolean isFinalNode;

BuildNode(boolean isFinalNode) {
children = new HashMap<>();
this.isFinalNode = isFinalNode;
}
}

private BuildNode root = new BuildNode(false);

void insert(String filter) {
insertNode(filter, root);
}

FilterPath build() {
return buildPath("", root);
}

void insertNode(String filter, BuildNode node) {
int end = filter.length();
int splitPosition = -1;
boolean findEscapes = false;
for (int i = 0; i < end; i++) {
char c = filter.charAt(i);
if (c == '.') {
splitPosition = i;
break;
} else if ((c == '\\') && (i + 1 < end) && (filter.charAt(i + 1) == '.')) {
++i;
findEscapes = true;
}
}

if (splitPosition > 0) {
String field = findEscapes
? filter.substring(0, splitPosition).replaceAll("\\\\.", ".")
: filter.substring(0, splitPosition);
BuildNode child = node.children.get(field);
if (child == null) {
child = new BuildNode(false);
node.children.put(field, child);
}
if (false == child.isFinalNode) {
insertNode(filter.substring(splitPosition + 1), child);
}
} else {
String field = findEscapes ? filter.replaceAll("\\\\.", ".") : filter;
node.children.put(field, new BuildNode(true));
}
}

FilterPath buildPath(String segment, BuildNode node) {
Map<String, FilterPath> termsChildren = new HashMap<>();
List<FilterPath> wildcardChildren = new ArrayList<>();
for (Map.Entry<String, BuildNode> entry : node.children.entrySet()) {
String childName = entry.getKey();
BuildNode childNode = entry.getValue();
FilterPath childFilterPath = buildPath(childName, childNode);
if (childName.contains(WILDCARD)) {
wildcardChildren.add(childFilterPath);
} else {
termsChildren.put(childName, childFilterPath);
}
}
return new FilterPath(segment, node.isFinalNode, termsChildren, wildcardChildren.toArray(new FilterPath[0]));
}
}

public static FilterPath[] compile(Set<String> filters) {
if (filters == null || filters.isEmpty()) {
return null;
}

List<FilterPath> paths = new ArrayList<>();
FilterPathBuilder builder = new FilterPathBuilder();
for (String filter : filters) {
if (filter != null) {
filter = filter.trim();
if (filter.length() > 0) {
paths.add(parse(filter, filter));
builder.insert(filter);
}
}
}
return paths.toArray(new FilterPath[paths.size()]);
}

private static FilterPath parse(final String filter, final String segment) {
int end = segment.length();

for (int i = 0; i < end;) {
char c = segment.charAt(i);

if (c == '.') {
String current = segment.substring(0, i).replaceAll("\\\\.", ".");
return new FilterPath(filter, current, parse(filter, segment.substring(i + 1)));
}
++i;
if ((c == '\\') && (i < end) && (segment.charAt(i) == '.')) {
++i;
}
}
return new FilterPath(filter, segment.replaceAll("\\\\.", "."), EMPTY);
}

@Override
public String toString() {
return "FilterPath [filter=" + filter + ", segment=" + segment + "]";
FilterPath filterPath = builder.build();
return Collections.singletonList(filterPath).toArray(new FilterPath[0]);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,26 +59,15 @@ public FilterPathBasedFilter(Set<String> filters, boolean inclusive) {
*/
private TokenFilter evaluate(String name, FilterPath[] filterPaths) {
if (filterPaths != null) {
List<FilterPath> nextFilters = null;

List<FilterPath> nextFilters = new ArrayList<>();
for (FilterPath filter : filterPaths) {
FilterPath next = filter.matchProperty(name);
if (next != null) {
if (next.matches()) {
return MATCHING;
} else {
if (nextFilters == null) {
nextFilters = new ArrayList<>();
}
if (filter.isDoubleWildcard()) {
nextFilters.add(filter);
}
nextFilters.add(next);
}
boolean matches = filter.matches(name, nextFilters);
if (matches) {
return MATCHING;
}
}

if ((nextFilters != null) && (nextFilters.isEmpty() == false)) {
if (nextFilters.isEmpty() == false) {
return new FilterPathBasedFilter(nextFilters.toArray(new FilterPath[nextFilters.size()]), inclusive);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.test.ESTestCase;

import java.util.Collections;
import java.util.Arrays;
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.equalTo;

Expand Down Expand Up @@ -67,6 +68,11 @@ public void testInclusiveFilters() throws Exception {
assertResult(SAMPLE, "**.l", true, "{'h':{'i':{'j':{'k':{'l':'l_value'}}}}}");

assertResult(SAMPLE, "**.*2", true, "{'e':[{'f2':'f2_value'},{'g2':'g2_value'}]}");

assertResult(SAMPLE, "h.i.j.k.l,h.i.j.k.l.m", true, "{'h':{'i':{'j':{'k':{'l':'l_value'}}}}}");
assertResult(SAMPLE, "a,b,c,d,e.f1,e.f2,e.g1,e.g2,h.i.j.k.l", true, SAMPLE);
assertResult(SAMPLE, "", true, "");
assertResult(SAMPLE, "h.", true, "");
}

public void testExclusiveFilters() throws Exception {
Expand Down Expand Up @@ -278,6 +284,16 @@ public void testExclusiveFilters() throws Exception {
+ "{'g1':'g1_value'}],'h':{'i':{'j':{'k':{'l':'l_value'}}}}}"
);

assertResult(
SAMPLE,
"h.i.j.k.l,h.i.j.k.l.m",
false,
"{'a':0,'b':true,'c':'c_value','d':[0,1,2],'e':[{'f1':'f1_value','f2':'f2_value'}," + "{'g1':'g1_value','g2':'g2_value'}]}"
);

assertResult(SAMPLE, "a,b,c,d,e.f1,e.f2,e.g1,e.g2,h.i.j.k.l", false, "");
assertResult(SAMPLE, "", false, SAMPLE);
assertResult(SAMPLE, "h.", false, SAMPLE);
}

public void testInclusiveFiltersWithDots() throws Exception {
Expand All @@ -295,7 +311,7 @@ private void assertResult(String input, String filter, boolean inclusive, String
try (
FilteringGeneratorDelegate generator = new FilteringGeneratorDelegate(
JSON_FACTORY.createGenerator(os),
new FilterPathBasedFilter(Collections.singleton(filter), inclusive),
new FilterPathBasedFilter(Arrays.asList(filter.split(",")).stream().collect(Collectors.toSet()), inclusive),
true,
true
)
Expand Down

0 comments on commit c758d1c

Please sign in to comment.