-
Notifications
You must be signed in to change notification settings - Fork 25.5k
ESQL: Limit when we push topn to lucene #134497
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
080c1a1
1ee7f0a
59f663a
0da61e0
b894cb7
387d49d
18e2f0a
665df91
b7184b0
6fa73c3
cfa0bdc
b31bbae
f18dfa1
07e37f3
a9bd848
c6d05da
e829fa4
b7d4d0d
82b4f25
e63d68a
e4bc310
bfc240a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 134497 | ||
summary: Limit when we push topn to lucene | ||
area: ES|QL | ||
type: bug | ||
issues: [] |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -18,6 +18,7 @@ | |||||||||||||||||||
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; | ||||||||||||||||||||
import org.elasticsearch.xpack.esql.core.expression.NameId; | ||||||||||||||||||||
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; | ||||||||||||||||||||
import org.elasticsearch.xpack.esql.expression.Foldables; | ||||||||||||||||||||
import org.elasticsearch.xpack.esql.expression.Order; | ||||||||||||||||||||
import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.BinarySpatialFunction; | ||||||||||||||||||||
import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.SpatialRelatesUtils; | ||||||||||||||||||||
|
@@ -28,6 +29,7 @@ | |||||||||||||||||||
import org.elasticsearch.xpack.esql.plan.physical.EvalExec; | ||||||||||||||||||||
import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; | ||||||||||||||||||||
import org.elasticsearch.xpack.esql.plan.physical.TopNExec; | ||||||||||||||||||||
import org.elasticsearch.xpack.esql.planner.PhysicalSettings; | ||||||||||||||||||||
|
||||||||||||||||||||
import java.util.ArrayList; | ||||||||||||||||||||
import java.util.LinkedHashMap; | ||||||||||||||||||||
|
@@ -63,7 +65,12 @@ public class PushTopNToSource extends PhysicalOptimizerRules.ParameterizedOptimi | |||||||||||||||||||
|
||||||||||||||||||||
@Override | ||||||||||||||||||||
protected PhysicalPlan rule(TopNExec topNExec, LocalPhysicalOptimizerContext ctx) { | ||||||||||||||||||||
Pushable pushable = evaluatePushable(ctx.foldCtx(), topNExec, LucenePushdownPredicates.from(ctx.searchStats(), ctx.flags())); | ||||||||||||||||||||
Pushable pushable = evaluatePushable( | ||||||||||||||||||||
ctx.physicalSettings(), | ||||||||||||||||||||
ctx.foldCtx(), | ||||||||||||||||||||
topNExec, | ||||||||||||||||||||
LucenePushdownPredicates.from(ctx.searchStats(), ctx.flags()) | ||||||||||||||||||||
); | ||||||||||||||||||||
return pushable.rewrite(topNExec); | ||||||||||||||||||||
} | ||||||||||||||||||||
|
||||||||||||||||||||
|
@@ -124,15 +131,24 @@ public PhysicalPlan rewrite(TopNExec topNExec) { | |||||||||||||||||||
} | ||||||||||||||||||||
} | ||||||||||||||||||||
|
||||||||||||||||||||
private static Pushable evaluatePushable(FoldContext ctx, TopNExec topNExec, LucenePushdownPredicates lucenePushdownPredicates) { | ||||||||||||||||||||
private static Pushable evaluatePushable( | ||||||||||||||||||||
PhysicalSettings physicalSettings, | ||||||||||||||||||||
FoldContext ctx, | ||||||||||||||||||||
TopNExec topNExec, | ||||||||||||||||||||
LucenePushdownPredicates lucenePushdownPredicates | ||||||||||||||||||||
) { | ||||||||||||||||||||
PhysicalPlan child = topNExec.child(); | ||||||||||||||||||||
if (child instanceof EsQueryExec queryExec | ||||||||||||||||||||
&& queryExec.canPushSorts() | ||||||||||||||||||||
&& canPushDownOrders(topNExec.order(), lucenePushdownPredicates)) { | ||||||||||||||||||||
&& canPushDownOrders(topNExec.order(), lucenePushdownPredicates) | ||||||||||||||||||||
&& canPushLimit(topNExec, physicalSettings)) { | ||||||||||||||||||||
// With the simplest case of `FROM index | SORT ...` we only allow pushing down if the sort is on a field | ||||||||||||||||||||
return new PushableQueryExec(queryExec); | ||||||||||||||||||||
} | ||||||||||||||||||||
if (child instanceof EvalExec evalExec && evalExec.child() instanceof EsQueryExec queryExec && queryExec.canPushSorts()) { | ||||||||||||||||||||
if (child instanceof EvalExec evalExec | ||||||||||||||||||||
&& evalExec.child() instanceof EsQueryExec queryExec | ||||||||||||||||||||
&& queryExec.canPushSorts() | ||||||||||||||||||||
&& canPushLimit(topNExec, physicalSettings)) { | ||||||||||||||||||||
Comment on lines
+148
to
+151
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks right to me, but I think @craigtaverner should have a look, too. I don't remember if it's bad when we somehow cannot push a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct. Failing to push down ST_DISTANCE will not cause the query to fail, just run slow. And as I understand it this PR will only block pushdown for extremely large LIMIT values, which are an extreme case anyway. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||||||||||||||
// When we have an EVAL between the FROM and the SORT, we consider pushing down if the sort is on a field and/or | ||||||||||||||||||||
// a distance function defined in the EVAL. We also move the EVAL to after the SORT. | ||||||||||||||||||||
List<Order> orders = topNExec.order(); | ||||||||||||||||||||
|
@@ -204,6 +220,10 @@ private static boolean canPushDownOrders(List<Order> orders, LucenePushdownPredi | |||||||||||||||||||
return orders.stream().allMatch(o -> isSortableAttribute.apply(o.child(), lucenePushdownPredicates)); | ||||||||||||||||||||
} | ||||||||||||||||||||
|
||||||||||||||||||||
private static boolean canPushLimit(TopNExec topn, PhysicalSettings physicalSettings) { | ||||||||||||||||||||
return Foldables.limitValue(topn.limit(), topn.sourceText()) <= physicalSettings.luceneTopNLimit(); | ||||||||||||||||||||
} | ||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: this could be simplified with Lines 99 to 107 in 2a1176a
|
||||||||||||||||||||
|
||||||||||||||||||||
private static List<EsQueryExec.Sort> buildFieldSorts(List<Order> orders) { | ||||||||||||||||||||
List<EsQueryExec.Sort> sorts = new ArrayList<>(orders.size()); | ||||||||||||||||||||
for (Order o : orders) { | ||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,9 +9,11 @@ | |
|
||
import org.elasticsearch.cluster.service.ClusterService; | ||
import org.elasticsearch.common.settings.Setting; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.common.unit.ByteSizeValue; | ||
import org.elasticsearch.common.unit.MemorySizeValue; | ||
import org.elasticsearch.compute.lucene.DataPartitioning; | ||
import org.elasticsearch.index.IndexSettings; | ||
import org.elasticsearch.monitor.jvm.JvmInfo; | ||
|
||
/** | ||
|
@@ -35,23 +37,34 @@ public class PhysicalSettings { | |
Setting.Property.Dynamic | ||
); | ||
|
||
public static final Setting<Integer> LUCENE_TOPN_LIMIT = Setting.intSetting( | ||
"esql.lucene_topn_limit", | ||
IndexSettings.MAX_RESULT_WINDOW_SETTING.getDefault(Settings.EMPTY), | ||
-1, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do not limit by default? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Uh, just realized There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right! We default to the window's default. |
||
Setting.Property.NodeScope, | ||
Setting.Property.Dynamic | ||
); | ||
|
||
private volatile DataPartitioning defaultDataPartitioning; | ||
private volatile ByteSizeValue valuesLoadingJumboSize; | ||
private volatile int luceneTopNLimit; | ||
|
||
/** | ||
* Ctor for prod that listens for updates from the {@link ClusterService}. | ||
*/ | ||
public PhysicalSettings(ClusterService clusterService) { | ||
clusterService.getClusterSettings().initializeAndWatch(DEFAULT_DATA_PARTITIONING, v -> this.defaultDataPartitioning = v); | ||
clusterService.getClusterSettings().initializeAndWatch(VALUES_LOADING_JUMBO_SIZE, v -> this.valuesLoadingJumboSize = v); | ||
clusterService.getClusterSettings().initializeAndWatch(LUCENE_TOPN_LIMIT, v -> this.luceneTopNLimit = v); | ||
} | ||
|
||
/** | ||
* Ctor for testing. | ||
*/ | ||
public PhysicalSettings(DataPartitioning defaultDataPartitioning, ByteSizeValue valuesLoadingJumboSize) { | ||
public PhysicalSettings(DataPartitioning defaultDataPartitioning, ByteSizeValue valuesLoadingJumboSize, int luceneTopNLimit) { | ||
this.defaultDataPartitioning = defaultDataPartitioning; | ||
this.valuesLoadingJumboSize = valuesLoadingJumboSize; | ||
this.luceneTopNLimit = luceneTopNLimit; | ||
} | ||
|
||
public DataPartitioning defaultDataPartitioning() { | ||
|
@@ -61,4 +74,22 @@ public DataPartitioning defaultDataPartitioning() { | |
public ByteSizeValue valuesLoadingJumboSize() { | ||
return valuesLoadingJumboSize; | ||
} | ||
|
||
/** | ||
* Maximum {@code LIMIT} that we're willing to push to Lucene's topn. | ||
* <p> | ||
* Lucene's topn code was designed for <strong>search</strong> | ||
* which typically fetches 10 or 30 or 50 or 100 or 1000 documents. | ||
* That's as many you want on a page, and that's what it's designed for. | ||
* But if you go to, say, page 10, Lucene implements this as a search | ||
* for {@code page_size * page_number} docs and then materializes only | ||
* the last {@code page_size} documents. Traditionally, Elasticsearch | ||
* limits that {@code page_size * page_number} which it calls the | ||
* {@link IndexSettings#MAX_RESULT_WINDOW_SETTING "result window"}. | ||
* So! ESQL defaults to the same default - {@code 10,000}. | ||
* </p> | ||
*/ | ||
public int luceneTopNLimit() { | ||
return luceneTopNLimit; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks correct.