Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,19 @@ public FilterEstimation(boolean isOnBaseTable) {
/**
* This method will update the stats according to the selectivity.
*/
public Statistics estimate(Expression expression, Statistics statistics) {
// For a comparison predicate, only when it's left side is a slot and right side is a literal, we would
// consider is a valid predicate.
Statistics stats = expression.accept(this, new EstimationContext(statistics));
stats.enforceValid();
return stats;
public Statistics estimate(Expression expression, Statistics inputStats) {
Statistics outputStats = expression.accept(this, new EstimationContext(inputStats));
if (outputStats.getRowCount() == 0 && inputStats.getDeltaRowCount() > 0) {
StatisticsBuilder deltaStats = new StatisticsBuilder();
deltaStats.setDeltaRowCount(0);
deltaStats.setRowCount(inputStats.getDeltaRowCount());
for (Expression expr : inputStats.columnStatistics().keySet()) {
deltaStats.putColumnStatistics(expr, ColumnStatistic.UNKNOWN);
}
outputStats = expression.accept(this, new EstimationContext(deltaStats.build()));
}
outputStats.enforceValid();
return outputStats;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -834,14 +834,6 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
hasUnknownCol = true;
}
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) {
if (deltaRowCount > 0) {
// clear min-max to avoid error estimation
// for example, after yesterday data loaded, user send query about yesterday immediately.
// since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer
// estimates the filter result is zero
colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
}
columnStatisticBuilderMap.put(slotReference, colStatsBuilder);
} else {
columnStatisticBuilderMap.put(slotReference, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN));
Expand All @@ -851,17 +843,18 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
if (hasUnknownCol && ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) {
ConnectContext.get().getStatementContext().setHasUnknownColStats(true);
}
return normalizeCatalogRelationColumnStatsRowCount(rowCount, columnStatisticBuilderMap);
return normalizeCatalogRelationColumnStatsRowCount(rowCount, columnStatisticBuilderMap, deltaRowCount);
}

private Statistics normalizeCatalogRelationColumnStatsRowCount(double rowCount,
Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap) {
Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap,
long deltaRowCount) {
Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
for (Expression slot : columnStatisticBuilderMap.keySet()) {
columnStatisticMap.put(slot,
columnStatisticBuilderMap.get(slot).setCount(rowCount).build());
}
return new Statistics(rowCount, columnStatisticMap);
return new Statistics(rowCount, 1, columnStatisticMap, deltaRowCount);
}

private Statistics computeTopN(TopN topN) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,32 @@ public class Statistics {
// the byte size of one tuple
private double tupleSize;

private double deltaRowCount = 0.0;

public Statistics(Statistics another) {
this.rowCount = another.rowCount;
this.widthInJoinCluster = another.widthInJoinCluster;
this.expressionToColumnStats = new HashMap<>(another.expressionToColumnStats);
this.tupleSize = another.tupleSize;
this.deltaRowCount = another.getDeltaRowCount();
}

public Statistics(double rowCount, Map<Expression, ColumnStatistic> expressionToColumnStats) {
this(rowCount, 1, expressionToColumnStats);
this(rowCount, 1, expressionToColumnStats, 0);
}

public Statistics(double rowCount, int widthInJoinCluster,
Map<Expression, ColumnStatistic> expressionToColumnStats) {
Map<Expression, ColumnStatistic> expressionToColumnStats) {
this(rowCount, widthInJoinCluster, expressionToColumnStats, 0);
}

public Statistics(double rowCount, int widthInJoinCluster,
Map<Expression, ColumnStatistic> expressionToColumnStats,
double deltaRowCount) {
this.rowCount = rowCount;
this.widthInJoinCluster = widthInJoinCluster;
this.expressionToColumnStats = expressionToColumnStats;
this.deltaRowCount = deltaRowCount;
}

public ColumnStatistic findColumnStatistics(Expression expression) {
Expand All @@ -62,18 +79,18 @@ public double getRowCount() {
}

public Statistics withRowCount(double rowCount) {
return new Statistics(rowCount, widthInJoinCluster, new HashMap<>(expressionToColumnStats));
return new Statistics(rowCount, widthInJoinCluster, new HashMap<>(expressionToColumnStats), deltaRowCount);
}

public Statistics withExpressionToColumnStats(Map<Expression, ColumnStatistic> expressionToColumnStats) {
return new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats);
return new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats, deltaRowCount);
}

/**
* Update by count.
*/
public Statistics withRowCountAndEnforceValid(double rowCount) {
Statistics statistics = new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats);
Statistics statistics = new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats, deltaRowCount);
statistics.enforceValid();
return statistics;
}
Expand Down Expand Up @@ -155,7 +172,11 @@ public String toString() {
return "-Infinite";
}
DecimalFormat format = new DecimalFormat("#,###.##");
return format.format(rowCount);
String rows = format.format(rowCount);
if (deltaRowCount > 0) {
rows = rows + "(" + format.format(deltaRowCount) + ")";
}
return rows;
}

public int getBENumber() {
Expand Down Expand Up @@ -222,4 +243,8 @@ public Statistics normalizeByRatio(double originRowCount) {
}
return builder.build();
}

public double getDeltaRowCount() {
return deltaRowCount;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public class StatisticsBuilder {
private int widthInJoinCluster;
private final Map<Expression, ColumnStatistic> expressionToColumnStats;

private double deltaRowCount = 0.0;

public StatisticsBuilder() {
expressionToColumnStats = new HashMap<>();
}
Expand All @@ -37,6 +39,7 @@ public StatisticsBuilder(Statistics statistics) {
this.widthInJoinCluster = statistics.getWidthInJoinCluster();
expressionToColumnStats = new HashMap<>();
expressionToColumnStats.putAll(statistics.columnStatistics());
this.deltaRowCount = statistics.getDeltaRowCount();
}

public StatisticsBuilder setRowCount(double rowCount) {
Expand All @@ -49,6 +52,11 @@ public StatisticsBuilder setWidthInJoinCluster(int widthInJoinCluster) {
return this;
}

public StatisticsBuilder setDeltaRowCount(double deltaRowCount) {
this.deltaRowCount = deltaRowCount;
return this;
}

public StatisticsBuilder putColumnStatistics(
Map<Expression, ColumnStatistic> expressionToColumnStats) {
this.expressionToColumnStats.putAll(expressionToColumnStats);
Expand All @@ -61,6 +69,6 @@ public StatisticsBuilder putColumnStatistics(Expression expression, ColumnStatis
}

public Statistics build() {
return new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats);
return new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats, deltaRowCount);
}
}
55 changes: 55 additions & 0 deletions regression-test/suites/nereids_p0/delta_row/delta_row.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("delta_row") {
String database = context.config.getDbNameByFile(context.file)
sql """
drop database if exists ${database};
create database ${database};
use ${database};
CREATE TABLE IF NOT EXISTS t (
k int(11) null comment "",
v string replace null comment "",
) engine=olap
DISTRIBUTED BY HASH(k) BUCKETS 5 properties("replication_num" = "1");

insert into t values (1, "a"),(2, "b"),(3, 'c'),(4,'d');
analyze table t with sync;
"""
explain {
sql "physical plan select * from t where k > 6"
contains("stats=0,")
contains("stats=4 ")
// PhysicalResultSink[75] ( outputExprs=[k#0, v#1] )
// +--PhysicalFilter[72]@1 ( stats=0, predicates=(k#0 > 6) )
// +--PhysicalOlapScan[t]@0 ( stats=4 )
}

sql "set global enable_auto_analyze=false;"

sql "insert into t values (10, 'c');"
explain {
sql "physical plan select * from t where k > 6"
contains("stats=0.5,")
contains("stats=5(1)")
notContains("stats=0,")
notContains("stats=4 ")
// PhysicalResultSink[75] ( outputExprs=[k#0, v#1] )
// +--PhysicalFilter[72]@1 ( stats=0.5, predicates=(k#0 > 6) )
// +--PhysicalOlapScan[t]@0 ( stats=5(1) )
}
}