Skip to content

Commit

Permalink
[refactor][backport](stats) Migrate stats framework from master to br…
Browse files Browse the repository at this point in the history
…anch 2.0 (#25119)

This PR is composed of belowing commits which has been merged to Doirs master:

* #24769
* #24672
* #24599
* #24521
* #24405
* #24237
* #24135
* #24074
* #24026
* #23992
* #23978
* #23622
* #23507
* #23354
* #23103
* #22963
* #22896
* #22775
* #22773
  • Loading branch information
Kikyou1997 authored Oct 13, 2023
1 parent b9fba6a commit 4b6ff32
Show file tree
Hide file tree
Showing 102 changed files with 3,860 additions and 2,724 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2178,6 +2178,13 @@ public class Config extends ConfigBase {
})
public static int autobucket_min_buckets = 1;

@ConfField
public static int full_auto_analyze_simultaneously_running_task_num = 1;

@ConfField
public static final int period_analyze_simultaneously_running_task_num = 1;


@ConfField(mutable = true, description = {
"Doris 为了兼用 mysql 周边工具生态,会内置一个名为 mysql 的数据库,如果该数据库与用户自建数据库冲突,"
+ "请修改这个字段,为 doris 内置的 mysql database 更换一个名字",
Expand Down
12 changes: 8 additions & 4 deletions fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -4134,13 +4134,17 @@ show_param ::=
RESULT = new ShowCreateMaterializedViewStmt(mvName, tableName);
:}
/* show analyze job */
| KW_ANALYZE opt_table_name:tbl opt_wild_where order_by_clause:orderByClause limit_clause:limitClause
| KW_ANALYZE opt_table_name:tbl opt_wild_where
{:
RESULT = new ShowAnalyzeStmt(tbl, parser.where, orderByClause, limitClause);
RESULT = new ShowAnalyzeStmt(tbl, parser.where, false);
:}
| KW_ANALYZE INTEGER_LITERAL:jobId opt_wild_where order_by_clause:orderByClause limit_clause:limitClause
| KW_ANALYZE INTEGER_LITERAL:jobId opt_wild_where
{:
RESULT = new ShowAnalyzeStmt(jobId, parser.where, orderByClause, limitClause);
RESULT = new ShowAnalyzeStmt(jobId, parser.where);
:}
| KW_AUTO KW_ANALYZE opt_table_name:tbl opt_wild_where
{:
RESULT = new ShowAnalyzeStmt(tbl, parser.where, true);
:}
| KW_ANALYZE KW_TASK KW_STATUS INTEGER_LITERAL:jobId
{:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,18 @@
import org.apache.doris.statistics.AnalysisInfo.AnalysisType;

import com.google.common.collect.ImmutableSet;
import com.google.gson.annotations.SerializedName;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.core.util.CronExpression;

import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.TimeUnit;

// TODO: Remove map
public class AnalyzeProperties {

private final Map<String, String> properties;

public static final String PROPERTY_SYNC = "sync";
public static final String PROPERTY_INCREMENTAL = "incremental";
public static final String PROPERTY_AUTOMATIC = "automatic";
Expand All @@ -41,6 +43,23 @@ public class AnalyzeProperties {
public static final String PROPERTY_ANALYSIS_TYPE = "analysis.type";
public static final String PROPERTY_PERIOD_SECONDS = "period.seconds";

public static final String PROPERTY_FORCE_FULL = "force.full";

public static final AnalyzeProperties DEFAULT_PROP = new AnalyzeProperties(new HashMap<String, String>() {
{
put(AnalyzeProperties.PROPERTY_SYNC, "false");
put(AnalyzeProperties.PROPERTY_AUTOMATIC, "false");
put(AnalyzeProperties.PROPERTY_ANALYSIS_TYPE, AnalysisType.FUNDAMENTALS.toString());
}
});

public static final String PROPERTY_PERIOD_CRON = "period.cron";

private CronExpression cronExpression;

@SerializedName("analyzeProperties")
private final Map<String, String> properties;

private static final ImmutableSet<String> PROPERTIES_SET = new ImmutableSet.Builder<String>()
.add(PROPERTY_SYNC)
.add(PROPERTY_INCREMENTAL)
Expand All @@ -50,6 +69,8 @@ public class AnalyzeProperties {
.add(PROPERTY_NUM_BUCKETS)
.add(PROPERTY_ANALYSIS_TYPE)
.add(PROPERTY_PERIOD_SECONDS)
.add(PROPERTY_PERIOD_CRON)
.add(PROPERTY_FORCE_FULL)
.build();

public AnalyzeProperties(Map<String, String> properties) {
Expand All @@ -72,6 +93,7 @@ public void check() throws AnalysisException {
checkAnalysisMode(msgTemplate);
checkAnalysisType(msgTemplate);
checkScheduleType(msgTemplate);
checkPeriod();
}

public boolean isSync() {
Expand Down Expand Up @@ -115,6 +137,10 @@ public long getPeriodTimeInMs() {
return TimeUnit.SECONDS.toMillis(minutes);
}

public CronExpression getCron() {
return cronExpression;
}

private void checkPeriodSeconds() throws AnalysisException {
if (properties.containsKey(PROPERTY_PERIOD_SECONDS)) {
checkNumericProperty(PROPERTY_PERIOD_SECONDS, properties.get(PROPERTY_PERIOD_SECONDS),
Expand Down Expand Up @@ -207,6 +233,22 @@ private void checkScheduleType(String msgTemplate) throws AnalysisException {
}
}

private void checkPeriod() throws AnalysisException {
if (properties.containsKey(PROPERTY_PERIOD_SECONDS)
&& properties.containsKey(PROPERTY_PERIOD_CRON)) {
throw new AnalysisException(PROPERTY_PERIOD_SECONDS + " and " + PROPERTY_PERIOD_CRON
+ " couldn't be set simultaneously");
}
String cronExprStr = properties.get(PROPERTY_PERIOD_CRON);
if (cronExprStr != null) {
try {
cronExpression = new CronExpression(cronExprStr);
} catch (java.text.ParseException e) {
throw new AnalysisException("Invalid cron expression: " + cronExprStr);
}
}
}

private void checkNumericProperty(String key, String value, int lowerBound, int upperBound,
boolean includeBoundary, String errorMsg) throws AnalysisException {
if (!StringUtils.isNumeric(value)) {
Expand All @@ -226,6 +268,14 @@ public boolean isSample() {
|| properties.containsKey(PROPERTY_SAMPLE_ROWS);
}

public boolean forceFull() {
return properties.containsKey(PROPERTY_FORCE_FULL);
}

public boolean isSampleRows() {
return properties.containsKey(PROPERTY_SAMPLE_ROWS);
}

public String toSQL() {
StringBuilder sb = new StringBuilder();
sb.append("PROPERTIES(");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
import org.apache.doris.statistics.AnalysisInfo.ScheduleType;

import org.apache.logging.log4j.core.util.CronExpression;

import java.util.Map;

public class AnalyzeStmt extends StatementBase {
Expand Down Expand Up @@ -55,7 +57,8 @@ public ScheduleType getScheduleType() {
if (analyzeProperties.isAutomatic()) {
return ScheduleType.AUTOMATIC;
}
return analyzeProperties.getPeriodTimeInMs() > 0 ? ScheduleType.PERIOD : ScheduleType.ONCE;
return analyzeProperties.getPeriodTimeInMs() > 0 || analyzeProperties.getCron() != null
? ScheduleType.PERIOD : ScheduleType.ONCE;
}

public boolean isSync() {
Expand Down Expand Up @@ -86,4 +89,12 @@ public AnalyzeProperties getAnalyzeProperties() {
public RedirectStatus getRedirectStatus() {
return RedirectStatus.FORWARD_WITH_SYNC;
}

public CronExpression getCron() {
return analyzeProperties.getCron();
}

public boolean forceFull() {
return analyzeProperties.forceFull();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.View;
import org.apache.doris.catalog.external.ExternalTable;
import org.apache.doris.catalog.external.HMSExternalTable;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
Expand All @@ -41,6 +42,7 @@
import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringUtils;

import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.Set;
Expand Down Expand Up @@ -84,7 +86,7 @@ public class AnalyzeTblStmt extends AnalyzeStmt {

private final TableName tableName;
private List<String> columnNames;
private List<String> partitionNames;
private PartitionNames partitionNames;
private boolean isAllColumns;

// after analyzed
Expand All @@ -97,7 +99,7 @@ public AnalyzeTblStmt(TableName tableName,
AnalyzeProperties properties) {
super(properties);
this.tableName = tableName;
this.partitionNames = partitionNames == null ? null : partitionNames.getPartitionNames();
this.partitionNames = partitionNames;
this.columnNames = columnNames;
this.analyzeProperties = properties;
this.isAllColumns = columnNames == null;
Expand Down Expand Up @@ -166,11 +168,9 @@ public void check() throws AnalysisException {
analyzeProperties.check();

// TODO support external table
if (analyzeProperties.isSample()) {
if (!(table instanceof OlapTable)) {
throw new AnalysisException("Sampling statistics "
+ "collection of external tables is not supported");
}
if (analyzeProperties.isSampleRows() && !(table instanceof OlapTable)) {
throw new AnalysisException("Sampling statistics "
+ "collection of external tables is not supported with rows, use percent instead.");
}
if (analyzeProperties.isSync()
&& (analyzeProperties.isAutomatic() || analyzeProperties.getPeriodTimeInMs() != 0)) {
Expand All @@ -181,6 +181,9 @@ public void check() throws AnalysisException {
throw new AnalysisException("Automatic collection "
+ "and period statistics collection cannot be set at same time");
}
if (analyzeProperties.isSample() && analyzeProperties.forceFull()) {
throw new AnalysisException("Impossible to analyze with sample and full simultaneously");
}
}

private void checkColumn() throws AnalysisException {
Expand All @@ -196,7 +199,8 @@ private void checkColumn() throws AnalysisException {
}
}
if (containsUnsupportedTytpe) {
if (!ConnectContext.get().getSessionVariable().enableAnalyzeComplexTypeColumn) {
if (ConnectContext.get() == null
|| !ConnectContext.get().getSessionVariable().enableAnalyzeComplexTypeColumn) {
columnNames = columnNames.stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(table.getColumn(c).getType()))
.collect(Collectors.toList());
Expand Down Expand Up @@ -236,14 +240,33 @@ public Set<String> getColumnNames() {
}

public Set<String> getPartitionNames() {
Set<String> partitions = partitionNames == null ? table.getPartitionNames() : Sets.newHashSet(partitionNames);
if (isSamplingPartition()) {
int partNum = ConnectContext.get().getSessionVariable().getExternalTableAnalyzePartNum();
partitions = partitions.stream().limit(partNum).collect(Collectors.toSet());
if (partitionNames == null || partitionNames.getPartitionNames() == null) {
if (table instanceof ExternalTable) {
// External table couldn't return all partitions when partitionNames is not set.
// Because Analyze Table command for external table could specify partition names.
return Collections.emptySet();
}
return table.getPartitionNames();
}
Set<String> partitions = Sets.newHashSet();
partitions.addAll(partitionNames.getPartitionNames());
return partitions;
}

public boolean isAllPartitions() {
if (partitionNames == null) {
return false;
}
return partitionNames.isAllPartitions();
}

public long getPartitionCount() {
if (partitionNames == null) {
return 0;
}
return partitionNames.getCount();
}

public boolean isPartitionOnly() {
return partitionNames != null;
}
Expand All @@ -260,8 +283,13 @@ public boolean isSamplingPartition() {
}

private void checkAnalyzePriv(String dbName, String tblName) throws AnalysisException {
ConnectContext ctx = ConnectContext.get();
// means it a system analyze
if (ctx == null) {
return;
}
if (!Env.getCurrentEnv().getAccessManager()
.checkTblPriv(ConnectContext.get(), dbName, tblName, PrivPredicate.SELECT)) {
.checkTblPriv(ctx, dbName, tblName, PrivPredicate.SELECT)) {
ErrorReport.reportAnalysisException(
ErrorCode.ERR_TABLEACCESS_DENIED_ERROR,
"ANALYZE",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,37 @@ public class PartitionNames implements ParseNode, Writable {
// true if these partitions are temp partitions
@SerializedName(value = "isTemp")
private final boolean isTemp;
private final boolean allPartitions;
private final long count;
// Default partition count to collect statistic for external table.
private static final long DEFAULT_PARTITION_COUNT = 100;

public PartitionNames(boolean isTemp, List<String> partitionNames) {
this.partitionNames = partitionNames;
this.isTemp = isTemp;
this.allPartitions = false;
this.count = 0;
}

public PartitionNames(PartitionNames other) {
this.partitionNames = Lists.newArrayList(other.partitionNames);
this.isTemp = other.isTemp;
this.allPartitions = other.allPartitions;
this.count = 0;
}

public PartitionNames(boolean allPartitions) {
this.partitionNames = null;
this.isTemp = false;
this.allPartitions = allPartitions;
this.count = 0;
}

public PartitionNames(long partitionCount) {
this.partitionNames = null;
this.isTemp = false;
this.allPartitions = false;
this.count = partitionCount;
}

public List<String> getPartitionNames() {
Expand All @@ -67,9 +89,23 @@ public boolean isTemp() {
return isTemp;
}

public boolean isAllPartitions() {
return allPartitions;
}

public long getCount() {
return count;
}

@Override
public void analyze(Analyzer analyzer) throws AnalysisException {
if (partitionNames.isEmpty()) {
if (allPartitions && count > 0) {
throw new AnalysisException("All partition and partition count couldn't be set at the same time.");
}
if (allPartitions || count > 0) {
return;
}
if (partitionNames == null || partitionNames.isEmpty()) {
throw new AnalysisException("No partition specified in partition lists");
}
// check if partition name is not empty string
Expand Down
Loading

0 comments on commit 4b6ff32

Please sign in to comment.