diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index f9be5d8755920f..514e28b722cc62 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1521,7 +1521,7 @@ public class Config extends ConfigBase { * The number is determined by "start" and "end" in the dynamic partition parameters. */ @ConfField(mutable = true, masterOnly = true) - public static int max_dynamic_partition_num = 500; + public static int max_dynamic_partition_num = 20000; /** * Used to limit the maximum number of partitions that can be created when creating multi partition, @@ -2690,8 +2690,8 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true, description = { "For auto-partitioned tables to prevent users from accidentally creating a large number of partitions, " - + "the number of partitions allowed per OLAP table is `max_auto_partition_num`. Default 2000."}) - public static int max_auto_partition_num = 2000; + + "the number of partitions allowed per OLAP table is `max_auto_partition_num`. Default 20000."}) + public static int max_auto_partition_num = 20000; @ConfField(mutable = true, masterOnly = true, description = { "The maximum difference in the number of tablets of each BE in partition rebalance mode. " diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java index db12f6266ea8c0..516d6942478f17 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java @@ -42,6 +42,7 @@ import org.apache.doris.common.FeConstants; import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.UserException; +import org.apache.doris.metric.MetricRepo; import org.apache.doris.policy.StoragePolicy; import org.apache.doris.resource.Tag; import org.apache.doris.thrift.TStorageMedium; @@ -641,10 +642,20 @@ public static Map analyzeDynamicPartition(Map pr } expectCreatePartitionNum = (long) end - start; - if (!isReplay && hasEnd && (expectCreatePartitionNum > Config.max_dynamic_partition_num) + int dynamicPartitionLimit = Config.max_dynamic_partition_num; + if (!isReplay && hasEnd && Boolean.parseBoolean(analyzedProperties.getOrDefault(DynamicPartitionProperty.ENABLE, "true"))) { - throw new DdlException("Too many dynamic partitions: " - + expectCreatePartitionNum + ". Limit: " + Config.max_dynamic_partition_num); + if (expectCreatePartitionNum > dynamicPartitionLimit) { + throw new DdlException("Too many dynamic partitions: " + + expectCreatePartitionNum + ". Limit: " + dynamicPartitionLimit); + } else if (expectCreatePartitionNum > dynamicPartitionLimit * 8L / 10) { + LOG.warn("Dynamic partition count {} is approaching limit {} (>80%)." + + " Consider increasing max_dynamic_partition_num.", + expectCreatePartitionNum, dynamicPartitionLimit); + if (MetricRepo.isInit) { + MetricRepo.COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT.increase(1L); + } + } } if (properties.containsKey(DynamicPartitionProperty.START_DAY_OF_MONTH)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java index cd0755fd9da494..c28c2aeb99f207 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java @@ -256,6 +256,10 @@ public final class MetricRepo { public static GaugeMetricImpl GAUGE_AVG_PARTITION_SIZE_BYTES; public static GaugeMetricImpl GAUGE_AVG_TABLET_SIZE_BYTES; + // Partition near-limit warnings + public static LongCounterMetric COUNTER_AUTO_PARTITION_NEAR_LIMIT; + public static LongCounterMetric COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT; + // Agent task public static LongCounterMetric COUNTER_AGENT_TASK_REQUEST_TOTAL; public static AutoMappedMetric COUNTER_AGENT_TASK_TOTAL; @@ -1040,6 +1044,16 @@ public Integer getValue() { GAUGE_AVG_TABLET_SIZE_BYTES = new GaugeMetricImpl<>("avg_tablet_size_bytes", MetricUnit.BYTES, "", 0L); DORIS_METRIC_REGISTER.addMetrics(GAUGE_AVG_TABLET_SIZE_BYTES); + // Partition near-limit warning counters + COUNTER_AUTO_PARTITION_NEAR_LIMIT = new LongCounterMetric("auto_partition_near_limit_count", + MetricUnit.NOUNIT, + "number of times auto partition count exceeded 80% of max_auto_partition_num"); + DORIS_METRIC_REGISTER.addMetrics(COUNTER_AUTO_PARTITION_NEAR_LIMIT); + COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT = new LongCounterMetric("dynamic_partition_near_limit_count", + MetricUnit.NOUNIT, + "number of times dynamic partition count exceeded 80% of max_dynamic_partition_num"); + DORIS_METRIC_REGISTER.addMetrics(COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT); + COUNTER_AGENT_TASK_REQUEST_TOTAL = new LongCounterMetric("agent_task_request_total", MetricUnit.NOUNIT, "total agent batch task request send to BE"); DORIS_METRIC_REGISTER.addMetrics(COUNTER_AGENT_TASK_REQUEST_TOTAL); diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index f074711854c296..44410af0163e5b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -96,6 +96,7 @@ import org.apache.doris.load.routineload.RoutineLoadManager; import org.apache.doris.master.MasterImpl; import org.apache.doris.meta.MetaContext; +import org.apache.doris.metric.MetricRepo; import org.apache.doris.mysql.privilege.AccessControllerManager; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.nereids.trees.plans.PlanNodeAndHash; @@ -4391,15 +4392,24 @@ public TCreatePartitionResult createPartition(TCreatePartitionRequest request) t // check partition's number limit. because partitions in addPartitionClauseMap may be duplicated with existing // partitions, which would lead to false positive. so we should check the partition number AFTER adding new // partitions using its ACTUAL NUMBER, rather than the sum of existing and requested partitions. - if (olapTable.getPartitionNum() > Config.max_auto_partition_num) { + int partitionNum = olapTable.getPartitionNum(); + int autoPartitionLimit = Config.max_auto_partition_num; + if (partitionNum > autoPartitionLimit) { String errorMessage = String.format( "partition numbers %d exceeded limit of variable max_auto_partition_num %d", - olapTable.getPartitionNum(), Config.max_auto_partition_num); + partitionNum, autoPartitionLimit); LOG.warn(errorMessage); errorStatus.setErrorMsgs(Lists.newArrayList(errorMessage)); result.setStatus(errorStatus); LOG.warn("send create partition error status: {}", result); return result; + } else if (partitionNum > autoPartitionLimit * 8 / 10) { + LOG.warn("Table {}.{} auto partition count {} is approaching limit {} (>80%)." + + " Consider increasing max_auto_partition_num.", + db.getFullName(), olapTable.getName(), partitionNum, autoPartitionLimit); + if (MetricRepo.isInit) { + MetricRepo.COUNTER_AUTO_PARTITION_NEAR_LIMIT.increase(1L); + } } // build partition & tablets