From afdbac882fcf0ff9f7002544d2b9bc91ddbd7891 Mon Sep 17 00:00:00 2001 From: Ryan19929 Date: Wed, 6 May 2026 09:59:59 +0800 Subject: [PATCH 1/2] [feature](catalog) support medium allocation mode Introduce MediumAllocationMode as the table and partition metadata foundation for storage medium allocation semantics. CREATE TABLE keeps the existing user syntax: explicitly specifying storage_medium maps to STRICT, while omitting it maps to ADAPTIVE. Persist the mode in DataProperty, rebuild the table-level cache in TableProperty, and migrate SystemInfoService and its callers from the old boolean flag to the enum so STRICT placement cannot silently fall back. Old images that do not contain the new field default to ADAPTIVE, which matches the previous behavior because storageMediumSpecified was not persisted. Add FE unit coverage for enum parsing, DataProperty compatibility, TableProperty cache behavior, and STRICT/ADAPTIVE backend selection, plus a regression test covering persistence across FE restart. Co-authored-by: Cursor --- .../org/apache/doris/backup/RestoreJob.java | 4 +- .../apache/doris/catalog/DataProperty.java | 59 ++++- .../doris/catalog/MediumAllocationMode.java | 83 +++++++ .../org/apache/doris/catalog/OlapTable.java | 2 +- .../apache/doris/catalog/TableProperty.java | 31 +++ .../datasource/CloudInternalCatalog.java | 3 +- .../cloud/system/CloudSystemInfoService.java | 3 +- .../common/util/DynamicPartitionUtil.java | 7 +- .../doris/common/util/PropertyAnalyzer.java | 11 +- .../doris/datasource/InternalCatalog.java | 20 +- .../doris/system/SystemInfoService.java | 19 +- .../apache/doris/backup/RestoreJobTest.java | 3 +- .../doris/catalog/DataPropertyTest.java | 77 +++++++ .../catalog/MediumAllocationModeTest.java | 58 +++++ .../doris/catalog/ReplicaAllocationTest.java | 2 +- .../doris/catalog/TablePropertyTest.java | 64 ++++++ .../RoundRobinCreateTabletTest.java | 3 +- .../doris/system/SystemInfoServiceTest.java | 53 ++++- .../test_medium_allocation_mode_compat.groovy | 203 ++++++++++++++++++ 19 files changed, 668 insertions(+), 37 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/catalog/MediumAllocationMode.java create mode 100644 fe/fe-core/src/test/java/org/apache/doris/catalog/MediumAllocationModeTest.java create mode 100644 fe/fe-core/src/test/java/org/apache/doris/catalog/TablePropertyTest.java create mode 100644 regression-test/suites/storage_medium_p0/test_medium_allocation_mode_compat.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 29ae36d53765fc..531176ad50c2d8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -34,6 +34,7 @@ import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.MaterializedIndexMeta; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.OdbcCatalogResource; import org.apache.doris.catalog.OdbcTable; import org.apache.doris.catalog.OlapTable; @@ -1539,7 +1540,8 @@ protected Partition resetTabletForRestore(OlapTable localTbl, OlapTable remoteTb // replicas try { Pair>, TStorageMedium> beIdsAndMedium = Env.getCurrentSystemInfo() - .selectBackendIdsForReplicaCreation(replicaAlloc, nextIndexes, null, false, false); + .selectBackendIdsForReplicaCreation(replicaAlloc, nextIndexes, null, + MediumAllocationMode.ADAPTIVE, false); Map> beIds = beIdsAndMedium.first; for (Map.Entry> entry : beIds.entrySet()) { for (Long beId : entry.getValue()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/DataProperty.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/DataProperty.java index 9d17e7f4dee857..dad7cf6c863750 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/DataProperty.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/DataProperty.java @@ -43,7 +43,17 @@ public class DataProperty implements GsonPostProcessable { private String storagePolicy; @SerializedName(value = "isMutable") private boolean isMutable = true; - private boolean storageMediumSpecified; + // Whether the medium was explicitly requested by the user at CREATE TABLE time. + // ADAPTIVE: default / auto - placement may pick any available medium + // STRICT : user explicitly asked for a specific medium (e.g. storage_medium=ssd) + // + // NOTE: on master this signal lived in a transient `storageMediumSpecified` boolean + // that was never persisted (missing @SerializedName), so it silently reverted to false + // after an FE restart. By persisting it here we both (a) carry the signal across + // restarts correctly and (b) get a typed home for the upcoming restore-side medium + // decision logic. See MediumAllocationMode for semantics. + @SerializedName(value = "mediumAllocationMode") + private MediumAllocationMode mediumAllocationMode = MediumAllocationMode.ADAPTIVE; private DataProperty() { // for persist @@ -53,6 +63,7 @@ public DataProperty(TStorageMedium medium) { this.storageMedium = medium; this.cooldownTimeMs = MAX_COOLDOWN_TIME_MS; this.storagePolicy = ""; + this.mediumAllocationMode = MediumAllocationMode.ADAPTIVE; } public DataProperty(DataProperty other) { @@ -60,6 +71,7 @@ public DataProperty(DataProperty other) { this.cooldownTimeMs = other.cooldownTimeMs; this.storagePolicy = other.storagePolicy; this.isMutable = other.isMutable; + this.mediumAllocationMode = other.mediumAllocationMode; } /** @@ -78,6 +90,7 @@ public DataProperty(TStorageMedium medium, long cooldown, String storagePolicy, this.cooldownTimeMs = cooldown; this.storagePolicy = storagePolicy; this.isMutable = isMutable; + this.mediumAllocationMode = MediumAllocationMode.ADAPTIVE; } public TStorageMedium getStorageMedium() { @@ -96,8 +109,35 @@ public void setStoragePolicy(String storagePolicy) { this.storagePolicy = storagePolicy; } + public MediumAllocationMode getMediumAllocationMode() { + return mediumAllocationMode; + } + + public void setMediumAllocationMode(MediumAllocationMode mode) { + this.mediumAllocationMode = (mode == null ? MediumAllocationMode.ADAPTIVE : mode); + } + + /** + * Legacy alias kept so that callers migrating in follow-up commits do not all + * need to change at once. Prefer {@link #getMediumAllocationMode()} in new code. + * + * @deprecated use {@link #getMediumAllocationMode()} and compare with + * {@link MediumAllocationMode#STRICT}. + */ + @Deprecated public boolean isStorageMediumSpecified() { - return storageMediumSpecified; + return mediumAllocationMode == MediumAllocationMode.STRICT; + } + + /** + * Legacy setter kept so that callers migrating in follow-up commits do not all + * need to change at once. Prefer {@link #setMediumAllocationMode(MediumAllocationMode)}. + * + * @deprecated use {@link #setMediumAllocationMode(MediumAllocationMode)}. + */ + @Deprecated + public void setStorageMediumSpecified(boolean isSpecified) { + this.mediumAllocationMode = isSpecified ? MediumAllocationMode.STRICT : MediumAllocationMode.ADAPTIVE; } public boolean isMutable() { @@ -108,17 +148,13 @@ public void setMutable(boolean mutable) { isMutable = mutable; } - public void setStorageMediumSpecified(boolean isSpecified) { - storageMediumSpecified = isSpecified; - } - public void setStorageMedium(TStorageMedium medium) { this.storageMedium = medium; } @Override public int hashCode() { - return Objects.hash(storageMedium, cooldownTimeMs, storagePolicy); + return Objects.hash(storageMedium, cooldownTimeMs, storagePolicy, mediumAllocationMode); } @Override @@ -136,7 +172,8 @@ public boolean equals(Object obj) { return this.storageMedium == other.storageMedium && this.cooldownTimeMs == other.cooldownTimeMs && Strings.nullToEmpty(this.storagePolicy).equals(Strings.nullToEmpty(other.storagePolicy)) - && this.isMutable == other.isMutable; + && this.isMutable == other.isMutable + && this.mediumAllocationMode == other.mediumAllocationMode; } @Override @@ -145,6 +182,7 @@ public String toString() { sb.append("Storage medium[").append(this.storageMedium).append("]. "); sb.append("cool down[").append(TimeUtils.longToTimeString(cooldownTimeMs)).append("]. "); sb.append("remote storage policy[").append(this.storagePolicy).append("]. "); + sb.append("medium allocation mode[").append(this.mediumAllocationMode).append("]. "); return sb.toString(); } @@ -152,6 +190,9 @@ public String toString() { public void gsonPostProcess() throws IOException { // storagePolicy is a newly added field, it may be null when replaying from old version. this.storagePolicy = Strings.nullToEmpty(this.storagePolicy); + // mediumAllocationMode is a newly added field; old images won't contain it, so default to ADAPTIVE. + if (this.mediumAllocationMode == null) { + this.mediumAllocationMode = MediumAllocationMode.ADAPTIVE; + } } - } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MediumAllocationMode.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MediumAllocationMode.java new file mode 100644 index 00000000000000..7bb93f9566cebf --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MediumAllocationMode.java @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.common.AnalysisException; + +import com.google.common.base.Strings; + +/** + * Defines how Doris decides the storage medium of a data property + * (partition-level / table-level). + * + *

Hard-binding semantics with CREATE TABLE: + *

    + *
  • {@code PROPERTIES("storage_medium"="ssd|hdd")} -> {@link #STRICT}
  • + *
  • no {@code storage_medium} property -> {@link #ADAPTIVE}
  • + *
+ * + *

The mode drives replica placement: + *

    + *
  • {@link #STRICT}: user explicitly requested a medium; placement must + * honour it and fail if the requested medium cannot be satisfied.
  • + *
  • {@link #ADAPTIVE}: medium is a hint; placement may pick any available + * medium according to cluster capacity.
  • + *
+ */ +public enum MediumAllocationMode { + STRICT("strict"), + ADAPTIVE("adaptive"); + + private final String value; + + MediumAllocationMode(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + + public boolean isStrict() { + return this == STRICT; + } + + public boolean isAdaptive() { + return this == ADAPTIVE; + } + + /** + * Parse a user-provided string (case-insensitive, trimmed) into the enum. + * + * @throws AnalysisException if the value is null, blank or unrecognised. + */ + public static MediumAllocationMode fromString(String value) throws AnalysisException { + String trimmed = Strings.nullToEmpty(value).trim(); + if (trimmed.isEmpty()) { + throw new AnalysisException("medium_allocation_mode cannot be null or empty"); + } + for (MediumAllocationMode mode : values()) { + if (mode.value.equalsIgnoreCase(trimmed)) { + return mode; + } + } + throw new AnalysisException(String.format( + "Invalid medium_allocation_mode value: '%s'. Valid options are: 'strict', 'adaptive'", + value)); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index e93e521f1516f7..6dca34474f758a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -970,7 +970,7 @@ && getTableProperty().getDynamicPartitionProperty().getBuckets() Pair>, TStorageMedium> tag2beIdsAndMedium = Env.getCurrentSystemInfo().selectBackendIdsForReplicaCreation( replicaAlloc, nextIndexes, null, - false, false); + MediumAllocationMode.ADAPTIVE, false); tag2beIds = tag2beIdsAndMedium.first; } for (Map.Entry> entry3 : tag2beIds.entrySet()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java index 2221a8f8fb7340..8eb70a8db0ba0d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java @@ -73,6 +73,17 @@ public class TableProperty implements GsonPostProcessable { private TStorageMedium storageMedium = null; + // Table-level cache of the medium allocation mode derived from the table's + // raw PROPERTIES map. Like {@link #storageMedium} this is rebuilt from + // {@code properties} on load rather than persisted as its own field, so + // {@link DataProperty#mediumAllocationMode} (the partition-level source of + // truth) remains the only persisted copy. + // + // Hard-binding at CREATE TABLE time: if the user specifies + // {@code PROPERTIES("storage_medium"=...)} the mode is STRICT, otherwise + // ADAPTIVE. See {@link MediumAllocationMode}. + private MediumAllocationMode mediumAllocationMode = MediumAllocationMode.ADAPTIVE; + // which columns stored in RowStore column private List rowStoreColumns; @@ -163,6 +174,7 @@ public TableProperty buildProperty(short opCode) { buildInMemory(); buildMinLoadReplicaNum(); buildStorageMedium(); + buildMediumAllocationMode(); buildStoragePolicy(); buildIsBeingSynced(); buildCompactionPolicy(); @@ -535,6 +547,24 @@ public TStorageMedium getStorageMedium() { return storageMedium; } + /** + * Derive the table-level {@link MediumAllocationMode} from the raw properties + * map. Mirrors the hard-binding used by {@link DataProperty}: the user + * explicitly asked for a medium iff {@link PropertyAnalyzer#PROPERTIES_STORAGE_MEDIUM} + * is present and non-empty. + */ + public TableProperty buildMediumAllocationMode() { + String storageMediumStr = properties.get(PropertyAnalyzer.PROPERTIES_STORAGE_MEDIUM); + mediumAllocationMode = Strings.isNullOrEmpty(storageMediumStr) + ? MediumAllocationMode.ADAPTIVE + : MediumAllocationMode.STRICT; + return this; + } + + public MediumAllocationMode getMediumAllocationMode() { + return mediumAllocationMode; + } + public TableProperty buildStoragePolicy() { storagePolicy = properties.getOrDefault(PropertyAnalyzer.PROPERTIES_STORAGE_POLICY, ""); return this; @@ -910,6 +940,7 @@ public void gsonPostProcess() throws IOException { buildInMemory(); buildMinLoadReplicaNum(); buildStorageMedium(); + buildMediumAllocationMode(); buildStorageFormat(); buildInvertedIndexFileStorageFormat(); buildDataSortInfo(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java index 5bf096bc60e0cd..bc10b38f5f600c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java @@ -33,6 +33,7 @@ import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.MaterializedIndex.IndexState; import org.apache.doris.catalog.MaterializedIndexMeta; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.MetaIdGenerator.IdGeneratorBuffer; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -108,7 +109,7 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa String storagePolicy, IdGeneratorBuffer idGeneratorBuffer, BinlogConfig binlogConfig, - boolean isStorageMediumSpecified) + MediumAllocationMode mediumAllocationMode) throws DdlException { // create base index first. Preconditions.checkArgument(tbl.getBaseIndexId() != -1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java index 34865f06e61432..06d0d8f24d1522 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java @@ -18,6 +18,7 @@ package org.apache.doris.cloud.system; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.cloud.catalog.CloudEnv; import org.apache.doris.cloud.catalog.ComputeGroup; @@ -220,7 +221,7 @@ public void renameVirtualComputeGroup(String computeGroupId, String oldComputeGr @Override public Pair>, TStorageMedium> selectBackendIdsForReplicaCreation( ReplicaAllocation replicaAlloc, Map nextIndexs, - TStorageMedium storageMedium, boolean isStorageMediumSpecified, + TStorageMedium storageMedium, MediumAllocationMode mediumAllocationMode, boolean isOnlyForCheck) throws DdlException { return Pair.of(Maps.newHashMap(), storageMedium); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java index 09b4a9f18e815e..e0b2f76e242f31 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java @@ -26,6 +26,7 @@ import org.apache.doris.catalog.DistributionInfo; import org.apache.doris.catalog.DynamicPartitionProperty; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.PartitionType; @@ -237,7 +238,7 @@ private static void checkReplicationNum(String val, Database db) throws DdlExcep } ReplicaAllocation replicaAlloc = new ReplicaAllocation(Short.valueOf(val)); Env.getCurrentSystemInfo().selectBackendIdsForReplicaCreation(replicaAlloc, Maps.newHashMap(), - null, false, true); + null, MediumAllocationMode.ADAPTIVE, true); } private static void checkReplicaAllocation(ReplicaAllocation replicaAlloc, int hotPartitionNum) @@ -248,14 +249,14 @@ private static void checkReplicaAllocation(ReplicaAllocation replicaAlloc, int h Map nextIndexs = Maps.newHashMap(); Env.getCurrentSystemInfo().selectBackendIdsForReplicaCreation(replicaAlloc, nextIndexs, null, - false, true); + MediumAllocationMode.ADAPTIVE, true); if (hotPartitionNum <= 0) { return; } try { Env.getCurrentSystemInfo().selectBackendIdsForReplicaCreation(replicaAlloc, nextIndexs, - TStorageMedium.SSD, false, true); + TStorageMedium.SSD, MediumAllocationMode.ADAPTIVE, true); } catch (DdlException e) { throw new DdlException("Failed to find enough backend for ssd storage medium. When setting " + DynamicPartitionProperty.HOT_PARTITION_NUM + " > 0, the hot partitions will store " diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index 2800ea1157f886..d03a7020807dd1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -27,6 +27,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.EnvFactory; import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ReplicaAllocation; @@ -393,6 +394,7 @@ public static DataProperty analyzeDataProperty(Map properties, f String newStoragePolicy = oldStoragePolicy; boolean hasStoragePolicy = false; boolean storageMediumSpecified = false; + MediumAllocationMode mediumAllocationMode = oldDataProperty.getMediumAllocationMode(); boolean isBeingSynced = false; for (Map.Entry entry : properties.entrySet()) { @@ -505,9 +507,12 @@ public static DataProperty analyzeDataProperty(Map properties, f properties.remove(PROPERTIES_MUTABLE); DataProperty dataProperty = new DataProperty(storageMedium, cooldownTimestamp, newStoragePolicy, mutable); - // check the state of data property + dataProperty.setMediumAllocationMode(mediumAllocationMode); + // Hard-binding semantics: PROPERTIES("storage_medium"=...) -> STRICT. + // If storage_medium is absent, preserve the old mode across unrelated + // data property changes such as cooldown time or storage policy. if (storageMediumSpecified) { - dataProperty.setStorageMediumSpecified(true); + dataProperty.setMediumAllocationMode(MediumAllocationMode.STRICT); } return dataProperty; } @@ -1642,7 +1647,7 @@ private static ReplicaAllocation analyzeReplicaAllocationImpl(Map tabletIdSet, IdGeneratorBuffer idGeneratorBuffer, boolean isStorageMediumSpecified) + Set tabletIdSet, IdGeneratorBuffer idGeneratorBuffer, + MediumAllocationMode mediumAllocationMode) throws DdlException { ColocateTableIndex colocateIndex = Env.getCurrentColocateIndex(); SystemInfoService systemInfoService = Env.getCurrentSystemInfo(); @@ -3363,7 +3365,7 @@ public TStorageMedium createTablets(MaterializedIndex index, ReplicaState replic startPos = 0; } else { startPos = systemInfoService.getStartPosOfRoundRobin(tag, storageMedium, - isStorageMediumSpecified); + mediumAllocationMode); } nextIndexs.put(tag, startPos); } @@ -3385,7 +3387,7 @@ public TStorageMedium createTablets(MaterializedIndex index, ReplicaState replic Pair>, TStorageMedium> chosenBackendIdsAndMedium = systemInfoService.selectBackendIdsForReplicaCreation( replicaAlloc, nextIndexs, - storageMedium, isStorageMediumSpecified, false); + storageMedium, mediumAllocationMode, false); chosenBackendIds = chosenBackendIdsAndMedium.first; storageMedium = chosenBackendIdsAndMedium.second; for (Map.Entry> entry : chosenBackendIds.entrySet()) { @@ -3598,7 +3600,7 @@ public void truncateTable(String dbName, String tableName, PartitionNamesInfo pa copiedTbl.getPartitionInfo().getTabletType(oldPartitionId), olapTable.getPartitionInfo().getDataProperty(oldPartitionId).getStoragePolicy(), idGeneratorBuffer, binlogConfig, - copiedTbl.getPartitionInfo().getDataProperty(oldPartitionId).isStorageMediumSpecified()); + copiedTbl.getPartitionInfo().getDataProperty(oldPartitionId).getMediumAllocationMode()); newPartitions.add(newPartition); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java index 2ef193cb158cfc..ba2dc50b2aa266 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.DiskInfo; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.cloud.qe.ComputeGroupException; import org.apache.doris.cluster.ClusterGuard; @@ -490,7 +491,8 @@ public int compare(Backend a, Backend b) { // Select the smallest number of tablets as the starting position of // round robin in the BE that match the policy - public int getStartPosOfRoundRobin(Tag tag, TStorageMedium storageMedium, boolean isStorageMediumSpecified) { + public int getStartPosOfRoundRobin(Tag tag, TStorageMedium storageMedium, + MediumAllocationMode mediumAllocationMode) { BeSelectionPolicy.Builder builder = new BeSelectionPolicy.Builder() .needScheduleAvailable() .needCheckDiskUsage() @@ -502,7 +504,9 @@ public int getStartPosOfRoundRobin(Tag tag, TStorageMedium storageMedium, boolea BeSelectionPolicy policy = builder.build(); List beIds = selectBackendIdsByPolicy(policy, -1); - if (beIds.isEmpty() && storageMedium != null && !isStorageMediumSpecified) { + // ADAPTIVE: medium is a hint; fall back to the other medium if none of the BEs can host it. + // STRICT : medium is required; do not fall back (caller will surface an error). + if (beIds.isEmpty() && storageMedium != null && mediumAllocationMode != MediumAllocationMode.STRICT) { storageMedium = (storageMedium == TStorageMedium.HDD) ? TStorageMedium.SSD : TStorageMedium.HDD; policy = builder.setStorageMedium(storageMedium).build(); beIds = selectBackendIdsByPolicy(policy, -1); @@ -527,14 +531,15 @@ public int getStartPosOfRoundRobin(Tag tag, TStorageMedium storageMedium, boolea * @param replicaAlloc * @param nextIndexs create tablet round robin next be index, when enable_round_robin_create_tablet * @param storageMedium - * @param isStorageMediumSpecified + * @param mediumAllocationMode controls whether the storage medium may fall back when no BE + * can host the requested medium (ADAPTIVE falls back, STRICT does not) * @param isOnlyForCheck set true if only used for check available backend * @return return the selected backend ids group by tag. * @throws DdlException */ public Pair>, TStorageMedium> selectBackendIdsForReplicaCreation( ReplicaAllocation replicaAlloc, Map nextIndexs, - TStorageMedium storageMedium, boolean isStorageMediumSpecified, + TStorageMedium storageMedium, MediumAllocationMode mediumAllocationMode, boolean isOnlyForCheck) throws DdlException { Map copiedBackends = Maps.newHashMap(getAllClusterBackendsNoException()); @@ -568,7 +573,11 @@ public Pair>, TStorageMedium> selectBackendIdsForReplicaCrea // first time empty, retry with different storage medium // if only for check, no need to retry different storage medium to get backend TStorageMedium originalStorageMedium = storageMedium; - if (beIds.isEmpty() && storageMedium != null && !isStorageMediumSpecified && !isOnlyForCheck) { + // ADAPTIVE: retry with the opposite medium if no BE can host the requested one. + // STRICT: caller explicitly asked for this medium, do not fall back. + if (beIds.isEmpty() && storageMedium != null + && mediumAllocationMode != MediumAllocationMode.STRICT + && !isOnlyForCheck) { storageMedium = (storageMedium == TStorageMedium.HDD) ? TStorageMedium.SSD : TStorageMedium.HDD; builder.setStorageMedium(storageMedium); if (Config.enable_round_robin_create_tablet) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java index d03d1c2e5f0300..03dadf59c4ed72 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java @@ -26,6 +26,7 @@ import org.apache.doris.catalog.HashDistributionInfo; import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.MaterializedIndex.IndexExtState; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PartitionInfo; @@ -151,7 +152,7 @@ public void setUp() throws Exception { Mockito.any(ReplicaAllocation.class), Mockito.anyMap(), Mockito.any(TStorageMedium.class), - Mockito.eq(false), + Mockito.eq(MediumAllocationMode.ADAPTIVE), Mockito.eq(true)); Mockito.doAnswer(inv -> { diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java index a53c18680af97a..4d4f0b6b2e8a41 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java @@ -18,11 +18,16 @@ package org.apache.doris.catalog; import org.apache.doris.common.Config; +import org.apache.doris.common.util.PropertyAnalyzer; +import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.thrift.TStorageMedium; import org.junit.Assert; import org.junit.Test; +import java.util.HashMap; +import java.util.Map; + public class DataPropertyTest { @Test @@ -41,4 +46,76 @@ public void testCooldownTimeMs() throws Exception { dataProperty = new DataProperty(TStorageMedium.HDD); Assert.assertEquals(DataProperty.MAX_COOLDOWN_TIME_MS, dataProperty.getCooldownTimeMs()); } + + @Test + public void testDefaultMediumAllocationMode() { + DataProperty dataProperty = new DataProperty(TStorageMedium.HDD); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, dataProperty.getMediumAllocationMode()); + + DataProperty dataProperty2 = new DataProperty(TStorageMedium.SSD, + DataProperty.MAX_COOLDOWN_TIME_MS, ""); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, dataProperty2.getMediumAllocationMode()); + } + + @Test + public void testStrictFlagRoundTrip() { + DataProperty dataProperty = new DataProperty(TStorageMedium.SSD); + dataProperty.setMediumAllocationMode(MediumAllocationMode.STRICT); + + String json = GsonUtils.GSON.toJson(dataProperty); + Assert.assertTrue("new field must be persisted", json.contains("mediumAllocationMode")); + Assert.assertTrue(json.contains("STRICT") || json.contains("strict")); + + DataProperty restored = GsonUtils.GSON.fromJson(json, DataProperty.class); + Assert.assertEquals(MediumAllocationMode.STRICT, restored.getMediumAllocationMode()); + Assert.assertTrue("legacy shim must agree with enum", restored.isStorageMediumSpecified()); + } + + /** + * On master the old {@code storageMediumSpecified} boolean had no + * {@code @SerializedName} annotation, so it was never written to the + * image at all. Simulate replaying such an old image (no mediumAllocationMode + * key) and verify we fall back to ADAPTIVE rather than NPE / STRICT. + */ + @Test + public void testOldImageDeserialisesToAdaptive() throws Exception { + String legacyJson = "{" + + "\"storageMedium\":\"HDD\"," + + "\"cooldownTimeMs\":" + DataProperty.MAX_COOLDOWN_TIME_MS + "," + + "\"storagePolicy\":\"\"," + + "\"isMutable\":true" + + "}"; + + DataProperty restored = GsonUtils.GSON.fromJson(legacyJson, DataProperty.class); + restored.gsonPostProcess(); + + Assert.assertEquals(TStorageMedium.HDD, restored.getStorageMedium()); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, restored.getMediumAllocationMode()); + Assert.assertFalse(restored.isStorageMediumSpecified()); + } + + @Test + public void testLegacySetterMapsToEnum() { + DataProperty dataProperty = new DataProperty(TStorageMedium.SSD); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, dataProperty.getMediumAllocationMode()); + + dataProperty.setStorageMediumSpecified(true); + Assert.assertEquals(MediumAllocationMode.STRICT, dataProperty.getMediumAllocationMode()); + + dataProperty.setStorageMediumSpecified(false); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, dataProperty.getMediumAllocationMode()); + } + + @Test + public void testAnalyzeDataPropertyPreservesMediumAllocationModeWhenMediumUnspecified() throws Exception { + DataProperty oldDataProperty = new DataProperty(TStorageMedium.SSD); + oldDataProperty.setMediumAllocationMode(MediumAllocationMode.STRICT); + + Map properties = new HashMap<>(); + properties.put(PropertyAnalyzer.PROPERTIES_MUTABLE, "false"); + + DataProperty updated = PropertyAnalyzer.analyzeDataProperty(properties, oldDataProperty); + Assert.assertEquals(MediumAllocationMode.STRICT, updated.getMediumAllocationMode()); + Assert.assertFalse(updated.isMutable()); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/MediumAllocationModeTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/MediumAllocationModeTest.java new file mode 100644 index 00000000000000..fa28d0301fe63a --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/MediumAllocationModeTest.java @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.common.AnalysisException; + +import org.junit.Assert; +import org.junit.Test; + +public class MediumAllocationModeTest { + + @Test + public void testFromStringCaseInsensitive() throws AnalysisException { + Assert.assertEquals(MediumAllocationMode.STRICT, MediumAllocationMode.fromString("strict")); + Assert.assertEquals(MediumAllocationMode.STRICT, MediumAllocationMode.fromString("STRICT")); + Assert.assertEquals(MediumAllocationMode.STRICT, MediumAllocationMode.fromString(" Strict ")); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, MediumAllocationMode.fromString("adaptive")); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, MediumAllocationMode.fromString("ADAPTIVE")); + } + + @Test(expected = AnalysisException.class) + public void testFromStringNullThrows() throws AnalysisException { + MediumAllocationMode.fromString(null); + } + + @Test(expected = AnalysisException.class) + public void testFromStringBlankThrows() throws AnalysisException { + MediumAllocationMode.fromString(" "); + } + + @Test(expected = AnalysisException.class) + public void testFromStringUnknownThrows() throws AnalysisException { + MediumAllocationMode.fromString("lax"); + } + + @Test + public void testIsStrictIsAdaptive() { + Assert.assertTrue(MediumAllocationMode.STRICT.isStrict()); + Assert.assertFalse(MediumAllocationMode.STRICT.isAdaptive()); + Assert.assertTrue(MediumAllocationMode.ADAPTIVE.isAdaptive()); + Assert.assertFalse(MediumAllocationMode.ADAPTIVE.isStrict()); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaAllocationTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaAllocationTest.java index 550b2c7d6630a8..85d0ea860698a6 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaAllocationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaAllocationTest.java @@ -58,7 +58,7 @@ public void setUp() throws DdlException { Mockito.any(ReplicaAllocation.class), Mockito.anyMap(), Mockito.nullable(TStorageMedium.class), - Mockito.eq(false), + Mockito.eq(MediumAllocationMode.ADAPTIVE), Mockito.eq(true)); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TablePropertyTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/TablePropertyTest.java new file mode 100644 index 00000000000000..8db534b5334045 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TablePropertyTest.java @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.common.util.PropertyAnalyzer; + +import com.google.common.collect.Maps; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Map; + +public class TablePropertyTest { + + @Test + public void testMediumAllocationModeAdaptiveWhenUnspecified() { + Map properties = Maps.newHashMap(); + TableProperty tableProperty = new TableProperty(properties); + tableProperty.buildMediumAllocationMode(); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, tableProperty.getMediumAllocationMode()); + } + + @Test + public void testMediumAllocationModeStrictWhenSsdSpecified() { + Map properties = Maps.newHashMap(); + properties.put(PropertyAnalyzer.PROPERTIES_STORAGE_MEDIUM, "SSD"); + TableProperty tableProperty = new TableProperty(properties); + tableProperty.buildMediumAllocationMode(); + Assert.assertEquals(MediumAllocationMode.STRICT, tableProperty.getMediumAllocationMode()); + } + + @Test + public void testMediumAllocationModeStrictWhenHddSpecified() { + Map properties = Maps.newHashMap(); + properties.put(PropertyAnalyzer.PROPERTIES_STORAGE_MEDIUM, "HDD"); + TableProperty tableProperty = new TableProperty(properties); + tableProperty.buildMediumAllocationMode(); + Assert.assertEquals(MediumAllocationMode.STRICT, tableProperty.getMediumAllocationMode()); + } + + @Test + public void testMediumAllocationModeAdaptiveWhenEmptyString() { + Map properties = Maps.newHashMap(); + properties.put(PropertyAnalyzer.PROPERTIES_STORAGE_MEDIUM, ""); + TableProperty tableProperty = new TableProperty(properties); + tableProperty.buildMediumAllocationMode(); + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, tableProperty.getMediumAllocationMode()); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/RoundRobinCreateTabletTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/RoundRobinCreateTabletTest.java index b76c49081d89d1..fbe7a6541c90d2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/RoundRobinCreateTabletTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/RoundRobinCreateTabletTest.java @@ -21,6 +21,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.HashDistributionInfo; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.MetaIdGenerator.IdGeneratorBuffer; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Replica.ReplicaState; @@ -112,7 +113,7 @@ public void testCreateTablets() { try { Env.getCurrentEnv().getInternalCatalog().createTablets(index, ReplicaState.NORMAL, distributionInfo, 0, replicaAlloc, tabletMeta, - tabletIdSet, idGeneratorBuffer, false); + tabletIdSet, idGeneratorBuffer, MediumAllocationMode.ADAPTIVE); } catch (Exception e) { System.out.println("failed to create tablets " + e.getMessage()); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java b/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java index 033568017d90f3..99e2d2e1e0cdd4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java @@ -19,9 +19,11 @@ import org.apache.doris.catalog.DiskInfo; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.MediumAllocationMode; import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; +import org.apache.doris.common.DdlException; import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.Pair; import org.apache.doris.meta.MetaContext; @@ -409,7 +411,7 @@ public void testSelectBackendIdsForReplicaCreation() throws Exception { Map beCounterMap = Maps.newHashMap(); for (int i = 0; i < 30000; ++i) { Pair>, TStorageMedium> ret = infoService.selectBackendIdsForReplicaCreation(replicaAlloc, - Maps.newHashMap(), TStorageMedium.HDD, false, false); + Maps.newHashMap(), TStorageMedium.HDD, MediumAllocationMode.ADAPTIVE, false); Map> res = ret.first; Assert.assertEquals(3, res.get(Tag.DEFAULT_BACKEND_TAG).size()); for (Long beId : res.get(Tag.DEFAULT_BACKEND_TAG)) { @@ -428,6 +430,55 @@ public void testSelectBackendIdsForReplicaCreation() throws Exception { Assert.assertTrue((diff * 1.0 / max) < 0.3); } + /** + * STRICT mode must refuse to fall back to another medium when the requested + * one is missing. Pair with {@link #testAdaptiveFallsBackOnSingleMediumCluster()} + * for the ADAPTIVE behaviour on the same fixture. + */ + @Test(expected = DdlException.class) + public void testStrictThrowsOnSingleMediumCluster() throws Exception { + addBackend(20001, "10.0.0.1", 9050); + Backend be1 = infoService.getBackend(20001); + addDisk(be1, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be1.setAlive(true); + addBackend(20002, "10.0.0.2", 9050); + Backend be2 = infoService.getBackend(20002); + addDisk(be2, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be2.setAlive(true); + addBackend(20003, "10.0.0.3", 9050); + Backend be3 = infoService.getBackend(20003); + addDisk(be3, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be3.setAlive(true); + + ReplicaAllocation replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; + infoService.selectBackendIdsForReplicaCreation(replicaAlloc, Maps.newHashMap(), + TStorageMedium.SSD, MediumAllocationMode.STRICT, false); + } + + @Test + public void testAdaptiveFallsBackOnSingleMediumCluster() throws Exception { + addBackend(30001, "10.0.1.1", 9050); + Backend be1 = infoService.getBackend(30001); + addDisk(be1, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be1.setAlive(true); + addBackend(30002, "10.0.1.2", 9050); + Backend be2 = infoService.getBackend(30002); + addDisk(be2, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be2.setAlive(true); + addBackend(30003, "10.0.1.3", 9050); + Backend be3 = infoService.getBackend(30003); + addDisk(be3, "path1", TStorageMedium.HDD, 200 * 1024 * 1024L, 150 * 1024 * 1024L); + be3.setAlive(true); + + ReplicaAllocation replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; + Pair>, TStorageMedium> ret = infoService.selectBackendIdsForReplicaCreation( + replicaAlloc, Maps.newHashMap(), + TStorageMedium.SSD, MediumAllocationMode.ADAPTIVE, false); + Assert.assertEquals("ADAPTIVE must fall back to HDD on a HDD-only cluster", + TStorageMedium.HDD, ret.second); + Assert.assertEquals(3, ret.first.get(Tag.DEFAULT_BACKEND_TAG).size()); + } + private void addDisk(Backend be, String path, TStorageMedium medium, long totalB, long availB) { DiskInfo diskInfo1 = new DiskInfo(path); diskInfo1.setTotalCapacityB(totalB); diff --git a/regression-test/suites/storage_medium_p0/test_medium_allocation_mode_compat.groovy b/regression-test/suites/storage_medium_p0/test_medium_allocation_mode_compat.groovy new file mode 100644 index 00000000000000..50522f5973ca6d --- /dev/null +++ b/regression-test/suites/storage_medium_p0/test_medium_allocation_mode_compat.groovy @@ -0,0 +1,203 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +import org.apache.doris.regression.suite.ClusterOptions + +// PR-1 compatibility check: the MediumAllocationMode enum replaces the old +// non-persisted storageMediumSpecified boolean on DataProperty. Verify that +// it does not regress user-visible behaviour across a FE restart: +// - explicit storage_medium=SSD -> stays on SSD after restart (STRICT persists) +// - explicit storage_medium=HDD -> stays on HDD after restart (STRICT persists) +// - no storage_medium specified -> default behaviour (ADAPTIVE) preserved +suite("test_medium_allocation_mode_compat", 'docker') { + def options = new ClusterOptions() + options.feConfigs += [ + 'default_storage_medium=HDD', + ] + // mixed cluster: both SSD and HDD are available so STRICT is actually honoured. + options.beDisks = ['SSD=2', 'HDD=2'] + + def collectPartitionMediums = { tbl -> + def partitions = sql_return_maparray "SHOW PARTITIONS FROM ${tbl};" + def media = [] + partitions.each { media << it.StorageMedium } + return media + } + + def assertAllEqual = { mediums, expected, tbl -> + log.info("${tbl} storage mediums: ${mediums}") + assertTrue(!mediums.isEmpty(), "${tbl} must have at least one partition") + mediums.each { + assertEquals(expected, it, "${tbl}: expected ${expected}, got ${it}") + } + } + + docker(options) { + def tblExplicitSsd = "medium_mode_explicit_ssd" + def tblExplicitHdd = "medium_mode_explicit_hdd" + def tblImplicit = "medium_mode_implicit" + + [tblExplicitSsd, tblExplicitHdd, tblImplicit].each { + sql "drop table if exists ${it}" + } + + // (1) explicit storage_medium=SSD -> STRICT + sql """ + CREATE TABLE ${tblExplicitSsd} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "storage_medium" = "SSD", + "replication_num" = "1" + ); + """ + + // (2) explicit storage_medium=HDD -> STRICT + sql """ + CREATE TABLE ${tblExplicitHdd} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "storage_medium" = "HDD", + "replication_num" = "1" + ); + """ + + // (3) no storage_medium specified -> ADAPTIVE (falls back to default HDD) + sql """ + CREATE TABLE ${tblImplicit} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sleep 1000 + + assertAllEqual(collectPartitionMediums(tblExplicitSsd), "SSD", tblExplicitSsd) + assertAllEqual(collectPartitionMediums(tblExplicitHdd), "HDD", tblExplicitHdd) + assertAllEqual(collectPartitionMediums(tblImplicit), "HDD", tblImplicit) + + // Restart the FE to exercise the new serialized mediumAllocationMode + // field. The legacy storageMediumSpecified boolean was never persisted, + // so this step also covers the bug-fix aspect of PR-1: after a restart + // the STRICT intent must still be remembered. + cluster.restartFrontends() + sleep 5000 + context.reconnectFe() + + assertAllEqual(collectPartitionMediums(tblExplicitSsd), "SSD", tblExplicitSsd) + assertAllEqual(collectPartitionMediums(tblExplicitHdd), "HDD", tblExplicitHdd) + assertAllEqual(collectPartitionMediums(tblImplicit), "HDD", tblImplicit) + } +} + +// Non-docker coverage for the same PR-1 user-visible semantics. The docker +// suite above owns mixed HDD/SSD and FE restart coverage; this suite is meant +// to run against an already-started regression cluster and only asserts the +// behaviours that can be verified from SQL metadata in that environment. +suite("test_medium_allocation_mode_compat_non_docker", 'p0') { + if (isCloudMode()) { + return + } + + def collectAvailableMediums = { -> + def mediums = [] as Set + def backends = sql_return_maparray "SHOW PROC '/backends'" + backends.each { be -> + def disks = sql_return_maparray "SHOW PROC '/backends/${be.BackendId}'" + disks.each { disk -> + if (disk.State == 'ONLINE' && (disk.StorageMedium == 'SSD' || disk.StorageMedium == 'HDD')) { + mediums << disk.StorageMedium + } + } + } + return mediums + } + + def collectPartitionMediums = { tbl -> + def partitions = sql_return_maparray "SHOW PARTITIONS FROM ${tbl};" + def media = [] + partitions.each { media << it.StorageMedium } + return media + } + + def assertAllEqual = { mediums, expected, tbl -> + log.info("${tbl} storage mediums: ${mediums}") + assertTrue(!mediums.isEmpty(), "${tbl} must have at least one partition") + mediums.each { + assertEquals(expected, it, "${tbl}: expected ${expected}, got ${it}") + } + } + + def availableMediums = collectAvailableMediums() + log.info("available storage mediums in current non-docker cluster: ${availableMediums}") + assertTrue(!availableMediums.isEmpty(), "current cluster must have at least one HDD or SSD disk") + + // Explicit storage_medium is the user-visible trigger for STRICT. Only test + // mediums that are physically available in the current non-docker cluster so + // the case is stable across developers' local environments. + availableMediums.each { medium -> + def tbl = "medium_mode_non_docker_${medium.toLowerCase()}" + sql "drop table if exists ${tbl}" + sql """ + CREATE TABLE ${tbl} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "storage_medium" = "${medium}", + "replication_num" = "1" + ); + """ + assertAllEqual(collectPartitionMediums(tbl), medium, tbl) + } + + // No storage_medium specified keeps the existing CREATE TABLE syntax and + // maps internally to ADAPTIVE. The realized medium depends on the current + // cluster, so only assert that it is a real medium available on this cluster. + def tblImplicit = "medium_mode_non_docker_implicit" + sql "drop table if exists ${tblImplicit}" + sql """ + CREATE TABLE ${tblImplicit} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + def implicitMediums = collectPartitionMediums(tblImplicit) + log.info("${tblImplicit} storage mediums: ${implicitMediums}") + assertTrue(!implicitMediums.isEmpty(), "${tblImplicit} must have at least one partition") + implicitMediums.each { + assertTrue(availableMediums.contains(it), "${tblImplicit}: unexpected medium ${it}") + } +} From 99aa83f4c4480126ed1003ad9b03f5c7d7d55d09 Mon Sep 17 00:00:00 2001 From: Ryan19929 Date: Wed, 13 May 2026 20:07:33 +0800 Subject: [PATCH 2/2] [test](fe) Improve medium allocation coverage ### What problem does this PR solve? Issue Number: close #xxx Related PR: None Problem Summary: Improve coverage for the medium allocation mode metadata change by testing DataProperty's persisted mode behavior, keeping the enum API scoped to PR-1, and adding a regression case for ADAPTIVE fallback versus STRICT failure. ### Release note None ### Check List (For Author) - Test: Unit Test attempted, but local Maven dependency resolution failed for missing FE 1.2-SNAPSHOT modules; `ReadLints` and `git diff --check` passed for changed files. - Behavior changed: No - Does this need documentation: No Co-authored-by: Cursor --- .../doris/catalog/MediumAllocationMode.java | 38 +---------- .../doris/catalog/DataPropertyTest.java | 41 ++++++++++++ .../catalog/MediumAllocationModeTest.java | 26 -------- .../test_medium_allocation_mode_compat.groovy | 65 +++++++++++++++++++ 4 files changed, 108 insertions(+), 62 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MediumAllocationMode.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MediumAllocationMode.java index 7bb93f9566cebf..ca9708a7f25f87 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MediumAllocationMode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MediumAllocationMode.java @@ -17,10 +17,6 @@ package org.apache.doris.catalog; -import org.apache.doris.common.AnalysisException; - -import com.google.common.base.Strings; - /** * Defines how Doris decides the storage medium of a data property * (partition-level / table-level). @@ -40,18 +36,8 @@ * */ public enum MediumAllocationMode { - STRICT("strict"), - ADAPTIVE("adaptive"); - - private final String value; - - MediumAllocationMode(String value) { - this.value = value; - } - - public String getValue() { - return value; - } + STRICT, + ADAPTIVE; public boolean isStrict() { return this == STRICT; @@ -60,24 +46,4 @@ public boolean isStrict() { public boolean isAdaptive() { return this == ADAPTIVE; } - - /** - * Parse a user-provided string (case-insensitive, trimmed) into the enum. - * - * @throws AnalysisException if the value is null, blank or unrecognised. - */ - public static MediumAllocationMode fromString(String value) throws AnalysisException { - String trimmed = Strings.nullToEmpty(value).trim(); - if (trimmed.isEmpty()) { - throw new AnalysisException("medium_allocation_mode cannot be null or empty"); - } - for (MediumAllocationMode mode : values()) { - if (mode.value.equalsIgnoreCase(trimmed)) { - return mode; - } - } - throw new AnalysisException(String.format( - "Invalid medium_allocation_mode value: '%s'. Valid options are: 'strict', 'adaptive'", - value)); - } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java index 4d4f0b6b2e8a41..0b21b81eb01486 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/DataPropertyTest.java @@ -25,6 +25,7 @@ import org.junit.Assert; import org.junit.Test; +import java.lang.reflect.Field; import java.util.HashMap; import java.util.Map; @@ -94,6 +95,46 @@ public void testOldImageDeserialisesToAdaptive() throws Exception { Assert.assertFalse(restored.isStorageMediumSpecified()); } + @Test + public void testPostProcessDefaultsNullMediumAllocationMode() throws Exception { + DataProperty dataProperty = new DataProperty(TStorageMedium.HDD); + Field modeField = DataProperty.class.getDeclaredField("mediumAllocationMode"); + modeField.setAccessible(true); + modeField.set(dataProperty, null); + + dataProperty.gsonPostProcess(); + + Assert.assertEquals(MediumAllocationMode.ADAPTIVE, dataProperty.getMediumAllocationMode()); + } + + @Test + public void testCopyConstructorPreservesMediumAllocationMode() { + DataProperty source = new DataProperty(TStorageMedium.SSD); + source.setMediumAllocationMode(MediumAllocationMode.STRICT); + + DataProperty copied = new DataProperty(source); + + Assert.assertEquals(MediumAllocationMode.STRICT, copied.getMediumAllocationMode()); + } + + @Test + public void testHashCodeConsistentForSameMediumAllocationMode() { + DataProperty strict = new DataProperty(TStorageMedium.SSD); + strict.setMediumAllocationMode(MediumAllocationMode.STRICT); + DataProperty copied = new DataProperty(strict); + + Assert.assertEquals(strict, copied); + Assert.assertEquals(strict.hashCode(), copied.hashCode()); + } + + @Test + public void testToStringIncludesMediumAllocationMode() { + DataProperty dataProperty = new DataProperty(TStorageMedium.SSD); + dataProperty.setMediumAllocationMode(MediumAllocationMode.STRICT); + + Assert.assertTrue(dataProperty.toString().contains("medium allocation mode[STRICT]")); + } + @Test public void testLegacySetterMapsToEnum() { DataProperty dataProperty = new DataProperty(TStorageMedium.SSD); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/MediumAllocationModeTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/MediumAllocationModeTest.java index fa28d0301fe63a..913ca613781ba2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/MediumAllocationModeTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/MediumAllocationModeTest.java @@ -17,37 +17,11 @@ package org.apache.doris.catalog; -import org.apache.doris.common.AnalysisException; - import org.junit.Assert; import org.junit.Test; public class MediumAllocationModeTest { - @Test - public void testFromStringCaseInsensitive() throws AnalysisException { - Assert.assertEquals(MediumAllocationMode.STRICT, MediumAllocationMode.fromString("strict")); - Assert.assertEquals(MediumAllocationMode.STRICT, MediumAllocationMode.fromString("STRICT")); - Assert.assertEquals(MediumAllocationMode.STRICT, MediumAllocationMode.fromString(" Strict ")); - Assert.assertEquals(MediumAllocationMode.ADAPTIVE, MediumAllocationMode.fromString("adaptive")); - Assert.assertEquals(MediumAllocationMode.ADAPTIVE, MediumAllocationMode.fromString("ADAPTIVE")); - } - - @Test(expected = AnalysisException.class) - public void testFromStringNullThrows() throws AnalysisException { - MediumAllocationMode.fromString(null); - } - - @Test(expected = AnalysisException.class) - public void testFromStringBlankThrows() throws AnalysisException { - MediumAllocationMode.fromString(" "); - } - - @Test(expected = AnalysisException.class) - public void testFromStringUnknownThrows() throws AnalysisException { - MediumAllocationMode.fromString("lax"); - } - @Test public void testIsStrictIsAdaptive() { Assert.assertTrue(MediumAllocationMode.STRICT.isStrict()); diff --git a/regression-test/suites/storage_medium_p0/test_medium_allocation_mode_compat.groovy b/regression-test/suites/storage_medium_p0/test_medium_allocation_mode_compat.groovy index 50522f5973ca6d..88e93b7361feb2 100644 --- a/regression-test/suites/storage_medium_p0/test_medium_allocation_mode_compat.groovy +++ b/regression-test/suites/storage_medium_p0/test_medium_allocation_mode_compat.groovy @@ -115,6 +115,71 @@ suite("test_medium_allocation_mode_compat", 'docker') { } } +// ADAPTIVE means the configured/default medium is only a hint. On a HDD-only +// cluster with default_storage_medium=SSD, implicit tables should still be +// creatable and should land on HDD, while explicit SSD remains STRICT and fails. +suite("test_medium_allocation_mode_adaptive_fallback", 'docker') { + def options = new ClusterOptions() + options.feConfigs += [ + 'default_storage_medium=SSD', + ] + options.beDisks = ['HDD=1'] + + def collectPartitionMediums = { tbl -> + def partitions = sql_return_maparray "SHOW PARTITIONS FROM ${tbl};" + def media = [] + partitions.each { media << it.StorageMedium } + return media + } + + def assertAllEqual = { mediums, expected, tbl -> + log.info("${tbl} storage mediums: ${mediums}") + assertTrue(!mediums.isEmpty(), "${tbl} must have at least one partition") + mediums.each { + assertEquals(expected, it, "${tbl}: expected ${expected}, got ${it}") + } + } + + docker(options) { + def tblImplicit = "medium_mode_adaptive_fallback" + def tblStrictSsd = "medium_mode_strict_ssd_no_disk" + + [tblImplicit, tblStrictSsd].each { + sql "drop table if exists ${it}" + } + + sql """ + CREATE TABLE ${tblImplicit} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sleep 1000 + assertAllEqual(collectPartitionMediums(tblImplicit), "HDD", tblImplicit) + + test { + sql """ + CREATE TABLE ${tblStrictSsd} ( + k1 BIGINT, + v1 VARCHAR(64) + ) + DUPLICATE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 2 + PROPERTIES ( + "storage_medium" = "SSD", + "replication_num" = "1" + ); + """ + exception "Failed to find enough backend" + } + } +} + // Non-docker coverage for the same PR-1 user-visible semantics. The docker // suite above owns mixed HDD/SSD and FE restart coverage; this suite is meant // to run against an already-started regression cluster and only asserts the