From fb1839fa9ffb3147c6064dc75589f1cc9ce01174 Mon Sep 17 00:00:00 2001 From: deardeng Date: Thu, 9 Apr 2026 15:10:14 +0800 Subject: [PATCH] [fix](balance) Fix PartitionRebalancer generating invalid moves to BEs without required storage medium (#62206) When tablet_rebalancer_type=Partition, adding a new BE with only HDD disks to a cluster where tables use SSD storage medium causes the PartitionRebalancer to generate invalid moves (SSD tablets -> HDD-only BE), resulting in infinite "paths has no available balance slot: []" scheduling failures. Root cause: 1. In LoadStatisticForTag.init(), beByTotalReplicaCount for each medium includes ALL available BEs without checking hasMedium(). This causes the greedy algorithm to consider HDD-only BEs as valid destinations for SSD tablets. 2. In LocalTabletInvertedIndex.buildPartitionInfoBySkew(), the countMap initialization uses all availableBeIds without medium filtering, so HDD-only BEs get counted with 0 replicas for SSD partitions, making them appear as the "least loaded" and preferred move target. Fix: 1. Add hasMedium() filter in LoadStatisticForTag.init() when building beByTotalReplicaCount, so only BEs that actually have the required medium are considered for balancing. 2. Add availableBeIdsByMedium parameter to buildPartitionInfoBySkew() and use it to initialize countMap with only medium-matching BEs, preventing BEs without the required medium from appearing in the skew calculation. --- docker/runtime/doris-compose/command.py | 15 +- .../catalog/LocalTabletInvertedIndex.java | 13 +- .../doris/catalog/TabletInvertedIndex.java | 4 +- .../doris/clone/LoadStatisticForTag.java | 13 +- .../doris/clone/PartitionRebalancer.java | 2 +- .../org/apache/doris/clone/RebalanceTest.java | 81 ++++++++ .../doris/clone/RebalancerTestUtil.java | 16 ++ .../regression/suite/SuiteCluster.groovy | 31 +-- ...artition_rebalancer_medium_mismatch.groovy | 178 ++++++++++++++++++ 9 files changed, 332 insertions(+), 21 deletions(-) create mode 100644 regression-test/suites/storage_medium_p0/test_partition_rebalancer_medium_mismatch.groovy diff --git a/docker/runtime/doris-compose/command.py b/docker/runtime/doris-compose/command.py index 6acbe4795321f4..c7883454899af1 100644 --- a/docker/runtime/doris-compose/command.py +++ b/docker/runtime/doris-compose/command.py @@ -391,7 +391,7 @@ def add_parser(self, args_parsers): ) group1.add_argument("--be-disks", nargs="*", - default=["HDD=1"], + default=None, type=str, help="Specify each be disks, each group is \"disk_type=disk_num[,disk_capactity]\", "\ "disk_type is HDD or SSD, disk_capactity is capactity limit in gb. default: HDD=1. "\ @@ -628,7 +628,8 @@ def run(self, args): args.NAME, args.IMAGE, args.cloud, args.root, args.fe_config, args.be_config, args.ms_config, args.recycle_config, args.remote_master_fe, args.local_network_ip, args.fe_follower, - args.be_disks, args.be_cluster, args.reg_be, args.extra_hosts, + args.be_disks if args.be_disks is not None else ["HDD=1"], + args.be_cluster, args.reg_be, args.extra_hosts, args.coverage_dir, cloud_store_config, args.sql_mode_node_mgr, args.be_metaservice_endpoint, args.be_cluster_id, args.tde_ak, args.tde_sk) LOG.info("Create new cluster {} succ, cluster path is {}".format( @@ -669,9 +670,19 @@ def do_add_node(node_type, add_num, add_ids): related_nodes.append(node) add_ids.append(node.id) + # If --be-disks is explicitly provided for an existing cluster, + # temporarily override cluster.be_disks so newly added BEs use + # the specified disk config instead of the original cluster config. + saved_be_disks = cluster.be_disks + if args.be_disks is not None: + cluster.be_disks = args.be_disks + for node_type, add_num, add_ids in add_type_nums: do_add_node(node_type, add_num, add_ids) + # Restore original be_disks to avoid side effects + cluster.be_disks = saved_be_disks + if args.IMAGE: for node in related_nodes: node.set_image(args.IMAGE) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/LocalTabletInvertedIndex.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/LocalTabletInvertedIndex.java index 27d1b6ba6755e1..44a143222430ec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/LocalTabletInvertedIndex.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/LocalTabletInvertedIndex.java @@ -925,7 +925,9 @@ public void setPartitionCollectInfoMap(ImmutableMap // Only build from available bes, exclude colocate tables @Override public Map> buildPartitionInfoBySkew( - List availableBeIds, Map> movesInProgress) { + List availableBeIds, + Map> availableBeIdsByMedium, + Map> movesInProgress) { Set dbIds = Sets.newHashSet(); Set tableIds = Sets.newHashSet(); Set partitionIds = Sets.newHashSet(); @@ -986,11 +988,14 @@ public Map> buildPartit Map countMap = partitionReplicasInfo.get( tabletMeta.getPartitionId(), tabletMeta.getIndexId()); if (countMap == null) { - // If one be doesn't have any replica of one partition, it should be counted too. - countMap = availableBeIds.stream().collect(Collectors.toMap(i -> i, i -> 0L)); + // If one be doesn't have any replica of one partition, + // it should be counted too. + List availableBeIdsForMedium = availableBeIdsByMedium.getOrDefault( + medium, Lists.newArrayList()); + countMap = availableBeIdsForMedium.stream().collect(Collectors.toMap(i -> i, i -> 0L)); } - Long count = countMap.get(beId); + Long count = countMap.getOrDefault(beId, 0L); countMap.put(beId, count + 1L); partitionReplicasInfo.put(tabletMeta.getPartitionId(), tabletMeta.getIndexId(), countMap); partitionReplicasInfoMaps.put(medium, partitionReplicasInfo); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java index 420c1087066761..0e1e68ef9a785a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java @@ -199,7 +199,9 @@ public void setPartitionCollectInfoMap(ImmutableMap } public Map> buildPartitionInfoBySkew( - List availableBeIds, Map> movesInProgress) { + List availableBeIds, + Map> availableBeIdsByMedium, + Map> movesInProgress) { throw new UnsupportedOperationException("buildPartitionInfoBySkew is not supported in TabletInvertedIndex"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java b/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java index 60a0d147917f6f..e731d0701e013b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java @@ -159,7 +159,8 @@ public void init() { // Only count the available be for (TStorageMedium medium : TStorageMedium.values()) { TreeMultimap beByTotalReplicaCount = TreeMultimap.create(); - beLoadStatistics.stream().filter(BackendLoadStatistic::isAvailable).forEach(beStat -> + beLoadStatistics.stream().filter(BackendLoadStatistic::isAvailable) + .filter(beStat -> beStat.hasMedium(medium)).forEach(beStat -> beByTotalReplicaCount.put(beStat.getReplicaNum(medium), beStat.getBeId())); beByTotalReplicaCountMaps.put(medium, beByTotalReplicaCount); } @@ -173,9 +174,17 @@ public void init() { .filter(BackendLoadStatistic::isAvailable) .map(BackendLoadStatistic::getBeId) .collect(Collectors.toList()); + Map> availableBeIdsByMedium = Maps.newHashMap(); + for (TStorageMedium medium : TStorageMedium.values()) { + availableBeIdsByMedium.put(medium, beLoadStatistics.stream() + .filter(BackendLoadStatistic::isAvailable) + .filter(be -> be.hasMedium(medium)) + .map(BackendLoadStatistic::getBeId) + .collect(Collectors.toList())); + } Map> movesInProgress = rebalancer == null ? Maps.newHashMap() : ((PartitionRebalancer) rebalancer).getMovesInProgress(); - skewMaps = invertedIndex.buildPartitionInfoBySkew(availableBeIds, movesInProgress); + skewMaps = invertedIndex.buildPartitionInfoBySkew(availableBeIds, availableBeIdsByMedium, movesInProgress); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java index 30a7a76b920328..96db70cf76f294 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java @@ -305,7 +305,7 @@ protected void completeSchedCtx(TabletSchedCtx tabletCtx) List paths = beStat.getPathStatistics(); List availPath = paths.stream().filter(path -> path.getStorageMedium() == tabletCtx.getStorageMedium() - && path.isFit(tabletCtx.getTabletSize(), false) == BalanceStatus.OK) + && path.isFit(tabletCtx.getTabletSize(), false).ok()) .map(RootPathLoadStatistic::getPathHash).collect(Collectors.toList()); long pathHash = slot.takeAnAvailBalanceSlotFrom(availPath, tabletCtx.getTag(), tabletCtx.getStorageMedium()); diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java index 4e91ec5be3b8b8..87ece62b7572bf 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java @@ -323,6 +323,87 @@ public void testPartitionRebalancer() { Assert.assertEquals(needCheckTablets.size(), succeeded.get()); } + // Test for OPENSOURCE-192: PartitionRebalancer should not generate moves + // targeting a BE that lacks the required storage medium. + // Scenario: SSD tablets on BE 20001/20002, new BE 20003 has only HDD. + // Without the fix, the algorithm would pick BE 20003 (0 SSD replicas) as the + // "least loaded" destination for SSD tablets, causing infinite scheduling failures. + @Test + public void testPartitionRebalancerSkipBEWithoutMedium() { + Configurator.setLevel("org.apache.doris.clone.PartitionRebalancer", Level.DEBUG); + + // Add backends: 20001, 20002 have SSD; 20003 has only HDD + systemInfoService.addBackend( + RebalancerTestUtil.createBackend(20001L, 2048, 0, TStorageMedium.SSD)); + systemInfoService.addBackend( + RebalancerTestUtil.createBackend(20002L, 2048, 0, TStorageMedium.SSD)); + systemInfoService.addBackend( + RebalancerTestUtil.createBackend(20003L, 2048, 0, TStorageMedium.HDD)); + + // Create a table with SSD partition + OlapTable ssdTable = new OlapTable(3, "ssd table", new ArrayList<>(), + KeysType.DUP_KEYS, new RangePartitionInfo(), new HashDistributionInfo()); + db.registerTable(ssdTable); + + MaterializedIndex ssdIndex = new MaterializedIndex(ssdTable.getId(), null); + long partId = 41; + Partition partition = new Partition(partId, "p0", ssdIndex, new HashDistributionInfo()); + ssdTable.addPartition(partition); + ssdTable.getPartitionInfo().addPartition(partId, new DataProperty(TStorageMedium.SSD), + ReplicaAllocation.DEFAULT_ALLOCATION, false, true); + ssdTable.setIndexMeta(ssdIndex.getId(), "ssd index", Lists.newArrayList(new Column()), + 0, 0, (short) 0, TStorageType.COLUMN, KeysType.DUP_KEYS); + + // Create SSD tablets: 3 replicas on BE 20001, 1 on BE 20002 + // This creates skew = 3 - 1 = 2 among SSD BEs (with fix), + // or skew = 3 - 0 = 3 counting HDD-only BEs (without fix) + RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0", TStorageMedium.SSD, + 80001, Lists.newArrayList(20001L)); + RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0", TStorageMedium.SSD, + 80002, Lists.newArrayList(20001L)); + RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0", TStorageMedium.SSD, + 80003, Lists.newArrayList(20001L)); + RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0", TStorageMedium.SSD, + 80004, Lists.newArrayList(20002L)); + + // Regenerate statistics with partition rebalancer + Config.tablet_rebalancer_type = "partition"; + LoadStatisticForTag loadStatistic = new LoadStatisticForTag( + Tag.DEFAULT_BACKEND_TAG, systemInfoService, invertedIndex, null); + loadStatistic.init(); + Map ssdStatMap = Maps.newHashMap(); + ssdStatMap.put(Tag.DEFAULT_BACKEND_TAG, loadStatistic); + + PartitionRebalancer rebalancer = new PartitionRebalancer(Env.getCurrentSystemInfo(), + Env.getCurrentInvertedIndex(), null); + rebalancer.updateLoadStatistic(ssdStatMap); + rebalancer.selectAlternativeTablets(); + + // Verify: moves were generated (test is meaningful) + Map> moves = rebalancer.getMovesInProgress(); + Assert.assertFalse("Should generate moves for skewed SSD partition", moves.isEmpty()); + + // Verify: no move targets BE 20003 (HDD-only) or any of the HDD BEs from setUp (10001-10004) + for (Map.Entry> entry : moves.entrySet()) { + PartitionRebalancer.TabletMove move = entry.getValue().first; + Assert.assertNotEquals("Move should not target HDD-only BE for SSD tablet", + Long.valueOf(20003L), move.toBe); + Assert.assertFalse("Move should not target any BE without SSD", + move.toBe == 10001L || move.toBe == 10002L + || move.toBe == 10003L || move.toBe == 10004L); + } + + // Verify: all moves go from BE 20001 (most loaded) to BE 20002 (least loaded with SSD) + for (Map.Entry> entry : moves.entrySet()) { + PartitionRebalancer.TabletMove move = entry.getValue().first; + Assert.assertEquals("Source should be the most loaded SSD BE", + Long.valueOf(20001L), move.fromBe); + Assert.assertEquals("Dest should be the least loaded SSD BE", + Long.valueOf(20002L), move.toBe); + } + LOG.info("testPartitionRebalancerSkipBEWithoutMedium success"); + } + @Test public void testMoveInProgressMap() { Configurator.setLevel("org.apache.doris.clone.MovesInProgressCache", Level.DEBUG); diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java index 6f1d14ebf59cce..0864a996ee8d90 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java @@ -51,6 +51,22 @@ public static Backend createBackend(long id, long totalCap, long usedCap) { return createBackend(id, totalCap, Lists.newArrayList(usedCap), 1); } + // Add only one path with specified storage medium, PathHash:id + public static Backend createBackend(long id, long totalCap, long usedCap, TStorageMedium medium) { + Backend be = new Backend(id, "192.168.0." + id, 9051); + Map disks = Maps.newHashMap(); + DiskInfo diskInfo = new DiskInfo("/path1"); + diskInfo.setPathHash(id); + diskInfo.setTotalCapacityB(totalCap); + diskInfo.setDataUsedCapacityB(usedCap); + diskInfo.setAvailableCapacityB(totalCap - usedCap); + diskInfo.setStorageMedium(medium); + disks.put(diskInfo.getRootPath(), diskInfo); + be.setDisks(ImmutableMap.copyOf(disks)); + be.setAlive(true); + return be; + } + /** * size of usedCaps should equal to diskNum. */ diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy index dd1c2b8f2fddbe..da4617ff5d36ff 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy @@ -524,10 +524,15 @@ class SuiteCluster { } List addFrontend(int num, boolean followerMode=false) throws Exception { - def result = add(num, 0, null, followerMode) + def result = add(0, num, '', false, null) return result.first } + List addBackend(int num, List beDisks) throws Exception { + def result = add(0, num, '', false, beDisks) + return result.second + } + List addBackend(int num, String ClusterName='') throws Exception { def result = add(0, num, ClusterName) return result.second @@ -535,29 +540,33 @@ class SuiteCluster { // ATTN: clusterName just used for cloud mode, 1 cluster has n bes // ATTN: followerMode just used for cloud mode - Tuple2, List> add(int feNum, int beNum, String clusterName, boolean followerMode=false) throws Exception { + // ATTN: beDisks just used for not cloud mode + Tuple2, List> add(int feNum, int beNum, String clusterName, boolean followerMode=false, List beDisks=null) throws Exception { assert feNum > 0 || beNum > 0 - def sb = new StringBuilder() - sb.append('up ' + name + ' ') + def cmd = ['up', name] if (feNum > 0) { - sb.append('--add-fe-num ' + feNum + ' ') + cmd += ['--add-fe-num', String.valueOf(feNum)] if (followerMode) { - sb.append('--fe-follower' + ' ') + cmd += ['--fe-follower'] } if (sqlModeNodeMgr) { - sb.append('--sql-mode-node-mgr' + ' ') + cmd += ['--sql-mode-node-mgr'] } } if (beNum > 0) { - sb.append('--add-be-num ' + beNum + ' ') + cmd += ['--add-be-num', String.valueOf(beNum)] if (clusterName != null && !clusterName.isEmpty()) { - sb.append(' --be-cluster ' + clusterName + ' ') + cmd += ['--be-cluster', clusterName] } } - sb.append('--wait-timeout 60') + if (beDisks != null && !beDisks.isEmpty()) { + cmd += ['--be-disks'] + cmd += beDisks + } + cmd += ['--wait-timeout', '60'] - def data = (Map>) runCmd(sb.toString(), 180) + def data = (Map>) runCmdList(cmd, 180) def newFrontends = (List) data.get('fe').get('add_list') def newBackends = (List) data.get('be').get('add_list') diff --git a/regression-test/suites/storage_medium_p0/test_partition_rebalancer_medium_mismatch.groovy b/regression-test/suites/storage_medium_p0/test_partition_rebalancer_medium_mismatch.groovy new file mode 100644 index 00000000000000..5aceb14891431e --- /dev/null +++ b/regression-test/suites/storage_medium_p0/test_partition_rebalancer_medium_mismatch.groovy @@ -0,0 +1,178 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.doris.regression.suite.ClusterOptions + +/** + * Reproduce OPENSOURCE-192: + * When tablet_rebalancer_type=Partition, adding a new BE with only HDD disks + * to a cluster where tables are created with storage_medium=SSD causes + * the PartitionRebalancer to generate invalid moves (SSD tablets -> HDD-only BE), + * resulting in infinite "paths has no available balance slot: []" errors. + * + * Root cause: In LoadStatisticForTag.init(), the beByTotalReplicaCount map + * for each medium includes ALL available BEs without checking hasMedium(). + * Similarly, TabletInvertedIndex.buildPartitionInfoBySkew() includes all + * availableBeIds in countMap without medium filtering. This causes the + * greedy algorithm to generate moves targeting BEs that lack the required + * storage medium. + * + * Setup: + * - 3 initial BEs with SSD + HDD disks + * - Table created with storage_medium = SSD (explicitly specified) + * - Add 1 new BE with HDD only (via addBackend with custom beDisks) + * - PartitionRebalancer generates invalid moves to the HDD-only BE + */ +suite('test_partition_rebalancer_medium_mismatch', 'docker') { + if (isCloudMode()) { + return + } + + def options = new ClusterOptions() + options.feConfigs += [ + 'tablet_rebalancer_type=Partition', + 'schedule_slot_num_per_hdd_path=8', + 'balance_slot_num_per_path=2', + 'disable_balance=false', + 'disable_disk_balance=true', + 'tablet_checker_interval_ms=2000', + 'schedule_batch_size=1000', + ] + options.beConfigs += [ + 'report_disk_state_interval_seconds=2', + 'report_tablet_interval_seconds=3', + ] + // Initial 3 BEs: each has 1 SSD + 1 HDD + options.beDisks = ['SSD=1', 'HDD=1'] + options.beNum = 3 + + docker(options) { + // Step 1: Create table explicitly with SSD medium + def table = 'tbl_ssd_balance' + sql "DROP TABLE IF EXISTS ${table} FORCE" + sql """ + CREATE TABLE ${table} ( + k1 INT, + k2 VARCHAR(100), + v1 INT + ) + DISTRIBUTED BY HASH(k1) BUCKETS 10 + PROPERTIES ( + 'replication_num' = '1', + 'storage_medium' = 'SSD' + ) + """ + + // Verify partition medium is SSD + def partitions = sql_return_maparray "SHOW PARTITIONS FROM ${table}" + assertTrue(partitions.size() > 0) + partitions.each { + assertEquals('SSD', it.StorageMedium) + } + log.info("Table created with SSD medium, partitions: ${partitions.size()}") + + // Step 2: Insert data to distribute tablets across existing BEs + for (int i = 0; i < 100; i++) { + sql "INSERT INTO ${table} VALUES (${i}, 'value_${i}', ${i * 10})" + } + + def count = sql "SELECT COUNT(*) FROM ${table}" + assertEquals(100, count[0][0] as int) + + // Record tablet distribution before expansion + def tabletsBefore = sql_return_maparray "SHOW TABLETS FROM ${table}" + log.info("Tablets before expansion: ${tabletsBefore.size()}") + def beIdsBefore = tabletsBefore.collect { it.BackendId }.unique() + log.info("Tablets on BEs: ${beIdsBefore}") + + // Let scheduler settle + sleep(10000) + + // Step 3: Add a new BE with HDD only (different disk config from initial BEs) + log.info("Adding new BE with HDD-only disks...") + def newBeIndices = cluster.addBackend(1, ['HDD=1']) + log.info("New BE added with indices: ${newBeIndices}") + + // Wait for new BE heartbeat and disk report + sleep(8000) + + // Verify all backends + def backends = sql_return_maparray "SHOW BACKENDS" + log.info("Total backends after expansion: ${backends.size()}") + assertEquals(4, backends.size()) + + // Find the new BE + def newBeId = null + for (def be : backends) { + if (!(be.BackendId in beIdsBefore.collect { it as String })) { + newBeId = be.BackendId + break + } + } + assertNotNull(newBeId, "Should find new BE") + log.info("New BE id: ${newBeId}") + + // Verify new BE has only HDD + def newBeDisks = sql_return_maparray "SHOW PROC '/backends/${newBeId}'" + log.info("New BE disks: ${newBeDisks}") + def hasSSD = newBeDisks.any { it.StorageMedium == 'SSD' } + def hasHDD = newBeDisks.any { it.StorageMedium == 'HDD' } + assertTrue(hasHDD, "New BE should have HDD disk") + assertFalse(hasSSD, "New BE should NOT have SSD disk") + + // Step 4: Wait for PartitionRebalancer to attempt balance scheduling + // The bug: algorithm generates moves targeting the HDD-only BE for SSD tablets + log.info("Waiting for PartitionRebalancer to run (60s)...") + sleep(60000) + + // Step 5: Check balance history for the bug signature + def schedHistory = sql_return_maparray "SHOW PROC '/cluster_balance/history_tablets'" + def failedWithEmptySlot = schedHistory.findAll { + it.ErrMsg != null && it.ErrMsg.contains('paths has no available balance slot: []') + } + + log.info("Total history entries: ${schedHistory.size()}") + log.info("Entries with 'empty slot' error: ${failedWithEmptySlot.size()}") + + if (failedWithEmptySlot.size() > 0) { + log.warn("BUG REPRODUCED (OPENSOURCE-192)! " + + "Found ${failedWithEmptySlot.size()} balance tasks " + + "failed with 'paths has no available balance slot: []'") + failedWithEmptySlot.take(5).each { task -> + log.warn(" tablet=${task.TabletId}, dest=${task.DestBe}, err=${task.ErrMsg}") + } + // This assertion will fail when the bug is present, and pass after fix + fail("BUG: PartitionRebalancer generated invalid moves to HDD-only BE for SSD tablets") + } else { + log.info("No 'empty slot' failures. Bug not triggered or already fixed.") + } + + // Step 6: Check that no tablets moved to the new BE + // (since it has no SSD, SSD tablets should NOT be relocated there) + def tabletsAfter = sql_return_maparray "SHOW TABLETS FROM ${table}" + def tabletsOnNewBe = tabletsAfter.findAll { it.BackendId == newBeId } + log.info("Tablets on new HDD-only BE: ${tabletsOnNewBe.size()}") + assertEquals(0, tabletsOnNewBe.size()) + + // Step 7: Verify data integrity + def countAfter = sql "SELECT COUNT(*) FROM ${table}" + assertEquals(100, countAfter[0][0] as int) + + // Cleanup + sql "DROP TABLE IF EXISTS ${table} FORCE" + } +}