Skip to content

Commit a84c837

Browse files
mreddy7alexeyserbin
authored andcommitted
KUDU-3532: Fix range aware replica placement bug
An implicit conversion from unsigned long to int caused an std::length_error to be thrown when a vector tried to reserve a a size greater than the max size. This happens when a negative number is converted. This bug was introduced by changelist [1]. I also added tests with tablet servers in multiple locations. This omission caused this bug to go unnoticed until now. [1] 10fdaf6a9 Change-Id: Id5d696d58965590a9f91f8b1b59f23225bbad8ee Reviewed-on: http://gerrit.cloudera.org:8080/20781 Tested-by: Kudu Jenkins Reviewed-by: Alexey Serbin <alexey@apache.org>
1 parent 6681514 commit a84c837

File tree

2 files changed

+170
-56
lines changed

2 files changed

+170
-56
lines changed

src/kudu/master/placement_policy-test.cc

Lines changed: 166 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,6 +1085,17 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasWithRangesExtremeCase) {
10851085
ASSERT_EQ(500, stats["ts0"]);
10861086
ASSERT_EQ(500, stats["ts1"]);
10871087
}
1088+
1089+
{
1090+
ASSERT_OK(Prepare(cluster_info));
1091+
const auto& all = descriptors();
1092+
PlacementPolicy policy(all, rng());
1093+
const auto existing = GetDescriptors({"ts1"});
1094+
shared_ptr<TSDescriptor> extra_ts;
1095+
ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts));
1096+
ASSERT_TRUE(extra_ts);
1097+
ASSERT_TRUE(extra_ts->permanent_uuid() == "ts0") << extra_ts->permanent_uuid();
1098+
}
10881099
}
10891100

10901101
// RF is equal to the number of tablet servers, so replica placement will be same with ranges.
@@ -1142,6 +1153,18 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasWithRanges) {
11421153
ASSERT_EQ(1000, stats["ts1"]);
11431154
ASSERT_EQ(1000, stats["ts2"]);
11441155
}
1156+
1157+
{
1158+
ASSERT_OK(Prepare(cluster_info));
1159+
const auto& all = descriptors();
1160+
PlacementPolicy policy(all, rng());
1161+
const auto existing = GetDescriptors({"ts2"});
1162+
shared_ptr<TSDescriptor> extra_ts;
1163+
ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts));
1164+
ASSERT_TRUE(extra_ts);
1165+
const auto& ts_uuid = extra_ts->permanent_uuid();
1166+
ASSERT_TRUE(ts_uuid == "ts0" || ts_uuid == "ts1") << extra_ts->permanent_uuid();
1167+
}
11451168
}
11461169

11471170
// RF is 3 while number of tablet servers is 4 with 2 of them empty and the other 2 loaded with
@@ -1209,6 +1232,18 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasWithRangesAndTables) {
12091232
ASSERT_LE(368, stats["ts3"]);
12101233
ASSERT_GE(378, stats["ts3"]);
12111234
}
1235+
1236+
{
1237+
ASSERT_OK(Prepare(cluster_info));
1238+
const auto& all = descriptors();
1239+
PlacementPolicy policy(all, rng());
1240+
const auto existing = GetDescriptors({"ts3"});
1241+
shared_ptr<TSDescriptor> extra_ts;
1242+
ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts));
1243+
ASSERT_TRUE(extra_ts);
1244+
const auto& ts_uuid = extra_ts->permanent_uuid();
1245+
ASSERT_TRUE(ts_uuid == "ts0" || ts_uuid == "ts1") << extra_ts->permanent_uuid();
1246+
}
12121247
}
12131248

12141249
// RF is 3 while number of tablet servers is 6. Half of them are empty while the other half contain
@@ -1288,6 +1323,20 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasWithRangesAndTablesWithDoubleTSer
12881323
ASSERT_LE(137, stats["ts5"]);
12891324
ASSERT_GE(147, stats["ts5"]);
12901325
}
1326+
1327+
{
1328+
ASSERT_OK(Prepare(cluster_info));
1329+
const auto& all = descriptors();
1330+
PlacementPolicy policy(all, rng());
1331+
const auto existing = GetDescriptors({"ts4", "ts5"});
1332+
shared_ptr<TSDescriptor> extra_ts;
1333+
ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts));
1334+
ASSERT_TRUE(extra_ts);
1335+
const auto& ts_uuid = extra_ts->permanent_uuid();
1336+
ASSERT_TRUE(ts_uuid == "ts0" ||
1337+
ts_uuid == "ts1" ||
1338+
ts_uuid == "ts2") << extra_ts->permanent_uuid();
1339+
}
12911340
}
12921341

12931342
// With RF = 3, 6 tablet servers, and a well-balanced cluster initially, this test case adds
@@ -1377,68 +1426,132 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasWithImbalanceByTable) {
13771426
},
13781427
};
13791428

1380-
ASSERT_OK(Prepare(cluster_info));
1381-
const auto& all = descriptors();
1382-
PlacementPolicy policy(all, rng());
1383-
typedef std::unordered_map<string, std::unordered_map<string, int>> replicas_per_range_per_table;
1384-
std::unordered_map<string, replicas_per_range_per_table> stats;
1385-
vector<string> ranges = {"a1", "b1", "c1", "d1", "e1", "f1"};
1386-
vector<string> tables = {"t1", "t2"};
1387-
for (const auto& table : tables) {
1388-
for (const auto& range : ranges) {
1389-
for (auto i = 0; i < 500; ++i) {
1390-
TSDescriptorVector result;
1391-
ASSERT_OK(policy.PlaceTabletReplicas(nReplicationFactor, nullopt, range, table, &result));
1392-
ASSERT_EQ(nReplicationFactor, result.size());
1393-
for (const auto& ts : result) {
1394-
const auto& ts_uuid = ts->permanent_uuid();
1395-
++stats[ts_uuid][table][range];
1429+
{
1430+
ASSERT_OK(Prepare(cluster_info));
1431+
const auto& all = descriptors();
1432+
PlacementPolicy policy(all, rng());
1433+
typedef std::unordered_map<string, std::unordered_map<string, int>>
1434+
replicas_per_range_per_table;
1435+
std::unordered_map<string, replicas_per_range_per_table> stats;
1436+
vector<string> ranges = {"a1", "b1", "c1", "d1", "e1", "f1"};
1437+
vector<string> tables = {"t1", "t2"};
1438+
for (const auto& table : tables) {
1439+
for (const auto& range : ranges) {
1440+
for (auto i = 0; i < 500; ++i) {
1441+
TSDescriptorVector result;
1442+
ASSERT_OK(policy.PlaceTabletReplicas(nReplicationFactor, nullopt, range, table, &result));
1443+
ASSERT_EQ(nReplicationFactor, result.size());
1444+
for (const auto& ts : result) {
1445+
const auto& ts_uuid = ts->permanent_uuid();
1446+
++stats[ts_uuid][table][range];
1447+
}
13961448
}
13971449
}
13981450
}
1399-
}
14001451

1401-
ASSERT_EQ(kNumServers, stats.size());
1402-
vector<string> balanced_ranges = {"c1", "d1", "e1", "f1"};
1403-
vector<string> tservers1 = {"ts0", "ts2", "ts4"};
1404-
for (const auto& stat : stats) {
1405-
int tserver_replicas = 0;
1406-
// Verifies that two tables exist on each tserver.
1407-
ASSERT_EQ(2, stat.second.size());
1408-
for (const auto& table : stat.second) {
1409-
// Verifies that the six ranges exist for each table on each tserver.
1410-
ASSERT_EQ(6, table.second.size());
1411-
for (const auto& ranges : table.second) {
1412-
if (std::find(balanced_ranges.begin(),
1413-
balanced_ranges.end(), ranges.first) != balanced_ranges.end()) {
1414-
ASSERT_LE(245, ranges.second);
1415-
ASSERT_GE(255, ranges.second);
1416-
} else if (std::find(tservers1.begin(),
1417-
tservers1.end(), stat.first) != tservers1.end()) {
1418-
// Only range of "a1" or "b1" now. Checks which tservers and for table "t1" or "t2".
1419-
if (table.first == "t1") {
1420-
ASSERT_LE(120, ranges.second);
1421-
ASSERT_GE(130, ranges.second);
1422-
} else {
1423-
ASSERT_LE(370, ranges.second);
1424-
ASSERT_GE(380, ranges.second);
1425-
}
1426-
} else {
1427-
// Ranges "a1" or "b1" for tservers "ts1", "ts3", "ts5". Checks for table "t1" or "t2".
1428-
if (table.first == "t1") {
1429-
ASSERT_LE(370, ranges.second);
1430-
ASSERT_GE(380, ranges.second);
1452+
ASSERT_EQ(kNumServers, stats.size());
1453+
vector<string> balanced_ranges = {"c1", "d1", "e1", "f1"};
1454+
vector<string> tservers1 = {"ts0", "ts2", "ts4"};
1455+
for (const auto& stat : stats) {
1456+
int tserver_replicas = 0;
1457+
// Verifies that two tables exist on each tserver.
1458+
ASSERT_EQ(2, stat.second.size());
1459+
for (const auto& table : stat.second) {
1460+
// Verifies that the six ranges exist for each table on each tserver.
1461+
ASSERT_EQ(6, table.second.size());
1462+
for (const auto& ranges : table.second) {
1463+
if (std::find(balanced_ranges.begin(),
1464+
balanced_ranges.end(), ranges.first) != balanced_ranges.end()) {
1465+
ASSERT_LE(245, ranges.second);
1466+
ASSERT_GE(255, ranges.second);
1467+
} else if (std::find(tservers1.begin(),
1468+
tservers1.end(), stat.first) != tservers1.end()) {
1469+
// Only range of "a1" or "b1" now. Checks which tservers and for table "t1" or "t2".
1470+
if (table.first == "t1") {
1471+
ASSERT_LE(120, ranges.second);
1472+
ASSERT_GE(130, ranges.second);
1473+
} else {
1474+
ASSERT_LE(370, ranges.second);
1475+
ASSERT_GE(380, ranges.second);
1476+
}
14311477
} else {
1432-
ASSERT_LE(120, ranges.second);
1433-
ASSERT_GE(130, ranges.second);
1478+
// Ranges "a1" or "b1" for tservers "ts1", "ts3", "ts5". Checks for table "t1" or "t2".
1479+
if (table.first == "t1") {
1480+
ASSERT_LE(370, ranges.second);
1481+
ASSERT_GE(380, ranges.second);
1482+
} else {
1483+
ASSERT_LE(120, ranges.second);
1484+
ASSERT_GE(130, ranges.second);
1485+
}
14341486
}
1487+
tserver_replicas += ranges.second;
14351488
}
1436-
tserver_replicas += ranges.second;
14371489
}
1490+
// Verifies that around 3000 replicas are placed on each tserver.
1491+
ASSERT_LE(2990, tserver_replicas);
1492+
ASSERT_GE(3110, tserver_replicas);
14381493
}
1439-
// Verifies that around 3000 replicas are placed on each tserver.
1440-
ASSERT_LE(2990, tserver_replicas);
1441-
ASSERT_GE(3110, tserver_replicas);
1494+
}
1495+
1496+
{
1497+
ASSERT_OK(Prepare(cluster_info));
1498+
const auto& all = descriptors();
1499+
PlacementPolicy policy(all, rng());
1500+
const auto existing = GetDescriptors({"ts0", "ts2", "ts4"});
1501+
shared_ptr<TSDescriptor> extra_ts;
1502+
ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts));
1503+
ASSERT_TRUE(extra_ts);
1504+
const auto& ts_uuid = extra_ts->permanent_uuid();
1505+
ASSERT_TRUE(ts_uuid == "ts1" ||
1506+
ts_uuid == "ts3" ||
1507+
ts_uuid == "ts5") << extra_ts->permanent_uuid();
1508+
}
1509+
}
1510+
1511+
TEST_F(PlacementPolicyTest, PlaceRangeAwareTabletReplicasWithLocations) {
1512+
TabletNumByRangeMap range_a({ {"a1", 2}});
1513+
TabletNumByRangeMap range_double_a({ {"a1", 4}});
1514+
TabletNumByRangeMap range_b({ {"b1", 2}});
1515+
TabletNumByRangeMap range_double_b({ {"b1", 2}});
1516+
TabletNumByRangeMap ranges_a_and_b({ {"a1", 2}, {"b1", 2} });
1517+
const vector<LocationInfo> cluster_info = {
1518+
{ "A", { { "A_ts0", 4, { }, { {"t1", range_double_a }, } },
1519+
{ "A_ts1", 4, { }, { {"t1", range_double_a }, } },
1520+
{ "A_ts2", 4, { }, { {"t1", range_double_b }, } }, } },
1521+
{ "B", { { "B_ts0", 4, { }, { {"t1", ranges_a_and_b }, } },
1522+
{ "B_ts1", 4, { }, { {"t1", range_a}, {"t2", range_a} } }, } },
1523+
{ "C", { { "C_ts0", 6, { }, { {"t1", range_a }, {"t2", ranges_a_and_b }, } },
1524+
{ "C_ts1", 4, { }, { {"t1", range_a }, {"t2", range_b} } }, } },
1525+
};
1526+
1527+
// To avoid placing a majority of replicas in one location,
1528+
// exactly one replica will be placed in each location.
1529+
// "A_ts2" is chosen because it has the least replicas per range "a1" in table "t1".
1530+
// "B_ts1" is chosen because it wins the replicas per table tiebreaker with "B_ts0".
1531+
// "C_ts1" is chosen because it wins the total replicas tiebreaker.
1532+
{
1533+
ASSERT_OK(Prepare(cluster_info));
1534+
const auto& all = descriptors();
1535+
PlacementPolicy policy(all, rng());
1536+
TSDescriptorVector result;
1537+
ASSERT_OK(policy.PlaceTabletReplicas(3, nullopt, "a1", "t1", &result));
1538+
ASSERT_EQ(3, result.size());
1539+
TSDescriptorsMap m;
1540+
ASSERT_OK(TSDescriptorVectorToMap(result, &m));
1541+
ASSERT_EQ(1, m.count("A_ts2"));
1542+
ASSERT_EQ(1, m.count("B_ts1"));
1543+
ASSERT_EQ(1, m.count("C_ts1"));
1544+
}
1545+
1546+
{
1547+
ASSERT_OK(Prepare(cluster_info));
1548+
const auto& all = descriptors();
1549+
PlacementPolicy policy(all, rng());
1550+
const auto existing = GetDescriptors({"A_ts0", "A_ts1", "B_ts0", "B_ts1", "C_ts0", "C_ts1"});
1551+
shared_ptr<TSDescriptor> extra_ts;
1552+
ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts));
1553+
ASSERT_TRUE(extra_ts);
1554+
ASSERT_TRUE(extra_ts->permanent_uuid() == "A_ts2") << extra_ts->permanent_uuid();
14421555
}
14431556
}
14441557

src/kudu/master/placement_policy.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -381,9 +381,10 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica(
381381
const set<shared_ptr<TSDescriptor>>& excluded) const {
382382
if (range_key_start && table_id) {
383383
TSDescriptorVector ts_choices;
384-
auto choices_size = ts_descs.size() - excluded.size();
385-
ReservoirSample(ts_descs, choices_size, excluded, rng_, &ts_choices);
386-
DCHECK_EQ(ts_choices.size(), choices_size);
384+
const int ts_size = static_cast<int>(ts_descs.size());
385+
CHECK_GE(ts_size, 0);
386+
ReservoirSample(ts_descs, ts_size, excluded, rng_, &ts_choices);
387+
DCHECK_LE(ts_choices.size(), ts_size);
387388
if (ts_choices.size() > 1) {
388389
return PickTabletServer(
389390
ts_choices, range_key_start.value(), table_id.value(), dimension, rng_);

0 commit comments

Comments
 (0)