40
40
#include " kudu/client/shared_ptr.h" // IWYU pragma: keep
41
41
#include " kudu/common/common.pb.h"
42
42
#include " kudu/common/partial_row.h"
43
+ #include " kudu/common/partition.h"
43
44
#include " kudu/common/schema.h"
44
45
#include " kudu/common/wire_protocol-test-util.h"
45
46
#include " kudu/gutil/mathlimits.h"
53
54
#include " kudu/mini-cluster/external_mini_cluster.h"
54
55
#include " kudu/mini-cluster/mini_cluster.h"
55
56
#include " kudu/rpc/rpc_controller.h"
57
+ #include " kudu/tablet/tablet.pb.h"
56
58
#include " kudu/tools/tool_test_util.h"
59
+ #include " kudu/tserver/tserver.pb.h"
57
60
#include " kudu/util/atomic.h"
58
61
#include " kudu/util/metrics.h"
59
62
#include " kudu/util/monotime.h"
@@ -199,7 +202,10 @@ TEST_F(CreateTableITest, TestCreateWhenMajorityOfReplicasFailCreation) {
199
202
TEST_F (CreateTableITest, TestSpreadReplicasEvenly) {
200
203
const int kNumServers = 10 ;
201
204
const int kNumTablets = 20 ;
202
- NO_FATALS (StartCluster ({}, {}, kNumServers ));
205
+ vector<string> master_flags = {
206
+ " --enable_range_replica_placement=false" ,
207
+ };
208
+ NO_FATALS (StartCluster ({}, master_flags, kNumServers ));
203
209
204
210
unique_ptr<client::KuduTableCreator> table_creator (client_->NewTableCreator ());
205
211
auto client_schema = KuduSchema::FromSchema (GetSimpleTestSchema ());
@@ -399,6 +405,99 @@ TEST_F(CreateTableITest, TestSpreadReplicasEvenlyWithDimension) {
399
405
}
400
406
}
401
407
408
+ // Tests the range aware replica placement by adding multiple tables with multiple ranges
409
+ // and checking the replica distribution.
410
+ TEST_F (CreateTableITest, TestSpreadReplicas) {
411
+ const int kNumServers = 5 ;
412
+ const int kNumReplicas = 3 ;
413
+ NO_FATALS (StartCluster ({ }, { }, kNumServers ));
414
+
415
+ Schema schema = Schema ({ ColumnSchema (" key1" , INT32),
416
+ ColumnSchema (" key2" , INT32),
417
+ ColumnSchema (" int_val" , INT32),
418
+ ColumnSchema (" string_val" , STRING, true ) }, 2 );
419
+ auto client_schema = KuduSchema::FromSchema (schema);
420
+
421
+ auto create_table_func = [](KuduClient* client,
422
+ KuduSchema* client_schema,
423
+ const string& table_name,
424
+ const vector<std::pair<int32_t , int32_t >> range_bounds,
425
+ const int num_buckets) {
426
+ unique_ptr<client::KuduTableCreator> table_creator (client->NewTableCreator ());
427
+ table_creator->table_name (table_name)
428
+ .schema (client_schema)
429
+ .add_hash_partitions ({ " key1" }, num_buckets)
430
+ .set_range_partition_columns ({ " key2" })
431
+ .num_replicas (kNumReplicas );
432
+ for (const auto & range_bound : range_bounds) {
433
+ unique_ptr<KuduPartialRow> lower_bound (client_schema->NewRow ());
434
+ RETURN_NOT_OK (lower_bound->SetInt32 (" key2" , range_bound.first ));
435
+ unique_ptr<KuduPartialRow> upper_bound (client_schema->NewRow ());
436
+ RETURN_NOT_OK (upper_bound->SetInt32 (" key2" , range_bound.second ));
437
+ table_creator->add_range_partition (lower_bound.release (), upper_bound.release ());
438
+ }
439
+ return table_creator->Create ();
440
+ };
441
+
442
+ vector<string> tables = {" table1" , " table2" , " table3" , " table4" };
443
+ vector<std::pair<int32_t , int32_t >> range_bounds =
444
+ { {0 , 100 }, {100 , 200 }, {200 , 300 }, {300 , 400 }};
445
+ const int doubleNumBuckets = 10 ;
446
+ const int numBuckets = 5 ;
447
+ for (const auto & table : tables) {
448
+ if (table == " table1" ) {
449
+ ASSERT_OK (create_table_func (
450
+ client_.get (), &client_schema, table, range_bounds, doubleNumBuckets));
451
+ } else {
452
+ ASSERT_OK (create_table_func (
453
+ client_.get (), &client_schema, table, range_bounds, numBuckets));
454
+ }
455
+ }
456
+
457
+ // Stats of number of replicas per range per table per tserver.
458
+ typedef std::unordered_map<string, std::unordered_map<string, int >> replicas_per_range_per_table;
459
+ std::unordered_map<int , replicas_per_range_per_table> stats;
460
+ for (int ts_idx = 0 ; ts_idx < kNumServers ; ts_idx++) {
461
+ rpc::RpcController rpc;
462
+ tserver::ListTabletsRequestPB req;
463
+ tserver::ListTabletsResponsePB resp;
464
+ cluster_->tserver_proxy (ts_idx)->ListTablets (req, &resp, &rpc);
465
+ for (auto i = 0 ; i < resp.status_and_schema_size (); ++i) {
466
+ auto tablet_status = resp.status_and_schema (i).tablet_status ();
467
+ if (tablet_status.has_partition ()) {
468
+ Partition partition;
469
+ Partition::FromPB (tablet_status.partition (), &partition);
470
+ auto range_start_key = partition.begin ().range_key ();
471
+ auto table_name = tablet_status.table_name ();
472
+ ++stats[ts_idx][table_name][range_start_key];
473
+ }
474
+ }
475
+ }
476
+
477
+ ASSERT_EQ (kNumServers , stats.size ());
478
+ for (const auto & stat : stats) {
479
+ int tserver_replicas = 0 ;
480
+ // Verifies that four tables exist on each tserver.
481
+ ASSERT_EQ (tables.size (), stat.second .size ());
482
+ for (const auto & table : stat.second ) {
483
+ // Verifies that the four ranges exist for each table on each tserver.
484
+ ASSERT_EQ (range_bounds.size (), table.second .size ());
485
+ for (const auto & ranges : table.second ) {
486
+ // Since there are ten buckets instead of five for table "table1",
487
+ // we expect twice as many replicas (6 instead of 3).
488
+ if (table.first == " table1" ) {
489
+ ASSERT_EQ (doubleNumBuckets * kNumReplicas / kNumServers , ranges.second );;
490
+ } else {
491
+ ASSERT_EQ (numBuckets * kNumReplicas / kNumServers , ranges.second );
492
+ }
493
+ tserver_replicas += ranges.second ;
494
+ }
495
+ }
496
+ // Verifies that 60 replicas are placed on each tserver, 300 total across 5 tservers.
497
+ ASSERT_EQ (60 , tserver_replicas);
498
+ }
499
+ }
500
+
402
501
static void LookUpRandomKeysLoop (const std::shared_ptr<master::MasterServiceProxy>& master,
403
502
const char * table_name,
404
503
AtomicBool* quit) {
@@ -472,7 +571,7 @@ TEST_F(CreateTableITest, TestCreateTableWithDeadTServers) {
472
571
unique_ptr<client::KuduTableCreator> table_creator (client_->NewTableCreator ());
473
572
474
573
// Don't bother waiting for table creation to finish; it'll never happen
475
- // because all of the tservers are dead.
574
+ // because all the tservers are dead.
476
575
CHECK_OK (table_creator->table_name (kTableName )
477
576
.schema (&client_schema)
478
577
.set_range_partition_columns ({ " key" })
@@ -688,7 +787,7 @@ TEST_P(NotEnoughHealthyTServersTest, TestNotEnoughHealthyTServers) {
688
787
}
689
788
// Wait the 3 tablet servers heartbeat timeout and unresponsive timeout. Then catalog
690
789
// manager will take them as unavailable tablet servers. KSCK gets the status of tablet
691
- // server from tablet serve interface. Here must wait the caltalog manager to take the
790
+ // server from tablet serve interface. Here must wait the catalog manager to take the
692
791
// as unavailable.
693
792
SleepFor (MonoDelta::FromMilliseconds (3 *(kTSUnresponsiveTimeoutMs + kHeartbeatIntervalMs )));
694
793
}
@@ -703,7 +802,7 @@ TEST_P(NotEnoughHealthyTServersTest, TestNotEnoughHealthyTServers) {
703
802
{
704
803
// Restart the first tablet server.
705
804
NO_FATALS (cluster_->tablet_server (0 )->Restart ());
706
- // Wait the restarted tablet server to send a heartbeat and be registered in catalog manaager .
805
+ // Wait the restarted tablet server to send a heartbeat and be registered in catalog manager .
707
806
SleepFor (MonoDelta::FromMilliseconds (kHeartbeatIntervalMs ));
708
807
}
709
808
@@ -713,7 +812,7 @@ TEST_P(NotEnoughHealthyTServersTest, TestNotEnoughHealthyTServers) {
713
812
{
714
813
// Restart the second tablet server.
715
814
NO_FATALS (cluster_->tablet_server (1 )->Restart ());
716
- // Wait the restarted tablet server to send a heartbeat and be registered in catalog manaager .
815
+ // Wait the restarted tablet server to send a heartbeat and be registered in catalog manager .
717
816
SleepFor (MonoDelta::FromMilliseconds (kHeartbeatIntervalMs ));
718
817
}
719
818
@@ -755,7 +854,7 @@ TEST_P(NotEnoughHealthyTServersTest, TestNotEnoughHealthyTServers) {
755
854
// Add one new tablet server.
756
855
NO_FATALS (cluster_->AddTabletServer ());
757
856
} else {
758
- // Restart the stopped tablet server
857
+ // Restart the stopped tablet server.
759
858
NO_FATALS (cluster_->tablet_server (2 )->Restart ());
760
859
}
761
860
0 commit comments