Skip to content
Permalink
Browse files

#824: Only count votes from VOTERS in the active config for determini…

…ng replicated OpIds

Summary: When determining which OpIds have been replicated, we now only accept votes from peers that are VOTERS in the active config. Before this change we were accepting votes from any peer that was a member of the active config.

Test Plan: New TestChangeConfigBasedOnJepsen

Reviewers: timur, mikhail, amitanand, kannan

Reviewed By: amitanand, kannan

Subscribers: ybase, bharat

Differential Revision: https://phabricator.dev.yugabyte.com/D6098
  • Loading branch information...
hectorgcr committed Feb 4, 2019
1 parent 26c875f commit 7a1bf5b1a9d56e5b991db881ec04e63d5a5e31cb
@@ -85,6 +85,8 @@ DEFINE_test_flag(bool, enable_remote_bootstrap, true,
"detects that a follower is out of date or does not have a tablet "
"replica.");

DECLARE_int32(log_change_config_every_n);

namespace yb {
namespace consensus {

@@ -257,12 +259,12 @@ void Peer::SendNextRequest(RequestTriggerMode trigger_mode) {
boost::optional<tserver::TabletServerErrorPB::Code> error_code;

// If another ChangeConfig is being processed, our request will be rejected.
LOG(INFO) << "Sending ChangeConfig request";
YB_LOG_EVERY_N(INFO, FLAGS_log_change_config_every_n)
<< "Sending ChangeConfig request to promote peer";
auto status = consensus_->ChangeConfig(req, &DoNothingStatusCB, &error_code);
if (PREDICT_FALSE(!status.ok())) {
LOG(WARNING) << "Unable to change role for peer "
<< uuid << ": "
<< status;
YB_LOG_EVERY_N(INFO, FLAGS_log_change_config_every_n)
<< "Unable to change role for peer " << uuid << ": " << status;
// Since we released the semaphore, we need to call SignalRequest again to send a message
status = SignalRequest(RequestTriggerMode::kAlwaysSend);
if (PREDICT_FALSE(!status.ok())) {
@@ -588,6 +588,10 @@ typename Policy::result_type PeerMessageQueue::GetWatermark() {
// value of the watermark.
continue;
}
if (!IsRaftConfigVoter(peer.uuid, *queue_state_.active_config)) {
// Only votes from VOTERs in the active config should be taken into consideration
continue;
}
if (peer.is_last_exchange_successful) {
watermarks.push_back(Policy::ExtractValue(peer));
}
@@ -176,6 +176,10 @@ DEFINE_test_flag(bool, pause_update_replica, false,
DEFINE_test_flag(bool, pause_update_majority_replicated, false,
"Pause RaftConsensus::UpdateMajorityReplicated.");

DEFINE_test_flag(int32, log_change_config_every_n, 1, "How often to log change config information. "
"Used to reduce the number of lines being printed for change config requests "
"when a test simulates a failure that would generate a log of these requests.");

namespace yb {
namespace consensus {

@@ -1965,7 +1969,8 @@ Status RaftConsensus::ChangeConfig(const ChangeConfigRequestPB& req,
return STATUS(InvalidArgument, "Must specify 'server' argument to ChangeConfig()",
req.ShortDebugString());
}
LOG(INFO) << "Received ChangeConfig request " << req.ShortDebugString();
YB_LOG_EVERY_N(INFO, FLAGS_log_change_config_every_n)
<< "Received ChangeConfig request " << req.ShortDebugString();
ChangeConfigType type = req.type();
bool use_hostport = req.has_use_host() && req.use_host();

@@ -2000,8 +2005,9 @@ Status RaftConsensus::ChangeConfig(const ChangeConfigRequestPB& req,
const string& server_uuid = server.has_permanent_uuid() ? server.permanent_uuid() : "";
s = IsLeaderReadyForChangeConfigUnlocked(type, server_uuid);
if (!s.ok()) {
LOG(INFO) << "Returning not ready for " << ChangeConfigType_Name(type)
<< " due to error " << s.ToString();
YB_LOG_EVERY_N(INFO, FLAGS_log_change_config_every_n)
<< "Returning not ready for " << ChangeConfigType_Name(type)
<< " due to error " << s.ToString();
*error_code = TabletServerErrorPB::LEADER_NOT_READY_CHANGE_CONFIG;
return s;
}
@@ -2732,7 +2732,6 @@ TEST_F(RaftConsensusITest, TestChangeConfigRejectedUnlessNoopReplicated) {
// because the followers are rejecting UpdateConsensus, and the leader needs to majority-replicate
// a lease expiration that is in the future in order to establish a leader lease.
ASSERT_OK(WaitUntilLeader(leader_ts, tablet_id_, timeout, LeaderLeaseCheckMode::DONT_NEED_LEASE));

// Now attempt to do a config change. It should be rejected because there have not been any ops
// (notably the initial NO_OP) from the leader's term that have been committed yet.
Status s = itest::RemoveServer(leader_ts,
@@ -2747,6 +2746,87 @@ TEST_F(RaftConsensusITest, TestChangeConfigRejectedUnlessNoopReplicated) {
"Leader not yet replicated NoOp to be ready to serve requests");
}

TEST_F(RaftConsensusITest, TestChangeConfigBasedOnJepsen) {
FLAGS_num_tablet_servers = 4;
vector<string> ts_flags = { "--enable_leader_failure_detection=false",
"--log_change_config_every_n=3000" };
vector<string> master_flags = { "--catalog_manager_wait_for_new_tablets_to_elect_leader=false" };
ASSERT_NO_FATALS(BuildAndStart(ts_flags, master_flags));
vector<TServerDetails*> tservers = TServerDetailsVector(tablet_servers_);
ASSERT_EQ(FLAGS_num_tablet_servers, tservers.size());

int leader_idx = -1;
int new_node_idx = -1;
for (int i = 0; i < 4; i++) {
auto& uuid = cluster_->tablet_server(i)->uuid();
if (nullptr == GetReplicaWithUuidOrNull(tablet_id_, uuid)) {
new_node_idx = i;
continue;
} else if (leader_idx == -1) {
leader_idx = i;
continue;
}
}

CHECK_NE(new_node_idx, -1) << "Could not find the node not having tablet_id_";

TServerDetails* leader_ts = tablet_servers_[cluster_->tablet_server(leader_idx)->uuid()].get();
MonoDelta timeout = MonoDelta::FromSeconds(30);
ASSERT_OK(StartElection(leader_ts, tablet_id_, timeout));

ASSERT_OK(WaitUntilLeader(leader_ts, tablet_id_, timeout, LeaderLeaseCheckMode::NEED_LEASE));

vector<TServerDetails*> tservers_list(4);
int ts_idx = 0;
tservers_list[ts_idx++] = tablet_servers_[cluster_->tablet_server(leader_idx)->uuid()].get();
for (int i = 0; i < 4; i++) {
if (i == leader_idx || i == new_node_idx) {
continue;
}
ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(i),
"follower_reject_update_consensus_requests", "true"));
tservers_list[ts_idx++] = tablet_servers_[cluster_->tablet_server(i)->uuid()].get();
}
tservers_list[ts_idx++] = tablet_servers_[cluster_->tablet_server(new_node_idx)->uuid()].get();

// Now attempt to do a config change. It should be rejected because there have not been any ops
// (notably the initial NO_OP) from the leader's term that have been committed yet.
Status s = itest::AddServer(leader_ts,
tablet_id_,
tablet_servers_[cluster_->tablet_server(new_node_idx)->uuid()].get(),
RaftPeerPB::PRE_VOTER,
boost::none,
timeout,
nullptr /* error_code */,
false /* retry */);
LOG(INFO) << "Got status " << yb::ToString(s);
const double kSleepDelaySec = 50;
SleepFor(MonoDelta::FromSeconds(kSleepDelaySec));
LOG(INFO) << "Done Sleeping";

vector<OpId> committed_op_ids, received_op_ids;
GetLastOpIdForEachReplica(tablet_id_, tservers_list, consensus::OpIdType::COMMITTED_OPID,
timeout, &committed_op_ids);
GetLastOpIdForEachReplica(tablet_id_, tservers_list, consensus::OpIdType::RECEIVED_OPID,
timeout, &received_op_ids);

for(int i = 0; i < 4; i++) {
LOG(INFO) << "i = " << i << " Peer " << tservers_list[i]->uuid()
<< " Committed op id " << yb::ToString(committed_op_ids[i])
<< " Last received op id " << yb::ToString(received_op_ids[i]);
}

const OpId kLeaderCommittedOpId = committed_op_ids[0];
int num_voters_who_received_committed_op_id = 0;
for(int i = 0; i < 3; i++) {
if (yb::consensus::OpIdCompare(kLeaderCommittedOpId, received_op_ids[i]) <= 0) {
num_voters_who_received_committed_op_id++;
}
}
CHECK_GE(num_voters_who_received_committed_op_id, 2)
<< "At least 2 voters should have received the op id";
}

// Test that if for some reason none of the transactions can be prepared, that it will come back as
// an error in UpdateConsensus().
TEST_F(RaftConsensusITest, TestUpdateConsensusErrorNonePrepared) {

1 comment on commit 7a1bf5b

@mbautin

This comment has been minimized.

Copy link
Collaborator

commented on 7a1bf5b Feb 28, 2019

To clarify, in the description to this commit we should have said "AppendEntries acknowledgments" instead of "votes".

Please sign in to comment.
You can’t perform that action at this time.