Skip to content

Commit

Permalink
Merge pull request #5014 from sfc-gh-jslocum/tss_delay_fix
Browse files Browse the repository at this point in the history
Don't inject TSS faults if speedUpSimulation is set
  • Loading branch information
sfc-gh-jslocum committed Jun 18, 2021
2 parents b53b165 + d1d2ca9 commit df0219e
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 12 deletions.
6 changes: 1 addition & 5 deletions fdbserver/QuietDatabase.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,11 +358,7 @@ ACTOR Future<int64_t> getMaxStorageServerQueueSize(Database cx, Reference<AsyncV
.detail("SS", servers[i].id());
throw attribute_not_found();
}
// Ignore TSS in add delay mode since it can purposefully freeze forever
if (!servers[i].isTss() || !g_network->isSimulated() ||
g_simulator.tssMode != ISimulator::TSSMode::EnabledAddDelay) {
messages.push_back(getStorageMetricsTimeout(servers[i].id(), itr->second));
}
messages.push_back(getStorageMetricsTimeout(servers[i].id(), itr->second));
}

wait(waitForAll(messages));
Expand Down
16 changes: 13 additions & 3 deletions fdbserver/storageserver.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3446,6 +3446,15 @@ class StorageUpdater {
}
};

ACTOR Future<Void> tssDelayForever() {
loop {
wait(delay(5.0));
if (g_simulator.speedUpSimulation) {
return Void();
}
}
}

ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
state double start;
try {
Expand All @@ -3466,12 +3475,13 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
}

if (g_network->isSimulated() && data->isTss() && g_simulator.tssMode == ISimulator::TSSMode::EnabledAddDelay &&
data->tssFaultInjectTime.present() && data->tssFaultInjectTime.get() < now()) {
!g_simulator.speedUpSimulation && data->tssFaultInjectTime.present() &&
data->tssFaultInjectTime.get() < now()) {
if (deterministicRandom()->random01() < 0.01) {
TraceEvent(SevWarnAlways, "TSSInjectDelayForever", data->thisServerID);
// small random chance to just completely get stuck here, each tss should eventually hit this in this
// mode
wait(Never());
wait(tssDelayForever());
} else {
// otherwise pause for part of a second
double delayTime = deterministicRandom()->random01();
Expand Down Expand Up @@ -3666,7 +3676,7 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {

// Drop non-private mutations if TSS fault injection is enabled in simulation, or if this is a TSS in
// quarantine.
if (g_network->isSimulated() && data->isTss() &&
if (g_network->isSimulated() && data->isTss() && !g_simulator.speedUpSimulation &&
g_simulator.tssMode == ISimulator::TSSMode::EnabledDropMutations &&
data->tssFaultInjectTime.present() && data->tssFaultInjectTime.get() < now() &&
(msg.type == MutationRef::SetValue || msg.type == MutationRef::ClearRange) &&
Expand Down
5 changes: 1 addition & 4 deletions fdbserver/workloads/ConsistencyCheck.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1498,10 +1498,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
.error(e);

// All shards should be available in quiscence
if (self->performQuiescentChecks &&
((g_network->isSimulated() &&
g_simulator.tssMode != ISimulator::TSSMode::EnabledAddDelay) ||
!storageServerInterfaces[j].isTss())) {
if (self->performQuiescentChecks) {
self->testFailure("Storage server unavailable");
return false;
}
Expand Down

0 comments on commit df0219e

Please sign in to comment.