Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions src/proxy/http/remap/NextHopHealthStatus.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,12 @@ NextHopHealthStatus::markNextHop(TSHttpTxn txn, const char *hostname, const int
new_fail_count = h->failCount = 1;
}
} else if (result.retry == true) {
h->failedAt = _now;
new_fail_count = h->failCount += 1;
h->failedAt = _now;
if (h->failCount < UINT32_MAX) {
new_fail_count = ++h->failCount;
} else {
new_fail_count = UINT32_MAX;
}
}
} // end lock guard

Expand All @@ -132,19 +136,25 @@ NextHopHealthStatus::markNextHop(TSHttpTxn txn, const char *hostname, const int
{ // lock guard
std::lock_guard<std::mutex> lock(h->_mutex);
if ((h->failedAt.load() + retry_time) < _now) {
h->failedAt = _now;
new_fail_count = h->failCount = 1;
h->failedAt = _now;
} else {
new_fail_count = h->failCount += 1;
if (h->failCount < UINT32_MAX) {
new_fail_count = ++h->failCount;
} else {
new_fail_count = UINT32_MAX;
}
}
} // end of lock_guard
NH_Dbg(NH_DBG_CTL, "[%" PRId64 "] Parent fail count increased to %d for %s", sm_id, new_fail_count, h->hostname.c_str());
NH_Dbg(NH_DBG_CTL, "[%" PRId64 "] Parent fail count increased to %" PRIu32 " for %s", sm_id, new_fail_count,
h->hostname.c_str());
}

if (new_fail_count >= fail_threshold) {
h->set_unavailable();
NH_Note("[%" PRId64 "] Failure threshold met failcount:%d >= threshold:%" PRId64 ", http parent proxy %s marked down", sm_id,
new_fail_count, fail_threshold, h->hostname.c_str());
NH_Note("[%" PRId64 "] Failure threshold met failcount:%" PRIu32 " >= threshold:%" PRId64
", http parent proxy %s marked down",
sm_id, new_fail_count, fail_threshold, h->hostname.c_str());
NH_Dbg(NH_DBG_CTL, "[%" PRId64 "] NextHop %s marked unavailable, h->available=%s", sm_id, h->hostname.c_str(),
(h->available.load()) ? "true" : "false");
}
Expand Down
69 changes: 69 additions & 0 deletions src/proxy/http/remap/unit-tests/test_NextHopRoundRobin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -381,3 +381,72 @@ SCENARIO("Testing NextHopRoundRobin class, using policy 'latched'", "[NextHopRou
}
}
}

SCENARIO("Testing NextHopHealthStatus failCount overflow saturation", "[NextHopHealthStatus]")
{
// No thread setup, forbid use of thread local allocators.
cmd_disable_pfreelist = true;
http_init();

GIVEN("Loading the round-robin-tests.yaml config for overflow test.")
{
NextHopStrategyFactory nhf(TS_SRC_DIR "/round-robin-tests.yaml");
NextHopSelectionStrategy *const strategy = nhf.strategyInstance("rr-strict-exhaust-ring");

REQUIRE(nhf.strategies_loaded == true);
REQUIRE(strategy != nullptr);

WHEN("failCount is near UINT32_MAX and markNextHop is called")
{
HttpSM sm;
ParentResult *result = &sm.t_state.parent_result;
TSHttpTxn txnp = reinterpret_cast<TSHttpTxn>(&sm);

// Select a host so result is populated.
build_request(20001, &sm, nullptr, "rabbit.net", nullptr);
strategy->findNextHop(txnp);
REQUIRE(result->result == ParentResultType::SPECIFIED);
REQUIRE(result->hostname != nullptr);

// Get the HostRecord for the selected host.
std::shared_ptr<HostRecord> host_rec;
for (auto &group : strategy->host_groups) {
for (auto &h : group) {
if (h->hostname == result->hostname) {
host_rec = h;
break;
}
}
if (host_rec) {
break;
}
}
REQUIRE(host_rec != nullptr);

// Set failCount to UINT32_MAX - 1 to test saturation.
host_rec->failCount = UINT32_MAX - 1;
host_rec->failedAt = time(nullptr);

// Set fail_threshold very high so set_unavailable doesn't interfere.
extern char _my_txn_conf[];
auto *oride = reinterpret_cast<OverridableHttpConfigParams *>(_my_txn_conf);
oride->parent_fail_threshold = static_cast<int64_t>(UINT32_MAX) + 1;
oride->parent_retry_time = 30; // large retry window so we stay in the increment path

THEN("failCount saturates at UINT32_MAX and does not wrap to 0")
{
// First markNextHop: UINT32_MAX-1 -> UINT32_MAX
strategy->markNextHop(txnp, result->hostname, result->port, NHCmd::MARK_DOWN);
CHECK(host_rec->failCount.load() == UINT32_MAX);

// Second markNextHop: should stay at UINT32_MAX (saturated)
strategy->markNextHop(txnp, result->hostname, result->port, NHCmd::MARK_DOWN);
CHECK(host_rec->failCount.load() == UINT32_MAX);

// Verify host is still available (threshold not met since threshold > UINT32_MAX)
CHECK(host_rec->available.load() == true);
}
br_destroy(sm);
}
}
}