From b501fb7cbcd5fa3247750db0e67227b6e8ff0fbc Mon Sep 17 00:00:00 2001 From: Terry Wilson Date: Wed, 24 Aug 2022 13:10:17 -0700 Subject: [PATCH] core: Update outlier detection max ejection logic. Instead of strictly enforcing the max ejection percentage setting, allow one additional ejection past the maximum. This quarantees at least one ejection and matches Envoy proxy behavior. --- .../util/OutlierDetectionLoadBalancer.java | 19 +++++++++++-------- .../OutlierDetectionLoadBalancerTest.java | 13 ++++++------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/io/grpc/util/OutlierDetectionLoadBalancer.java b/core/src/main/java/io/grpc/util/OutlierDetectionLoadBalancer.java index 068bfb4de6d..12d71ba2152 100644 --- a/core/src/main/java/io/grpc/util/OutlierDetectionLoadBalancer.java +++ b/core/src/main/java/io/grpc/util/OutlierDetectionLoadBalancer.java @@ -654,10 +654,9 @@ void maybeUnejectOutliers(Long detectionTimerStartNanos) { } /** - * How many percent of the addresses would have their subchannels ejected if we proceeded - * with the next ejection. + * How many percent of the addresses have been ejected. */ - double nextEjectionPercentage() { + double ejectionPercentage() { if (trackerMap.isEmpty()) { return 0; } @@ -669,7 +668,7 @@ void maybeUnejectOutliers(Long detectionTimerStartNanos) { ejectedAddresses++; } } - return ((double)(ejectedAddresses + 1) / totalAddresses) * 100; + return ((double)ejectedAddresses / totalAddresses) * 100; } } @@ -733,8 +732,10 @@ public void ejectOutliers(AddressTrackerMap trackerMap, long ejectionTimeNanos) mean - stdev * (config.successRateEjection.stdevFactor / 1000f); for (AddressTracker tracker : trackersWithVolume) { - // If an ejection now would take us past the max configured ejection percentage, stop here. - if (trackerMap.nextEjectionPercentage() > config.maxEjectionPercent) { + // If we are above the max ejection percentage, don't eject any more. This will allow the + // total ejections to go one above the max, but at the same time it assures at least one + // ejection, which the spec calls for. This behavior matches what Envoy proxy does. + if (trackerMap.ejectionPercentage() > config.maxEjectionPercent) { return; } @@ -803,8 +804,10 @@ public void ejectOutliers(AddressTrackerMap trackerMap, long ejectionTimeNanos) // If this address does not have enough volume to be considered, skip to the next one. for (AddressTracker tracker : trackerMap.values()) { - // If an ejection now would take us past the max configured ejection percentage stop here. - if (trackerMap.nextEjectionPercentage() > config.maxEjectionPercent) { + // If we are above the max ejection percentage, don't eject any more. This will allow the + // total ejections to go one above the max, but at the same time it assures at least one + // ejection, which the spec calls for. This behavior matches what Envoy proxy does. + if (trackerMap.ejectionPercentage() > config.maxEjectionPercent) { return; } diff --git a/core/src/test/java/io/grpc/util/OutlierDetectionLoadBalancerTest.java b/core/src/test/java/io/grpc/util/OutlierDetectionLoadBalancerTest.java index cbd32c91904..94426d2c9de 100644 --- a/core/src/test/java/io/grpc/util/OutlierDetectionLoadBalancerTest.java +++ b/core/src/test/java/io/grpc/util/OutlierDetectionLoadBalancerTest.java @@ -574,10 +574,11 @@ public void successRateTwoOutliers() { } /** - * Two outliers. but only one gets ejected because we have reached the max ejection percentage. + * Three outliers, second one ejected even if ejecting it goes above the max ejection percentage, + * as this matches Envoy behavior. The third one should not get ejected. */ @Test - public void successRateTwoOutliers_maxEjectionPercentage() { + public void successRateThreeOutliers_maxEjectionPercentage() { OutlierDetectionLoadBalancerConfig config = new OutlierDetectionLoadBalancerConfig.Builder() .setMaxEjectionPercent(20) .setSuccessRateEjection( @@ -591,7 +592,8 @@ public void successRateTwoOutliers_maxEjectionPercentage() { generateLoad(ImmutableMap.of( subchannel1, Status.DEADLINE_EXCEEDED, - subchannel2, Status.DEADLINE_EXCEEDED), 7); + subchannel2, Status.DEADLINE_EXCEEDED, + subchannel3, Status.DEADLINE_EXCEEDED), 7); // Move forward in time to a point where the detection timer has fired. forwardTime(config); @@ -603,10 +605,7 @@ public void successRateTwoOutliers_maxEjectionPercentage() { : 0; } - // Even if all subchannels were failing, we should have not ejected more than the configured - // maximum percentage. - assertThat((double) totalEjected / servers.size()).isAtMost( - (double) config.maxEjectionPercent / 100); + assertThat(totalEjected).isEqualTo(2); }