Skip to content

Commit

Permalink
Also check young GC counts.
Browse files Browse the repository at this point in the history
  • Loading branch information
henningandersen committed Jul 7, 2020
1 parent addeede commit 110925d
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 9 deletions.
Expand Up @@ -32,8 +32,10 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.monitor.jvm.GcNames;
import org.elasticsearch.monitor.jvm.JvmInfo;

import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.util.ArrayList;
Expand Down Expand Up @@ -112,7 +114,8 @@ public class HierarchyCircuitBreakerService extends CircuitBreakerService {
public HierarchyCircuitBreakerService(Settings settings, List<BreakerSettings> customBreakers, ClusterSettings clusterSettings) {
this(settings, customBreakers, clusterSettings,
// hardcode interval, do not want any tuning of it outside code changes.
createDoubleCheckStrategy(JvmInfo.jvmInfo(), HierarchyCircuitBreakerService::realMemoryUsage, System::currentTimeMillis, 5000));
createDoubleCheckStrategy(JvmInfo.jvmInfo(), HierarchyCircuitBreakerService::realMemoryUsage, createYoungGcCountSupplier(),
System::currentTimeMillis, 5000));
}

HierarchyCircuitBreakerService(Settings settings, List<BreakerSettings> customBreakers, ClusterSettings clusterSettings,
Expand Down Expand Up @@ -362,23 +365,41 @@ private CircuitBreaker validateAndCreateBreaker(BreakerSettings breakerSettings)
}

private static OverLimitStrategy createDoubleCheckStrategy(JvmInfo jvmInfo, LongSupplier currentMemoryUsageSupplier,
LongSupplier timeSupplier, long minimumInterval) {
LongSupplier gcCountSupplier,
LongSupplier timeSupplier,long minimumInterval) {
if (jvmInfo.useG1GC().equals("true")
// messing with GC is "dangerous" so we apply an escape hatch. Not intended to be used.
&& Boolean.parseBoolean(System.getProperty("es.real_memory_circuit_breaker.g1.double_check.enabled", "true"))) {
return new G1OverLimitStrategy(jvmInfo, currentMemoryUsageSupplier, timeSupplier, minimumInterval);
return new G1OverLimitStrategy(jvmInfo, currentMemoryUsageSupplier, gcCountSupplier,
timeSupplier, minimumInterval);
} else {
return memoryUsed -> memoryUsed;
}
}

static LongSupplier createYoungGcCountSupplier() {
List<GarbageCollectorMXBean> youngBeans =
ManagementFactory.getGarbageCollectorMXBeans().stream().filter(mxBean -> GcNames.getByGcName(mxBean.getName(),
mxBean.getName()).equals(GcNames.YOUNG)).collect(Collectors.toList());
assert youngBeans.size() == 1;
assert youngBeans.get(0).getCollectionCount() != -1 : "G1 must support getting collection count";

if (youngBeans.size() == 1) {
return youngBeans.get(0)::getCollectionCount;
} else {
logger.warn("Unable to find young generation collector, G1 over limit strategy might be impacted [{}]", youngBeans);
return () -> -1;
}
}

interface OverLimitStrategy {
MemoryUsage overLimit(MemoryUsage memoryUsed);
}

static class G1OverLimitStrategy implements OverLimitStrategy {
private final long g1RegionSize;
private final LongSupplier currentMemoryUsageSupplier;
private final LongSupplier gcCountSupplier;
private final LongSupplier timeSupplier;

private long lastCheckTime = Long.MIN_VALUE;
Expand All @@ -389,9 +410,11 @@ static class G1OverLimitStrategy implements OverLimitStrategy {
private final long maxHeap;

G1OverLimitStrategy(JvmInfo jvmInfo, LongSupplier currentMemoryUsageSupplier,
LongSupplier gcCountSupplier,
LongSupplier timeSupplier, long minimumInterval) {
assert minimumInterval > 0;
this.currentMemoryUsageSupplier = currentMemoryUsageSupplier;
this.gcCountSupplier = gcCountSupplier;
this.timeSupplier = timeSupplier;
this.minimumInterval = minimumInterval;
long g1RegionSize = jvmInfo.getG1RegionSize();
Expand All @@ -402,6 +425,7 @@ static class G1OverLimitStrategy implements OverLimitStrategy {
this.g1RegionSize = g1RegionSize;
}
maxHeap = jvmInfo.getMem().getHeapMax().getBytes();

}

static long fallbackRegionSize(JvmInfo jvmInfo) {
Expand Down Expand Up @@ -432,6 +456,7 @@ public MemoryUsage overLimit(MemoryUsage memoryUsed) {
leader = begin >= lastCheckTime + minimumInterval;
overLimitTriggered(leader);
if (leader) {
long initialCollectionCount = gcCountSupplier.getAsLong();
logger.info("attempting to trigger G1GC due to high heap usage [{}]", memoryUsed.baseUsage);
long localBlackHole = 0;
// number of allocations, corresponding to (approximately) number of free regions + 1
Expand All @@ -447,6 +472,9 @@ public MemoryUsage overLimit(MemoryUsage memoryUsed) {
// we observed a memory drop, so some GC must have occurred
break;
}
if (initialCollectionCount != gcCountSupplier.getAsLong()) {
break;
}
localBlackHole += new byte[allocationSize].hashCode();
}

Expand Down
Expand Up @@ -45,6 +45,7 @@
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;
import java.util.function.LongSupplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

Expand Down Expand Up @@ -305,7 +306,7 @@ public void testParentTriggersG1GCBeforeBreaking() throws InterruptedException,
Collections.emptyList(),
new ClusterSettings(clusterSettings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS),
new HierarchyCircuitBreakerService.G1OverLimitStrategy(JvmInfo.jvmInfo(), HierarchyCircuitBreakerService::realMemoryUsage,
time::get, interval) {
HierarchyCircuitBreakerService.createYoungGcCountSupplier(), time::get, interval) {

@Override
void overLimitTriggered(boolean leader) {
Expand Down Expand Up @@ -417,16 +418,15 @@ public void testFallbackG1RegionSize() {
equalTo(JvmInfo.jvmInfo().getG1RegionSize()));
}

public void testG1OverLimitStrategy() {
public void testG1OverLimitStrategyBreakOnMemory() {
AtomicLong time = new AtomicLong(randomLongBetween(Long.MIN_VALUE/2, Long.MAX_VALUE/2));
AtomicInteger leaderTriggerCount = new AtomicInteger();
AtomicInteger nonLeaderTriggerCount = new AtomicInteger();
long interval = randomLongBetween(1, 1000);
AtomicLong memoryUsage = new AtomicLong();
HierarchyCircuitBreakerService.G1OverLimitStrategy strategy =
new HierarchyCircuitBreakerService.G1OverLimitStrategy(JvmInfo.jvmInfo(), memoryUsage::get,
time::get, interval) {

HierarchyCircuitBreakerService.G1OverLimitStrategy strategy =
new HierarchyCircuitBreakerService.G1OverLimitStrategy(JvmInfo.jvmInfo(), memoryUsage::get, () -> 0, time::get, interval) {
@Override
void overLimitTriggered(boolean leader) {
if (leader) {
Expand Down Expand Up @@ -463,13 +463,49 @@ void overLimitTriggered(boolean leader) {
assertThat(leaderTriggerCount.get(), equalTo(2));
}

public void testG1OverLimitStrategyBreakOnGcCount() {
AtomicLong time = new AtomicLong(randomLongBetween(Long.MIN_VALUE/2, Long.MAX_VALUE/2));
AtomicInteger leaderTriggerCount = new AtomicInteger();
AtomicInteger nonLeaderTriggerCount = new AtomicInteger();
long interval = randomLongBetween(1, 1000);
AtomicLong memoryUsageCounter = new AtomicLong();
AtomicLong gcCounter = new AtomicLong();
LongSupplier memoryUsageSupplier = () -> {
memoryUsageCounter.incrementAndGet();
return randomLongBetween(100, 110);
};
HierarchyCircuitBreakerService.G1OverLimitStrategy strategy =
new HierarchyCircuitBreakerService.G1OverLimitStrategy(JvmInfo.jvmInfo(),
memoryUsageSupplier,
gcCounter::incrementAndGet,
time::get, interval) {

@Override
void overLimitTriggered(boolean leader) {
if (leader) {
leaderTriggerCount.incrementAndGet();
} else {
nonLeaderTriggerCount.incrementAndGet();
}
}
};
HierarchyCircuitBreakerService.MemoryUsage input = new HierarchyCircuitBreakerService.MemoryUsage(100, randomLongBetween(100, 110),
randomLongBetween(0, 50),
randomLongBetween(0, 50));

assertThat(strategy.overLimit(input), sameInstance(input));
assertThat(leaderTriggerCount.get(), equalTo(1));
assertThat(gcCounter.get(), equalTo(2L));
assertThat(memoryUsageCounter.get(), equalTo(2L)); // 1 before gc count break and 1 to get resulting memory usage.
}

public void testG1OverLimitStrategyThrottling() throws InterruptedException, BrokenBarrierException, TimeoutException {
AtomicLong time = new AtomicLong(randomLongBetween(Long.MIN_VALUE/2, Long.MAX_VALUE/2));
AtomicInteger leaderTriggerCount = new AtomicInteger();
long interval = randomLongBetween(1, 1000);
AtomicLong memoryUsage = new AtomicLong();
HierarchyCircuitBreakerService.G1OverLimitStrategy strategy =
new HierarchyCircuitBreakerService.G1OverLimitStrategy(JvmInfo.jvmInfo(), memoryUsage::get,
new HierarchyCircuitBreakerService.G1OverLimitStrategy(JvmInfo.jvmInfo(), memoryUsage::get, () -> 0,
time::get, interval) {

@Override
Expand Down

0 comments on commit 110925d

Please sign in to comment.