Skip to content

Commit

Permalink
interop-testing: Use separate event loops in RetryTest
Browse files Browse the repository at this point in the history
The RetryTest was flaky, and it seems to have been caused by the client
and server getting assigned to the same event loop. Separating the two
reduces the flake rate from ~3% to less than 0.1% (no flakes in a 1000).

While I was here fixing the executors, I reduced the number of threads
created and shut down the threads after they are no longer used. This
had no impact to the flake rate (no flakes in 1000).
  • Loading branch information
ejona86 committed Feb 8, 2024
1 parent 7ba0718 commit 95b847e
Showing 1 changed file with 14 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.junit.After;
import org.junit.Rule;
import org.junit.Test;
import org.junit.runner.RunWith;
Expand Down Expand Up @@ -110,7 +111,7 @@ public class RetryTest {
mock(ClientCall.Listener.class, delegatesTo(testCallListener));

private CountDownLatch backoffLatch = new CountDownLatch(1);
private final EventLoopGroup group = new DefaultEventLoopGroup() {
private final EventLoopGroup clientGroup = new DefaultEventLoopGroup(1) {
@SuppressWarnings("FutureReturnValueIgnored")
@Override
public ScheduledFuture<?> schedule(
Expand All @@ -122,7 +123,7 @@ public ScheduledFuture<?> schedule(
new Runnable() {
@Override
public void run() {
group.execute(command);
clientGroup.execute(command);
}
},
delay,
Expand All @@ -137,6 +138,7 @@ public void run() {} // no-op
TimeUnit.NANOSECONDS);
}
};
private final EventLoopGroup serverGroup = new DefaultEventLoopGroup(1);
private final FakeStatsRecorder clientStatsRecorder = new FakeStatsRecorder();
private final ClientInterceptor statsInterceptor =
InternalCensusStatsAccessor.getClientInterceptor(
Expand Down Expand Up @@ -173,11 +175,18 @@ public Listener<String> startCall(ServerCall<String, Integer> call, Metadata hea
private Map<String, Object> retryPolicy = null;
private long bufferLimit = 1L << 20; // 1M

@After
@SuppressWarnings("FutureReturnValueIgnored")
public void tearDown() {
clientGroup.shutdownGracefully();
serverGroup.shutdownGracefully();
}

private void startNewServer() throws Exception {
localServer = cleanupRule.register(NettyServerBuilder.forAddress(localAddress)
.channelType(LocalServerChannel.class)
.bossEventLoopGroup(group)
.workerEventLoopGroup(group)
.bossEventLoopGroup(serverGroup)
.workerEventLoopGroup(serverGroup)
.addService(serviceDefinition)
.build());
localServer.start();
Expand All @@ -196,7 +205,7 @@ private void createNewChannel() {
channel = cleanupRule.register(
NettyChannelBuilder.forAddress(localAddress)
.channelType(LocalChannel.class, LocalAddress.class)
.eventLoopGroup(group)
.eventLoopGroup(clientGroup)
.usePlaintext()
.enableRetry()
.perRpcBufferLimit(bufferLimit)
Expand Down

0 comments on commit 95b847e

Please sign in to comment.