From 95b847e7995d8c9fed96fcad2cb39e882da917fc Mon Sep 17 00:00:00 2001 From: Eric Anderson Date: Wed, 7 Feb 2024 17:48:07 -0800 Subject: [PATCH] interop-testing: Use separate event loops in RetryTest The RetryTest was flaky, and it seems to have been caused by the client and server getting assigned to the same event loop. Separating the two reduces the flake rate from ~3% to less than 0.1% (no flakes in a 1000). While I was here fixing the executors, I reduced the number of threads created and shut down the threads after they are no longer used. This had no impact to the flake rate (no flakes in 1000). --- .../grpc/testing/integration/RetryTest.java | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/interop-testing/src/test/java/io/grpc/testing/integration/RetryTest.java b/interop-testing/src/test/java/io/grpc/testing/integration/RetryTest.java index dbbcf39d0ac..edd2a57ab9d 100644 --- a/interop-testing/src/test/java/io/grpc/testing/integration/RetryTest.java +++ b/interop-testing/src/test/java/io/grpc/testing/integration/RetryTest.java @@ -77,6 +77,7 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import org.junit.After; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; @@ -110,7 +111,7 @@ public class RetryTest { mock(ClientCall.Listener.class, delegatesTo(testCallListener)); private CountDownLatch backoffLatch = new CountDownLatch(1); - private final EventLoopGroup group = new DefaultEventLoopGroup() { + private final EventLoopGroup clientGroup = new DefaultEventLoopGroup(1) { @SuppressWarnings("FutureReturnValueIgnored") @Override public ScheduledFuture schedule( @@ -122,7 +123,7 @@ public ScheduledFuture schedule( new Runnable() { @Override public void run() { - group.execute(command); + clientGroup.execute(command); } }, delay, @@ -137,6 +138,7 @@ public void run() {} // no-op TimeUnit.NANOSECONDS); } }; + private final EventLoopGroup serverGroup = new DefaultEventLoopGroup(1); private final FakeStatsRecorder clientStatsRecorder = new FakeStatsRecorder(); private final ClientInterceptor statsInterceptor = InternalCensusStatsAccessor.getClientInterceptor( @@ -173,11 +175,18 @@ public Listener startCall(ServerCall call, Metadata hea private Map retryPolicy = null; private long bufferLimit = 1L << 20; // 1M + @After + @SuppressWarnings("FutureReturnValueIgnored") + public void tearDown() { + clientGroup.shutdownGracefully(); + serverGroup.shutdownGracefully(); + } + private void startNewServer() throws Exception { localServer = cleanupRule.register(NettyServerBuilder.forAddress(localAddress) .channelType(LocalServerChannel.class) - .bossEventLoopGroup(group) - .workerEventLoopGroup(group) + .bossEventLoopGroup(serverGroup) + .workerEventLoopGroup(serverGroup) .addService(serviceDefinition) .build()); localServer.start(); @@ -196,7 +205,7 @@ private void createNewChannel() { channel = cleanupRule.register( NettyChannelBuilder.forAddress(localAddress) .channelType(LocalChannel.class, LocalAddress.class) - .eventLoopGroup(group) + .eventLoopGroup(clientGroup) .usePlaintext() .enableRetry() .perRpcBufferLimit(bufferLimit)