From 7df2d5feebf8bc5ecfcea3edba290db500382dcf Mon Sep 17 00:00:00 2001 From: Fabio Kung Date: Mon, 18 Mar 2019 12:38:54 -0700 Subject: [PATCH] netty: default grace time for RPCs can leak memory Using Long.MAX_VALUE as the delay for Netty's NioEventLoop#schedule can cause (long) deadlines for scheduled tasks to wrap into negative values, which is unspecified behavior. Recent versions of netty are guarding against overflows, but not all versions of grpc-java are using a recent enough netty. When connections are gracefully closed, Netty's Http2ConnectionHandler sets up a timeout task forcing resources to be freed after a grace period. When their deadlines wrap into negative values, a race was observed in production systems (with grpc-java <= 1.12.1) causing Netty to never properly release buffers associated with active streams in the connection being (gracefully) closed. --- .../io/grpc/netty/NettyServerHandler.java | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/netty/src/main/java/io/grpc/netty/NettyServerHandler.java b/netty/src/main/java/io/grpc/netty/NettyServerHandler.java index d7a9aec2e2f..41a630eac4c 100644 --- a/netty/src/main/java/io/grpc/netty/NettyServerHandler.java +++ b/netty/src/main/java/io/grpc/netty/NettyServerHandler.java @@ -19,6 +19,7 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static io.grpc.internal.GrpcUtil.SERVER_KEEPALIVE_TIME_NANOS_DISABLED; +import static io.grpc.netty.NettyServerBuilder.MAX_CONNECTION_AGE_GRACE_NANOS_INFINITE; import static io.grpc.netty.NettyServerBuilder.MAX_CONNECTION_AGE_NANOS_DISABLED; import static io.grpc.netty.NettyServerBuilder.MAX_CONNECTION_IDLE_NANOS_DISABLED; import static io.grpc.netty.Utils.CONTENT_TYPE_HEADER; @@ -910,12 +911,9 @@ void secondGoAwayAndClose(ChannelHandlerContext ctx) { // gracefully shutdown with specified grace time long savedGracefulShutdownTimeMillis = gracefulShutdownTimeoutMillis(); - long gracefulShutdownTimeoutMillis = savedGracefulShutdownTimeMillis; - if (graceTimeInNanos != null) { - gracefulShutdownTimeoutMillis = TimeUnit.NANOSECONDS.toMillis(graceTimeInNanos); - } + long overriddenGraceTime = graceTimeOverrideMillis(savedGracefulShutdownTimeMillis); try { - gracefulShutdownTimeoutMillis(gracefulShutdownTimeoutMillis); + gracefulShutdownTimeoutMillis(overriddenGraceTime); NettyServerHandler.super.close(ctx, ctx.newPromise()); } catch (Exception e) { onError(ctx, /* outbound= */ true, e); @@ -923,6 +921,17 @@ void secondGoAwayAndClose(ChannelHandlerContext ctx) { gracefulShutdownTimeoutMillis(savedGracefulShutdownTimeMillis); } } + + private long graceTimeOverrideMillis(long originalMillis) { + if (graceTimeInNanos == null) { + return originalMillis; + } + if (graceTimeInNanos == MAX_CONNECTION_AGE_GRACE_NANOS_INFINITE) { + // netty treats -1 as "no timeout" + return -1L; + } + return TimeUnit.NANOSECONDS.toMillis(graceTimeInNanos); + } } // Use a frame writer so that we know when frames are through flow control and actually being