Skip to content

Commit

Permalink
fix: avoid unbalanced session pool creation (#2442)
Browse files Browse the repository at this point in the history
* fix: avoid unbalanced session pool creation

A query storm at the startup of the client library before the session
pool had initialized could cause the creation of an unbalanced session
pool. This again would put a large batch of sessions using the same gRPC
channel at the head of the pool, which could then continously be used by
the application.

* fix: automatically balance pool

* fix: skip empty pool

* fix: shuffle if unbalanced

* fix: only reset randomness if actually randomized

* test: randomize if many sessions are checked out

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* test: try with more channels

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* fix: also consider checked out sessions for unbalanced pool

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* docs: add javadoc for property

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* perf: optimize low-QPS workloads

* test: only randomize if more than 2 sessions are checked out

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* test: only skip randomization for existing sessions

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* chore: run formatter

* chore: address review comments

* docs: update comment on how session is added to the pool

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
olavloite and gcf-owl-bot[bot] committed Sep 11, 2023
1 parent d855ebb commit db751ce
Show file tree
Hide file tree
Showing 6 changed files with 439 additions and 35 deletions.
Expand Up @@ -54,6 +54,7 @@
import com.google.cloud.spanner.SessionPoolOptions.InactiveTransactionRemovalOptions;
import com.google.cloud.spanner.SpannerException.ResourceNotFoundException;
import com.google.cloud.spanner.SpannerImpl.ClosedException;
import com.google.cloud.spanner.spi.v1.SpannerRpc;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.MoreObjects;
Expand Down Expand Up @@ -1366,12 +1367,19 @@ PooledSession get(final boolean eligibleForLongRunning) {
}
}

final class PooledSession implements Session {
class PooledSession implements Session {
@VisibleForTesting SessionImpl delegate;
private volatile Instant lastUseTime;
private volatile SpannerException lastException;
private volatile boolean allowReplacing = true;

/**
* This ensures that the session is added at a random position in the pool the first time it is
* actually added to the pool.
*/
@GuardedBy("lock")
private Position releaseToPosition = initialReleasePosition;

/**
* Property to mark if the session is eligible to be long-running. This can only be true if the
* session is executing certain types of transactions (for ex - Partitioned DML) which can be
Expand Down Expand Up @@ -1403,6 +1411,13 @@ private PooledSession(SessionImpl delegate) {
this.lastUseTime = clock.instant();
}

int getChannel() {
Long channelHint = (Long) delegate.getOptions().get(SpannerRpc.Option.CHANNEL_HINT);
return channelHint == null
? 0
: (int) (channelHint % sessionClient.getSpanner().getOptions().getNumChannels());
}

@Override
public String toString() {
return getName();
Expand Down Expand Up @@ -1536,7 +1551,7 @@ public void close() {
if (state != SessionState.CLOSING) {
state = SessionState.AVAILABLE;
}
releaseSession(this, Position.FIRST);
releaseSession(this, false);
}
}

Expand Down Expand Up @@ -1576,7 +1591,7 @@ private void determineDialectAsync(final SettableFuture<Dialect> dialect) {
// in the database dialect, and there's nothing sensible that we can do with it here.
dialect.setException(t);
} finally {
releaseSession(this, Position.FIRST);
releaseSession(this, false);
}
});
}
Expand Down Expand Up @@ -1830,7 +1845,7 @@ private void keepAliveSessions(Instant currTime) {
logger.log(Level.FINE, "Keeping alive session " + sessionToKeepAlive.getName());
numSessionsToKeepAlive--;
sessionToKeepAlive.keepAlive();
releaseSession(sessionToKeepAlive, Position.FIRST);
releaseSession(sessionToKeepAlive, false);
} catch (SpannerException e) {
handleException(e, sessionToKeepAlive);
}
Expand Down Expand Up @@ -1929,7 +1944,7 @@ private void removeLongRunningSessions(
}
}

private enum Position {
enum Position {
FIRST,
RANDOM
}
Expand Down Expand Up @@ -1962,6 +1977,15 @@ private enum Position {

final PoolMaintainer poolMaintainer;
private final Clock clock;
/**
* initialReleasePosition determines where in the pool sessions are added when they are released
* into the pool the first time. This is always RANDOM in production, but some tests use FIRST to
* be able to verify the order of sessions in the pool. Using RANDOM ensures that we do not get an
* unbalanced session pool where all sessions belonging to one gRPC channel are added to the same
* region in the pool.
*/
private final Position initialReleasePosition;

private final Object lock = new Object();
private final Random random = new Random();

Expand Down Expand Up @@ -2045,6 +2069,7 @@ static SessionPool createPool(
((GrpcTransportOptions) spannerOptions.getTransportOptions()).getExecutorFactory(),
sessionClient,
poolMaintainerClock == null ? new Clock() : poolMaintainerClock,
Position.RANDOM,
Metrics.getMetricRegistry(),
labelValues);
}
Expand All @@ -2053,20 +2078,22 @@ static SessionPool createPool(
SessionPoolOptions poolOptions,
ExecutorFactory<ScheduledExecutorService> executorFactory,
SessionClient sessionClient) {
return createPool(poolOptions, executorFactory, sessionClient, new Clock());
return createPool(poolOptions, executorFactory, sessionClient, new Clock(), Position.RANDOM);
}

static SessionPool createPool(
SessionPoolOptions poolOptions,
ExecutorFactory<ScheduledExecutorService> executorFactory,
SessionClient sessionClient,
Clock clock) {
Clock clock,
Position initialReleasePosition) {
return createPool(
poolOptions,
null,
executorFactory,
sessionClient,
clock,
initialReleasePosition,
Metrics.getMetricRegistry(),
SPANNER_DEFAULT_LABEL_VALUES);
}
Expand All @@ -2077,6 +2104,7 @@ static SessionPool createPool(
ExecutorFactory<ScheduledExecutorService> executorFactory,
SessionClient sessionClient,
Clock clock,
Position initialReleasePosition,
MetricRegistry metricRegistry,
List<LabelValue> labelValues) {
SessionPool pool =
Expand All @@ -2087,6 +2115,7 @@ static SessionPool createPool(
executorFactory.get(),
sessionClient,
clock,
initialReleasePosition,
metricRegistry,
labelValues);
pool.initPool();
Expand All @@ -2100,6 +2129,7 @@ private SessionPool(
ScheduledExecutorService executor,
SessionClient sessionClient,
Clock clock,
Position initialReleasePosition,
MetricRegistry metricRegistry,
List<LabelValue> labelValues) {
this.options = options;
Expand All @@ -2108,6 +2138,7 @@ private SessionPool(
this.executor = executor;
this.sessionClient = sessionClient;
this.clock = clock;
this.initialReleasePosition = initialReleasePosition;
this.poolMaintainer = new PoolMaintainer();
this.initMetricsCollection(metricRegistry, labelValues);
this.waitOnMinSessionsLatch =
Expand Down Expand Up @@ -2233,7 +2264,7 @@ private void handleException(SpannerException e, PooledSession session) {
if (isSessionNotFound(e)) {
invalidateSession(session);
} else {
releaseSession(session, Position.FIRST);
releaseSession(session, false);
}
}

Expand Down Expand Up @@ -2396,33 +2427,128 @@ private void maybeCreateSession() {
}
}
}

/** Releases a session back to the pool. This might cause one of the waiters to be unblocked. */
private void releaseSession(PooledSession session, Position position) {
private void releaseSession(PooledSession session, boolean isNewSession) {
Preconditions.checkNotNull(session);
synchronized (lock) {
if (closureFuture != null) {
return;
}
if (waiters.size() == 0) {
// No pending waiters
switch (position) {
case RANDOM:
if (!sessions.isEmpty()) {
int pos = random.nextInt(sessions.size() + 1);
sessions.add(pos, session);
break;
}
// fallthrough
case FIRST:
default:
sessions.addFirst(session);
// There are no pending waiters.
// Add to a random position if the head of the session pool already contains many sessions
// with the same channel as this one.
if (session.releaseToPosition == Position.FIRST && isUnbalanced(session)) {
session.releaseToPosition = Position.RANDOM;
} else if (session.releaseToPosition == Position.RANDOM
&& !isNewSession
&& checkedOutSessions.size() <= 2) {
// Do not randomize if there are few other sessions checked out and this session has been
// used. This ensures that this session will be re-used for the next transaction, which is
// more efficient.
session.releaseToPosition = Position.FIRST;
}
if (session.releaseToPosition == Position.RANDOM && !sessions.isEmpty()) {
// A session should only be added at a random position the first time it is added to
// the pool or if the pool was deemed unbalanced. All following releases into the pool
// should normally happen at the front of the pool (unless the pool is again deemed to be
// unbalanced).
session.releaseToPosition = Position.FIRST;
int pos = random.nextInt(sessions.size() + 1);
sessions.add(pos, session);
} else {
sessions.addFirst(session);
}
} else {
waiters.poll().put(session);
}
}
}

private boolean isUnbalanced(PooledSession session) {
int channel = session.getChannel();
int numChannels = sessionClient.getSpanner().getOptions().getNumChannels();
return isUnbalanced(channel, this.sessions, this.checkedOutSessions, numChannels);
}

/**
* Returns true if the given list of sessions is considered unbalanced when compared to the
* sessionChannel that is about to be added to the pool.
*
* <p>The method returns true if all the following is true:
*
* <ol>
* <li>The list of sessions is not empty.
* <li>The number of checked out sessions is > 2.
* <li>The number of channels being used by the pool is > 1.
* <li>And at least one of the following is true:
* <ol>
* <li>The first numChannels sessions in the list of sessions contains more than 2
* sessions that use the same channel as the one being added.
* <li>The list of currently checked out sessions contains more than 2 times the the
* number of sessions with the same channel as the one being added than it should in
* order for it to be perfectly balanced. Perfectly balanced in this case means that
* the list should preferably contain size/numChannels sessions of each channel.
* </ol>
* </ol>
*
* @param channelOfSessionBeingAdded the channel number being used by the session that is about to
* be released into the pool
* @param sessions the list of all sessions in the pool
* @param checkedOutSessions the currently checked out sessions of the pool
* @param numChannels the number of channels in use
* @return true if the pool is considered unbalanced, and false otherwise
*/
@VisibleForTesting
static boolean isUnbalanced(
int channelOfSessionBeingAdded,
List<PooledSession> sessions,
Set<PooledSessionFuture> checkedOutSessions,
int numChannels) {
// Do not re-balance the pool if the number of checked out sessions is low, as it is
// better to re-use sessions as much as possible in a low-QPS scenario.
if (sessions.isEmpty() || checkedOutSessions.size() <= 2) {
return false;
}
if (numChannels == 1) {
return false;
}

// Ideally, the first numChannels sessions in the pool should contain exactly one session for
// each channel.
// Check if the first numChannels sessions at the head of the pool already contain more than 2
// sessions that use the same channel as this one. If so, we re-balance.
// We also re-balance the pool in the specific case that the pool uses 2 channels and the first
// two sessions use those two channels.
int maxSessionsAtHeadOfPool = Math.min(numChannels, 3);
int count = 0;
for (int i = 0; i < Math.min(numChannels, sessions.size()); i++) {
PooledSession otherSession = sessions.get(i);
if (channelOfSessionBeingAdded == otherSession.getChannel()) {
count++;
if (count >= maxSessionsAtHeadOfPool) {
return true;
}
}
}
// Ideally, the use of a channel in the checked out sessions is exactly
// numCheckedOut / numChannels
// We check whether we are more than a factor two away from that perfect distribution.
// If we are, then we re-balance.
count = 0;
int checkedOutThreshold = Math.max(2, 2 * checkedOutSessions.size() / numChannels);
for (PooledSessionFuture otherSession : checkedOutSessions) {
if (otherSession.isDone() && channelOfSessionBeingAdded == otherSession.get().getChannel()) {
count++;
if (count > checkedOutThreshold) {
return true;
}
}
}
return false;
}

private void handleCreateSessionsFailure(SpannerException e, int count) {
synchronized (lock) {
for (int i = 0; i < count; i++) {
Expand Down Expand Up @@ -2622,7 +2748,7 @@ public void onSessionReady(SessionImpl session) {
// Release the session to a random position in the pool to prevent the case that a batch
// of sessions that are affiliated with the same channel are all placed sequentially in
// the pool.
releaseSession(pooledSession, Position.RANDOM);
releaseSession(pooledSession, true);
}
}
}
Expand Down
Expand Up @@ -26,16 +26,21 @@
import com.google.api.core.ApiFutures;
import com.google.cloud.grpc.GrpcTransportOptions.ExecutorFactory;
import com.google.cloud.spanner.SessionPool.Clock;
import com.google.cloud.spanner.spi.v1.SpannerRpc.Option;
import com.google.protobuf.Empty;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import org.threeten.bp.Instant;

abstract class BaseSessionPoolTest {
ScheduledExecutorService mockExecutor;
int sessionIndex;
AtomicLong channelHint = new AtomicLong(0L);

final class TestExecutorFactory implements ExecutorFactory<ScheduledExecutorService> {

Expand Down Expand Up @@ -64,6 +69,9 @@ public void release(ScheduledExecutorService executor) {
@SuppressWarnings("unchecked")
SessionImpl mockSession() {
final SessionImpl session = mock(SessionImpl.class);
Map options = new HashMap<>();
options.put(Option.CHANNEL_HINT, channelHint.getAndIncrement());
when(session.getOptions()).thenReturn(options);
when(session.getName())
.thenReturn(
"projects/dummy/instances/dummy/database/dummy/sessions/session" + sessionIndex);
Expand Down
Expand Up @@ -26,6 +26,7 @@
import com.google.cloud.spanner.SessionClient.SessionConsumer;
import com.google.cloud.spanner.SessionPool.PooledSession;
import com.google.cloud.spanner.SessionPool.PooledSessionFuture;
import com.google.cloud.spanner.SessionPool.Position;
import com.google.cloud.spanner.SessionPool.SessionConsumerImpl;
import java.util.ArrayList;
import java.util.HashMap;
Expand Down Expand Up @@ -58,6 +59,7 @@ public void setUp() {
initMocks(this);
when(client.getOptions()).thenReturn(spannerOptions);
when(client.getSessionClient(db)).thenReturn(sessionClient);
when(sessionClient.getSpanner()).thenReturn(client);
when(spannerOptions.getNumChannels()).thenReturn(4);
when(spannerOptions.getDatabaseRole()).thenReturn("role");
setupMockSessionCreation();
Expand Down Expand Up @@ -111,9 +113,11 @@ private SessionImpl setupMockSession(final SessionImpl session) {
}

private SessionPool createPool() throws Exception {
// Allow sessions to be added to the head of the pool in all cases in this test, as it is
// otherwise impossible to know which session exactly is getting pinged at what point in time.
SessionPool pool =
SessionPool.createPool(
options, new TestExecutorFactory(), client.getSessionClient(db), clock);
options, new TestExecutorFactory(), client.getSessionClient(db), clock, Position.FIRST);
pool.idleSessionRemovedListener =
input -> {
idledSessions.add(input);
Expand Down

0 comments on commit db751ce

Please sign in to comment.