Skip to content

Commit

Permalink
Merge pull request #790 from jsanda/hwkmetrics-637
Browse files Browse the repository at this point in the history
[HWKMETRICS-637] Wait for entire C* cluster to be up for schema updates
  • Loading branch information
stefannegrea committed Mar 31, 2017
2 parents 9d4b233 + 1159882 commit 61e9102
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import static org.hawkular.metrics.api.jaxrs.config.ConfigurationKey.ADMIN_TENANT;
import static org.hawkular.metrics.api.jaxrs.config.ConfigurationKey.ADMIN_TOKEN;
import static org.hawkular.metrics.api.jaxrs.config.ConfigurationKey.CASSANDRA_CLUSTER_CONNECTION_ATTEMPTS;
import static org.hawkular.metrics.api.jaxrs.config.ConfigurationKey.CASSANDRA_CLUSTER_CONNECTION_MAX_DELAY;
import static org.hawkular.metrics.api.jaxrs.config.ConfigurationKey.CASSANDRA_CONNECTION_TIMEOUT;
import static org.hawkular.metrics.api.jaxrs.config.ConfigurationKey.CASSANDRA_CQL_PORT;
import static org.hawkular.metrics.api.jaxrs.config.ConfigurationKey.CASSANDRA_KEYSPACE;
Expand Down Expand Up @@ -83,6 +85,7 @@
import org.hawkular.metrics.api.jaxrs.config.ConfigurationProperty;
import org.hawkular.metrics.api.jaxrs.log.RestLogger;
import org.hawkular.metrics.api.jaxrs.log.RestLogging;
import org.hawkular.metrics.api.jaxrs.util.CassandraClusterNotUpException;
import org.hawkular.metrics.api.jaxrs.util.JobSchedulerFactory;
import org.hawkular.metrics.api.jaxrs.util.ManifestInformation;
import org.hawkular.metrics.api.jaxrs.util.MetricRegistryProvider;
Expand Down Expand Up @@ -168,6 +171,16 @@ public enum State {
@ConfigurationProperty(CASSANDRA_KEYSPACE)
private String keyspace;

@Inject
@Configurable
@ConfigurationProperty(CASSANDRA_CLUSTER_CONNECTION_ATTEMPTS)
private String clusterConnectionAttempts;

@Inject
@Configurable
@ConfigurationProperty(CASSANDRA_CLUSTER_CONNECTION_MAX_DELAY)
private String clusterConnectionDelay;

@Inject
@Configurable
@ConfigurationProperty(CASSANDRA_REPLICATION_FACTOR)
Expand Down Expand Up @@ -372,7 +385,7 @@ private void startMetricsService() {
connectionAttempts++;
try {
session = createSession();
} catch (Exception t) {
} catch (Exception t) {
Throwable rootCause = Throwables.getRootCause(t);

// to get around HWKMETRICS-415
Expand All @@ -389,6 +402,8 @@ private void startMetricsService() {
return;
}
try {
waitForAllNodesToBeUp();

initSchema();
dataAcces = new DataAccessImpl(session);

Expand Down Expand Up @@ -438,6 +453,10 @@ private void startMetricsService() {
state = State.STARTED;
log.infoServiceStarted();

} catch (CassandraClusterNotUpException e) {
log.fatal("It appears that some nodes in the Cassandra cluster are not up. Start up cannot proceed");
state = State.FAILED;

} catch (Exception e) {
log.fatalCannotConnectToCassandra(e);
state = State.FAILED;
Expand Down Expand Up @@ -565,6 +584,46 @@ private Session createSession() {
}
}

private void waitForAllNodesToBeUp() throws CassandraClusterNotUpException {
boolean isReady = false;
int attempts = Integer.parseInt(CASSANDRA_CLUSTER_CONNECTION_ATTEMPTS.defaultValue());
long delay = 2000;
long maxDelay = Long.parseLong(CASSANDRA_CLUSTER_CONNECTION_MAX_DELAY.defaultValue());
try {
attempts = Integer.parseInt(clusterConnectionAttempts);
} catch (NumberFormatException e) {
log.infof("Invalid value for %s. Using default of %d", CASSANDRA_CLUSTER_CONNECTION_ATTEMPTS.name(),
attempts);
}
try {
maxDelay = Long.parseLong(clusterConnectionDelay);
} catch (NumberFormatException e) {
log.infof("Invalid value for %s. Using default of %d", CASSANDRA_CLUSTER_CONNECTION_MAX_DELAY.name(),
delay);
}

while (!isReady && !Thread.currentThread().isInterrupted() && attempts-- >= 0) {
isReady = true;
for (Host host : session.getCluster().getMetadata().getAllHosts()) {
if (!host.isUp()) {
isReady = false;
log.warnf("Cassandra node %s may not be up yet. Waiting %s ms for node to come up", host, delay);
try {
Thread.sleep(delay);
delay = Math.min(delay * 2, maxDelay);
} catch(InterruptedException e) {
Thread.currentThread().interrupt();
}
break;
}
}
}
if (!isReady) {
throw new CassandraClusterNotUpException("It appears that not all nodes in the Cassandra cluster are up " +
"after " + attempts + " checks. Schema updates cannot proceed without all nodes being up.");
}
}

private void initSchema() {
AtomicReference<Integer> replicationFactor = new AtomicReference<>();
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright 2014-2017 Red Hat, Inc. and/or its affiliates
* and other contributors as indicated by the @author tags.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.hawkular.metrics.api.jaxrs.util;

/**
* This is thrown after the session is initialized and it is determined that at least one Cassandra node in the
* cluster is down. See https://issues.jboss.org/browse/HWKMETRICS-637 for details.
*
* @author jsanda
*/
public class CassandraClusterNotUpException extends Exception {

public CassandraClusterNotUpException() {
super();
}

public CassandraClusterNotUpException(String message) {
super(message);
}

public CassandraClusterNotUpException(String message, Throwable cause) {
super(message, cause);
}

public CassandraClusterNotUpException(Throwable cause) {
super(cause);
}

protected CassandraClusterNotUpException(String message, Throwable cause, boolean enableSuppression,
boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ public enum ConfigurationKey {
CASSANDRA_KEYSPACE("hawkular.metrics.cassandra.keyspace", "hawkular_metrics", null, false),
CASSANDRA_REPLICATION_FACTOR("hawkular.metrics.cassandra.replication-factor", "1", "CASSANDRA_REPLICATION_FACTOR",
false),
CASSANDRA_CLUSTER_CONNECTION_ATTEMPTS("hawkular.metrics.cassandra.cluster.connection-attempts", "5",
"CASSANDRA_CLUSTER_CONNECTION_ATTEMPTS", false),
CASSANDRA_CLUSTER_CONNECTION_MAX_DELAY("hawkular.metrics.cassandra.cluster.connection-delay", "30000",
"CASSANDR_CLUSTER_CONNECTION_DELAY", false),
CASSANDRA_RESETDB("hawkular.metrics.cassandra.resetdb", null, null, true),
CASSANDRA_USESSL("hawkular.metrics.cassandra.use-ssl", "false", "CASSANDRA_USESSL", false),
CASSANDRA_MAX_CONN_HOST("hawkular.metrics.cassandra.max-connections-per-host", "10", "CASSANDRA_MAX_CONN_HOST",
Expand Down

0 comments on commit 61e9102

Please sign in to comment.