Skip to content

Commit

Permalink
(2.8) webadmin: fix exit login in billing refresh loop
Browse files Browse the repository at this point in the history
Currently the thread which refreshes the billing plots
will exit if it encounters an unspecified error from
the billing service. But this behavior does not
take into account slow start-up of domains (i.e.,
the billing service may not be there yet but will
be eventually).

The patch fixes the logic to treat NoRouteToCell
exceptions differently by waiting for a short time
and retrying.

Testing done: On deployed service without billing,
and then with billing booted.

Target: 2.8
Patch: https://rb.dcache.org/r/7306
Acked-by: Gerd
Committed: b315fc0
Require-note: yes
Require-book: no

RELEASE NOTES:
Fixes a bug where a simple timeout (NoRouteToCell) causes an
exit from the billing service refresh loop requiring a restart
of the domain in order to reconnect.
  • Loading branch information
alrossi committed Oct 16, 2014
1 parent e8c474d commit 5f575c7
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

import java.io.File;

import diskCacheV111.util.ServiceUnavailableException;

import dmg.cells.nucleus.NoRouteToCellException;

/**
* Provides plot images to billing page.
*
Expand All @@ -83,5 +87,5 @@ public interface IBillingService {

void initialize();

void refresh();
void refresh() throws NoRouteToCellException, ServiceUnavailableException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
*/
package org.dcache.webadmin.controller.impl;

import com.google.common.base.Throwables;
import com.google.common.util.concurrent.RateLimiter;
import org.apache.wicket.util.lang.Exceptions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -77,6 +80,8 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

import diskCacheV111.util.ServiceUnavailableException;

import dmg.cells.nucleus.NoRouteToCellException;

import org.dcache.cells.CellStub;
import org.dcache.services.billing.histograms.ITimeFrameHistogramFactory;
import org.dcache.services.billing.histograms.ITimeFrameHistogramFactory.Style;
Expand Down Expand Up @@ -106,6 +111,7 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
*/
public final class StandardBillingService implements IBillingService, Runnable {
private static final Logger logger = LoggerFactory.getLogger(StandardBillingService.class);
private static final double ERRORS_PER_SECOND = 1.0 / 120.0;

/**
* injected
Expand Down Expand Up @@ -135,48 +141,78 @@ public final class StandardBillingService implements IBillingService, Runnable {
* refreshing can be done periodically by the daemon, or forced
* through the web interface directly
*/
private final RateLimiter rate = RateLimiter.create(ERRORS_PER_SECOND);

private long timeout;
private int popupWidth;
private int popupHeight;
private long lastUpdate = System.currentTimeMillis();
private Thread refresher;

public List<TimeFrameHistogramData> load(PlotType plotType,
TimeFrame timeFrame) {
TimeFrame timeFrame) throws NoRouteToCellException,
ServiceUnavailableException {
logger.debug("remote fetch of {} {}", plotType, timeFrame);
List<TimeFrameHistogramData> histograms = new ArrayList<>();
switch (plotType) {
case BYTES_READ:
add(client.getDcBytesHistogram(timeFrame, false), histograms);
add(client.getHsmBytesHistogram(timeFrame, false), histograms);
break;
case BYTES_WRITTEN:
add(client.getDcBytesHistogram(timeFrame, true), histograms);
add(client.getHsmBytesHistogram(timeFrame, true), histograms);
break;
case BYTES_P2P:
add(client.getP2pBytesHistogram(timeFrame), histograms);
break;
case TRANSFERS_READ:
add(client.getDcTransfersHistogram(timeFrame, false),
histograms);
add(client.getHsmTransfersHistogram(timeFrame, false),
histograms);
break;
case TRANSFERS_WRITTEN:
add(client.getDcTransfersHistogram(timeFrame, true), histograms);
add(client.getHsmTransfersHistogram(timeFrame, true),
histograms);
break;
case TRANSFERS_P2P:
add(client.getP2pTransfersHistogram(timeFrame), histograms);
break;
case CONNECTION_TIME:
add(client.getDcConnectTimeHistograms(timeFrame), histograms);
break;
case CACHE_HITS:
add(client.getHitHistograms(timeFrame), histograms);
break;
try {
switch (plotType) {
case BYTES_READ:
add(client.getDcBytesHistogram(timeFrame, false),
histograms);
add(client.getHsmBytesHistogram(timeFrame, false),
histograms);
break;
case BYTES_WRITTEN:
add(client.getDcBytesHistogram(timeFrame, true),
histograms);
add(client.getHsmBytesHistogram(timeFrame, true),
histograms);
break;
case BYTES_P2P:
add(client.getP2pBytesHistogram(timeFrame),
histograms);
break;
case TRANSFERS_READ:
add(client.getDcTransfersHistogram(timeFrame, false),
histograms);
add(client.getHsmTransfersHistogram(timeFrame, false),
histograms);
break;
case TRANSFERS_WRITTEN:
add(client.getDcTransfersHistogram(timeFrame, true),
histograms);
add(client.getHsmTransfersHistogram(timeFrame, true),
histograms);
break;
case TRANSFERS_P2P:
add(client.getP2pTransfersHistogram(timeFrame),
histograms);
break;
case CONNECTION_TIME:
add(client.getDcConnectTimeHistograms(timeFrame),
histograms);
break;
case CACHE_HITS:
add(client.getHitHistograms(timeFrame),
histograms);
break;
}
} catch (UndeclaredThrowableException ute) {
Throwable cause
= Exceptions.findCause(ute, ServiceUnavailableException.class);
if (cause != null) {
throw (ServiceUnavailableException)cause;
}
cause = Exceptions.findCause(ute, NoRouteToCellException.class);
if (cause != null) {
throw (NoRouteToCellException)cause;
}
cause = ute.getCause();
Throwables.propagateIfPossible(cause);
throw new RuntimeException("Unexpected error: "
+ "this is probably a bug. Please report "
+ "to the dCache team.",
cause);
}
return histograms;
}
Expand Down Expand Up @@ -288,7 +324,8 @@ public void initialize() {
}

@Override
public void refresh() {
public void refresh() throws NoRouteToCellException,
ServiceUnavailableException{
TimeFrame[] timeFrames = generateTimeFrames();
for (int tFrame = 0; tFrame < timeFrames.length; tFrame++) {
Date low = timeFrames[tFrame].getLow();
Expand All @@ -305,27 +342,24 @@ public void refresh() {
public void run() {
try {
while (true) {
refresh();
Thread.sleep(timeout);
try {
refresh();
Thread.sleep(timeout);
} catch (ServiceUnavailableException e) {
logger.error("The billing database has been disabled."
+ " To generate plots, please restart the service when"
+ " the billing database is once again available");
break;
} catch (NoRouteToCellException e) {
if (rate.tryAcquire()) {
logger.warn("No route to the billing service yet; "
+ "retrying every 10 seconds");
}
Thread.sleep(TimeUnit.SECONDS.toMillis(10));
}
}
} catch (InterruptedException interrupted) {
logger.trace("{} interrupted; exiting ...", refresher);
} catch (UndeclaredThrowableException ute) {
Throwable cause = ute.getCause();
if (cause instanceof ServiceUnavailableException) {
logger.error("The billing database has been disabled."
+ " To generate plots, please restart the service when"
+ " the billing database is once again available");
} else if (cause instanceof Error) {
throw ute;
}
/*
* if the service can't handle the client's requests, then we
* back out here because there is nothing we can do
*/
logger.error("fatal billing request exception; "
+ "client loop is exiting");
logger.debug("refresh", ute);
}
}

Expand Down Expand Up @@ -360,7 +394,8 @@ public void shutDown() {
}

private void generatePlot(PlotType type, TimeFrame timeFrame,
String fileName, String title) {
String fileName, String title) throws ServiceUnavailableException,
NoRouteToCellException {
List<TimeFrameHistogramData> data = load(type, timeFrame);
List<HistogramWrapper<?>> config = new ArrayList<>();
int i = 0;
Expand Down Expand Up @@ -454,4 +489,4 @@ private void synchronizeTimeFramePlotProperties() {

logger.debug("plot properties are {}", properties.toJavaProperties());
}
}
}

0 comments on commit 5f575c7

Please sign in to comment.