Skip to content

Commit

Permalink
Improve logging on ledger dirs monitor to avoid log flooding
Browse files Browse the repository at this point in the history
Descriptions of the changes in this PR:

*Problem*

When a bookie is in readonly mode, the ledger dirs monitor will keep check the disk usage and generate tons of logs if the disk usage is unchanged. This makes debugging much difficult.

*Solution*

- Improve the logging logic in ledger dirs monitor to only log changes when disk usage is changed.
- Disable logging on checking threshold for high priority writes. Only log changes when high priority writes availability is changed.

*Result*

This reduces the logging when a bookie is outage in readonly mode.

Author: Sijie Guo <sijie@apache.org>

Reviewers: Enrico Olivelli <eolivelli@gmail.com>, Yiming Zang <yzang2016@gmail.com>, Matteo Merli <mmerli@apache.org>

This closes #1322 from sijie/improve_monitor_logging
  • Loading branch information
sijie committed Apr 9, 2018
1 parent 4aec3ce commit c760c56
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 10 deletions.
Expand Up @@ -31,6 +31,7 @@
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.apache.bookkeeper.conf.ServerConfiguration;
import org.apache.bookkeeper.meta.MetadataBookieDriver;
import org.apache.bookkeeper.stats.Gauge;
Expand All @@ -43,6 +44,7 @@
/**
* An implementation of StateManager.
*/
@Slf4j
public class BookieStateManager implements StateManager {
private static final Logger LOG = LoggerFactory.getLogger(BookieStateManager.class);
private final ServerConfiguration conf;
Expand Down Expand Up @@ -142,6 +144,11 @@ public boolean isAvailableForHighPriorityWrites() {

@Override
public void setHighPriorityWritesAvailability(boolean available) {
if (this.availableForHighPriorityWrites && !available) {
log.info("Disable high priority writes on readonly bookie.");
} else if (!this.availableForHighPriorityWrites && available) {
log.info("Enable high priority writes on readonly bookie.");
}
this.availableForHighPriorityWrites = available;
}

Expand Down
Expand Up @@ -178,10 +178,11 @@ public List<File> getWritableLedgerDirsForNewLog() throws NoWritableLedgerDirExc
// We don't have writable Ledger Dirs. But we are still okay to create new entry log files if we have enough
// disk spaces. This allows bookie can still function at readonly mode. Because compaction, journal replays
// can still write data to disks.
return getDirsAboveUsableThresholdSize(minUsableSizeForEntryLogCreation);
return getDirsAboveUsableThresholdSize(minUsableSizeForEntryLogCreation, true);
}

List<File> getDirsAboveUsableThresholdSize(long thresholdSize) throws NoWritableLedgerDirException {
List<File> getDirsAboveUsableThresholdSize(long thresholdSize, boolean loggingNoWritable)
throws NoWritableLedgerDirException {
List<File> fullLedgerDirsToAccomodate = new ArrayList<File>();
for (File dir: this.ledgerDirectories) {
// Pick dirs which can accommodate little more than thresholdSize
Expand All @@ -191,16 +192,20 @@ List<File> getDirsAboveUsableThresholdSize(long thresholdSize) throws NoWritable
}

if (!fullLedgerDirsToAccomodate.isEmpty()) {
LOG.info("No writable ledger dirs below diskUsageThreshold. "
if (loggingNoWritable) {
LOG.info("No writable ledger dirs below diskUsageThreshold. "
+ "But Dirs that can accommodate {} are: {}", thresholdSize, fullLedgerDirsToAccomodate);
}
return fullLedgerDirsToAccomodate;
}

// We will reach here when we find no ledgerDir which has atleast
// thresholdSize usable space
String errMsg = "All ledger directories are non writable and no reserved space (" + thresholdSize + ") left.";
NoWritableLedgerDirException e = new NoWritableLedgerDirException(errMsg);
LOG.error(errMsg, e);
if (loggingNoWritable) {
LOG.error(errMsg, e);
}
throw e;
}

Expand Down Expand Up @@ -306,7 +311,7 @@ File pickRandomWritableDirForNewIndexFile(File excludedDir) throws NoWritableLed
// That means we must have turned readonly. But
// during the Bookie restart, while replaying the journal there might be a need
// to create new Index file and it should proceed.
writableDirsForNewIndexFile = getDirsAboveUsableThresholdSize(minUsableSizeForIndexFileCreation);
writableDirsForNewIndexFile = getDirsAboveUsableThresholdSize(minUsableSizeForIndexFileCreation, true);
}
return pickRandomDir(writableDirsForNewIndexFile, excludedDir);
}
Expand Down
Expand Up @@ -82,14 +82,22 @@ private void check() {
listener.diskFailed(dir);
}
} catch (DiskWarnThresholdException e) {
LOG.warn("Ledger directory {} is almost full.", dir);
diskUsages.put(dir, e.getUsage());
diskUsages.compute(dir, (d, prevUsage) -> {
if (null == prevUsage || e.getUsage() != prevUsage) {
LOG.warn("Ledger directory {} is almost full : usage {}", dir, e.getUsage());
}
return e.getUsage();
});
for (LedgerDirsListener listener : ldm.getListeners()) {
listener.diskAlmostFull(dir);
}
} catch (DiskOutOfSpaceException e) {
LOG.error("Ledger directory {} is out-of-space.", dir);
diskUsages.put(dir, e.getUsage());
diskUsages.compute(dir, (d, prevUsage) -> {
if (null == prevUsage || e.getUsage() != prevUsage) {
LOG.error("Ledger directory {} is out-of-space : usage {}", dir, e.getUsage());
}
return e.getUsage();
});
// Notify disk full to all listeners
ldm.addToFilledDirs(dir);
}
Expand All @@ -102,7 +110,8 @@ private void check() {
} catch (NoWritableLedgerDirException e) {
boolean highPriorityWritesAllowed = true;
try {
ldm.getDirsAboveUsableThresholdSize(minUsableSizeForHighPriorityWrites);
// disk check can be frequent, so disable 'loggingNoWritable' to avoid log flooding.
ldm.getDirsAboveUsableThresholdSize(minUsableSizeForHighPriorityWrites, false);
} catch (NoWritableLedgerDirException e1) {
highPriorityWritesAllowed = false;
}
Expand Down

0 comments on commit c760c56

Please sign in to comment.