Skip to content

Commit

Permalink
BOOKKEEPER-1034: Bookie start in RO when diskfull
Browse files Browse the repository at this point in the history
When the disk is above threshold, Bookie goes to RO. If we have to restart the
bookie, on the way back, bookie tries to create new entrylog and other files,
which will fail because disk usage is above threshold,
hence bookie refuses to come up. So with this fix we will try to start in RO
mode if RO is enabled.

Also, if bookie has died abruptly then it may missed flushing EntryMemtable and
IndexInMemoryPageManager. So next time when it starts when disc is full
it is failing to create index file and it is shutting down, though we expect it
to start in readonlymode. So Bookie should be able to create index file
though it has reached the diskusagethreshold, while starting the Bookie in
Readonly Mode. But ofcourse there should be some config to safeguard when
disk usable space is so low.

Minor fixes in shutdown logic of Bookie and Bookieserver.

Author: Charan Reddy Guttapalem <cguttapalem@salesforce.com>

Reviewers: Jia Zhai <None>, Sijie Guo <sijie@apache.org>

This closes #190 from reddycharan/bookiestartinreadonlywhendiskfull
  • Loading branch information
reddycharan authored and sijie committed Jun 28, 2017
1 parent fe8ded5 commit 9ddd9e6
Show file tree
Hide file tree
Showing 7 changed files with 293 additions and 58 deletions.
4 changes: 4 additions & 0 deletions bookkeeper-server/conf/bk_server.conf
Original file line number Diff line number Diff line change
Expand Up @@ -317,3 +317,7 @@ zkTimeout=10000

# Stats Provider Class
#statsProviderClass=org.apache.bookkeeper.stats.CodahaleMetricsProvider

# Minimum safe Usable size to be available in index directory for Bookie to create Index File while replaying
# journal at the time of Bookie Start in Readonly Mode (in bytes)
# minUsableSizeForIndexFileCreation=1073741824
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,17 @@
package org.apache.bookkeeper.bookie;

import static com.google.common.base.Charsets.UTF_8;

import com.google.common.util.concurrent.SettableFuture;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_ADD_ENTRY;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_ADD_ENTRY_BYTES;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY_BYTES;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_RECOVERY_ADD_ENTRY;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_SCOPE;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_INDEX_SCOPE;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_LEDGER_SCOPE;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_BYTES;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SERVER_STATUS;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WRITE_BYTES;

import java.io.File;
import java.io.FileNotFoundException;
Expand All @@ -34,21 +41,20 @@
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.util.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.bookkeeper.bookie.Journal.JournalScanner;
import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener;
import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException;
Expand All @@ -67,6 +73,7 @@
import org.apache.bookkeeper.util.DiskChecker;
import org.apache.bookkeeper.util.IOUtils;
import org.apache.bookkeeper.util.MathUtils;
import org.apache.bookkeeper.util.ZkUtils;
import org.apache.bookkeeper.util.collections.ConcurrentLongHashMap;
import org.apache.bookkeeper.versioning.Version;
import org.apache.bookkeeper.versioning.Versioned;
Expand All @@ -81,23 +88,19 @@
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.Watcher.Event.KeeperState;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_ADD_ENTRY;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_ADD_ENTRY_BYTES;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY_BYTES;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_RECOVERY_ADD_ENTRY;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_LEDGER_SCOPE;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_INDEX_SCOPE;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_BYTES;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SERVER_STATUS;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WRITE_BYTES;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_SCOPE;
import org.apache.bookkeeper.util.ZkUtils;
import org.apache.zookeeper.data.ACL;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.SettableFuture;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;

/**
* Implements a bookie.
Expand Down Expand Up @@ -613,15 +616,33 @@ public Bookie(ServerConfiguration conf, StatsLogger statsLogger)
this.ledgerMonitor = new LedgerDirsMonitor(conf,
new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()),
ledgerDirsManager);
this.ledgerMonitor.init();

try {
this.ledgerMonitor.init();
} catch (NoWritableLedgerDirException nle) {
// start in read-only mode if no writable dirs and read-only allowed
if(!conf.isReadOnlyModeEnabled()) {
throw nle;
} else {
this.transitionToReadOnlyMode();
}
}

if (null == idxDirs) {
this.idxMonitor = this.ledgerMonitor;
} else {
this.idxMonitor = new LedgerDirsMonitor(conf,
new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()),
indexDirsManager);
this.idxMonitor.init();
try {
this.idxMonitor.init();
} catch (NoWritableLedgerDirException nle) {
// start in read-only mode if no writable dirs and read-only allowed
if(!conf.isReadOnlyModeEnabled()) {
throw nle;
} else {
this.transitionToReadOnlyMode();
}
}
}

// ZK ephemeral node for this Bookie.
Expand Down Expand Up @@ -1225,9 +1246,10 @@ synchronized int shutdown(int exitCode) {
if (indexDirsManager != ledgerDirsManager) {
idxMonitor.shutdown();
}

// Shutdown the ZK client
if(zk != null) zk.close();
}
// Shutdown the ZK client
if (zk != null) {
zk.close();
}
} catch (InterruptedException ie) {
LOG.error("Interrupted during shutting down bookie : ", ie);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ private FileInfo putFileInfo(Long ledger, byte masterKey[], File lf, boolean cre
*/
private File getNewLedgerIndexFile(Long ledger, File excludedDir)
throws NoWritableLedgerDirException {
File dir = ledgerDirsManager.pickRandomWritableDir(excludedDir);
File dir = ledgerDirsManager.pickRandomWritableDirForNewIndexFile(excludedDir);
String ledgerName = getLedgerName(ledger);
return new File(dir, ledgerName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public class LedgerDirsManager {
new ConcurrentHashMap<File, Float>();
private final long entryLogSize;
private boolean forceGCAllowWhenNoSpace;
private long minUsableSizeForIndexFileCreation;

public LedgerDirsManager(ServerConfiguration conf, File[] dirs) {
this(conf, dirs, NullStatsLogger.INSTANCE);
Expand All @@ -70,6 +71,7 @@ public LedgerDirsManager(ServerConfiguration conf, File[] dirs) {
this.listeners = new ArrayList<LedgerDirsListener>();
this.forceGCAllowWhenNoSpace = conf.getIsForceGCAllowWhenNoSpace();
this.entryLogSize = conf.getEntryLogSizeLimit();
this.minUsableSizeForIndexFileCreation = conf.getMinUsableSizeForIndexFileCreation();
for (File dir : dirs) {
diskUsages.put(dir, 0f);
String statName = "dir_" + dir.getPath().replace('/', '_') + "_usage";
Expand Down Expand Up @@ -172,9 +174,7 @@ public boolean hasWritableLedgerDirs() {
return !writableLedgerDirectories.isEmpty();
}

public List<File> getWritableLedgerDirsForNewLog()
throws NoWritableLedgerDirException {

public List<File> getWritableLedgerDirsForNewLog() throws NoWritableLedgerDirException {
if (!writableLedgerDirectories.isEmpty()) {
return writableLedgerDirectories;
}
Expand All @@ -191,27 +191,32 @@ public List<File> getWritableLedgerDirsForNewLog()
// That means we must have turned readonly but the compaction
// must have started running and it needs to allocate
// a new log file to move forward with the compaction.
List<File> fullLedgerDirsToAccomodateNewEntryLog = new ArrayList<File>();
return getDirsAboveUsableThresholdSize((long) (this.entryLogSize * 1.2));
}

List<File> getDirsAboveUsableThresholdSize(long thresholdSize) throws NoWritableLedgerDirException {
List<File> fullLedgerDirsToAccomodate = new ArrayList<File>();
for (File dir: this.ledgerDirectories) {
// Pick dirs which can accommodate little more than an entry log.
if (dir.getUsableSpace() > (this.entryLogSize * 1.2) ) {
fullLedgerDirsToAccomodateNewEntryLog.add(dir);
// Pick dirs which can accommodate little more than thresholdSize
if (dir.getUsableSpace() > (thresholdSize) ) {
fullLedgerDirsToAccomodate.add(dir);
}
}

if (!fullLedgerDirsToAccomodateNewEntryLog.isEmpty()) {
LOG.info("No writable ledger dirs. Trying to go beyond to accomodate compaction."
+ "Dirs that can accomodate new entryLog are: {}", fullLedgerDirsToAccomodateNewEntryLog);
return fullLedgerDirsToAccomodateNewEntryLog;
if (!fullLedgerDirsToAccomodate.isEmpty()) {
LOG.info("No writable ledger dirs below diskUsageThreshold. "
+ "But Dirs that can accomodate {} are: {}", thresholdSize, fullLedgerDirsToAccomodate);
return fullLedgerDirsToAccomodate;
}

// We will reach here when we have no option of creating a new log file for compaction
String errMsg = "All ledger directories are non writable and no reserved space left for creating entry log file.";
// We will reach here when we find no ledgerDir which has atleast
// thresholdSize usable space
String errMsg = "All ledger directories are non writable and no reserved space (" + thresholdSize + ") left.";
NoWritableLedgerDirException e = new NoWritableLedgerDirException(errMsg);
LOG.error(errMsg, e);
throw e;
}

/**
* @return full-filled ledger dirs.
*/
Expand Down Expand Up @@ -294,20 +299,47 @@ File pickRandomWritableDir() throws NoWritableLedgerDirException {
*/
File pickRandomWritableDir(File excludedDir) throws NoWritableLedgerDirException {
List<File> writableDirs = getWritableLedgerDirs();
return pickRandomDir(writableDirs, excludedDir);
}

final int start = rand.nextInt(writableDirs.size());
/**
* Pick up a dir randomly from writableLedgerDirectories. If writableLedgerDirectories is empty
* then pick up a dir randomly from the ledger/indexdirs which have usable space more than
* minUsableSizeForIndexFileCreation.
*
* @param excludedDir
* The directory to exclude during pickup.
* @return
* @throws NoWritableLedgerDirException if there is no dir available.
*/
File pickRandomWritableDirForNewIndexFile(File excludedDir) throws NoWritableLedgerDirException {
final List<File> writableDirsForNewIndexFile;
if (!writableLedgerDirectories.isEmpty()) {
writableDirsForNewIndexFile = writableLedgerDirectories;
} else {
// We don't have writable Index Dirs.
// That means we must have turned readonly. But
// during the Bookie restart, while replaying the journal there might be a need
// to create new Index file and it should proceed.
writableDirsForNewIndexFile = getDirsAboveUsableThresholdSize(minUsableSizeForIndexFileCreation);
}
return pickRandomDir(writableDirsForNewIndexFile, excludedDir);
}

File pickRandomDir(List<File> dirs, File excludedDir) throws NoWritableLedgerDirException{
final int start = rand.nextInt(dirs.size());
int idx = start;
File candidate = writableDirs.get(idx);
File candidate = dirs.get(idx);
while (null != excludedDir && excludedDir.equals(candidate)) {
idx = (idx + 1) % writableDirs.size();
idx = (idx + 1) % dirs.size();
if (idx == start) {
// after searching all available dirs,
// no writable dir is found
throw new NoWritableLedgerDirException("No writable directories found from "
+ " available writable dirs (" + writableDirs + ") : exclude dir "
+ " available writable dirs (" + dirs + ") : exclude dir "
+ excludedDir);
}
candidate = writableDirs.get(idx);
candidate = dirs.get(idx);
}
return candidate;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ public class ServerConfiguration extends AbstractConfiguration {

// Bookie auth provider factory class name
protected final static String BOOKIE_AUTH_PROVIDER_FACTORY_CLASS = "bookieAuthProviderFactoryClass";

protected final static String MIN_USABLESIZE_FOR_INDEXFILE_CREATION = "minUsableSizeForIndexFileCreation";

/**
* Construct a default configuration object
Expand Down Expand Up @@ -1939,4 +1941,26 @@ public ServerConfiguration setNettyMaxFrameSizeBytes(int maxSize) {
super.setNettyMaxFrameSizeBytes(maxSize);
return this;
}

/**
* Gets the minimum safe Usable size to be available in index directory for Bookie to create Index File while replaying
* journal at the time of Bookie Start in Readonly Mode (in bytes)
*
* @return
*/
public long getMinUsableSizeForIndexFileCreation() {
return this.getLong(MIN_USABLESIZE_FOR_INDEXFILE_CREATION, 100 * 1024 * 1024L);
}

/**
* Sets the minimum safe Usable size to be available in index directory for Bookie to create Index File while replaying
* journal at the time of Bookie Start in Readonly Mode (in bytes)
*
* @param minUsableSizeForIndexFileCreation
* @return
*/
public ServerConfiguration setMinUsableSizeForIndexFileCreation(long minUsableSizeForIndexFileCreation) {
this.setProperty(MIN_USABLESIZE_FOR_INDEXFILE_CREATION, Long.toString(minUsableSizeForIndexFileCreation));
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,11 @@ public void resumeProcessing() {
}

public synchronized void shutdown() {
LOG.info("Shutting down BookieServer");
this.nettyServer.shutdown();
if (!running) {
return;
}
LOG.info("Shutting down BookieServer");
this.nettyServer.shutdown();
exitCode = bookie.shutdown();
if (isAutoRecoveryDaemonEnabled && this.autoRecoveryMain != null) {
this.autoRecoveryMain.shutdown();
Expand Down
Loading

0 comments on commit 9ddd9e6

Please sign in to comment.