Skip to content

Commit

Permalink
ZOOKEEPER-2332: Fix server failed to start for empty txn log
Browse files Browse the repository at this point in the history
When a new txn log is created, it encounters a failure before writing to
the file header. The txn log file is empty at this moment, the server
starts up again, reads the file, and fails to load the database because
the header is not recognized.

The patch detects that the empty txn log is at the end of the database,
and if the file is empty, it deletes the file and prompts the server to
reboot to reload it.

When one of the txn logs in the middle of the database is empty, it will
refuse to start up since it's a real corruption.
  • Loading branch information
fanyang89 committed Feb 20, 2024
1 parent 7074448 commit 2a515d4
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,13 @@ public synchronized long getTotalLogSize() {
return prevLogsRunningTotal + getCurrentLogSize();
}

/**
* Get log size limit
*/
public static long getTxnLogSizeLimit() {
return txnLogSizeLimit;
}

/**
* creates a checksum algorithm to be used
* @return the checksum used for this txnlog
Expand Down Expand Up @@ -701,14 +708,26 @@ public long getStorageSize() {

/**
* go to the next logfile
*
* @return true if there is one and false if there is no
* new file to be read
* @throws IOException
*/
private boolean goToNextLog() throws IOException {
if (storedFiles.size() > 0) {
if (!storedFiles.isEmpty()) {
this.logFile = storedFiles.remove(storedFiles.size() - 1);
ia = createInputArchive(this.logFile);
try {
ia = createInputArchive(this.logFile);
} catch (EOFException ex) {
// If this file is the last log file in the database and is empty,
// it means that the last time the file was created
// before the header was written.
if (storedFiles.isEmpty() && this.logFile.length() == 0) {
boolean deleted = this.logFile.delete();
LOG.warn("Delete empty log file at the tail to recover from corruption. file: {}, deleted: {}",
this.logFile.getName(), deleted);
}
throw ex;
}
return true;
}
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,17 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Random;
import java.util.stream.Collectors;
import org.apache.jute.Record;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.DummyWatcher;
Expand Down Expand Up @@ -296,6 +302,73 @@ public void testLogSizeLimit(@TempDir File tmpDir) throws Exception {
}
}

private void prepareTxnLogs(File dir, int n) throws IOException {
FileTxnLog.setTxnLogSizeLimit(1);
FileTxnLog log = new FileTxnLog(dir);
CreateRequest record = new CreateRequest(null, new byte[NODE_SIZE],
ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT.toFlag());
int zxid = 1;
for (int i = 0; i < n; i++) {
log.append(new Request(0, 0, 0, new TxnHeader(0, 0, zxid, 0, -1), record, zxid));
zxid++;
log.commit();
}
log.close();
}

public void testEmptyTxnLog(boolean clearTail) throws IOException {
// prepare a database with logs
File tmpDir = ClientBase.createTmpDir();
LOG.info("tmp dir: {}", tmpDir.getPath());
ClientBase.setupTestEnv();
prepareTxnLogs(tmpDir, 4);

// clear the log
List<File> files = Arrays.
stream(Objects.requireNonNull(tmpDir.listFiles((File f, String name) -> name.startsWith("log.")))).
sorted(Comparator.comparing(File::getName)).
collect(Collectors.toList());
File toClear;
if (clearTail) {
toClear = files.get(files.size() - 1);
} else {
toClear = files.get(files.size() - 2);
}
PrintWriter writer = new PrintWriter(toClear);
writer.close();
LOG.info("Txn log file {} cleared", toClear.getName());

// open txn log
boolean isEof = false;
try {
FileTxnLog.FileTxnIterator itr = new FileTxnLog.FileTxnIterator(tmpDir, 0x0, false);
while (itr.next()) {}
} catch (EOFException ex) {
isEof = true;
}

if (clearTail) {
FileTxnLog.FileTxnIterator itr = new FileTxnLog.FileTxnIterator(tmpDir, 0x0, false);
while (itr.next()) {}
} else {
assertTrue(isEof, "Mid txn log file empty should throw Exception");
}
}

@Test
public void testEmptyTailTxnLog() throws IOException {
long limit = FileTxnLog.getTxnLogSizeLimit();
testEmptyTxnLog(true);
FileTxnLog.setTxnLogSizeLimit(limit);
}

@Test
public void testEmptyMidTxnLog() throws IOException {
long limit = FileTxnLog.getTxnLogSizeLimit();
testEmptyTxnLog(false);
FileTxnLog.setTxnLogSizeLimit(limit);
}

private int calculateSingleRecordLength(TxnHeader txnHeader, Record record) throws IOException {
int crcLength = 8;
int dataLength = 4;
Expand Down

0 comments on commit 2a515d4

Please sign in to comment.