From 3c3280689546f977cf803a797280a6c05da213a9 Mon Sep 17 00:00:00 2001 From: Yi WU Date: Thu, 2 Apr 2026 07:25:39 -0700 Subject: [PATCH] [fix](editlog) Fix BDBJEJournal.write() advancing journal ID before successful write Both single-item write() and batch write() call nextJournalId.getAndIncrement() / getAndAdd() before the actual BDB put/commit. If the write fails (e.g. ReplicaWriteException, InsufficientAcksException), the journal IDs are permanently consumed but no journal entry exists for them. On retry, the write uses a new ID, creating gaps in the journal ID sequence. These gaps can confuse the replayer which expects contiguous journal IDs, potentially causing it to skip entries or fail to find expected journals. Fix: read the current ID with nextJournalId.get() before writing, and only advance the ID with incrementAndGet()/addAndGet() after the write succeeds. Both write methods are synchronized, so the get-then-advance pattern is safe. --- .../org/apache/doris/journal/bdbje/BDBJEJournal.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java index f4234cc462a172..a573251aadca4a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java @@ -131,7 +131,8 @@ public synchronized long write(JournalBatch batch) throws IOException { List entities = batch.getJournalEntities(); int entitySize = entities.size(); long dataSize = 0; - long firstId = nextJournalId.getAndAdd(entitySize); + // Reserve IDs only after successful commit to avoid burning IDs on write failure. + long firstId = nextJournalId.get(); // Write the journals to bdb. for (int i = 0; i < RETRY_TIME; i++) { @@ -155,6 +156,7 @@ public synchronized long write(JournalBatch batch) throws IOException { txn.commit(); txn = null; + nextJournalId.addAndGet(entitySize); if (MetricRepo.isInit) { MetricRepo.COUNTER_EDIT_LOG_SIZE_BYTES.increase(dataSize); @@ -237,8 +239,9 @@ public synchronized long write(short op, Writable writable) throws IOException { entity.setOpCode(op); entity.setData(writable); - // id is the key - long id = nextJournalId.getAndIncrement(); + // id is the key. Reserve ID only after successful write to avoid burning IDs on failure. + // This is safe because the method is synchronized. + long id = nextJournalId.get(); DatabaseEntry theKey = idToKey(id); // entity is the value @@ -273,6 +276,7 @@ public synchronized long write(short op, Writable writable) throws IOException { // Parameter null means auto commit if (currentJournalDB.put(null, theKey, theData) == OperationStatus.SUCCESS) { writeSucceed = true; + nextJournalId.incrementAndGet(); if (LOG.isDebugEnabled()) { LOG.debug("master write journal {} finished. db name {}, current time {}", id, currentJournalDB.getDatabaseName(), System.currentTimeMillis());