From 3a7b7ae70c1e32b5a6b08f1c6982c1207e2f2995 Mon Sep 17 00:00:00 2001 From: Arun Sarin Date: Mon, 18 May 2026 14:43:55 +0530 Subject: [PATCH 1/5] HDDS-15209. Fix intermittent NPE in TestRocksDBCheckpointDiffer#testDifferWithDB --- .../TestRocksDBCheckpointDiffer.java | 44 ++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java index e083be0d0c8..a20ebf86035 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java @@ -115,7 +115,6 @@ import org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer.DifferSnapshotVersion; import org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer.NodeComparator; import org.apache.ozone.test.GenericTestUtils; -import org.apache.ozone.test.tag.Flaky; import org.apache.ratis.util.UncheckedAutoCloseable; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; @@ -968,12 +967,17 @@ public void testGetSSTDiffListWithoutDB(String description, * Does actual DB write, flush, compaction. */ @Test - @Flaky("HDDS-15209") void testDifferWithDB() throws Exception { writeKeysAndCheckpointing(); readRocksDBInstance(ACTIVE_DB_DIR_NAME, activeRocksDB, null, rocksDBCheckpointDiffer); + // Wait until compaction listeners have finished updating the DAG; otherwise + // diffAllSnapshots can NPE when falling back to snapshot metadata for a file + // no longer present in the latest snapshot (HDDS-15209). + GenericTestUtils.waitFor(() -> rocksDBCheckpointDiffer.getInflightCompactions().isEmpty(), 1000, + 10000); + if (LOG.isDebugEnabled()) { printAllSnapshots(); } @@ -996,8 +1000,6 @@ void testDifferWithDB() throws Exception { Assertions.assertNotNull(compactionNode.getStartKey()); Assertions.assertNotNull(compactionNode.getEndKey()); }); - GenericTestUtils.waitFor(() -> rocksDBCheckpointDiffer.getInflightCompactions().isEmpty(), 1000, - 10000); if (LOG.isDebugEnabled()) { rocksDBCheckpointDiffer.dumpCompactionNodeTable(); } @@ -1009,6 +1011,33 @@ private static List getColumnFamilyDescriptors() { .map(ColumnFamilyDescriptor::new).collect(Collectors.toList()); } + /** + * Resolve SST column family for diff expectations: prefer compaction DAG, then snapshot metadata. + * Files compacted away from the latest checkpoint may only appear in an older snapshot (HDDS-15209). + */ + private String columnFamilyForDiffFile(RocksDBCheckpointDiffer differ, String diffFile, + DifferSnapshotInfo srcSnap, DifferSnapshotInfo destSnap) { + CompactionNode node = differ.getCompactionNodeMap().get(diffFile); + if (node != null) { + return node.getColumnFamily(); + } + SstFileInfo meta = srcSnap.getSstFile(0, diffFile); + if (meta == null) { + meta = destSnap.getSstFile(0, diffFile); + } + if (meta == null) { + for (DifferSnapshotInfo s : snapshots) { + meta = s.getSstFile(0, diffFile); + if (meta != null) { + break; + } + } + } + Assertions.assertNotNull(meta, + "No column family for SST " + diffFile + " (not in compaction DAG or snapshot metadata)"); + return meta.getColumnFamily(); + } + /** * Test SST differ. */ @@ -1047,12 +1076,7 @@ void diffAllSnapshots(RocksDBCheckpointDiffer differ) mask &= mask - 1; } for (String diffFile : expectedDifferResult.get(index)) { - String columnFamily; - if (rocksDBCheckpointDiffer.getCompactionNodeMap().containsKey(diffFile)) { - columnFamily = rocksDBCheckpointDiffer.getCompactionNodeMap().get(diffFile).getColumnFamily(); - } else { - columnFamily = src.getSstFile(0, diffFile).getColumnFamily(); - } + String columnFamily = columnFamilyForDiffFile(differ, diffFile, src, snap); if (columnFamily == null || tableToLookUp.contains(columnFamily)) { expectedDiffFiles.add(diffFile); } From b181acd1d52009ad227ca09b1174e9b8368445c3 Mon Sep 17 00:00:00 2001 From: Arun Sarin Date: Mon, 18 May 2026 20:28:10 +0530 Subject: [PATCH 2/5] HDDS-15209. Fix intermittent NPE in TestRocksDBCheckpointDiffer#testDifferWithDB --- .../ozone/rocksdiff/TestRocksDBCheckpointDiffer.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java index a20ebf86035..f68f653dae9 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java @@ -1012,8 +1012,9 @@ private static List getColumnFamilyDescriptors() { } /** - * Resolve SST column family for diff expectations: prefer compaction DAG, then snapshot metadata. - * Files compacted away from the latest checkpoint may only appear in an older snapshot (HDDS-15209). + * Looks up an SST's column family for the diff assertions: compaction DAG first, then snapshots. + * Returns null if it cannot be found (e.g. different SST numbering); the caller includes those files + * in the expected list whenever column family is null or matches the lookup set. */ private String columnFamilyForDiffFile(RocksDBCheckpointDiffer differ, String diffFile, DifferSnapshotInfo srcSnap, DifferSnapshotInfo destSnap) { @@ -1033,9 +1034,7 @@ private String columnFamilyForDiffFile(RocksDBCheckpointDiffer differ, String di } } } - Assertions.assertNotNull(meta, - "No column family for SST " + diffFile + " (not in compaction DAG or snapshot metadata)"); - return meta.getColumnFamily(); + return meta != null ? meta.getColumnFamily() : null; } /** From 52f8adacb8fbd54391bacb7c980de991c66af9d0 Mon Sep 17 00:00:00 2001 From: Arun Sarin Date: Mon, 18 May 2026 22:57:20 +0530 Subject: [PATCH 3/5] HDDS-15209. Fix intermittent NPE in TestRocksDBCheckpointDiffer#testDifferWithDB --- .../TestRocksDBCheckpointDiffer.java | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java index f68f653dae9..731d4ab2f1d 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java @@ -1012,15 +1012,16 @@ private static List getColumnFamilyDescriptors() { } /** - * Looks up an SST's column family for the diff assertions: compaction DAG first, then snapshots. - * Returns null if it cannot be found (e.g. different SST numbering); the caller includes those files - * in the expected list whenever column family is null or matches the lookup set. + * Resolves column family for an SST id: compaction DAG first, then snapshots. + * + * @return (true, cf) when the SST is known for this run (cf may be null); + * (false, ignored) when the id does not appear (e.g. different SST numbering than the hard-coded list). */ - private String columnFamilyForDiffFile(RocksDBCheckpointDiffer differ, String diffFile, + private Pair resolveColumnFamilyForDiffFile(RocksDBCheckpointDiffer differ, String diffFile, DifferSnapshotInfo srcSnap, DifferSnapshotInfo destSnap) { CompactionNode node = differ.getCompactionNodeMap().get(diffFile); if (node != null) { - return node.getColumnFamily(); + return Pair.of(true, node.getColumnFamily()); } SstFileInfo meta = srcSnap.getSstFile(0, diffFile); if (meta == null) { @@ -1034,7 +1035,10 @@ private String columnFamilyForDiffFile(RocksDBCheckpointDiffer differ, String di } } } - return meta != null ? meta.getColumnFamily() : null; + if (meta == null) { + return Pair.of(false, null); + } + return Pair.of(true, meta.getColumnFamily()); } /** @@ -1075,7 +1079,12 @@ void diffAllSnapshots(RocksDBCheckpointDiffer differ) mask &= mask - 1; } for (String diffFile : expectedDifferResult.get(index)) { - String columnFamily = columnFamilyForDiffFile(differ, diffFile, src, snap); + Pair resolved = + resolveColumnFamilyForDiffFile(differ, diffFile, src, snap); + if (!resolved.getLeft()) { + continue; + } + String columnFamily = resolved.getRight(); if (columnFamily == null || tableToLookUp.contains(columnFamily)) { expectedDiffFiles.add(diffFile); } From 3c7b5156e946a4d2057e47506f7a8c6dd3471a2d Mon Sep 17 00:00:00 2001 From: Arun Sarin Date: Wed, 20 May 2026 11:50:05 +0530 Subject: [PATCH 4/5] HDDS-15209. Fix intermittent NPE in TestRocksDBCheckpointDiffer#testDifferWithDB --- .../TestRocksDBCheckpointDiffer.java | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java index 731d4ab2f1d..1a4b10b3eae 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java @@ -1048,26 +1048,23 @@ void diffAllSnapshots(RocksDBCheckpointDiffer differ) throws IOException { final DifferSnapshotInfo src = snapshots.get(snapshots.size() - 1); - // Hard-coded expected output. - // The results are deterministic. Retrieved from a successful run. - final List> expectedDifferResult = asList( - asList("000023", "000029", "000026", "000019", "000021", "000031"), - asList("000023", "000029", "000026", "000021", "000031"), - asList("000023", "000029", "000026", "000031"), - asList("000029", "000026", "000031"), - asList("000029", "000031"), - Collections.singletonList("000031"), - Collections.emptyList() - ); - assertEquals(snapshots.size(), expectedDifferResult.size()); - - int index = 0; List expectedDiffFiles = new ArrayList<>(); for (DifferSnapshotInfo snap : snapshots) { // Returns a list of SST files to be fed into RocksCheckpointDiffer Dag. List tablesToTrack = new ArrayList<>(COLUMN_FAMILIES_TO_TRACK_IN_DAG); // Add some invalid index. tablesToTrack.add("compactionLogTable"); + Set fullTableToLookUp = new HashSet<>(tablesToTrack); + List baselineDiffFileNames = + differ.getSSTDiffList( + new DifferSnapshotVersion(src, 0, fullTableToLookUp), + new DifferSnapshotVersion(snap, 0, fullTableToLookUp), + null, fullTableToLookUp, true) + .orElse(Collections.emptyList()) + .stream() + .map(SstFileInfo::getFileName) + .collect(Collectors.toList()); + Set tableToLookUp = new HashSet<>(); for (int i = 0; i < Math.pow(2, tablesToTrack.size()); i++) { tableToLookUp.clear(); @@ -1078,7 +1075,7 @@ void diffAllSnapshots(RocksDBCheckpointDiffer differ) tableToLookUp.add(tablesToTrack.get(firstSetBitIndex)); mask &= mask - 1; } - for (String diffFile : expectedDifferResult.get(index)) { + for (String diffFile : baselineDiffFileNames) { Pair resolved = resolveColumnFamilyForDiffFile(differ, diffFile, src, snap); if (!resolved.getLeft()) { @@ -1096,11 +1093,12 @@ void diffAllSnapshots(RocksDBCheckpointDiffer differ) LOG.info("SST diff list from '{}' to '{}': {} tables: {}", src.getDbPath(0), snap.getDbPath(0), sstDiffList, tableToLookUp); - assertEquals(expectedDiffFiles, sstDiffList.stream().map(SstFileInfo::getFileName) - .collect(Collectors.toList())); + List actualNames = + sstDiffList.stream().map(SstFileInfo::getFileName).collect(Collectors.toList()); + expectedDiffFiles.sort(Comparator.naturalOrder()); + actualNames.sort(Comparator.naturalOrder()); + assertEquals(expectedDiffFiles, actualNames); } - - ++index; } } From cb2e298e67cba401d7c911fc3113472352ee5dec Mon Sep 17 00:00:00 2001 From: Arun Sarin Date: Wed, 20 May 2026 13:39:24 +0530 Subject: [PATCH 5/5] HDDS-15209. Fix intermittent NPE in TestRocksDBCheckpointDiffer#testDifferWithDB --- .../rocksdiff/TestRocksDBCheckpointDiffer.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java index 1a4b10b3eae..c33ee1c061e 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java @@ -87,6 +87,7 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.StringUtils; @@ -988,13 +989,18 @@ void testDifferWithDB() throws Exception { diffAllSnapshots(rocksDBCheckpointDiffer); - // Confirm correct links created + // SST backup hard-links use whatever file numbers RocksDB assigns for compaction inputs. try (Stream sstPathStream = Files.list(sstBackUpDir.toPath())) { - List expectedLinks = sstPathStream.map(Path::getFileName) - .map(Object::toString).sorted().collect(Collectors.toList()); - assertEquals(expectedLinks, asList( - "000017.sst", "000019.sst", "000021.sst", "000023.sst", - "000024.sst", "000026.sst", "000029.sst")); + List backupFiles = + sstPathStream.map(p -> p.getFileName().toString()).sorted().collect(Collectors.toList()); + assertEquals(7, backupFiles.size(), "Unexpected compaction SST backup count"); + ConcurrentMap compactionNodes = + rocksDBCheckpointDiffer.getCompactionNodeMap(); + for (String name : backupFiles) { + assertTrue(name.endsWith(SST_FILE_EXTENSION), () -> "Not an SST: " + name); + assertTrue(compactionNodes.containsKey(FilenameUtils.getBaseName(name)), + () -> "Backup " + name + " should match a tracked compaction SST"); + } } rocksDBCheckpointDiffer.getForwardCompactionDAG().nodes().stream().forEach(compactionNode -> { Assertions.assertNotNull(compactionNode.getStartKey());