Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -284,12 +284,23 @@ public OmSnapshotManager(OzoneManager ozoneManager) throws IOException {

public static boolean isSnapshotPurged(SnapshotChainManager chainManager, OMMetadataManager omMetadataManager,
UUID snapshotId, TransactionInfo transactionInfo) throws IOException {
boolean purgeFlushed = transactionInfo != null &&
isTransactionFlushedToDisk(omMetadataManager, transactionInfo);
String tableKey = chainManager.getTableKey(snapshotId);
if (tableKey == null) {
return true;
// Snapshot chain is rebuilt from DB on every OM restart (loadFromSnapshotInfoTable),
// but entries committed to the Raft log (but not yet flushed) after the restart
// are applied in addToDBBatch (skipping validateAndUpdateCache), so they never call
// addSnapshot(). This can affect any OM (leader or follower) after a restart.
//
// Need to fall back to transactionInfo. null means no purge has been recorded. Treat as active.
LOG.debug("snapshotId {} has null tableKey in SnapshotChainManager. "
+ "transactionInfo={} purgeFlushed={}. Returning {}",
snapshotId, transactionInfo, purgeFlushed, purgeFlushed);
return purgeFlushed;
}
return !omMetadataManager.getSnapshotInfoTable().isExist(tableKey) && transactionInfo != null &&
isTransactionFlushedToDisk(omMetadataManager, transactionInfo);
boolean inDB = omMetadataManager.getSnapshotInfoTable().isExist(tableKey);
return !inDB && purgeFlushed;
}

/**
Expand Down Expand Up @@ -371,8 +382,12 @@ public OmSnapshot load(@Nonnull UUID snapshotId) throws IOException {
}
try (OmSnapshotLocalDataManager.ReadableOmSnapshotLocalDataMetaProvider snapshotLocalDataProvider =
snapshotLocalDataManager.getOmSnapshotLocalDataMeta(snapshotInfo)) {
final OmSnapshotLocalDataManager.SnapshotVersionsMeta snapshotMeta = snapshotLocalDataProvider.getMeta();
if (snapshotMeta == null) {
throw new OMException("Snapshot local metadata is missing for snapshotId: " + snapshotId, FILE_NOT_FOUND);
}
snapshotMetadataManager = getSnapshotOmMetadataManager(snapshotInfo,
snapshotLocalDataProvider.getMeta().getVersion(), maxOpenSstFilesInSnapshotDb, conf);
snapshotMeta.getVersion(), maxOpenSstFilesInSnapshotDb, conf);
}
} catch (IOException e) {
LOG.error("Failed to retrieve snapshot: {}", snapshotTableKey, e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,10 @@ private boolean loadFromSnapshotInfoTable(OMMetadataManager metadataManager) {
globalSnapshotChain.clear();
snapshotChainByPath.clear();
latestSnapshotIdByPath.clear();
if (!snapshotIdToTableKey.isEmpty()) {
LOG.debug("Clearing snapshotIdToTableKey (had {} entries) on thread {}",
snapshotIdToTableKey.size(), Thread.currentThread().getName());
}
snapshotIdToTableKey.clear();

while (keyIter.hasNext()) {
Expand Down Expand Up @@ -353,6 +357,8 @@ public synchronized void addSnapshot(SnapshotInfo snapshotInfo)
// store snapshot ID to snapshot DB table key in the map
snapshotIdToTableKey.put(snapshotInfo.getSnapshotId(),
snapshotInfo.getTableKey());
LOG.debug("Added to snapshotIdToTableKey: snapshotId={} tableKey={}",
snapshotInfo.getSnapshotId(), snapshotInfo.getTableKey());
}

/**
Expand All @@ -378,7 +384,8 @@ public synchronized boolean deleteSnapshot(SnapshotInfo snapshotInfo)
*/
public synchronized void removeFromSnapshotIdToTable(UUID snapshotId) throws IOException {
validateSnapshotChain();
snapshotIdToTableKey.remove(snapshotId);
String tableKey = snapshotIdToTableKey.remove(snapshotId);
LOG.debug("Removed from snapshotIdToTableKey: snapshotId={} tableKey={}", snapshotId, tableKey);
}

/**
Expand Down Expand Up @@ -570,7 +577,12 @@ public UUID previousPathSnapshot(String snapshotPath,
}

public String getTableKey(UUID snapshotId) {
return snapshotIdToTableKey.get(snapshotId);
String tableKey = snapshotIdToTableKey.get(snapshotId);
if (tableKey == null) {
LOG.debug("getTableKey returned null for snapshotId={}. snapshotIdToTableKey has {} entries",
snapshotId, snapshotIdToTableKey.size());
}
return tableKey;
}

public LinkedHashMap<UUID, SnapshotChainInfo> getSnapshotChainPath(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
import org.apache.hadoop.ozone.om.OmSnapshotLocalData;
import org.apache.hadoop.ozone.om.OmSnapshotLocalDataYaml;
import org.apache.hadoop.ozone.om.OmSnapshotManager;
import org.apache.hadoop.ozone.om.SnapshotChainManager;
import org.apache.hadoop.ozone.om.helpers.SnapshotInfo;
import org.apache.hadoop.ozone.om.lock.DAGLeveledResource;
import org.apache.hadoop.ozone.om.lock.HierarchicalResourceLockManager;
Expand Down Expand Up @@ -1027,6 +1028,48 @@ public void testInitWithMissingYamlFiles(boolean needsUpgrade) throws IOExceptio
}
}

/**
* Regression test for NullPointerException in OmSnapshotManager#createCacheLoader.
* <p>
* isSnapshotPurged() now falls back to transactionInfo when getTableKey() returns null.
* A null transactionInfo means no purge was ever recorded for this snapshot in its YAML,
* so the snapshot is treated as active and the orphan check should correctly skip it.
*/
@Test
public void testCheckOrphanSnapshotVersionsWithStaleSnapshotChain() throws IOException {
localDataManager = getNewOmSnapshotLocalDataManager();
UUID snapshotId = createSnapshotLocalData(localDataManager, 1).get(0);

// Snapshot must be in versionNodeMap before the orphan check.
assertNotNull(localDataManager.getVersionNodeMapUnmodifiable().get(snapshotId));

// Use the real isSnapshotPurged
snapshotUtilMock.when(() -> OmSnapshotManager.isSnapshotPurged(any(), any(), any(), any()))
.thenCallRealMethod();

// Simulate a stale SnapshotChainManager: getTableKey returns null for the
// snapshot because the snapshot chain has not been correctly updated
SnapshotChainManager staleChain = mock(SnapshotChainManager.class);
when(staleChain.getTableKey(snapshotId)).thenReturn(null);

localDataManager.checkOrphanSnapshotVersions(omMetadataManager, staleChain, snapshotId);

// Before the fix: isSnapshotPurged returned true for any null tableKey, so the snapshot
// was removed from versionNodeMap. getMeta() then returned null, causing NullPointerException

// After the fix: null transactionInfo means no purge has been recorded, assuming active snapshot.
// versionNodeMap entry will survive the orphan check. getMeta() will be non-null.

assertNotNull(localDataManager.getVersionNodeMapUnmodifiable().get(snapshotId),
"Active snapshot was removed erroneously from versionNodeMap due to stale SnapshotChainManager");

try (OmSnapshotLocalDataManager.ReadableOmSnapshotLocalDataMetaProvider provider =
localDataManager.getOmSnapshotLocalDataMeta(snapshotId)) {
assertNotNull(provider.getMeta(),
"getMeta() returned null. Calling getVersion() on it throws NullPointerException");
}
}

@Test
public void testInitWithInvalidPathThrowsException() throws IOException {
UUID snapshotId = UUID.randomUUID();
Expand Down