Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OAK-10253: Option to only collect references when calling checkConsis… #947

Merged
merged 2 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ public interface BlobGarbageCollector {
*/
long checkConsistency() throws Exception;

/**
* Collects the blob references and consolidates references from other repositories if available in the DataStore.
* Adds relevant metrics.
*
* @return
* @throws Exception
*/
long checkConsistency(boolean markOnly) throws Exception;

/**
* Returns operation statistics
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -685,15 +685,9 @@ public int compare(String s1, String s2) {
closeQuietly(writer);
}
}

/**
* Checks for the DataStore consistency and reports the number of missing blobs still referenced.
*
* @return the missing blobs
* @throws Exception
*/

@Override
public long checkConsistency() throws Exception {
public long checkConsistency(boolean markOnly) throws Exception {
consistencyStatsCollector.start();
Stopwatch sw = Stopwatch.createStarted();

Expand All @@ -702,47 +696,36 @@ public long checkConsistency() throws Exception {
long candidates = 0;

try {
LOG.info("Starting blob consistency check");

// Find all blobs available in the blob store
ListenableFutureTask<Integer> blobIdRetriever = ListenableFutureTask.create(new BlobIdRetriever(fs,
true));
executor.execute(blobIdRetriever);
LOG.info("Starting blob consistency check with markOnly = {}", markOnly);

// Mark all used blob references
// Create a time marker in the data store if applicable
String uniqueSuffix = UUID.randomUUID().toString();
GarbageCollectionType.get(blobStore).addMarkedStartMarker(blobStore, repoId, uniqueSuffix);
iterateNodeTree(fs, true);
// Move the marked references file to the data store meta area if applicable
String uniqueSuffix = UUID.randomUUID().toString();
GarbageCollectionType.get(blobStore).addMarked(blobStore, fs, repoId, uniqueSuffix);
consistencyStatsCollector.updateMarkDuration(sw.elapsed(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);

try {
blobIdRetriever.get();
} catch (ExecutionException e) {
LOG.warn("Error occurred while fetching all the blobIds from the BlobStore");
threw = false;
throw e;
}

if (SharedDataStoreUtils.isShared(blobStore)) {
// Retrieve all other marked present in the datastore
List<DataRecord> refFiles =
((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REFERENCES.getType());
((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REFERENCES.getType());

// Get all the repositories registered
List<DataRecord> repoFiles =
((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REPOSITORY.getType());
((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REPOSITORY.getType());
LOG.info("Repositories registered {}", repoFiles);

// Retrieve repos for which reference files have not been created
Set<String> unAvailRepos =
SharedDataStoreUtils.refsNotAvailableFromRepos(repoFiles, refFiles);
SharedDataStoreUtils.refsNotAvailableFromRepos(repoFiles, refFiles);
LOG.info("Repositories with unavailable references {}", unAvailRepos);

if (!unAvailRepos.isEmpty()) {
throw new NotAllRepositoryMarkedException("Not all repositories have marked references available");
}

if (refFiles.size() > 0) {
File temp = new File(root, repoId + UUID.randomUUID().toString());
copyFile(fs.getMarkedRefs(), temp);
Expand All @@ -760,30 +743,45 @@ public long checkConsistency() throws Exception {

// Get size
getBlobReferencesSize(fs, consistencyStats);

if (!markOnly) {
// Find all blobs available in the blob store
ListenableFutureTask<Integer> blobIdRetriever = ListenableFutureTask.create(new BlobIdRetriever(fs,
true));
executor.execute(blobIdRetriever);

LOG.trace("Starting difference phase of the consistency check");
FileLineDifferenceIterator iter = new FileLineDifferenceIterator(
fs.getAvailableRefs(),
fs.getMarkedRefs(),
transformer);
// If tracking then also filter ids being tracked which are active deletions for lucene
candidates = BlobCollectionType.get(blobStore).filter(blobStore, iter, fs);
try {
blobIdRetriever.get();
} catch (ExecutionException e) {
LOG.warn("Error occurred while fetching all the blobIds from the BlobStore");
threw = false;
throw e;
}

LOG.trace("Starting difference phase of the consistency check");
FileLineDifferenceIterator iter = new FileLineDifferenceIterator(
fs.getAvailableRefs(),
fs.getMarkedRefs(),
transformer);
// If tracking then also filter ids being tracked which are active deletions for lucene
candidates = BlobCollectionType.get(blobStore).filter(blobStore, iter, fs);

GarbageCollectionType.get(blobStore).removeAllMarkedReferences(blobStore);
GarbageCollectionType.get(blobStore).removeAllMarkedReferences(blobStore);

LOG.trace("Ending difference phase of the consistency check");
LOG.info("Consistency check found [{}] missing blobs", candidates);
LOG.trace("Ending difference phase of the consistency check");
LOG.info("Consistency check found [{}] missing blobs", candidates);

if (candidates > 0) {
try (LineIterator lineIterator = new LineIterator(new FileReader(fs.getGcCandidates()))) {
while(lineIterator.hasNext()) {
LOG.warn("Missing Blob [{}]", lineIterator.nextLine());
if (candidates > 0) {
try (LineIterator lineIterator = new LineIterator(new FileReader(fs.getGcCandidates()))) {
while (lineIterator.hasNext()) {
LOG.warn("Missing Blob [{}]", lineIterator.nextLine());
}
}
}
LOG.warn("Consistency check failure in the the blob store : {}, check missing candidates in file {}",
LOG.warn("Consistency check failure in the the blob store : {}, check missing candidates in file {}",
blobStore, fs.getGcCandidates().getAbsolutePath());
consistencyStatsCollector.finishFailure();
consistencyStatsCollector.updateNumDeleted(candidates);
consistencyStatsCollector.finishFailure();
consistencyStatsCollector.updateNumDeleted(candidates);
}
}
} finally {
if (!traceOutput && (!LOG.isTraceEnabled() && candidates == 0)) {
Expand All @@ -795,6 +793,17 @@ public long checkConsistency() throws Exception {
return candidates;
}

/**
* Checks for the DataStore consistency and reports the number of missing blobs still referenced.
*
* @return the missing blobs
* @throws Exception
*/
@Override
public long checkConsistency() throws Exception {
return checkConsistency(false);
}

public void setTraceOutput(boolean trace) {
traceOutput = trace;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,22 @@ public void checkConsistency() throws Exception {
assertStatsBean(collector.getConsistencyOperationStats(), 1, 0, 0);
}

@Test
public void checkConsistencyMarkOnly() throws Exception {
log.info("Starting checkConsistencyMarkOnly()");

long afterSetupTime = clock.getTime();
log.info("after setup time {}", afterSetupTime);

MarkSweepGarbageCollector collector = cluster.getCollector(0);
long missing = collector.checkConsistency(true);

assertEquals(0, missing);
assertStats(cluster.statsProvider, 1, 0, 0, 0, cluster.blobStoreState.blobsPresent.size(), cluster.blobSize,
CONSISTENCY_NAME);
assertStatsBean(collector.getConsistencyOperationStats(), 1, 0, 0);
}

@Test
public void checkConsistencyFailure() throws Exception {
log.info("Starting checkConsistencyFailure()");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ private void execute(NodeStoreFixture fixture, DataStoreOptions dataStoreOpts, O
} else {
MarkSweepGarbageCollector collector = getCollector(fixture, dataStoreOpts, opts, closer);
if (dataStoreOpts.checkConsistency()) {
long missing = collector.checkConsistency();
long missing = collector.checkConsistency(dataStoreOpts.consistencyCheckMarkOnly());
log.warn("Found {} missing blobs", missing);

if (dataStoreOpts.isVerbose()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public class DataStoreOptions implements OptionsBean {
private final OptionSpec<File> workDirOpt;
private final OptionSpec<File> outputDirOpt;
private final OptionSpec<Boolean> collectGarbage;
private final OptionSpec<Void> consistencyCheck;
private final OptionSpec<Boolean> consistencyCheck;
private final OptionSpec<Void> refOp;
private final OptionSpec<Void> idOp;
private final OptionSpec<Boolean> checkConsistencyAfterGC;
Expand Down Expand Up @@ -74,7 +74,9 @@ public DataStoreOptions(OptionParser parser) {
.withOptionalArg().ofType(Boolean.class).defaultsTo(Boolean.FALSE);

consistencyCheck =
parser.accepts("check-consistency", "Performs a consistency check on the repository/datastore defined");
parser.accepts("check-consistency", "Performs a consistency check on the repository/datastore defined. An optional boolean specifying "
+ "'markOnly' required if only collecting references")
.withOptionalArg().ofType(Boolean.class).defaultsTo(Boolean.FALSE);

refOp = parser.accepts("dump-ref", "Gets a dump of Blob References");

Expand Down Expand Up @@ -193,6 +195,10 @@ public boolean markOnly() {
return collectGarbage.value(options);
}

public boolean consistencyCheckMarkOnly() {
return consistencyCheck.value(options);
}

public long getBlobGcMaxAgeInSecs() {
return blobGcMaxAgeInSecs.value(options);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,9 +348,9 @@ public void testTarNoDS() throws Exception {

File dump = temporaryFolder.newFolder();
List<String> argsList = Lists
.newArrayList("--check-consistency", storeFixture.getConnectionString(),
.newArrayList(storeFixture.getConnectionString(),
"--out-dir", dump.getAbsolutePath(), "--reset-log-config", "false", "--work-dir",
temporaryFolder.newFolder().getAbsolutePath());
temporaryFolder.newFolder().getAbsolutePath(), "--check-consistency");
if (!Strings.isNullOrEmpty(additionalParams)) {
argsList.add(additionalParams);
}
Expand All @@ -364,7 +364,7 @@ public void testConsistencyMissing() throws Exception {
Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
storeFixture.close();

testConsistency(dump, data, false);
testConsistency(dump, data, false, false);
}

@Test
Expand All @@ -373,7 +373,7 @@ public void testConsistencyVerbose() throws Exception {
Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
storeFixture.close();

testConsistency(dump, data, true);
testConsistency(dump, data, true, false);
}

@Test
Expand Down Expand Up @@ -553,7 +553,16 @@ public void testConsistencyNoMissing() throws Exception {
Data data = prepareData(storeFixture, blobFixture, 10, 5, 0);
storeFixture.close();

testConsistency(dump, data, false);
testConsistency(dump, data, false, false);
}

@Test
public void testConsistencyMarkOnly() throws Exception {
File dump = temporaryFolder.newFolder();
Data data = prepareData(storeFixture, blobFixture, 10, 5, 0);
storeFixture.close();

testConsistency(dump, data, false, false, true);
}

@Test
Expand Down Expand Up @@ -714,13 +723,13 @@ public void testConsistencyFakeDS() throws Exception {
}


private void testConsistency(File dump, Data data, boolean verbose) throws Exception {
testConsistency(dump, data, verbose, false);
private void testConsistency(File dump, Data data, boolean verbose, boolean verboseRootPath) throws Exception {
testConsistency(dump, data, verbose, verboseRootPath, false);
}

private void testConsistency(File dump, Data data, boolean verbose, boolean verboseRootPath) throws Exception {
private void testConsistency(File dump, Data data, boolean verbose, boolean verboseRootPath, boolean markOnly) throws Exception {
List<String> argsList = Lists
.newArrayList("--check-consistency", "--" + getOption(blobFixture.getType()), blobFixture.getConfigPath(),
.newArrayList("--check-consistency", String.valueOf(markOnly), "--" + getOption(blobFixture.getType()), blobFixture.getConfigPath(),
storeFixture.getConnectionString(), "--out-dir", dump.getAbsolutePath(), "--work-dir",
temporaryFolder.newFolder().getAbsolutePath());
if (!Strings.isNullOrEmpty(additionalParams)) {
Expand All @@ -732,8 +741,12 @@ private void testConsistency(File dump, Data data, boolean verbose, boolean verb
}
DataStoreCommand cmd = new DataStoreCommand();
cmd.execute(argsList.toArray(new String[0]));

assertFileEquals(dump, "avail-", Sets.difference(data.added, data.missingDataStore));

if (!markOnly) {
assertFileEquals(dump, "avail-", Sets.difference(data.added, data.missingDataStore));
} else {
assertFileNull(dump, "avail-");
}

// Verbose would have paths as well as ids changed but normally only DocumentNS would have paths suffixed
assertFileEquals(dump, "marked-", verbose ?
Expand All @@ -742,14 +755,18 @@ private void testConsistency(File dump, Data data, boolean verbose, boolean verb
(storeFixture instanceof StoreFixture.MongoStoreFixture) ?
encodedIdsAndPath(Sets.difference(data.added, data.deleted), blobFixture.getType(), data.idToPath, false) :
Sets.difference(data.added, data.deleted));

// Verbose would have paths as well as ids changed but normally only DocumentNS would have paths suffixed
assertFileEquals(dump, "gccand-", verbose ?
encodedIdsAndPath(verboseRootPath ? Sets.intersection(data.addedSubset, data.missingDataStore) :
data.missingDataStore, blobFixture.getType(), data.idToPath, true) :
(storeFixture instanceof StoreFixture.MongoStoreFixture) ?
encodedIdsAndPath(data.missingDataStore, blobFixture.getType(), data.idToPath, false) :
data.missingDataStore);

if (!markOnly) {
// Verbose would have paths as well as ids changed but normally only DocumentNS would have paths suffixed
assertFileEquals(dump, "gccand-", verbose ?
encodedIdsAndPath(verboseRootPath ? Sets.intersection(data.addedSubset, data.missingDataStore) :
data.missingDataStore, blobFixture.getType(), data.idToPath, true) :
(storeFixture instanceof StoreFixture.MongoStoreFixture) ?
encodedIdsAndPath(data.missingDataStore, blobFixture.getType(), data.idToPath, false) :
data.missingDataStore);
} else {
assertFileNull(dump, "gccand-");
}
}

private void testDumpRef(File dump, Data data, boolean verbose, boolean verboseRootPath) throws Exception {
Expand Down